**Imports** - *External*

In [1]:
%matplotlib widget
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import scipy
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import common

*Internal*

In [2]:
from Data import Mmse # This file already has the data converted from string to numeric

**There are some errors while calculating the correlation, these are shown as warnigns. The next line will ignore these errors**

In [3]:
np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

**The path used to save the correlation file is:**

**Fnction used to calculate correlation between two columns where they both have some sort of value:**

This function is using Pearson product-moment correlation coefficients:

\begin{equation}
    R_{ij} = \frac{ C_{ij} } { \sqrt{ C_{ii} * C_{jj} } }
\end{equation}


In [4]:
def getCorrP(x, y):
    xData = []
    yData = []
    for i in range(len(x)): # Both columns must have same size
        if x[i] != '' and y[i] != '':
            xData.append(float(x[i]))
            yData.append(float(y[i]))
    if len(xData) > 1:
        return pearsonr(xData, yData)
    return 0, 0
    

In [5]:
data = Mmse().data

unusedLables = ['Phase', 'ID', 'RID', 'SITEID', 'VISCODE']
data = data.drop(columns=unusedLables)

dataLabels = data.columns.values
dataNumpy = data.to_numpy()

correlations = pd.DataFrame(index=dataLabels, columns=dataLabels)

pValues = pd.DataFrame(index=dataLabels, columns=dataLabels)

tableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])




for i in range(len(dataLabels)):
    correlations.at[dataLabels[i], dataLabels[i]] = 1
    pValues.at[dataLabels[i], dataLabels[i]] = 0
    for j in range(i+1, len(dataLabels)):
        p_r, p_p = getCorrP(dataNumpy[:, i], dataNumpy[:, j])
        correlations.at[dataLabels[i], dataLabels[j]] = correlations.at[dataLabels[j], dataLabels[i]] = p_r
        pValues.at[dataLabels[i], dataLabels[j]] = pValues.at[dataLabels[j], dataLabels[i]] = p_p
        
        tableCorrP = tableCorrP.append(dict(zip(tableCorrP.columns,[f"{dataLabels[i]} -> {dataLabels[j]}", p_r, p_p])), ignore_index = True)
        
        


 
    
    



MMSE ready!


In [6]:
print(tableCorrP.head(10))

               Labels  Correlation        p_Value
0    MMDATE -> MMYEAR     0.454568   0.000000e+00
1   MMDATE -> MMMONTH     0.482985   0.000000e+00
2     MMDATE -> MMDAY     0.469111   0.000000e+00
3  MMDATE -> MMSEASON     0.267771  9.051544e-199
4  MMDATE -> MMHOSPIT     0.319725  3.915688e-287
5   MMDATE -> MMFLOOR     0.353742   0.000000e+00
6    MMDATE -> MMCITY     0.239923  8.314293e-159
7    MMDATE -> MMAREA     0.260183  2.188130e-187
8   MMDATE -> MMSTATE     0.182339   2.030427e-91
9    MMDATE -> MMBALL    -0.056695   3.923856e-10


In [7]:
aux = sns.color_palette("coolwarm", as_cmap=True) #Changes the color of the graph

# Correlation

In [8]:
plt.close()
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>

# P_Values

In [9]:
plt.close()
sns.heatmap(np.ma.filled(pValues.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=pValues.columns, yticklabels=pValues.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>