**Imports** - *External*

In [1]:
%matplotlib widget
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import scipy
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import common

*Internal*

In [2]:
from Data import Mmse # This file already has the data converted from string to numeric

MMSE ready!
   Phase  ID  RID  SITEID VISCODE VISCODE2  MMDATE  MMYEAR  MMMONTH  MMDAY  \
0      1  10    2     107      sc       sc       1       1        1      1   
1      1  12    1      10       f        f       1       1        1      1   
2      1  14    3     107      sc       sc       0       1        0      0   
3      1  16    4      10      sc       sc       1       1        1      1   
4      1  18    5     107      sc       sc       1       1        1      1   

   ...  MMWATCH  MMPENCIL  MMREPEAT  MMHAND  MMFOLD  MMONFLR  MMREAD  MMWRITE  \
0  ...        1         1         1       1       1        1       1        1   
1  ...        1         1         1       1       1        1       1        1   
2  ...        1         1         1       1       1        1       1        1   
3  ...        1         1         1       1       1        1       1        1   
4  ...        1         1         1       0       1        1       1        1   

   MMDRAW  MMSCORE  
0       1  

**There are some errors while calculating the correlation, these are shown as warnigns. The next line will ignore these errors**

In [3]:
np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

**The path used to save the correlation file is:**

**Fnction used to calculate correlation between two columns where they both have some sort of value:**

This function is using Pearson product-moment correlation coefficients:

\begin{equation}
    R_{ij} = \frac{ C_{ij} } { \sqrt{ C_{ii} * C_{jj} } }
\end{equation}


In [5]:
def getCorrP(x, y):
    xData = []
    yData = []
    for i in range(len(x)): # Both columns must have same size
        if x[i] != '' and y[i] != '':
            xData.append(float(x[i]))
            yData.append(float(y[i]))
    if len(xData) > 1:
        return pearsonr(xData, yData)
    return 0, 0
    

In [6]:
data = Mmse().data

unusedLables = ['Phase', 'ID', 'RID', 'SITEID', 'VISCODE', 'VISCODE2']
data = data.drop(columns=unusedLables)

dataLabels = data.columns.values
dataNumpy = data.to_numpy()

correlations = pd.DataFrame(index=dataLabels, columns=dataLabels)

pValues = pd.DataFrame(index=dataLabels, columns=dataLabels)

tableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])




for i in range(len(dataLabels)):
    correlations.at[dataLabels[i], dataLabels[i]] = 1
    pValues.at[dataLabels[i], dataLabels[i]] = 0
    for j in range(i+1, len(dataLabels)):
        p_r, p_p = getCorrP(dataNumpy[:, i], dataNumpy[:, j])
        correlations.at[dataLabels[i], dataLabels[j]] = correlations.at[dataLabels[j], dataLabels[i]] = p_r
        pValues.at[dataLabels[i], dataLabels[j]] = pValues.at[dataLabels[j], dataLabels[i]] = p_p
        
        tableCorrP = tableCorrP.append(dict(zip(tableCorrP.columns,[f"{dataLabels[i]} -> {dataLabels[j]}", p_r, p_p])), ignore_index = True)
        
        


 
    
    



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[field] = [1 if x == '1' else 0 for x in data[field]]


MMSE ready!


In [7]:
print(tableCorrP.head(10))

               Labels  Correlation        p_Value
0    MMDATE -> MMYEAR     0.430005   0.000000e+00
1   MMDATE -> MMMONTH     0.462811   0.000000e+00
2     MMDATE -> MMDAY     0.449816   0.000000e+00
3  MMDATE -> MMSEASON     0.246751  5.928726e-139
4  MMDATE -> MMHOSPIT     0.300016  1.305878e-207
5   MMDATE -> MMFLOOR     0.332895  5.215730e-258
6    MMDATE -> MMCITY     0.210679  5.664548e-101
7    MMDATE -> MMAREA     0.243508  2.886299e-135
8   MMDATE -> MMSTATE     0.133681   3.330092e-41
9    MMDATE -> MMBALL     0.065163   6.538303e-11


In [8]:
aux = sns.color_palette("coolwarm", as_cmap=True) #Changes the color of the graph

# Correlation

In [9]:
plt.close()
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>

# P_Values

In [10]:
plt.close()
sns.heatmap(np.ma.filled(pValues.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=pValues.columns, yticklabels=pValues.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>