**Imports** - *External*

In [1]:
%matplotlib widget
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import scipy
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import common

*Internal*

In [2]:
from Data import Moca # This file alrzeady has the data converted from string to numeric

**There are some errors while calculating the correlation, these are shown as warnigns. The next line will ignore these errors**

In [3]:
np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

**Fnction used to calculate correlation between two columns where they both have some sort of value:**

This function is using Pearson product-moment correlation coefficients:

\begin{equation}
    R_{ij} = \frac{ C_{ij} } { \sqrt{ C_{ii} * C_{jj} } }
\end{equation}


In [4]:
def getCorrP(x, y):
    xData = []
    yData = []
    for i in range(len(x)): # Both columns must have same size
        if x[i] != '' and y[i] != '':
            xData.append(float(x[i]))
            yData.append(float(y[i]))
    if len(xData) > 1:
        return pearsonr(xData, yData)
    return 0, 0
    

In [5]:
data = Moca().data

unusedLables = ['ID', 'RID', 'SITEID', 'VISCODE']
data = data.drop(columns=unusedLables)

dataLabels = data.columns.values
dataNumpy = data.to_numpy()

correlations = pd.DataFrame(index=dataLabels, columns=dataLabels)

pValues = pd.DataFrame(index=dataLabels, columns=dataLabels)

tableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])




for i in range(len(dataLabels)):
    correlations.at[dataLabels[i], dataLabels[i]] = 1
    pValues.at[dataLabels[i], dataLabels[i]] = 0
    for j in range(i+1, len(dataLabels)):
        p_r, p_p = getCorrP(dataNumpy[:, i], dataNumpy[:, j])
        correlations.at[dataLabels[i], dataLabels[j]] = correlations.at[dataLabels[j], dataLabels[i]] = p_r
        pValues.at[dataLabels[i], dataLabels[j]] = pValues.at[dataLabels[j], dataLabels[i]] = p_p
        
        tableCorrP = tableCorrP.append(dict(zip(tableCorrP.columns,[f"{dataLabels[i]} -> {dataLabels[j]}", p_r, p_p])), ignore_index = True)
        
        


 
    
    



Moca ready!


In [6]:
print(tableCorrP.head(10))

               Labels  Correlation        p_Value
0      TRAILS -> CUBE     0.217928   2.444723e-72
1  TRAILS -> CLOCKCON     0.168242   1.990343e-43
2   TRAILS -> CLOCKNO     0.275864  1.782019e-116
3  TRAILS -> CLOCKHAN     0.274557  2.379566e-115
4      TRAILS -> LION     0.091061   9.990937e-14
5     TRAILS -> RHINO     0.176211   1.563036e-47
6     TRAILS -> CAMEL     0.143207   8.112035e-32
7   TRAILS -> IMMT1W1     0.153148   3.393345e-36
8   TRAILS -> IMMT1W2     0.174918   7.474896e-47
9   TRAILS -> IMMT1W3     0.142976   1.017055e-31


In [7]:
aux = sns.color_palette("coolwarm", as_cmap=True) #Changes the color of the graph

# Correlations

In [8]:
plt.close()
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>

# P_Values

In [9]:
plt.close()
sns.heatmap(np.ma.filled(pValues.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=pValues.columns, yticklabels=pValues.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>