**Imports** - *External*

In [1]:
%matplotlib widget
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import scipy
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

*Internal*

In [2]:
from Cdr import Cdr # This file already has the data converted from string to numeric

**There are some errors while calculating the correlation, these are shown as warnigns. The next line will ignore these errors**

In [3]:
np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

**The path used to save the correlation file is:**

In [4]:
path = "./dataFiles/Mocaorr"

**Fnction used to calculate correlation between two columns where they both have some sort of value:**

This function is using Pearson product-moment correlation coefficients:

\begin{equation}
    R_{ij} = \frac{ C_{ij} } { \sqrt{ C_{ii} * C_{jj} } }
\end{equation}


In [5]:
def getCorrP(x, y):
    xData = []
    yData = []
    for i in range(len(x)): # Both columns must have same size
        if x[i] != '' and y[i] != '':
            xData.append(float(x[i]))
            yData.append(float(y[i]))
    if len(xData) > 1:
        return pearsonr(xData, yData)
    return 0, 0
    

In [6]:
data = Cdr().data

unusedLables = ['Phase', 'ID', 'RID', 'SITEID', 'VISCODE', 'VISCODE2']
data = data.drop(columns=unusedLables)

dataLabels = data.columns.values
dataNumpy = data.to_numpy()

correlations = pd.DataFrame(index=dataLabels, columns=dataLabels)

pValues = pd.DataFrame(index=dataLabels, columns=dataLabels)

tableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])




for i in range(len(dataLabels)):
    correlations.at[dataLabels[i], dataLabels[i]] = 1
    pValues.at[dataLabels[i], dataLabels[i]] = 0
    for j in range(i+1, len(dataLabels)):
        p_r, p_p = getCorrP(dataNumpy[:, i], dataNumpy[:, j])
        correlations.at[dataLabels[i], dataLabels[j]] = correlations.at[dataLabels[j], dataLabels[i]] = p_r
        pValues.at[dataLabels[i], dataLabels[j]] = pValues.at[dataLabels[j], dataLabels[i]] = p_p
        
        tableCorrP = tableCorrP.append(dict(zip(tableCorrP.columns,[f"{dataLabels[i]} -> {dataLabels[j]}", p_r, p_p])), ignore_index = True)
        
        


 
    
    



CDR ready!


In [7]:
print(tableCorrP.head(10))

                 Labels  Correlation       p_Value
0  CDSOURCE -> CDMEMORY    -0.016861  6.530758e-02
1  CDSOURCE -> CDORIENT    -0.008400  3.585380e-01
2   CDSOURCE -> CDJUDGE     0.000029  9.974656e-01
3  CDSOURCE -> CDCOMMUN     0.001193  8.962565e-01
4    CDSOURCE -> CDHOME    -0.001105  9.038770e-01
5    CDSOURCE -> CDCARE     0.047830  1.686069e-07
6  CDSOURCE -> CDGLOBAL    -0.000959  9.165440e-01
7     CDSOURCE -> CDRSB     0.006613  4.697989e-01
8  CDMEMORY -> CDORIENT     0.844668  0.000000e+00
9   CDMEMORY -> CDJUDGE     0.812763  0.000000e+00


In [8]:
aux = sns.color_palette("coolwarm", as_cmap=True) #Changes the color of the graph

# Correlations

In [11]:
plt.close()
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>

# P_Values

In [13]:
plt.close()
sns.heatmap(np.ma.filled(pValues.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=pValues.columns, yticklabels=pValues.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>