# Correlation ICA

In [1]:
%matplotlib widget
import numpy as np
import pandas as pd
import seaborn as sns
import scipy
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

import common

In [2]:
originalData = common.loadFile("CleanedData").drop(["RID", "VISCODE", "AGE", "PTEDUCAT", "PTGENDER", "DX"], axis=1)
ICA = common.loadFile("ICAData").drop("DX", axis=1)

In [3]:
def getCorrP(x, y):
    xData = []
    yData = []
    for i in range(len(x)): # Both columns must have same size
        if x[i] != '' and y[i] != '':
            xData.append(float(x[i]))
            yData.append(float(y[i]))
    if len(xData) > 1:
        return pearsonr(xData, yData)
    return 0, 0
    

In [4]:
originalDataLabels = originalData.columns.values
originalDataNp = originalData.to_numpy()
ICADataLabels = ICA.columns.values
ICADataNp = ICA.to_numpy()

correlations = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)
pValues = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)

tableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])

for i in range(len(ICADataLabels)):
    for j in range(len(originalDataLabels)):
        p_r, p_p = getCorrP(ICADataNp[:, i], originalDataNp[:, j])
        correlations.at[ICADataLabels[i], originalDataLabels[j]] = p_r
        pValues.at[ICADataLabels[i], originalDataLabels[j]] = p_p
        
        tableCorrP = tableCorrP.append(dict(zip(tableCorrP.columns,[f"{ICADataLabels[i]} -> {originalDataLabels[j]}", p_r, p_p])), ignore_index = True)
        


In [5]:
tableCorrP

Unnamed: 0,Labels,Correlation,p_Value
0,IC1 -> TRAILS,0.209488,2.063526e-18
1,IC1 -> CUBE,0.137314,1.190410e-08
2,IC1 -> CLOCKCON,0.131596,4.719457e-08
3,IC1 -> CLOCKNO,0.175533,2.667676e-13
4,IC1 -> CLOCKHAN,0.205192,1.032487e-17
...,...,...,...
935,IC5 -> DIVATT2_PT,0.143683,2.398417e-09
936,IC5 -> DIVATT3_PT,0.096588,6.319706e-05
937,IC5 -> DIVATT4_PT,0.085103,4.266521e-04
938,IC5 -> STAFFASST,0.047137,5.130811e-02


# Correlations

In [6]:
aux = sns.color_palette("coolwarm", as_cmap=True)

In [15]:
plt.close()
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>

# P_Values

In [8]:
plt.close()
plt.title('p Values')
sns.heatmap(np.ma.filled(pValues.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=pValues.columns, yticklabels=pValues.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'p Values'}>

## Correlações acima de 60% e Filtro de Bonferroni Correction

In [9]:
corrFilter = 0.6
pFilter = 0.05

In [16]:
mask = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)
filtredTableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])
for i in range(len(ICADataLabels)):
    for j in range(len(originalDataLabels)):
        if pValues.at[ICADataLabels[i], originalDataLabels[j]] >= pFilter/(len(originalDataLabels)**2) or abs(correlations.at[ICADataLabels[i], originalDataLabels[j]]) < corrFilter:
            mask.at[ICADataLabels[i], originalDataLabels[j]] = True  #Hide
        else:
            mask.at[ICADataLabels[i], originalDataLabels[j]]  = False
            filtredTableCorrP = filtredTableCorrP.append(tableCorrP.loc[tableCorrP["Labels"] == f"{ICADataLabels[i]} -> {originalDataLabels[j]}"])
        
mask = mask.to_numpy()

In [17]:
plt.close()
aux = sns.color_palette("coolwarm", as_cmap=True)
plt.title(f'Correlation >{corrFilter*100}% with Bonferroni Correction Filter')
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux, mask=mask,  linewidths=0.5, linecolor='gray')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'Correlation >60.0% with Bonferroni Correction Filter'}>

In [12]:
filtredTableCorrP

Unnamed: 0,Labels,Correlation,p_Value
91,IC1 -> CDORIENT,-0.601351,1.101521e-168
93,IC1 -> CDCOMMUN,-0.704859,5.0683149999999996e-257
94,IC1 -> CDHOME,-0.687793,6.804842e-240
97,IC1 -> CDRSB,-0.720776,3.802727e-274
137,IC1 -> FAQFINAN,-0.708992,2.344377e-261
138,IC1 -> FAQFORM,-0.688079,3.5899459999999997e-240
139,IC1 -> FAQSHOP,-0.776746,0.0
140,IC1 -> FAQGAME,-0.687895,5.415109999999999e-240
142,IC1 -> FAQMEAL,-0.733096,3.0318939999999996e-288
143,IC1 -> FAQEVENT,-0.704982,3.771587e-257


In [13]:
correlations


Unnamed: 0,TRAILS,CUBE,CLOCKCON,CLOCKNO,CLOCKHAN,LION,RHINO,CAMEL,IMMT1W1,IMMT1W2,...,ORGAN3_PT,ORGAN4_PT,ORGAN5_PT,ORGAN6_PT,DIVATT1_PT,DIVATT2_PT,DIVATT3_PT,DIVATT4_PT,STAFFASST,VALIDITY
IC1,0.209488,0.137314,0.131596,0.175533,0.205192,0.09534,0.179153,0.048835,0.09318,0.098495,...,-0.064197,0.033315,0.065789,0.011114,0.077682,0.093199,0.164585,0.060228,-0.310778,-0.300792
IC2,0.03953,0.058989,0.035873,0.062169,0.12546,-0.003731,0.067464,0.054821,0.040514,0.026794,...,0.25252,0.225402,0.278771,0.212288,0.271311,0.261011,0.188598,0.220573,-0.061573,-0.083567
IC3,-0.21451,-0.196969,-0.046953,-0.199978,-0.248388,-0.071654,-0.167672,-0.153455,-0.133787,-0.162527,...,0.383979,0.326091,0.262828,0.2419,0.341481,0.328301,0.325861,0.299394,0.296174,0.270393
IC4,-0.130292,-0.100399,-0.066797,-0.083589,-0.146422,-0.037626,-0.194252,-0.042383,-0.081287,-0.130154,...,0.003174,0.049991,0.01859,-0.027825,0.007283,-0.114433,-0.067764,0.001537,0.123872,0.084115
IC5,-0.118445,-0.079846,-0.07951,-0.0881,-0.107574,0.043596,0.012706,-0.012949,-0.078255,-0.036982,...,0.10229,0.113959,0.055495,0.083167,0.053803,0.143683,0.096588,0.085103,0.047137,0.052285


In [14]:
common.saveFile(correlations, "ICADataCorrelation")