# Correlation ICA

In [1]:
%matplotlib widget
import numpy as np
import pandas as pd
import seaborn as sns
import scipy
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

import common

In [2]:
originalData = common.loadFile("CleanedData").drop(["RID", "VISCODE", "AGE", "PTEDUCAT", "PTGENDER", "DX"], axis=1)
ICA = common.loadFile("ICAData").drop("DX", axis=1)

In [3]:
def getCorrP(x, y):
    xData = []
    yData = []
    for i in range(len(x)): # Both columns must have same size
        if x[i] != '' and y[i] != '':
            xData.append(float(x[i]))
            yData.append(float(y[i]))
    if len(xData) > 1:
        return pearsonr(xData, yData)
    return 0, 0
    

In [4]:
originalDataLabels = originalData.columns.values
originalDataNp = originalData.to_numpy()
ICADataLabels = ICA.columns.values
ICADataNp = ICA.to_numpy()

correlations = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)
pValues = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)

tableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])

for i in range(len(ICADataLabels)):
    for j in range(len(originalDataLabels)):
        p_r, p_p = getCorrP(ICADataNp[:, i], originalDataNp[:, j])
        correlations.at[ICADataLabels[i], originalDataLabels[j]] = p_r
        pValues.at[ICADataLabels[i], originalDataLabels[j]] = p_p
        
        tableCorrP = tableCorrP.append(dict(zip(tableCorrP.columns,[f"{ICADataLabels[i]} -> {originalDataLabels[j]}", p_r, p_p])), ignore_index = True)
        


In [5]:
tableCorrP

Unnamed: 0,Labels,Correlation,p_Value
0,IC1 -> TRAILS,-0.183297,2.189062e-14
1,IC1 -> CUBE,-0.120257,6.121499e-07
2,IC1 -> CLOCKCON,-0.123157,3.246826e-07
3,IC1 -> CLOCKNO,-0.151603,2.957378e-10
4,IC1 -> CLOCKHAN,-0.184707,1.373853e-14
...,...,...,...
1875,IC10 -> DIVATT2_PT,-0.348876,4.036486e-50
1876,IC10 -> DIVATT3_PT,-0.297482,2.777211e-36
1877,IC10 -> DIVATT4_PT,-0.306410,1.700969e-38
1878,IC10 -> STAFFASST,-0.117830,1.029125e-06


# Correlations

In [6]:
aux = sns.color_palette("coolwarm", as_cmap=True)

In [7]:
plt.figure()
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>

# P_Values

In [8]:
plt.figure()
plt.title('p Values')
sns.heatmap(np.ma.filled(pValues.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=pValues.columns, yticklabels=pValues.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'p Values'}>

## Correlações acima de 60% e Filtro de Bonferroni Correction

In [9]:
corrFilter = 0.3
pFilter = 0.05

In [10]:
mask = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)
filtredTableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])
for i in range(len(ICADataLabels)):
    for j in range(len(originalDataLabels)):
        if pValues.at[ICADataLabels[i], originalDataLabels[j]] >= pFilter/(len(originalDataLabels)**2) or abs(correlations.at[ICADataLabels[i], originalDataLabels[j]]) < corrFilter:
            mask.at[ICADataLabels[i], originalDataLabels[j]] = True  #Hide
        else:
            mask.at[ICADataLabels[i], originalDataLabels[j]]  = False
            filtredTableCorrP = filtredTableCorrP.append(tableCorrP.loc[tableCorrP["Labels"] == f"{ICADataLabels[i]} -> {originalDataLabels[j]}"])
        
mask = mask.to_numpy()

In [11]:
plt.figure()
aux = sns.color_palette("coolwarm", as_cmap=True)
plt.title(f'Correlation >{corrFilter*100}% with Bonferroni Correction Filter')
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux, mask=mask,  linewidths=0.5, linecolor='gray')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'Correlation >30.0% with Bonferroni Correction Filter'}>

In [12]:
filtredTableCorrP

Unnamed: 0,Labels,Correlation,p_Value
36,IC1 -> DATE,-0.336566,1.471286e-46
37,IC1 -> MONTH,-0.390796,1.685560e-63
38,IC1 -> YEAR,-0.359294,2.916338e-53
39,IC1 -> DAY,-0.388337,1.166765e-62
42,IC1 -> MOCA,-0.398797,2.768696e-66
...,...,...,...
1856,IC10 -> LANG9_PT,-0.327738,4.207794e-44
1865,IC10 -> PLAN3_PT,-0.314304,1.616534e-40
1874,IC10 -> DIVATT1_PT,-0.346446,2.099117e-49
1875,IC10 -> DIVATT2_PT,-0.348876,4.036486e-50


In [13]:
correlations


Unnamed: 0,TRAILS,CUBE,CLOCKCON,CLOCKNO,CLOCKHAN,LION,RHINO,CAMEL,IMMT1W1,IMMT1W2,...,ORGAN3_PT,ORGAN4_PT,ORGAN5_PT,ORGAN6_PT,DIVATT1_PT,DIVATT2_PT,DIVATT3_PT,DIVATT4_PT,STAFFASST,VALIDITY
IC1,-0.183297,-0.120257,-0.123157,-0.151603,-0.184707,-0.091837,-0.167083,-0.035275,-0.079996,-0.091097,...,0.016641,-0.088422,-0.10897,-0.045816,-0.133437,-0.157996,-0.226778,-0.117534,0.28957,0.283862
IC2,-0.030461,-0.019394,0.01217,0.040278,0.034403,-0.003791,0.001038,0.04295,0.012718,-0.019501,...,0.044392,-0.005552,0.004966,0.070067,-0.004847,-0.038465,-0.048504,-0.042672,-0.027683,-0.009274
IC3,-0.099623,-0.070754,0.018695,-0.058412,-0.097256,0.016869,-0.044895,-0.028362,-0.034562,-0.018137,...,0.732969,0.286378,0.335493,0.236942,0.175435,0.175337,0.127494,0.171488,0.240628,0.223047
IC4,-0.115274,-0.17233,-0.049098,-0.028366,-0.144683,0.003953,-0.080112,-0.025479,-0.046638,-0.10512,...,-0.014191,0.011627,-0.070177,-0.026547,-0.153445,-0.117291,-0.010272,-0.144655,0.066785,0.060456
IC5,0.106087,0.11327,0.008982,0.099973,0.12517,0.09197,0.162313,0.126756,0.071994,0.131509,...,-0.160422,-0.277695,-0.210969,-0.165317,-0.340769,-0.283581,-0.34089,-0.291174,-0.189133,-0.145199
IC6,-0.042064,0.008446,-0.049922,0.020838,-0.019598,0.006032,-0.10367,0.019061,-0.04374,-0.069555,...,-0.0022,0.048126,0.017516,-0.010679,0.022371,-0.073234,-0.058693,0.032742,0.061977,0.029254
IC7,0.061158,0.014092,0.014549,-0.018666,-0.016256,0.016953,0.084678,-0.009712,-0.002736,0.00409,...,0.063557,-0.02585,0.041452,-0.000644,-0.020542,0.017412,-0.048213,-0.036224,-0.007936,-0.006658
IC8,0.016572,0.058382,-0.013929,0.046934,0.079283,0.039511,0.095375,0.023231,-0.007256,0.03191,...,0.19918,0.238082,0.188096,0.193654,0.257727,0.356106,0.291564,0.272117,0.01828,0.011204
IC9,-0.212685,-0.186458,-0.096161,-0.245062,-0.302435,-0.024173,-0.128173,-0.119123,-0.140533,-0.110065,...,-0.048128,-0.113079,-0.158466,-0.073683,-0.130939,-0.116341,-0.137416,-0.109205,0.192093,0.22338
IC10,0.225219,0.187617,0.076636,0.261819,0.225249,0.077181,0.13042,0.120997,0.11772,0.114849,...,-0.197142,-0.296744,-0.274204,-0.196117,-0.346446,-0.348876,-0.297482,-0.30641,-0.11783,-0.083935


In [14]:
common.saveFile(correlations, "ICADataCorrelation")

Save file to CSV, carefull this is destructive. Will replace if it exists.

In [15]:
# common.saveFile(tableCorrP, "Correlações sem filtro", True)

In [16]:
# common.saveFile(filtredTableCorrP, "Correlações filtradas", True)

In [17]:
filtredTableCorrP

Unnamed: 0,Labels,Correlation,p_Value
36,IC1 -> DATE,-0.336566,1.471286e-46
37,IC1 -> MONTH,-0.390796,1.685560e-63
38,IC1 -> YEAR,-0.359294,2.916338e-53
39,IC1 -> DAY,-0.388337,1.166765e-62
42,IC1 -> MOCA,-0.398797,2.768696e-66
...,...,...,...
1856,IC10 -> LANG9_PT,-0.327738,4.207794e-44
1865,IC10 -> PLAN3_PT,-0.314304,1.616534e-40
1874,IC10 -> DIVATT1_PT,-0.346446,2.099117e-49
1875,IC10 -> DIVATT2_PT,-0.348876,4.036486e-50
