# Correlation ICA

In [1]:
%matplotlib widget
import numpy as np
import pandas as pd
import seaborn as sns
import scipy
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

import common

In [2]:
originalData = common.loadFile("CleanedData").drop(["RID", "VISCODE", "AGE", "PTEDUCAT", "PTGENDER", "DX"], axis=1)
ICA = common.loadFile("ICAData").drop("DX", axis=1)

In [3]:
def getCorrP(x, y):
    xData = []
    yData = []
    for i in range(len(x)): # Both columns must have same size
        if x[i] != '' and y[i] != '':
            xData.append(float(x[i]))
            yData.append(float(y[i]))
    if len(xData) > 1:
        return pearsonr(xData, yData)
    return 0, 0
    

In [4]:
originalDataLabels = originalData.columns.values
originalDataNp = originalData.to_numpy()
ICADataLabels = ICA.columns.values
ICADataNp = ICA.to_numpy()

correlations = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)
pValues = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)

tableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])

for i in range(len(ICADataLabels)):
    for j in range(len(originalDataLabels)):
        p_r, p_p = getCorrP(ICADataNp[:, i], originalDataNp[:, j])
        correlations.at[ICADataLabels[i], originalDataLabels[j]] = p_r
        pValues.at[ICADataLabels[i], originalDataLabels[j]] = p_p
        
        tableCorrP = tableCorrP.append(dict(zip(tableCorrP.columns,[f"{ICADataLabels[i]} -> {originalDataLabels[j]}", p_r, p_p])), ignore_index = True)
        


In [5]:
tableCorrP

Unnamed: 0,Labels,Correlation,p_Value
0,IC1 -> TRAILS,0.060835,1.186470e-02
1,IC1 -> CUBE,0.014108,5.598928e-01
2,IC1 -> CLOCKCON,0.014265,5.555303e-01
3,IC1 -> CLOCKNO,-0.019268,4.258725e-01
4,IC1 -> CLOCKHAN,-0.016402,4.979001e-01
...,...,...,...
1875,IC10 -> DIVATT2_PT,-0.147096,9.865369e-10
1876,IC10 -> DIVATT3_PT,-0.218904,5.346086e-20
1877,IC10 -> DIVATT4_PT,-0.108562,6.818039e-06
1878,IC10 -> STAFFASST,0.301633,2.657203e-37


# Correlations

In [6]:
aux = sns.color_palette("coolwarm", as_cmap=True)

In [7]:
plt.close()
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>

# P_Values

In [8]:
plt.close()
plt.title('p Values')
sns.heatmap(np.ma.filled(pValues.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=pValues.columns, yticklabels=pValues.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'p Values'}>

## Correlações acima de 60% e Filtro de Bonferroni Correction

In [9]:
corrFilter = 0.6
pFilter = 0.05

In [10]:
mask = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)
filtredTableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])
for i in range(len(ICADataLabels)):
    for j in range(len(originalDataLabels)):
        if pValues.at[ICADataLabels[i], originalDataLabels[j]] >= pFilter/(len(originalDataLabels)**2) or abs(correlations.at[ICADataLabels[i], originalDataLabels[j]]) < corrFilter:
            mask.at[ICADataLabels[i], originalDataLabels[j]] = True  #Hide
        else:
            mask.at[ICADataLabels[i], originalDataLabels[j]]  = False
            filtredTableCorrP = filtredTableCorrP.append(tableCorrP.loc[tableCorrP["Labels"] == f"{ICADataLabels[i]} -> {originalDataLabels[j]}"])
        
mask = mask.to_numpy()

In [11]:
plt.close()
aux = sns.color_palette("coolwarm", as_cmap=True)
plt.title(f'Correlation >{corrFilter*100}% with Bonferroni Correction Filter')
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux, mask=mask,  linewidths=0.5, linecolor='gray')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'Correlation >60.0% with Bonferroni Correction Filter'}>

In [12]:
filtredTableCorrP

Unnamed: 0,Labels,Correlation,p_Value
127,IC1 -> ORGAN2,0.807901,0.0
128,IC1 -> ORGAN3,0.737832,7.096401e-294
450,IC3 -> Q1,-0.62706,1.6088239999999998e-187
453,IC3 -> Q4,-0.662498,1.3868899999999999e-216
457,IC3 -> Q8,-0.623517,8.066964999999999e-185
463,IC3 -> ADAS11,-0.665671,2.205923e-219
464,IC3 -> ADAS13,-0.688724,8.495467e-241
679,IC4 -> VISSPAT1,-0.613903,1.157077e-177
680,IC4 -> VISSPAT2,-0.651744,2.376082e-207
1681,IC9 -> ORGAN2_PT,0.791846,0.0


In [13]:
correlations


Unnamed: 0,TRAILS,CUBE,CLOCKCON,CLOCKNO,CLOCKHAN,LION,RHINO,CAMEL,IMMT1W1,IMMT1W2,...,ORGAN3_PT,ORGAN4_PT,ORGAN5_PT,ORGAN6_PT,DIVATT1_PT,DIVATT2_PT,DIVATT3_PT,DIVATT4_PT,STAFFASST,VALIDITY
IC1,0.060835,0.014108,0.014265,-0.019268,-0.016402,0.016824,0.084747,-0.009841,-0.002954,0.004152,...,0.063936,-0.024984,0.042333,-2.6e-05,-0.019244,0.018951,-0.047258,-0.034917,-0.007834,-0.006647
IC2,-0.191889,-0.172494,-0.08655,-0.222874,-0.283318,-0.017082,-0.116707,-0.110598,-0.12973,-0.101209,...,-0.070403,-0.135821,-0.178906,-0.090714,-0.156735,-0.143858,-0.15563,-0.133519,0.17276,0.20647
IC3,0.100553,0.109836,0.006608,0.094096,0.120359,0.090783,0.159551,0.124097,0.06854,0.128775,...,-0.154288,-0.26863,-0.203247,-0.159048,-0.329856,-0.272344,-0.331779,-0.280827,-0.185558,-0.142457
IC4,0.032178,0.020765,-0.011421,-0.038419,-0.032832,0.004436,-0.000126,-0.042022,-0.011728,0.020528,...,-0.046107,0.002471,-0.007588,-0.072137,0.001302,0.034655,0.045212,0.039447,0.026493,0.008454
IC5,-0.011171,-0.054018,0.015815,-0.039894,-0.071546,-0.038002,-0.090864,-0.018731,0.011292,-0.028528,...,-0.200075,-0.239281,-0.187892,-0.194133,-0.26083,-0.358436,-0.293553,-0.275164,-0.024256,-0.017362
IC6,-0.234304,-0.198781,-0.079136,-0.272865,-0.24126,-0.076839,-0.13711,-0.129971,-0.125526,-0.122717,...,0.188375,0.295554,0.267395,0.192492,0.345118,0.348019,0.302296,0.305439,0.121602,0.08883
IC7,-0.042112,0.008067,-0.049796,0.021349,-0.019993,0.005562,-0.104871,0.018723,-0.043761,-0.070276,...,-0.001706,0.047835,0.016836,-0.011081,0.021545,-0.074732,-0.059261,0.031853,0.063847,0.030973
IC8,0.108482,0.16613,0.046829,0.020669,0.136366,-0.006192,0.074652,0.021452,0.042832,0.100975,...,0.015509,-0.008978,0.071908,0.027797,0.156724,0.119329,0.012366,0.147197,-0.062068,-0.056097
IC9,-0.102183,-0.07316,0.017934,-0.061642,-0.100419,0.016343,-0.046106,-0.029979,-0.036076,-0.019428,...,0.734157,0.288124,0.336952,0.23818,0.177614,0.177781,0.129575,0.173358,0.241754,0.224177
IC10,-0.200153,-0.134609,-0.129611,-0.171183,-0.204918,-0.095458,-0.176458,-0.044611,-0.089919,-0.099397,...,0.024607,-0.07994,-0.103114,-0.039964,-0.123838,-0.147096,-0.218904,-0.108562,0.301633,0.29585


In [14]:
common.saveFile(correlations, "ICADataCorrelation")