# Correlation ICA

In [1]:
%matplotlib widget
import numpy as np
import pandas as pd
import seaborn as sns
import scipy
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

import common

In [2]:
originalData = common.loadFile("CleanedData").drop(["RID", "VISCODE", "AGE", "PTEDUCAT", "PTGENDER", "DX"], axis=1)
ICA = common.loadFile("ICAData").drop("DX", axis=1)

In [3]:
def getCorrP(x, y):
    xData = []
    yData = []
    for i in range(len(x)): # Both columns must have same size
        if x[i] != '' and y[i] != '':
            xData.append(float(x[i]))
            yData.append(float(y[i]))
    if len(xData) > 1:
        return pearsonr(xData, yData)
    return 0, 0
    

In [4]:
originalDataLabels = originalData.columns.values
originalDataNp = originalData.to_numpy()
ICADataLabels = ICA.columns.values
ICADataNp = ICA.to_numpy()

correlations = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)
pValues = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)

tableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])

for i in range(len(ICADataLabels)):
    for j in range(len(originalDataLabels)):
        p_r, p_p = getCorrP(ICADataNp[:, i], originalDataNp[:, j])
        correlations.at[ICADataLabels[i], originalDataLabels[j]] = p_r
        pValues.at[ICADataLabels[i], originalDataLabels[j]] = p_p
        
        tableCorrP = tableCorrP.append(dict(zip(tableCorrP.columns,[f"{ICADataLabels[i]} -> {originalDataLabels[j]}", p_r, p_p])), ignore_index = True)
        


In [5]:
tableCorrP

Unnamed: 0,Labels,Correlation,p_Value
0,IC1 -> TRAILS,-0.202362,2.924528e-17
1,IC1 -> CUBE,-0.136306,1.523725e-08
2,IC1 -> CLOCKCON,-0.130236,6.494614e-08
3,IC1 -> CLOCKNO,-0.173953,4.377127e-13
4,IC1 -> CLOCKHAN,-0.206802,5.669701e-18
...,...,...,...
1875,IC10 -> DIVATT2_PT,-0.350265,1.563710e-50
1876,IC10 -> DIVATT3_PT,-0.305553,2.795091e-38
1877,IC10 -> DIVATT4_PT,-0.307506,8.984736e-39
1878,IC10 -> STAFFASST,-0.117871,1.020195e-06


# Correlations

In [6]:
aux = sns.color_palette("coolwarm", as_cmap=True)

In [7]:
plt.figure(figsize=[18, 10])
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>

# P_Values

In [8]:
plt.figure(figsize=[18, 10])
plt.title('p Values')
sns.heatmap(np.ma.filled(pValues.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=pValues.columns, yticklabels=pValues.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'p Values'}>

## Correlações acima de 60% e Filtro de Bonferroni Correction

In [9]:
corrFilter = 0.3
pFilter = 0.05

In [10]:
mask = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)
filtredTableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])
for i in range(len(ICADataLabels)):
    for j in range(len(originalDataLabels)):
        if pValues.at[ICADataLabels[i], originalDataLabels[j]] >= pFilter/(len(originalDataLabels)**2) or abs(correlations.at[ICADataLabels[i], originalDataLabels[j]]) < corrFilter:
            mask.at[ICADataLabels[i], originalDataLabels[j]] = True  #Hide
        else:
            mask.at[ICADataLabels[i], originalDataLabels[j]]  = False
            filtredTableCorrP = filtredTableCorrP.append(tableCorrP.loc[tableCorrP["Labels"] == f"{ICADataLabels[i]} -> {originalDataLabels[j]}"])
        
mask = mask.to_numpy()

In [11]:
plt.figure(figsize=[18, 10])
aux = sns.color_palette("coolwarm", as_cmap=True)
plt.title(f'Correlation >{corrFilter*100}% with Bonferroni Correction Filter')
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux, mask=mask,  linewidths=0.5, linecolor='gray')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'Correlation >30.0% with Bonferroni Correction Filter'}>

In [12]:
filtredTableCorrP

Unnamed: 0,Labels,Correlation,p_Value
36,IC1 -> DATE,-0.353009,2.365220e-51
37,IC1 -> MONTH,-0.400587,6.436722e-67
38,IC1 -> YEAR,-0.371645,3.849815e-57
39,IC1 -> DAY,-0.400801,5.403730e-67
40,IC1 -> PLACE,-0.305314,3.209458e-38
...,...,...,...
1865,IC10 -> PLAN3_PT,-0.317879,1.872734e-41
1874,IC10 -> DIVATT1_PT,-0.347467,1.052027e-49
1875,IC10 -> DIVATT2_PT,-0.350265,1.563710e-50
1876,IC10 -> DIVATT3_PT,-0.305553,2.795091e-38


In [13]:
correlations


Unnamed: 0,TRAILS,CUBE,CLOCKCON,CLOCKNO,CLOCKHAN,LION,RHINO,CAMEL,IMMT1W1,IMMT1W2,...,ORGAN3_PT,ORGAN4_PT,ORGAN5_PT,ORGAN6_PT,DIVATT1_PT,DIVATT2_PT,DIVATT3_PT,DIVATT4_PT,STAFFASST,VALIDITY
IC1,-0.202362,-0.136306,-0.130236,-0.173953,-0.206802,-0.096399,-0.177641,-0.045896,-0.091009,-0.100398,...,0.027594,-0.075394,-0.098655,-0.03692,-0.118279,-0.141511,-0.214083,-0.103607,0.302489,0.296167
IC2,-0.032195,-0.020754,0.011383,0.038392,0.03281,-0.004478,5.7e-05,0.042031,0.011719,-0.020547,...,0.045949,-0.00262,0.00746,0.072026,-0.001428,-0.034846,-0.045431,-0.03957,-0.026442,-0.008407
IC3,0.041638,-0.008431,0.049641,-0.021892,0.019568,-0.005639,0.104737,-0.018995,0.043473,0.070058,...,0.002262,-0.046909,-0.016043,0.011735,-0.020445,0.07597,0.060355,-0.03083,-0.0637,-0.030883
IC4,0.109161,0.166706,0.047135,0.021346,0.137168,-0.005896,0.075281,0.021725,0.043147,0.101357,...,0.015534,-0.008731,0.07219,0.027969,0.157052,0.119833,0.012934,0.147546,-0.0628,-0.056822
IC5,-0.100422,-0.109664,-0.006613,-0.093939,-0.120201,-0.090702,-0.159385,-0.124014,-0.068505,-0.128674,...,0.154208,0.268489,0.203063,0.159006,0.329726,0.272324,0.331685,0.280761,0.185572,0.142517
IC6,0.011385,0.054191,-0.015754,0.040323,0.071975,0.038127,0.091033,0.019022,-0.011112,0.028604,...,0.200202,0.239619,0.188252,0.194299,0.26096,0.358539,0.293747,0.275272,0.023812,0.016825
IC7,-0.101691,-0.07275,0.01814,-0.061098,-0.099861,0.016469,-0.045797,-0.029734,-0.035801,-0.019176,...,0.733976,0.287854,0.336764,0.238021,0.177329,0.177488,0.129356,0.173093,0.241416,0.223865
IC8,-0.19295,-0.17308,-0.087168,-0.223825,-0.28423,-0.017423,-0.117331,-0.110861,-0.130267,-0.101627,...,-0.069182,-0.135063,-0.17829,-0.090044,-0.156022,-0.143053,-0.155425,-0.132764,0.174167,0.207789
IC9,-0.060676,-0.013968,-0.014173,0.019429,0.016587,-0.016755,-0.084621,0.009898,0.003027,-0.004067,...,-0.063861,0.02505,-0.042242,7.9e-05,0.01934,-0.018854,0.047376,0.035008,0.00768,0.006495
IC10,0.231588,0.196818,0.077314,0.270334,0.238314,0.075616,0.135055,0.129299,0.124283,0.121488,...,-0.189106,-0.297358,-0.269593,-0.193453,-0.347467,-0.350265,-0.305553,-0.307506,-0.117871,-0.084997


In [14]:
#common.saveFile(correlations, "ICADataCorrelation")

Save file to CSV, carefull this is destructive. Will replace if it exists.

In [15]:
# common.saveFile(tableCorrP, "Correlações sem filtro", True)

In [16]:
# common.saveFile(filtredTableCorrP, "Correlações filtradas", True)

In [17]:
filtredTableCorrP

Unnamed: 0,Labels,Correlation,p_Value
36,IC1 -> DATE,-0.353009,2.365220e-51
37,IC1 -> MONTH,-0.400587,6.436722e-67
38,IC1 -> YEAR,-0.371645,3.849815e-57
39,IC1 -> DAY,-0.400801,5.403730e-67
40,IC1 -> PLACE,-0.305314,3.209458e-38
...,...,...,...
1865,IC10 -> PLAN3_PT,-0.317879,1.872734e-41
1874,IC10 -> DIVATT1_PT,-0.347467,1.052027e-49
1875,IC10 -> DIVATT2_PT,-0.350265,1.563710e-50
1876,IC10 -> DIVATT3_PT,-0.305553,2.795091e-38
