# Correlation ICA

In [1]:
%matplotlib widget
import numpy as np
import pandas as pd
import seaborn as sns
import scipy
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

import common


# The way Pandas is being used in this file is deprecated. It will eventually stop working. The next lines hide all warnings:
import warnings
warnings.filterwarnings("ignore")

In [2]:
originalData = common.loadFile("CleanedData").drop(["RID", "VISCODE", "AGE", "PTEDUCAT", "PTGENDER", "DX"], axis=1)
ICA = common.loadFile("ICAData").drop("DX", axis=1)

In [3]:
def getCorrP(x, y):
    xData = []
    yData = []
    for i in range(len(x)): # Both columns must have same size
        if x[i] != '' and y[i] != '':
            xData.append(float(x[i]))
            yData.append(float(y[i]))
    if len(xData) > 1:
        return pearsonr(xData, yData)
    return 0, 0
    

In [4]:
originalDataLabels = originalData.columns.values
originalDataNp = originalData.to_numpy()
ICADataLabels = ICA.columns.values
ICADataNp = ICA.to_numpy()

correlations = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)
pValues = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)

tableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])

for i in range(len(ICADataLabels)):
    for j in range(len(originalDataLabels)):
        p_r, p_p = getCorrP(ICADataNp[:, i], originalDataNp[:, j])
        correlations.at[ICADataLabels[i], originalDataLabels[j]] = p_r
        pValues.at[ICADataLabels[i], originalDataLabels[j]] = p_p
        
        tableCorrP = tableCorrP.append(dict(zip(tableCorrP.columns,[f"{ICADataLabels[i]} -> {originalDataLabels[j]}", p_r, p_p])), ignore_index = True)
        


In [5]:
tableCorrP

Unnamed: 0,Labels,Correlation,p_Value
0,IC1 -> TRAILS,0.194929,0.0
1,IC1 -> CUBE,0.130092,0.0
2,IC1 -> CLOCKCON,0.127485,0.0
3,IC1 -> CLOCKNO,0.165136,0.0
4,IC1 -> CLOCKHAN,0.198284,0.0
...,...,...,...
1875,IC10 -> DIVATT2_PT,0.349455,0.0
1876,IC10 -> DIVATT3_PT,0.302323,0.0
1877,IC10 -> DIVATT4_PT,0.3068,0.0
1878,IC10 -> STAFFASST,0.120188,0.000001


# Correlations

In [6]:
aux = sns.color_palette("coolwarm", as_cmap=True)

In [7]:
plt.figure(figsize=[18, 10])
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:>

# P_Values

In [8]:
plt.figure(figsize=[18, 10])
plt.title('p Values')
sns.heatmap(np.ma.filled(pValues.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=pValues.columns, yticklabels=pValues.index, cmap=aux)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'p Values'}>

## Correlações acima de 60% e Filtro de Bonferroni Correction

In [9]:
corrFilter = 0.3
pFilter = 0.05

In [10]:
mask = pd.DataFrame(index=ICADataLabels, columns=originalDataLabels)
filtredTableCorrP = pd.DataFrame(columns=["Labels", "Correlation", "p_Value"])
for i in range(len(ICADataLabels)):
    for j in range(len(originalDataLabels)):
        if pValues.at[ICADataLabels[i], originalDataLabels[j]] >= pFilter/(len(originalDataLabels)**2) or abs(correlations.at[ICADataLabels[i], originalDataLabels[j]]) < corrFilter:
            mask.at[ICADataLabels[i], originalDataLabels[j]] = True  #Hide
        else:
            mask.at[ICADataLabels[i], originalDataLabels[j]]  = False
            filtredTableCorrP = filtredTableCorrP.append(tableCorrP.loc[tableCorrP["Labels"] == f"{ICADataLabels[i]} -> {originalDataLabels[j]}"])
        
mask = mask.to_numpy()

In [11]:
plt.figure(figsize=[18, 10])
aux = sns.color_palette("coolwarm", as_cmap=True)
plt.title(f'Correlation >{corrFilter*100}% with Bonferroni Correction Filter')
sns.heatmap(np.ma.filled(correlations.astype(float), np.nan), annot=True, annot_kws={"size": 7}, xticklabels=correlations.columns, yticklabels=correlations.index, cmap=aux, mask=mask,  linewidths=0.5, linecolor='gray')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'Correlation >30.0% with Bonferroni Correction Filter'}>

In [12]:
filtredTableCorrP

Unnamed: 0,Labels,Correlation,p_Value
36,IC1 -> DATE,0.347023,0.0
37,IC1 -> MONTH,0.397156,0.0
38,IC1 -> YEAR,0.367182,0.0
39,IC1 -> DAY,0.396403,0.0
40,IC1 -> PLACE,0.300287,0.0
...,...,...,...
1865,IC10 -> PLAN3_PT,0.316898,0.0
1874,IC10 -> DIVATT1_PT,0.346784,0.0
1875,IC10 -> DIVATT2_PT,0.349455,0.0
1876,IC10 -> DIVATT3_PT,0.302323,0.0


In [13]:
correlations


Unnamed: 0,TRAILS,CUBE,CLOCKCON,CLOCKNO,CLOCKHAN,LION,RHINO,CAMEL,IMMT1W1,IMMT1W2,...,ORGAN3_PT,ORGAN4_PT,ORGAN5_PT,ORGAN6_PT,DIVATT1_PT,DIVATT2_PT,DIVATT3_PT,DIVATT4_PT,STAFFASST,VALIDITY
IC1,0.194929,0.130092,0.127485,0.165136,0.198284,0.094622,0.173641,0.041772,0.086729,0.096839,...,-0.023238,0.080856,0.103141,0.04064,0.124727,0.148505,0.219475,0.109518,-0.29778,-0.291745
IC2,0.06097,0.014134,0.014334,-0.019107,-0.016328,0.016793,0.084711,-0.009856,-0.002886,0.00413,...,0.063652,-0.025155,0.042106,-0.000196,-0.019443,0.018716,-0.047314,-0.035121,-0.007829,-0.006645
IC3,0.101855,0.110313,0.007285,0.095115,0.121307,0.09081,0.160446,0.124547,0.069521,0.129579,...,-0.156139,-0.271195,-0.205303,-0.160678,-0.332785,-0.274966,-0.33394,-0.283741,-0.186602,-0.1432
IC4,-0.11064,-0.168096,-0.04756,-0.023079,-0.139,0.005454,-0.076408,-0.022737,-0.044043,-0.102315,...,-0.014992,0.009866,-0.071342,-0.027347,-0.155647,-0.118633,-0.011659,-0.146354,0.063707,0.057616
IC5,-0.041718,0.008686,-0.049901,0.021567,-0.019316,0.006151,-0.103651,0.019387,-0.043522,-0.069478,...,-0.002288,0.047126,0.016442,-0.011334,0.020769,-0.074838,-0.060086,0.031335,0.062513,0.029865
IC6,0.013063,0.055552,-0.015091,0.04217,0.074288,0.038446,0.092579,0.020214,-0.009833,0.029855,...,0.199987,0.239143,0.188322,0.194216,0.260234,0.358264,0.293366,0.274544,0.022078,0.015126
IC7,-0.031486,-0.020203,0.011672,0.039094,0.033505,-0.004232,0.000568,0.042377,0.012104,-0.020135,...,0.045353,-0.003435,0.006801,0.071498,-0.002317,-0.035694,-0.046162,-0.040381,-0.027052,-0.008909
IC8,-0.199224,-0.177384,-0.090093,-0.230488,-0.290097,-0.019541,-0.120791,-0.113407,-0.133519,-0.104333,...,-0.062935,-0.128756,-0.172694,-0.085269,-0.148932,-0.135419,-0.15051,-0.12609,0.179985,0.212948
IC9,-0.101294,-0.072311,0.018197,-0.060536,-0.099287,0.016564,-0.045587,-0.029359,-0.035542,-0.018947,...,0.733823,0.287531,0.336536,0.237816,0.176876,0.176972,0.128793,0.172704,0.241232,0.223665
IC10,-0.231414,-0.195459,-0.078099,-0.269503,-0.236325,-0.077016,-0.135119,-0.127528,-0.123142,-0.12045,...,0.191357,0.296873,0.270335,0.194133,0.346784,0.349455,0.302323,0.3068,0.120188,0.086963


In [14]:
#common.saveFile(correlations, "ICADataCorrelation")

Save file to CSV, carefull this is destructive. Will replace if it exists.

In [15]:
# common.saveFile(tableCorrP, "Correlações sem filtro", True)

In [16]:
# common.saveFile(filtredTableCorrP, "Correlações filtradas", True)

In [17]:
filtredTableCorrP

Unnamed: 0,Labels,Correlation,p_Value
36,IC1 -> DATE,0.347023,0.0
37,IC1 -> MONTH,0.397156,0.0
38,IC1 -> YEAR,0.367182,0.0
39,IC1 -> DAY,0.396403,0.0
40,IC1 -> PLACE,0.300287,0.0
...,...,...,...
1865,IC10 -> PLAN3_PT,0.316898,0.0
1874,IC10 -> DIVATT1_PT,0.346784,0.0
1875,IC10 -> DIVATT2_PT,0.349455,0.0
1876,IC10 -> DIVATT3_PT,0.302323,0.0
