In [114]:
# Importing packages
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Loading the data

In [92]:
# Data description for only patients with associated ECG values
df_ECG = pd.read_csv('./database/data_description/ECG_data_summary_table.csv', encoding = 'latin-1')
df_ECG.set_index('patient ID', inplace=True)

# Data description for all patients
df_all = pd.read_csv('./database/data_description/GE-75_data_summary_table.csv', encoding = 'latin-1')
df_all.set_index('patient ID', inplace=True)


In [93]:
# NOT NECESSARY TO RUN, ONLY RUN THIS ONCE, HARD CODED TO JUST REMOVE ALL COLUMNS WITHIN THAT INDEX
# The variables we want to focus on:
df_all.drop(df_all.iloc[:,12:937], inplace = True, axis = 1) #drops all of the brain perfusion variables
df_all.drop(df_all.iloc[:,-69:], inplace = True, axis = 1) #drops all of the MENTAL test variables
df_ECG.drop(df_ECG.iloc[:,12:937], inplace = True, axis = 1) #drops all of the brain perfusion variables
df_ECG.drop(df_ECG.iloc[:,-69:], inplace = True, axis = 1) #drops all of the MENTAL test variables

# Strip columns with all NaN values
df_all = df_all.dropna(axis='columns', how='all')
df_ECG = df_all.dropna(axis='columns', how='all')

In [94]:
# easy way to check the variables present in the database
df_all.head()

Unnamed: 0_level_0,study,Group,template,Diabetes Duration,HTN or not,age,sex,HEIGHT (M),MASS (KG),BMI,...,Left Eye Diabetic Retinopathy,Diabetic Retinopathy (more advanced eye),Right Eye Macular Edema,Left Eye Macular Edema,Macular Edema (more advanced eye),RETINOPATHY-more advanced eye,Retinopathy Grading,Macular edema Grade-more advanced eye,GAIT - Walk 1 distance (m),GAIT - Walk 1 speed (m/s)
patient ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
S0250,GE75,DM,,7.0,,50,Male,1.79,114.65,35.782279,...,0.0,0.0,0.0,0.0,0.0,no,,no,812.76,1.127
S0254,GE75,CONTROL,Modified,0.0,ntn,69,Female,1.531,56.9,24.275148,...,0.0,0.0,0.0,0.0,0.0,no,0.0,no,1028.54,1.43
S0255,GE75,DM,,3.0,,78,Female,1.52,65.77,28.466932,...,,,,,,,,,784.65,1.09
S0256,GE75,DM,Modified,23.0,ntn,65,Female,1.575,73.5,29.62963,...,0.0,0.0,0.0,0.0,0.0,no,0.0,no,718.12,0.997
s0257,GE75,CONTROL,,0.0,,55,Male,1.7,70.31,24.32872,...,,,,,,,,,,


In [109]:
# Select variables you want to make a cross-correlation matrix out of
variables = ['Group', 'Diabetes Duration', 'age', 'sex', 'Neuropathy AUTONOMIC SYMPTOMS']
df_all[variables].head()

Unnamed: 0_level_0,Group,Diabetes Duration,age,sex,Neuropathy AUTONOMIC SYMPTOMS
patient ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
S0250,dm,7.0,50,male,0
S0254,control,0.0,69,female,0
S0255,dm,3.0,78,female,0
S0256,dm,23.0,65,female,0
s0257,control,0.0,55,male,0


In [111]:
# Unfortunately .corr() ignores all columns with all non-numeric values so find a way to represent these (is just converting to binary 1 or 0 enough?)
df_all[variables] = df_all[variables].apply(lambda x: x.astype(str).str.lower()) # Converts all of the values to string, and all strings to lowercase
df_all[variables].replace(('yes', 'no'), (1, 0), inplace=True)
df_all['Group'].replace(('dm', 'control'), (1, 0), inplace=True)
df_all['sex'].replace(('female', 'male'), (1, 0), inplace=True)
df_all[variables] = df_all[variables].apply(lambda x: x.astype(float))

In [113]:
# Make the cross-correlation matrix
corr = df_all[variables].corr()
corr.style.background_gradient(cmap='coolwarm') #can set to different styles depending on what you want

Unnamed: 0,Group,Diabetes Duration,age,sex,Neuropathy AUTONOMIC SYMPTOMS
Group,1.0,0.343247,0.017992,0.039675,0.1643
Diabetes Duration,0.343247,1.0,0.061253,0.066441,0.249084
age,0.017992,0.061253,1.0,-0.039081,0.258808
sex,0.039675,0.066441,-0.039081,1.0,-0.094597
Neuropathy AUTONOMIC SYMPTOMS,0.1643,0.249084,0.258808,-0.094597,1.0


In [75]:
# Or just look at the cross correlation of all the numeric values
corr = df_all.corr()
corr.style.background_gradient(cmap='coolwarm') #can set to different styles depending on what you want

  xa[xa < 0] = -1


Unnamed: 0,Diabetes Duration,age,HEIGHT (M),MASS (KG),BMI,Perfusion Vascular (whole brain) - baseline 5 whole,Perfusion Vascular (ACA Perf) - baseline 5 whole,Perfusion Vascular (ACHA) - baseline 5 whole,Perfusion Vascular (Lepto ACA) - baseline 5 whole,Perfusion Vascular (Lepto MCA) - baseline 5 whole,Perfusion Vascular (Lepto PCA) - baseline 5 whole,Perfusion Vascular (MCA Perf) - baseline 5 whole,Perfusion Vascular (POCA) - baseline 5 whole,PackYears,Years,ALCOHOL Dose/Week,HTN YRS PATIENT MEDICAL HISTORY,WBC K/uL,RBC m/uL,Hgb g/dL,Hct %,MCV fL,MCH pg,MCHC %,RDW%,Plt ct K/uL,GLUCOSE mg/dL,CHOLESTmg/dL,TRIGLYCmg/dL,HDL mg/dL,CHOL/HD Ratio,LDL CALCmg/dL,URINE CREAT mg/dL,URINE ALBUMIN mg/dL,Alb/Cre mg/g (URINE),ACR <5,ACR >5,ACR >10,Hb A1C%,SICAM (ng/ml),SVCAM (ng/ml),CRP (mg/L),IL-6 (pg/ml),TNFa (pg/ml),Right Eye Diabetic Retinopathy,Left Eye Diabetic Retinopathy,Diabetic Retinopathy (more advanced eye),Right Eye Macular Edema,Left Eye Macular Edema,Macular Edema (more advanced eye),Retinopathy Grading,GAIT - Walk 1 distance (m),GAIT - Walk 1 speed (m/s)
Diabetes Duration,1.0,0.061253,0.038241,0.022806,0.016606,-0.225235,-0.306757,-0.22182,-0.268127,-0.174443,-0.115755,-0.134746,-0.229977,-0.005937,0.054705,-0.035752,-0.074859,-0.014411,0.024218,-0.147769,-0.08586,-0.186045,-0.267757,-0.231594,0.362606,-0.082905,0.203484,0.061241,0.225531,0.025852,0.10875,-0.023769,0.27627,0.105035,-0.179579,-0.317911,-0.255429,-0.148755,0.355991,0.009293,0.213062,0.020845,0.369431,0.056446,0.187823,0.250961,0.275788,0.079955,0.184829,0.17009,0.286347,0.056164,-0.015964
age,0.061253,1.0,-0.108299,-0.337323,-0.33325,0.601698,0.675116,0.585247,0.659049,0.605913,0.483426,0.595544,0.590251,0.014989,0.116534,0.044273,-0.111695,-0.146606,-0.223039,-0.160087,-0.120974,0.20176,0.132088,-0.11826,0.00379,-0.145365,-0.139134,-0.010259,-0.090941,0.025772,-0.063776,-0.047564,-0.038893,0.042426,0.171844,0.144513,0.207413,0.244034,-0.176768,0.28262,0.103794,0.017296,-0.02341,0.108469,0.235832,0.413342,0.309456,0.311493,0.428562,0.384493,-0.159104,-0.151689,-0.276746
HEIGHT (M),0.038241,-0.108299,1.0,0.528951,-0.096679,0.143129,0.09686,0.117867,0.160454,0.292098,0.174337,0.404252,0.108211,0.067078,0.164184,0.290266,-0.026984,-0.036486,0.286212,0.454817,0.405341,0.125955,0.228592,0.235986,-0.139284,-0.324201,0.089617,-0.186842,0.058876,-0.307378,0.192749,-0.079632,0.133276,-0.210284,-0.156196,-0.232683,-0.033025,-0.199217,-0.166445,0.021082,0.335303,0.036245,-0.042231,0.110926,-0.126463,-0.062963,-0.058839,-0.155155,-0.062352,-0.09838,0.017063,0.027351,-0.03684
MASS (KG),0.022806,-0.337323,0.528951,1.0,0.786711,-0.740191,-0.799489,-0.729061,-0.781372,-0.724598,-0.642483,-0.689716,-0.733637,0.290535,0.219133,0.099364,0.234128,0.165709,0.228415,0.286823,0.246696,-0.023542,0.066461,0.193785,0.022766,-0.074991,0.222548,-0.258205,0.246161,-0.497235,0.35837,-0.204997,0.217998,-0.087395,-0.210668,-0.388242,-0.200834,-0.413751,0.103107,-0.0246,0.189792,0.215575,0.092132,0.086973,-0.033089,-0.194112,-0.091354,-0.160474,-0.227518,-0.208552,0.243053,-0.043105,-0.170357
BMI,0.016606,-0.33325,-0.096679,0.786711,1.0,-0.73252,-0.735338,-0.699825,-0.781243,-0.860906,-0.684966,-0.93871,-0.694285,0.300239,0.147478,-0.084131,0.314861,0.237539,0.061692,-0.00965,-0.019206,-0.143088,-0.11337,0.043883,0.140333,0.157872,0.187489,-0.156507,0.232245,-0.352343,0.266925,-0.171394,0.166515,0.043067,-0.149633,-0.380473,-0.214289,-0.359735,0.241591,-0.050083,-0.050211,0.236338,0.177817,0.021249,0.037433,-0.201354,-0.071366,-0.099065,-0.248977,-0.190234,0.268364,-0.069904,-0.182954
Perfusion Vascular (whole brain) - baseline 5 whole,-0.225235,0.601698,0.143129,-0.740191,-0.73252,1.0,0.995259,0.998625,0.993278,0.975652,0.987808,0.912966,0.998351,-0.125792,-0.125792,-0.072304,,-0.943648,-0.366201,-0.219233,-0.129028,0.557252,0.539658,-0.39288,-0.932425,0.767462,0.042882,0.006374,0.49329,-0.596082,0.31597,0.152988,-0.900967,1.0,1.0,,1.0,,0.108241,0.310299,-0.812068,-0.584871,-0.991788,0.967221,,-0.767153,-0.767153,,,,-0.767153,-0.883529,-0.011247
Perfusion Vascular (ACA Perf) - baseline 5 whole,-0.306757,0.675116,0.09686,-0.799489,-0.735338,0.995259,1.0,0.991126,0.997506,0.977304,0.968565,0.921612,0.991337,-0.167309,-0.167309,-0.054443,,-0.906988,-0.387659,-0.207509,-0.09594,0.615267,0.612817,-0.423585,-0.957457,0.730234,-0.010955,-0.087788,0.411298,-0.540964,0.237658,0.06946,-0.917858,1.0,1.0,,1.0,,0.054894,0.151789,-0.706512,-0.601022,-0.999392,0.995621,,-0.765629,-0.765629,,,,-0.765629,-0.858857,-0.03535
Perfusion Vascular (ACHA) - baseline 5 whole,-0.22182,0.585247,0.117867,-0.729061,-0.699825,0.998625,0.991126,1.0,0.9862,0.963243,0.992587,0.890933,0.999953,-0.153204,-0.153204,-0.120534,,-0.951712,-0.398534,-0.264756,-0.17947,0.562899,0.530893,-0.420542,-0.931218,0.800016,0.021808,0.022812,0.50299,-0.584284,0.314835,0.154883,-0.877008,1.0,1.0,,1.0,,0.086865,0.331896,-0.825164,-0.543851,-0.988613,0.961179,,-0.737242,-0.737242,,,,-0.737242,-0.906728,-0.046832
Perfusion Vascular (Lepto ACA) - baseline 5 whole,-0.268127,0.659049,0.160454,-0.781372,-0.781243,0.993278,0.997506,0.9862,1.0,0.98978,0.964406,0.946232,0.985797,-0.101216,-0.101216,0.013788,,-0.906972,-0.322336,-0.13942,-0.031749,0.572225,0.585013,-0.360719,-0.940104,0.692549,0.050568,-0.064908,0.435349,-0.584649,0.278282,0.109049,-0.941329,1.0,1.0,,1.0,,0.116206,0.213537,-0.749545,-0.654249,-0.99961,0.987789,,-0.809122,-0.809122,,,,-0.809122,-0.828789,0.03419
Perfusion Vascular (Lepto MCA) - baseline 5 whole,-0.174443,0.605913,0.292098,-0.724598,-0.860906,0.975652,0.977304,0.963243,0.98978,1.0,0.945143,0.980176,0.961546,0.039875,0.039875,0.146708,,-0.899537,-0.18414,-0.003592,0.091386,0.469035,0.509611,-0.225404,-0.88794,0.613273,0.180872,-0.003566,0.489845,-0.671861,0.367495,0.199481,-0.971758,1.0,1.0,,1.0,,0.245061,0.329145,-0.823514,-0.748246,-0.989048,0.961979,,-0.884258,-0.884258,,,,-0.884258,-0.760732,0.176026
