In [1]:
# Importing packages
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [14]:
# Data description for only patients with associated ECG values
df_ECG = pd.read_csv('./database/data_description/ECG_data_summary_table.csv', encoding = 'latin-1')
df_ECG.set_index('patient ID', inplace=True)

# Data description for all patients
df_all = pd.read_csv('./database/data_description/GE-75_data_summary_table.csv', encoding = 'latin-1')
df_all.set_index('patient ID', inplace=True)

In [3]:
# NOT NECESSARY TO RUN, ONLY RUN THIS ONCE, HARD CODED TO JUST REMOVE ALL COLUMNS WITHIN THAT INDEX
# The variables we want to focus on:
df_all.drop(df_all.iloc[:,12:937], inplace = True, axis = 1) #drops all of the brain perfusion variables
df_all.drop(df_all.iloc[:,-69:], inplace = True, axis = 1) #drops all of the MENTAL test variables
df_ECG.drop(df_ECG.iloc[:,12:937], inplace = True, axis = 1) #drops all of the brain perfusion variables
df_ECG.drop(df_ECG.iloc[:,-69:], inplace = True, axis = 1) #drops all of the MENTAL test variables

# Strip columns with all NaN values
df_all = df_all.dropna(axis='columns', how='all')
df_ECG = df_all.dropna(axis='columns', how='all')

In [12]:
# easy way to check the variables present in the database

patient ID
S0250     7.0
S0254     0.0
S0255     3.0
S0256    23.0
s0257     0.0
         ... 
S0435     NaN
s0437     NaN
s0438     NaN
s0441     NaN
s0443     NaN
Name: Diabetes Duration, Length: 88, dtype: float64


In [5]:
# Select variables you want to make a cross-correlation matrix out of
neuropathy = ['Group', 'Diabetes Duration', 'age', 'WBC K/uL', 'RBC m/uL', 'Hgb g/dL', 'GLUCOSE mg/dL','Hb A1C%', 'Dizziness AUTONOMIC SYMPTOMS', 'LDL CALCmg/dL','HDL mg/dL', 'Neuropathy AUTONOMIC SYMPTOMS']
retinopathy = ['Group', 'Diabetes Duration','age','BMI', 'Hb A1C%', 'CRP (mg/L)', 'URINE ALBUMIN mg/dL', 'Hgb g/dL', 'Retinopathy Grading']
general = ['Group', 'Diabetes Duration','age','BMI','Hb A1C%','CRP (mg/L)','Neuropathy AUTONOMIC SYMPTOMS','WBC K/uL','RBC m/uL','Hgb g/dL','GLUCOSE mg/dL','URINE CREAT mg/dL','URINE ALBUMIN mg/dL', 'CHOLESTmg/dL','LDL CALCmg/dL','Retinopathy Grading']
df_all[general].head()

Unnamed: 0_level_0,Group,Diabetes Duration,age,BMI,Hb A1C%,CRP (mg/L),Neuropathy AUTONOMIC SYMPTOMS,WBC K/uL,RBC m/uL,Hgb g/dL,GLUCOSE mg/dL,URINE CREAT mg/dL,URINE ALBUMIN mg/dL,CHOLESTmg/dL,LDL CALCmg/dL,Retinopathy Grading
patient ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
S0250,DM,7.0,50,35.782279,9.4,0.24,NO,6.9,5.31,15.0,211.0,65.0,2.4,135.0,72.0,
S0254,CONTROL,0.0,69,24.275148,5.1,0.104,NO,6.7,4.5,14.2,95.0,137.0,8.0,176.0,98.0,0.0
S0255,DM,3.0,78,28.466932,,,No,,,,,,,,,
S0256,DM,23.0,65,29.62963,7.1,0.06,NO,4.4,3.58,10.3,111.0,69.0,0.3,194.0,97.0,0.0
s0257,CONTROL,0.0,55,24.32872,,,No,,,,,,,,,


In [6]:
# Unfortunately .corr() ignores all columns with all non-numeric values so find a way to represent these (is just converting to binary 1 or 0 enough?)
df_all[neuropathy] = df_all[neuropathy].apply(lambda x: x.astype(str).str.lower()) # Converts all of the values to string, and all strings to lowercase
df_all[retinopathy] = df_all[retinopathy].apply(lambda x: x.astype(str).str.lower()) # Converts all of the values to string, and all strings to lowercase
df_all[general] = df_all[general].apply(lambda x: x.astype(str).str.lower()) # Converts all of the values to string, and all strings to lowercase
df_all['Dizziness AUTONOMIC SYMPTOMS'].replace(('yes', 'no'), (1, 0), inplace=True)
df_all['Neuropathy AUTONOMIC SYMPTOMS'].replace(('yes', 'no'), (1, 0), inplace=True)
df_all['Group'].replace(('dm', 'control'), (1, 0), inplace=True)
df_all[neuropathy] = df_all[neuropathy].apply(lambda x: x.astype(float))
df_all[retinopathy] = df_all[retinopathy].apply(lambda x: x.astype(float))
df_all[general] = df_all[general].apply(lambda x: x.astype(float))

In [7]:
corr=df_all[neuropathy].corr()
corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,Group,Diabetes Duration,age,WBC K/uL,RBC m/uL,Hgb g/dL,GLUCOSE mg/dL,Hb A1C%,Dizziness AUTONOMIC SYMPTOMS,LDL CALCmg/dL,HDL mg/dL,Neuropathy AUTONOMIC SYMPTOMS
Group,1.0,0.343247,0.017992,0.113213,-0.023473,-0.083338,0.281988,0.396138,0.111132,-0.391213,-0.154784,0.1643
Diabetes Duration,0.343247,1.0,0.061253,-0.014411,0.024218,-0.147769,0.203484,0.355991,0.192039,-0.023769,0.025852,0.249084
age,0.017992,0.061253,1.0,-0.146606,-0.223039,-0.160087,-0.139134,-0.176768,-0.140042,-0.047564,0.025772,0.258808
WBC K/uL,0.113213,-0.014411,-0.146606,1.0,0.296738,0.264809,0.132625,0.049006,-0.001069,-0.086752,-0.163838,0.126824
RBC m/uL,-0.023473,0.024218,-0.223039,0.296738,1.0,0.716593,0.132499,0.102889,-0.201831,0.071803,-0.272643,-0.054491
Hgb g/dL,-0.083338,-0.147769,-0.160087,0.264809,0.716593,1.0,0.114665,-0.153096,-0.115004,0.032661,-0.408949,-0.038547
GLUCOSE mg/dL,0.281988,0.203484,-0.139134,0.132625,0.132499,0.114665,1.0,0.668586,0.06083,-0.194991,-0.193689,0.10582
Hb A1C%,0.396138,0.355991,-0.176768,0.049006,0.102889,-0.153096,0.668586,1.0,0.042122,-0.119976,0.090505,-0.016274
Dizziness AUTONOMIC SYMPTOMS,0.111132,0.192039,-0.140042,-0.001069,-0.201831,-0.115004,0.06083,0.042122,1.0,-0.200963,-0.038829,0.044042
LDL CALCmg/dL,-0.391213,-0.023769,-0.047564,-0.086752,0.071803,0.032661,-0.194991,-0.119976,-0.200963,1.0,0.192064,-0.15509


In [8]:
corr=df_all[retinopathy].corr()
corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,Group,Diabetes Duration,age,BMI,Hb A1C%,CRP (mg/L),URINE ALBUMIN mg/dL,Hgb g/dL,Retinopathy Grading
Group,1.0,0.343247,0.017992,0.289253,0.396138,0.100444,0.006906,-0.083338,0.091844
Diabetes Duration,0.343247,1.0,0.061253,0.016606,0.355991,0.020845,0.105035,-0.147769,0.286347
age,0.017992,0.061253,1.0,-0.33325,-0.176768,0.017296,0.042426,-0.160087,-0.159104
BMI,0.289253,0.016606,-0.33325,1.0,0.241591,0.236338,0.043067,-0.00965,0.268364
Hb A1C%,0.396138,0.355991,-0.176768,0.241591,1.0,-0.070253,0.04461,-0.153096,0.083804
CRP (mg/L),0.100444,0.020845,0.017296,0.236338,-0.070253,1.0,-0.027008,0.059743,-0.086664
URINE ALBUMIN mg/dL,0.006906,0.105035,0.042426,0.043067,0.04461,-0.027008,1.0,0.049341,0.162017
Hgb g/dL,-0.083338,-0.147769,-0.160087,-0.00965,-0.153096,0.059743,0.049341,1.0,-0.237265
Retinopathy Grading,0.091844,0.286347,-0.159104,0.268364,0.083804,-0.086664,0.162017,-0.237265,1.0


In [9]:
corr=df_all[general].corr()
corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,Group,Diabetes Duration,age,BMI,Hb A1C%,CRP (mg/L),Neuropathy AUTONOMIC SYMPTOMS,WBC K/uL,RBC m/uL,Hgb g/dL,GLUCOSE mg/dL,URINE CREAT mg/dL,URINE ALBUMIN mg/dL,CHOLESTmg/dL,LDL CALCmg/dL,Retinopathy Grading
Group,1.0,0.343247,0.017992,0.289253,0.396138,0.100444,0.1643,0.113213,-0.023473,-0.083338,0.281988,0.054389,0.006906,-0.318247,-0.391213,0.091844
Diabetes Duration,0.343247,1.0,0.061253,0.016606,0.355991,0.020845,0.249084,-0.014411,0.024218,-0.147769,0.203484,0.27627,0.105035,0.061241,-0.023769,0.286347
age,0.017992,0.061253,1.0,-0.33325,-0.176768,0.017296,0.258808,-0.146606,-0.223039,-0.160087,-0.139134,-0.038893,0.042426,-0.010259,-0.047564,-0.159104
BMI,0.289253,0.016606,-0.33325,1.0,0.241591,0.236338,0.075762,0.237539,0.061692,-0.00965,0.187489,0.166515,0.043067,-0.156507,-0.171394,0.268364
Hb A1C%,0.396138,0.355991,-0.176768,0.241591,1.0,-0.070253,-0.016274,0.049006,0.102889,-0.153096,0.668586,0.035446,0.04461,-0.051962,-0.119976,0.083804
CRP (mg/L),0.100444,0.020845,0.017296,0.236338,-0.070253,1.0,0.014366,-0.002741,0.002701,0.059743,-0.067676,0.109179,-0.027008,-0.058683,-0.008927,-0.086664
Neuropathy AUTONOMIC SYMPTOMS,0.1643,0.249084,0.258808,0.075762,-0.016274,0.014366,1.0,0.126824,-0.054491,-0.038547,0.10582,0.12788,0.105988,-0.122466,-0.15509,0.248962
WBC K/uL,0.113213,-0.014411,-0.146606,0.237539,0.049006,-0.002741,0.126824,1.0,0.296738,0.264809,0.132625,0.107485,0.274773,-0.102522,-0.086752,-0.016108
RBC m/uL,-0.023473,0.024218,-0.223039,0.061692,0.102889,0.002701,-0.054491,0.296738,1.0,0.716593,0.132499,-0.102221,0.114197,0.009447,0.071803,-0.160092
Hgb g/dL,-0.083338,-0.147769,-0.160087,-0.00965,-0.153096,0.059743,-0.038547,0.264809,0.716593,1.0,0.114665,-0.089622,0.049341,-0.049035,0.032661,-0.237265
