# Create cohorts

## Define data directory

A top level directory to store all the data for this experiment

In [1]:
dataDirName = '/home/yram0006/phd/chapter_2/workspace/mortality_data/standardisation_experiment/'

## Create raw data matrix

### Read data files

In [2]:
import pandas as pd


demographics =  pd.read_csv(dataDirName + 'extract/demographics_icd.csv')
deaths =  pd.read_csv(dataDirName + 'extract/deaths.csv')
vitals_min =  pd.read_csv(dataDirName + 'outlier_removal/vitals_min_icd_corrected.csv')
vitals_max =  pd.read_csv(dataDirName + 'outlier_removal/vitals_max_icd_corrected.csv')
vitals_avg =  pd.read_csv(dataDirName + 'outlier_removal/vitals_avg_icd_corrected.csv')
vitals_stddev =  pd.read_csv(dataDirName + 'outlier_removal/vitals_stddev_icd_corrected.csv')
vitals_first =  pd.read_csv(dataDirName + 'outlier_removal/vitals_first_icd_corrected.csv')
vitals_last =  pd.read_csv(dataDirName + 'outlier_removal/vitals_last_icd_corrected.csv')
lab_measurements_min =  pd.read_csv(dataDirName + 'outlier_removal/lab_measurements_min_icd_corrected.csv')
lab_measurements_max =  pd.read_csv(dataDirName + 'outlier_removal/lab_measurements_max_icd_corrected.csv')
lab_measurements_avg =  pd.read_csv(dataDirName + 'outlier_removal/lab_measurements_avg_icd_corrected.csv')
lab_measurements_stddev =  pd.read_csv(dataDirName + 'outlier_removal/lab_measurements_stddev_icd_corrected.csv')
lab_measurements_first =  pd.read_csv(dataDirName + 'outlier_removal/lab_measurements_first_icd_corrected.csv')
lab_measurements_last =  pd.read_csv(dataDirName + 'outlier_removal/lab_measurements_last_icd_corrected.csv')

### Display data counts

In [3]:
demographics.shape, deaths.shape

((12241, 4), (12241, 3))

In [4]:
vitals_min.shape, vitals_max.shape, vitals_avg.shape, vitals_stddev.shape, vitals_first.shape, vitals_last.shape

((3942, 11), (3942, 11), (3942, 11), (3942, 11), (3942, 11), (3942, 11))

In [5]:
lab_measurements_min.shape, lab_measurements_max.shape, lab_measurements_avg.shape, lab_measurements_stddev.shape, lab_measurements_first.shape, lab_measurements_last.shape

((7448, 13), (7450, 13), (7450, 13), (7584, 8), (7450, 13), (7448, 13))

### Categorical value handling

In [6]:
demographics['gender'] = demographics.gender.apply(lambda x: 1 if(x == 'M') else 0)

In [7]:
demographics['ethnicity_WHITE'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'WHITE') else 0)
demographics['ethnicity_BLACK'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'BLACK/AFRICAN AMERICAN') else 0)
demographics['ethnicity_UNKNOWN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'UNKNOWN') else 0)
demographics['ethnicity_OTHER'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'OTHER') else 0)
demographics['ethnicity_HISPANIC'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'HISPANIC/LATINO') else 0)
demographics['ethnicity_ASIAN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'ASIAN') else 0)
demographics['ethnicity_UNABLE_TO_OBTAIN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'UNABLE TO OBTAIN') else 0)
demographics['ethnicity_AMERICAN_INDIAN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'AMERICAN INDIAN/ALASKA NATIVE') else 0)

demographics.drop(columns='ethnicity', inplace=True)

### Merge the data

In [8]:
import pandas as pd


mergedDf = pd.merge(demographics, deaths, how='inner', on='person_id')
mergedDf = pd.merge(mergedDf, vitals_min.add_suffix('_min'), how='inner', left_on='person_id', right_on='person_id_min')
mergedDf = pd.merge(mergedDf, vitals_max.add_suffix('_max'), how='inner', left_on='person_id', right_on='person_id_max')
mergedDf = pd.merge(mergedDf, vitals_avg.add_suffix('_avg'), how='inner', left_on='person_id', right_on='person_id_avg')
mergedDf = pd.merge(mergedDf, vitals_stddev.add_suffix('_stddev'), how='inner', left_on='person_id', right_on='person_id_stddev')
mergedDf = pd.merge(mergedDf, vitals_first.add_suffix('_first'), how='inner', left_on='person_id', right_on='person_id_first')
mergedDf = pd.merge(mergedDf, vitals_last.add_suffix('_last'), how='inner', left_on='person_id', right_on='person_id_last')
mergedDf = mergedDf.drop(['person_id_min', 'person_id_max', 'person_id_avg', 'person_id_stddev', 'person_id_first', 'person_id_last'], axis = 1)
mergedDf = pd.merge(mergedDf, lab_measurements_min.add_suffix('_min'), how='inner', left_on='person_id', right_on='person_id_min')
mergedDf = pd.merge(mergedDf, lab_measurements_max.add_suffix('_max'), how='inner', left_on='person_id', right_on='person_id_max')
mergedDf = pd.merge(mergedDf, lab_measurements_avg.add_suffix('_avg'), how='inner', left_on='person_id', right_on='person_id_avg')
mergedDf = pd.merge(mergedDf, lab_measurements_stddev.add_suffix('_stddev'), how='inner', left_on='person_id', right_on='person_id_stddev')
mergedDf = pd.merge(mergedDf, lab_measurements_first.add_suffix('_first'), how='inner', left_on='person_id', right_on='person_id_first')
mergedDf = pd.merge(mergedDf, lab_measurements_last.add_suffix('_last'), how='inner', left_on='person_id', right_on='person_id_last')
mergedDf = mergedDf.drop(['person_id_min', 'person_id_max', 'person_id_avg', 'person_id_stddev', 'person_id_first', 'person_id_last'], axis = 1)
pd.set_option('display.max_columns', None)
mergedDf


Unnamed: 0,person_id,age,gender,ethnicity_WHITE,ethnicity_BLACK,ethnicity_UNKNOWN,ethnicity_OTHER,ethnicity_HISPANIC,ethnicity_ASIAN,ethnicity_UNABLE_TO_OBTAIN,ethnicity_AMERICAN_INDIAN,anchor_time,death_datetime,heartrate_min,sysbp_min,diabp_min,meanbp_min,resprate_min,tempc_min,spo2_min,gcseye_min,gcsverbal_min,gcsmotor_min,heartrate_max,sysbp_max,diabp_max,meanbp_max,resprate_max,tempc_max,spo2_max,gcseye_max,gcsverbal_max,gcsmotor_max,heartrate_avg,sysbp_avg,diabp_avg,meanbp_avg,resprate_avg,tempc_avg,spo2_avg,gcseye_avg,gcsverbal_avg,gcsmotor_avg,heartrate_stddev,sysbp_stddev,diabp_stddev,meanbp_stddev,resprate_stddev,tempc_stddev,spo2_stddev,gcseye_stddev,gcsverbal_stddev,gcsmotor_stddev,heartrate_first,sysbp_first,diabp_first,meanbp_first,resprate_first,tempc_first,spo2_first,gcseye_first,gcsverbal_first,gcsmotor_first,heartrate_last,sysbp_last,diabp_last,meanbp_last,resprate_last,tempc_last,spo2_last,gcseye_last,gcsverbal_last,gcsmotor_last,chloride_serum_min,creatinine_min,sodium_serum_min,hemoglobin_min,platelet_count_min,urea_nitrogen_min,glucose_serum_min,bicarbonate_min,potassium_serum_min,anion_gap_min,leukocytes_blood_manual_min,hematocrit_min,chloride_serum_max,creatinine_max,sodium_serum_max,hemoglobin_max,platelet_count_max,urea_nitrogen_max,glucose_serum_max,bicarbonate_max,potassium_serum_max,anion_gap_max,leukocytes_blood_manual_max,hematocrit_max,chloride_serum_avg,creatinine_avg,sodium_serum_avg,hemoglobin_avg,platelet_count_avg,urea_nitrogen_avg,glucose_serum_avg,bicarbonate_avg,potassium_serum_avg,anion_gap_avg,leukocytes_blood_manual_avg,hematocrit_avg,chloride_serum_stddev,creatinine_stddev,sodium_serum_stddev,hemoglobin_stddev,glucose_serum_stddev,bicarbonate_stddev,potassium_serum_stddev,chloride_serum_first,creatinine_first,sodium_serum_first,hemoglobin_first,platelet_count_first,urea_nitrogen_first,glucose_serum_first,bicarbonate_first,potassium_serum_first,anion_gap_first,leukocytes_blood_manual_first,hematocrit_first,chloride_serum_last,creatinine_last,sodium_serum_last,hemoglobin_last,platelet_count_last,urea_nitrogen_last,glucose_serum_last,bicarbonate_last,potassium_serum_last,anion_gap_last,leukocytes_blood_manual_last,hematocrit_last
0,-2144679073,82.0,0,0,0,0,1,0,0,0,0,2190-01-30 19:22:00,2194-04-23 19:27:00,64.0,101.0,51.0,64.0,13.0,36.555556,91.0,3.0,5.0,6.0,85.0,174.0,129.0,136.0,24.0,37.611111,100.0,4.0,5.0,6.0,72.240741,135.081633,69.469388,85.775510,20.142857,36.932540,95.403846,3.750000,5.000000,6.000000,4.460740,18.113610,13.817948,13.163251,2.666381,0.304423,1.950282,0.439155,0.000000,0.000000,71.0,113.0,59.0,73.0,19.0,36.555556,96.0,4.0,5.0,6.0,73.0,157.0,72.0,95.0,20.0,36.611111,95.0,4.0,5.0,6.0,107.0,0.5,136.0,9.9,213.70,8.99,91.0,23.0,3.6,9.34,8.013,29.567,108.0,0.6,139.0,10.2,286.01,10.36,108.0,26.0,3.8,15.40,13.7438,32.209,107.666667,0.533333,137.666667,10.033333,264.174185,9.461219,96.666667,24.333333,3.666667,11.248667,12.131120,30.582149,0.500000,0.050000,1.322876,0.132288,8.500000,1.322876,0.100000,107.0,0.6,136.0,10.0,210.79,10.69,108.0,23.0,3.6,10.78,14.140,29.936,108.0,0.5,138.0,10.2,203.68,8.38,91.0,24.0,3.8,10.12,8.021,31.159
1,-2142084288,84.0,1,1,0,0,0,0,0,0,0,2161-07-10 08:07:00,,68.0,73.0,43.0,13.0,15.0,36.666667,84.0,4.0,5.0,6.0,148.0,150.0,102.0,113.0,32.0,37.888889,100.0,4.0,5.0,6.0,87.507692,109.937500,69.656250,80.546875,22.646154,37.077160,94.818182,4.000000,5.000000,6.000000,14.932534,19.670414,13.342512,16.458901,4.253337,0.355922,2.897270,0.000000,0.000000,0.000000,91.0,121.0,72.0,86.0,26.0,37.500000,90.0,4.0,5.0,6.0,78.0,131.0,83.0,99.0,32.0,36.833333,97.0,4.0,5.0,6.0,105.0,0.8,138.0,10.3,188.54,18.45,97.0,20.0,3.5,10.40,11.008,31.624,110.0,1.5,143.0,12.0,302.17,32.87,155.0,25.0,5.0,19.29,18.9150,37.430,106.333333,1.116667,140.000000,11.120000,267.997107,20.783786,123.000000,21.666667,4.233333,14.515881,14.713161,34.056806,1.775251,0.220880,1.705606,0.731513,21.481493,1.775251,0.457927,105.0,1.5,141.0,12.0,252.50,25.43,109.0,22.0,4.3,17.04,15.695,36.850,105.0,0.8,139.0,10.7,224.08,19.19,115.0,22.0,3.5,13.99,11.381,32.871
2,-2133944014,50.0,1,1,0,0,0,0,0,0,0,2141-03-25 16:45:00,,66.0,91.0,47.0,57.0,0.0,36.388889,82.0,2.0,1.0,5.0,108.0,166.0,106.0,116.0,27.0,37.277778,98.0,4.0,5.0,6.0,82.170213,123.657143,62.514286,76.017544,17.293103,36.837963,92.770833,3.727273,3.909091,5.909091,9.732595,17.877105,10.881100,11.088527,4.409510,0.280382,3.561428,0.631085,1.823369,0.294245,81.0,120.0,57.0,72.0,14.0,37.166667,97.0,2.0,1.0,5.0,80.0,116.0,58.0,71.0,18.0,36.555556,93.0,4.0,5.0,6.0,82.0,0.5,128.0,12.7,234.50,14.87,151.0,33.0,2.7,10.87,11.375,38.256,92.0,1.0,138.0,14.5,311.50,25.20,245.0,37.0,4.4,14.71,22.3950,40.168,87.800000,0.660000,132.800000,13.600000,249.328976,16.774946,188.400000,34.800000,3.340000,12.253091,14.354030,40.652467,3.489667,0.195505,3.735714,0.804984,34.342070,1.686548,0.691536,82.0,1.0,128.0,13.6,326.95,24.55,151.0,33.0,2.7,16.86,21.421,41.408,89.0,0.5,130.0,12.7,280.93,13.68,245.0,33.0,3.8,11.41,14.449,39.413
3,-2133227983,52.0,0,1,0,0,0,0,0,0,0,2120-09-22 10:15:00,2121-08-28 15:15:00,70.0,62.0,46.0,22.0,10.0,36.333333,90.0,3.0,2.0,5.0,115.0,140.0,107.0,117.0,39.0,38.444444,100.0,4.0,5.0,6.0,85.535211,99.904110,68.452055,78.301370,18.878378,36.756173,96.148649,3.900000,3.230769,5.461538,10.420901,16.519487,13.273869,14.949624,5.736097,0.502043,1.970206,0.316228,0.926809,0.518875,104.0,133.0,107.0,117.0,28.0,37.277778,97.0,4.0,4.0,5.0,81.0,115.0,56.0,75.0,20.0,36.666667,100.0,4.0,2.0,6.0,98.0,0.2,136.0,9.0,256.99,10.18,101.0,26.0,2.5,10.38,10.228,27.846,105.0,0.3,141.0,11.3,443.91,18.70,134.0,30.0,5.3,16.61,16.0130,34.087,102.000000,0.266667,139.250000,10.233333,327.037567,10.013539,115.750000,27.750000,3.700000,13.015508,12.521768,32.163245,3.162278,0.057735,2.217356,1.159023,15.478480,1.707825,1.166190,98.0,0.3,136.0,11.3,347.32,10.09,101.0,26.0,3.5,15.29,15.541,34.692,105.0,0.2,141.0,9.0,312.39,12.73,134.0,30.0,3.5,9.50,11.962,27.864
4,-2132499549,68.0,0,1,0,0,0,0,0,0,0,2205-11-16 13:07:00,,75.0,69.0,39.0,51.0,13.0,36.444444,89.0,3.0,5.0,6.0,139.0,142.0,89.0,99.0,28.0,37.000000,100.0,4.0,5.0,6.0,90.225000,102.589744,62.461538,71.564103,18.675000,36.648148,96.290323,3.666667,5.000000,6.000000,12.753729,16.654968,11.493298,12.315748,3.294702,0.220582,2.688086,0.485071,0.000000,0.000000,97.0,111.0,71.0,80.0,23.0,36.500000,97.0,4.0,5.0,6.0,103.0,114.0,60.0,74.0,28.0,37.000000,96.0,3.0,5.0,6.0,109.0,1.8,136.0,8.9,234.30,40.80,84.0,15.0,3.6,12.70,10.982,27.742,109.0,1.9,138.0,11.4,279.28,45.56,92.0,16.0,4.1,24.09,21.5410,35.739,109.000000,1.850000,137.000000,10.150000,200.969449,40.456093,88.000000,15.500000,3.850000,16.313815,15.356374,30.944207,0.000000,0.057735,1.154701,1.443376,4.618802,0.707107,0.353553,109.0,1.9,138.0,11.4,233.20,38.26,84.0,15.0,4.1,17.11,12.674,35.415,109.0,1.8,136.0,8.9,229.85,36.16,92.0,16.0,3.6,14.88,16.717,27.688
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2966,2142664100,65.0,1,1,0,0,0,0,0,0,0,2180-08-11 04:23:00,,88.0,81.0,51.0,57.0,11.0,36.388889,76.0,4.0,4.0,6.0,172.0,200.0,128.0,131.0,40.0,38.166667,100.0,4.0,5.0,6.0,113.687500,116.675676,75.648649,84.648649,26.151899,36.931373,96.139241,4.000000,4.923077,6.000000,17.044222,20.407487,14.748165,14.763839,5.485566,0.487989,4.471379,0.000000,0.271746,0.000000,122.0,116.0,79.0,87.0,32.0,37.277778,99.0,4.0,5.0,6.0,98.0,144.0,85.0,99.0,22.0,36.388889,94.0,4.0,5.0,6.0,97.0,0.8,132.0,8.6,92.00,13.00,115.0,22.0,3.4,11.00,8.700,26.400,102.0,1.1,138.0,10.7,174.00,17.00,193.0,28.0,4.1,17.00,19.1000,33.000,100.333333,0.961111,135.500000,9.450000,132.000000,14.750000,145.111111,24.388889,3.827778,14.750000,12.075000,29.450000,2.169305,0.109216,2.332633,0.736646,34.289235,2.428722,0.290649,102.0,1.1,138.0,10.7,174.00,14.00,159.0,23.0,3.8,17.00,9.800,33.000,97.0,1.0,132.0,9.2,92.00,17.00,193.0,24.0,4.1,15.00,19.100,28.300
2967,2144053271,71.0,0,1,0,0,0,0,0,0,0,2139-06-14 18:06:00,,54.0,75.0,26.0,52.0,0.0,34.666667,89.0,1.0,1.0,1.0,143.0,145.0,82.0,103.0,29.0,37.055556,100.0,2.0,1.0,6.0,80.178947,109.391304,58.717391,72.510791,17.674074,35.804989,98.053191,1.055556,1.000000,3.944444,21.383519,19.188513,11.921340,10.562695,6.859822,0.564411,2.744954,0.235702,0.000000,2.287362,101.0,131.0,57.0,58.0,22.0,37.055556,92.0,1.0,1.0,1.0,79.0,91.0,55.0,91.0,20.0,36.611111,100.0,1.0,1.0,6.0,97.0,1.7,132.0,10.2,177.44,42.50,63.0,16.0,3.4,14.63,11.851,30.999,102.0,5.2,137.0,11.0,283.64,56.76,115.0,30.0,4.2,19.47,17.6984,33.845,99.285714,3.314286,134.428571,10.700000,199.301064,46.713987,92.833333,22.571429,3.942857,16.885085,14.011737,33.182570,2.058663,1.409998,2.070197,0.309839,21.046773,5.740416,0.304725,102.0,4.6,132.0,10.5,222.99,69.53,108.0,19.0,4.2,17.02,16.249,31.810,97.0,1.7,136.0,11.0,191.72,39.59,110.0,30.0,4.0,12.92,11.655,33.818
2968,2144497079,65.0,1,1,0,0,0,0,0,0,0,2150-04-26 14:47:00,,49.0,89.0,34.0,50.0,11.0,36.111111,84.0,3.0,5.0,6.0,98.0,143.0,112.0,286.0,32.0,37.111111,100.0,4.0,5.0,6.0,76.584416,116.772152,50.531646,75.721519,18.297297,36.604798,96.513514,3.923077,5.000000,6.000000,10.212578,11.464650,10.212813,32.481229,4.129633,0.286044,2.158629,0.271746,0.000000,0.000000,50.0,120.0,44.0,63.0,16.0,36.500000,92.0,4.0,5.0,6.0,85.0,104.0,45.0,63.0,17.0,37.111111,95.0,4.0,5.0,6.0,89.0,3.0,125.0,10.5,112.00,35.00,108.0,13.0,3.5,19.00,6.900,30.400,94.0,6.1,133.0,13.7,157.00,108.00,244.0,26.0,6.2,27.00,12.5000,41.100,91.909091,4.855556,129.363636,11.600000,138.000000,79.555556,184.818182,19.545455,4.445455,22.636364,9.100000,34.283333,1.578475,1.098668,3.020692,0.879049,41.097937,4.353875,0.936269,89.0,4.9,127.0,13.7,157.00,92.00,180.0,17.0,6.2,27.00,12.500,41.100,92.0,3.0,132.0,11.5,144.00,35.00,200.0,23.0,4.5,22.00,9.200,33.900
2969,2144648302,91.0,0,1,0,0,0,0,0,0,0,2143-12-22 17:48:00,2143-12-24 05:35:00,0.0,25.0,21.0,13.0,0.0,35.722222,76.0,3.0,1.0,5.0,145.0,119.0,85.0,92.0,30.0,36.555556,100.0,4.0,4.0,6.0,110.571429,81.627907,56.465116,63.260870,20.047619,35.993056,85.857143,3.200000,2.333333,5.200000,29.933375,21.599188,13.616077,18.682654,6.828750,0.297606,6.666300,0.421637,1.366260,0.447214,115.0,113.0,76.0,92.0,12.0,35.722222,78.0,4.0,4.0,6.0,0.0,25.0,21.0,13.0,0.0,36.111111,87.0,3.0,2.0,5.0,103.0,3.5,139.0,13.5,97.00,76.00,108.0,18.0,5.3,19.00,16.800,42.300,107.0,4.0,139.0,13.6,104.00,87.00,116.0,20.0,5.8,22.00,20.3000,42.800,105.400000,3.700000,139.000000,13.540000,100.500000,81.500000,111.200000,18.800000,5.500000,20.500000,18.550000,42.550000,2.190890,0.273861,0.000000,0.054772,4.381780,1.095445,0.273861,103.0,4.0,139.0,13.6,97.00,87.00,116.0,20.0,5.8,22.00,16.800,42.300,107.0,3.5,139.0,13.5,104.00,76.00,108.0,18.0,5.3,19.00,20.300,42.800


### Save raw data matrix

In [9]:
mergedDf.to_csv(dataDirName + 'data_matrix/data_matrix_raw.csv', index=False)

## Create standardised data matrix

### Standardise the data

In [10]:
import os


os.system(
    '''cd /superbugai-data/yash/chapter_1/workspace/EHRQC;'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/vitals_min_icd_corrected.csv ''' + dataDirName + '''standardised/vitals_min_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/vitals_max_icd_corrected.csv ''' + dataDirName + '''standardised/vitals_max_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/vitals_avg_icd_corrected.csv ''' + dataDirName + '''standardised/vitals_avg_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/vitals_stddev_icd_corrected.csv ''' + dataDirName + '''standardised/vitals_stddev_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/vitals_first_icd_corrected.csv ''' + dataDirName + '''standardised/vitals_first_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/vitals_last_icd_corrected.csv ''' + dataDirName + '''standardised/vitals_last_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/lab_measurements_min_icd_corrected.csv ''' + dataDirName + '''standardised/lab_measurements_min_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","platelet_count","urea_nitrogen","glucose_serum","bicarbonate","potassium_serum","anion_gap","leukocytes_blood_manual","hematocrit";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/lab_measurements_max_icd_corrected.csv ''' + dataDirName + '''standardised/lab_measurements_max_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","platelet_count","urea_nitrogen","glucose_serum","bicarbonate","potassium_serum","anion_gap","leukocytes_blood_manual","hematocrit";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/lab_measurements_avg_icd_corrected.csv ''' + dataDirName + '''standardised/lab_measurements_avg_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","platelet_count","urea_nitrogen","glucose_serum","bicarbonate","potassium_serum","anion_gap","leukocytes_blood_manual","hematocrit";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/lab_measurements_stddev_icd_corrected.csv ''' + dataDirName + '''standardised/lab_measurements_stddev_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","glucose_serum","bicarbonate","potassium_serum";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/lab_measurements_first_icd_corrected.csv ''' + dataDirName + '''standardised/lab_measurements_first_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","platelet_count","urea_nitrogen","glucose_serum","bicarbonate","potassium_serum","anion_gap","leukocytes_blood_manual","hematocrit";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Standardise ''' + dataDirName + '''outlier_removal/lab_measurements_last_icd_corrected.csv ''' + dataDirName + '''standardised/lab_measurements_last_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","platelet_count","urea_nitrogen","glucose_serum","bicarbonate","potassium_serum","anion_gap","leukocytes_blood_manual","hematocrit";'''
    )

2023-09-21 19:41:30,997 - EHRQC - INFO - Parsing command line arguments
2023-09-21 19:41:30,998 - EHRQC - INFO - args.source_path: /home/yram0006/phd/chapter_2/workspace/mortality_data/standardisation_experiment/outlier_removal/vitals_min_icd_corrected.csv
2023-09-21 19:41:30,998 - EHRQC - INFO - args.save_path: /home/yram0006/phd/chapter_2/workspace/mortality_data/standardisation_experiment/standardised/vitals_min_icd.csv
2023-09-21 19:41:30,998 - EHRQC - INFO - args.columns: heartrate,sysbp,diabp,meanbp,resprate,tempc,spo2,gcseye,gcsverbal,gcsmotor
2023-09-21 19:41:30,999 - EHRQC - INFO - args.scaler_save_path: None
2023-09-21 19:41:31,060 - EHRQC - INFO - Done!!
2023-09-21 19:41:31,785 - EHRQC - INFO - Parsing command line arguments
2023-09-21 19:41:31,786 - EHRQC - INFO - args.source_path: /home/yram0006/phd/chapter_2/workspace/mortality_data/standardisation_experiment/outlier_removal/vitals_max_icd_corrected.csv
2023-09-21 19:41:31,786 - EHRQC - INFO - args.save_path: /home/yram00

0

### Read data files

In [11]:
import pandas as pd


demographics =  pd.read_csv(dataDirName + 'extract/demographics_icd.csv')
deaths =  pd.read_csv(dataDirName + 'extract/deaths.csv')
vitals_min =  pd.read_csv(dataDirName + 'standardised/vitals_min_icd.csv')
vitals_max =  pd.read_csv(dataDirName + 'standardised/vitals_max_icd.csv')
vitals_avg =  pd.read_csv(dataDirName + 'standardised/vitals_avg_icd.csv')
vitals_stddev =  pd.read_csv(dataDirName + 'standardised/vitals_stddev_icd.csv')
vitals_first =  pd.read_csv(dataDirName + 'standardised/vitals_first_icd.csv')
vitals_last =  pd.read_csv(dataDirName + 'standardised/vitals_last_icd.csv')
lab_measurements_min =  pd.read_csv(dataDirName + 'standardised/lab_measurements_min_icd.csv')
lab_measurements_max =  pd.read_csv(dataDirName + 'standardised/lab_measurements_max_icd.csv')
lab_measurements_avg =  pd.read_csv(dataDirName + 'standardised/lab_measurements_avg_icd.csv')
lab_measurements_stddev =  pd.read_csv(dataDirName + 'standardised/lab_measurements_stddev_icd.csv')
lab_measurements_first =  pd.read_csv(dataDirName + 'standardised/lab_measurements_first_icd.csv')
lab_measurements_last =  pd.read_csv(dataDirName + 'standardised/lab_measurements_last_icd.csv')

### Display data counts

In [12]:
demographics.shape

(12241, 4)

In [13]:
vitals_min.shape, vitals_max.shape, vitals_avg.shape, vitals_stddev.shape, vitals_first.shape, vitals_last.shape

((3942, 11), (3942, 11), (3942, 11), (3942, 11), (3942, 11), (3942, 11))

In [14]:
lab_measurements_min.shape, lab_measurements_max.shape, lab_measurements_avg.shape, lab_measurements_stddev.shape, lab_measurements_first.shape, lab_measurements_last.shape

((7448, 13), (7450, 13), (7450, 13), (7584, 8), (7450, 13), (7448, 13))

### Categorical value handling

In [15]:
demographics['gender'] = demographics.gender.apply(lambda x: 1 if(x == 'M') else 0)

In [16]:
demographics['ethnicity_WHITE'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'WHITE') else 0)
demographics['ethnicity_BLACK'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'BLACK/AFRICAN AMERICAN') else 0)
demographics['ethnicity_UNKNOWN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'UNKNOWN') else 0)
demographics['ethnicity_OTHER'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'OTHER') else 0)
demographics['ethnicity_HISPANIC'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'HISPANIC/LATINO') else 0)
demographics['ethnicity_ASIAN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'ASIAN') else 0)
demographics['ethnicity_UNABLE_TO_OBTAIN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'UNABLE TO OBTAIN') else 0)
demographics['ethnicity_AMERICAN_INDIAN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'AMERICAN INDIAN/ALASKA NATIVE') else 0)

demographics.drop(columns='ethnicity', inplace=True)

### Merge the data

In [17]:
import pandas as pd


mergedDf = pd.merge(demographics, deaths, how='inner', on='person_id')
mergedDf = pd.merge(mergedDf, vitals_min.add_suffix('_min'), how='inner', left_on='person_id', right_on='person_id_min')
mergedDf = pd.merge(mergedDf, vitals_max.add_suffix('_max'), how='inner', left_on='person_id', right_on='person_id_max')
mergedDf = pd.merge(mergedDf, vitals_avg.add_suffix('_avg'), how='inner', left_on='person_id', right_on='person_id_avg')
mergedDf = pd.merge(mergedDf, vitals_stddev.add_suffix('_stddev'), how='inner', left_on='person_id', right_on='person_id_stddev')
mergedDf = pd.merge(mergedDf, vitals_first.add_suffix('_first'), how='inner', left_on='person_id', right_on='person_id_first')
mergedDf = pd.merge(mergedDf, vitals_last.add_suffix('_last'), how='inner', left_on='person_id', right_on='person_id_last')
mergedDf = mergedDf.drop(['person_id_min', 'person_id_max', 'person_id_avg', 'person_id_stddev', 'person_id_first', 'person_id_last'], axis = 1)
mergedDf = pd.merge(mergedDf, lab_measurements_min.add_suffix('_min'), how='inner', left_on='person_id', right_on='person_id_min')
mergedDf = pd.merge(mergedDf, lab_measurements_max.add_suffix('_max'), how='inner', left_on='person_id', right_on='person_id_max')
mergedDf = pd.merge(mergedDf, lab_measurements_avg.add_suffix('_avg'), how='inner', left_on='person_id', right_on='person_id_avg')
mergedDf = pd.merge(mergedDf, lab_measurements_stddev.add_suffix('_stddev'), how='inner', left_on='person_id', right_on='person_id_stddev')
mergedDf = pd.merge(mergedDf, lab_measurements_first.add_suffix('_first'), how='inner', left_on='person_id', right_on='person_id_first')
mergedDf = pd.merge(mergedDf, lab_measurements_last.add_suffix('_last'), how='inner', left_on='person_id', right_on='person_id_last')
mergedDf = mergedDf.drop(['person_id_min', 'person_id_max', 'person_id_avg', 'person_id_stddev', 'person_id_first', 'person_id_last'], axis = 1)
pd.set_option('display.max_columns', None)
mergedDf

Unnamed: 0,person_id,age,gender,ethnicity_WHITE,ethnicity_BLACK,ethnicity_UNKNOWN,ethnicity_OTHER,ethnicity_HISPANIC,ethnicity_ASIAN,ethnicity_UNABLE_TO_OBTAIN,ethnicity_AMERICAN_INDIAN,anchor_time,death_datetime,heartrate_min,sysbp_min,diabp_min,meanbp_min,resprate_min,tempc_min,spo2_min,gcseye_min,gcsverbal_min,gcsmotor_min,heartrate_max,sysbp_max,diabp_max,meanbp_max,resprate_max,tempc_max,spo2_max,gcseye_max,gcsverbal_max,gcsmotor_max,heartrate_avg,sysbp_avg,diabp_avg,meanbp_avg,resprate_avg,tempc_avg,spo2_avg,gcseye_avg,gcsverbal_avg,gcsmotor_avg,heartrate_stddev,sysbp_stddev,diabp_stddev,meanbp_stddev,resprate_stddev,tempc_stddev,spo2_stddev,gcseye_stddev,gcsverbal_stddev,gcsmotor_stddev,heartrate_first,sysbp_first,diabp_first,meanbp_first,resprate_first,tempc_first,spo2_first,gcseye_first,gcsverbal_first,gcsmotor_first,heartrate_last,sysbp_last,diabp_last,meanbp_last,resprate_last,tempc_last,spo2_last,gcseye_last,gcsverbal_last,gcsmotor_last,chloride_serum_min,creatinine_min,sodium_serum_min,hemoglobin_min,platelet_count_min,urea_nitrogen_min,glucose_serum_min,bicarbonate_min,potassium_serum_min,anion_gap_min,leukocytes_blood_manual_min,hematocrit_min,chloride_serum_max,creatinine_max,sodium_serum_max,hemoglobin_max,platelet_count_max,urea_nitrogen_max,glucose_serum_max,bicarbonate_max,potassium_serum_max,anion_gap_max,leukocytes_blood_manual_max,hematocrit_max,chloride_serum_avg,creatinine_avg,sodium_serum_avg,hemoglobin_avg,platelet_count_avg,urea_nitrogen_avg,glucose_serum_avg,bicarbonate_avg,potassium_serum_avg,anion_gap_avg,leukocytes_blood_manual_avg,hematocrit_avg,chloride_serum_stddev,creatinine_stddev,sodium_serum_stddev,hemoglobin_stddev,glucose_serum_stddev,bicarbonate_stddev,potassium_serum_stddev,chloride_serum_first,creatinine_first,sodium_serum_first,hemoglobin_first,platelet_count_first,urea_nitrogen_first,glucose_serum_first,bicarbonate_first,potassium_serum_first,anion_gap_first,leukocytes_blood_manual_first,hematocrit_first,chloride_serum_last,creatinine_last,sodium_serum_last,hemoglobin_last,platelet_count_last,urea_nitrogen_last,glucose_serum_last,bicarbonate_last,potassium_serum_last,anion_gap_last,leukocytes_blood_manual_last,hematocrit_last
0,-2144679073,82.0,0,0,0,0,1,0,0,0,0,2190-01-30 19:22:00,2194-04-23 19:27:00,-0.272935,1.217839,0.923212,0.861995,0.878056,0.189531,0.317210,0.412686,1.239648,0.878002,-0.030098,-0.001912,0.069372,0.021625,-0.309184,-0.058288,-0.021353,0.369540,0.668464,0.340834,-0.671692,0.838949,0.734496,0.755155,0.000314,-0.003214,-0.100513,0.595537,1.041396,0.658422,-0.047480,-0.000181,0.026737,-0.025151,-0.700179,-0.113891,-0.042766,-0.055999,-0.695068,-0.770770,-1.241187,-0.101886,-0.094023,-0.278632,-0.162120,-0.089424,-0.015479,0.728786,0.909948,0.608757,-0.521836,1.716770,0.615999,0.848890,-0.031562,-0.049323,0.009546,0.659929,0.897364,0.536865,1.093201,-0.663598,0.163392,0.285519,0.319702,-0.795488,-0.012648,0.669085,-0.123994,-1.009209,-0.239107,0.134963,0.21506,-0.011713,-0.308786,-0.534630,0.256925,-1.006628,-0.024202,0.276622,-1.026043,-0.718985,-0.036992,-0.419631,-0.008149,-0.014662,-0.014572,-0.012889,0.455496,-0.952960,-0.020341,0.005902,-0.014080,-1.148345,-0.081704,-0.177350,-0.012315,-0.011540,-0.591288,-0.011750,-0.030978,-0.601296,-1.055479,0.743482,-0.694422,-0.192286,-0.406065,-0.196589,-0.891693,-0.019107,0.207140,-0.788143,-1.349868,-0.004275,-0.557117,0.656658,-0.695417,-0.086976,0.157615,0.001462,-0.897304,-0.015104,0.214698,-0.409186,-1.010112,-0.420646,0.120969
1,-2142084288,84.0,1,1,0,0,0,0,0,0,0,2161-07-10 08:07:00,,-0.073762,-0.427517,0.222531,-1.935277,1.165407,0.217700,-0.256094,1.199462,1.239648,0.878002,-0.004133,-0.014906,-0.026538,-0.053893,-0.057867,-0.039932,-0.021353,0.369540,0.668464,0.340834,-0.134020,-0.066258,0.750738,0.395407,0.616876,0.086614,-0.167659,0.860642,1.041396,0.658422,-0.001198,0.007219,0.014602,0.037586,-0.347878,-0.101605,-0.018478,-1.070385,-0.695068,-0.770770,-0.299668,0.243911,0.060522,0.376242,0.706462,0.306038,-0.056421,0.728786,0.909948,0.608757,-0.313781,0.683249,1.279241,1.023441,1.694040,-0.022363,0.236787,0.659929,0.897364,0.536865,0.793276,-0.435946,0.507732,0.486684,0.071532,-0.319467,-0.012130,0.065813,-0.301052,-0.690244,0.206996,0.491982,0.51092,-0.011635,0.432082,0.320030,0.384789,-0.170504,-0.021859,0.058379,0.292496,0.079856,-0.001346,0.443951,-0.009742,-0.014269,0.007673,-0.012234,0.490182,-0.444576,-0.018119,-0.016067,-0.011103,-0.272878,0.072240,0.463159,-0.011900,-0.011497,-0.344037,-0.011612,-0.029308,-0.225252,0.126483,0.473839,-0.173891,0.613881,0.464401,0.145551,-0.298895,-0.019045,0.012184,-0.025617,-0.016817,0.171130,0.485378,0.183622,-0.508203,0.113179,0.430819,0.184796,-0.427277,-0.013033,-0.196086,-0.900731,-0.159906,-0.072871,0.440591
2,-2133944014,50.0,1,1,0,0,0,0,0,0,0,2141-03-25 16:45:00,,-0.173349,0.630212,0.572872,0.478056,-0.989724,0.147277,-0.419896,-0.374090,-0.933000,0.421594,-0.020619,-0.006243,-0.012329,-0.044043,-0.214940,-0.080314,-0.030579,0.369540,0.668464,0.340834,-0.321996,0.427659,0.129935,0.083773,-0.701581,-0.061959,-0.402384,0.571436,0.394788,0.594722,-0.024180,-0.001306,-0.048224,-0.064646,-0.313207,-0.119627,-0.001443,0.387333,1.973704,-0.373017,-0.770428,0.200686,-0.117799,-0.329007,-0.782536,0.166463,-0.008655,-0.908147,-1.251745,0.069475,-0.230559,0.086987,-0.228127,-0.198415,-0.319163,-0.056063,-0.217696,0.659929,0.897364,0.536865,-2.655865,-0.663598,-1.213967,1.693676,0.524867,-0.499610,-0.007469,2.679992,-1.717519,-0.548816,0.261661,1.643051,-2.15182,-0.011678,-0.494002,1.507057,0.458612,-0.455402,-0.017373,2.677287,-0.366773,-0.860681,0.022642,0.896831,-0.031892,-0.014576,-0.060970,-0.010740,0.320804,-0.624573,-0.012598,0.092129,-0.015796,-0.879204,0.050828,1.678985,-0.011341,-0.011504,0.967452,-0.011595,-0.027653,-0.298987,0.897919,-2.627059,-0.463075,-1.482154,1.160773,0.756251,-0.334286,-0.016481,2.156698,-1.768534,-0.055147,0.817025,1.172635,-2.339231,-0.695417,-1.688223,1.523636,0.695706,-0.666856,-0.001811,2.063223,-0.409186,-0.726710,0.244681,1.661949
3,-2133227983,52.0,0,1,0,0,0,0,0,0,0,2120-09-22 10:15:00,2121-08-28 15:15:00,0.025824,-1.073907,0.485287,-1.441641,0.447030,0.133192,0.235309,0.412686,-0.389838,0.421594,-0.017734,-0.020320,-0.008777,-0.040759,0.162035,-0.003220,-0.021353,0.369540,0.668464,0.340834,-0.203487,-0.427468,0.646066,0.240908,-0.311127,-0.112761,-0.015123,0.754600,-0.007269,0.281123,-0.021138,-0.007759,0.012850,0.008855,-0.018708,-0.066744,-0.042255,-0.339944,0.661454,-0.069367,0.312319,0.762606,0.476605,1.937864,0.954628,0.212988,-0.008655,0.728786,0.369525,0.069475,-0.188948,0.047236,-0.348717,-0.023864,-0.031562,-0.042583,0.577649,0.659929,-0.728398,0.536865,-0.256463,-0.891251,0.163392,-0.167103,0.746702,-0.735608,-0.011785,1.272357,-2.071636,-0.696262,0.090816,-0.163738,-0.22873,-0.011739,0.061648,-0.012338,1.506291,-0.696842,-0.022906,1.149591,0.622131,-0.470502,-0.021350,-0.108999,-0.014921,-0.014841,0.000523,-0.012769,1.025862,-0.928160,-0.018731,0.034049,-0.013905,-0.674910,-0.058413,0.114105,-0.011448,-0.011538,-0.013437,-0.011513,-0.030080,-0.281300,2.465343,-0.469912,-0.867932,-0.192286,0.159738,0.923342,-0.915824,-0.019534,0.792007,-0.897076,-0.389475,0.153759,0.159994,0.183622,-0.882632,0.513491,-0.498075,0.978436,-0.708163,-0.011392,1.447048,-0.900731,-1.146321,-0.012734,-0.494190
4,-2132499549,68.0,0,1,0,0,0,0,0,0,0,2205-11-16 13:07:00,,0.274789,-0.662568,-0.127809,0.148965,0.878056,0.161361,0.153408,0.412686,1.239648,0.878002,-0.007843,-0.019237,-0.072717,-0.099860,-0.183525,-0.098670,-0.021353,0.369540,0.668464,0.340834,-0.038322,-0.330783,0.125350,-0.222640,-0.361220,-0.179859,0.001120,0.507168,1.041396,0.658422,-0.010828,-0.007115,-0.032598,-0.041284,-0.560693,-0.133894,-0.023843,0.050061,-0.695068,-0.770770,-0.017212,-0.188335,0.048634,0.073992,0.334212,-0.112686,-0.008655,0.728786,0.909948,0.608757,0.726493,0.007485,-0.107538,-0.067502,1.118839,-0.002144,0.123166,-0.255262,0.897364,0.536865,1.393127,0.322895,0.163392,-0.217395,0.522894,0.805172,-0.013252,-0.939640,-0.123994,0.001850,0.203124,-0.181789,0.36299,-0.011601,-0.494002,0.035143,0.203674,0.300861,-0.025000,-1.905801,-0.696408,1.065572,0.016755,0.164250,-0.006555,-0.013775,-0.020928,-0.012819,-0.117967,0.438712,-0.021073,-0.066869,-0.013117,0.208888,0.110589,-0.110609,-0.012478,-0.011538,-0.699933,-0.011448,-0.031477,-1.113163,-0.218183,1.013126,0.057455,0.130181,0.203261,-0.012764,0.217089,-0.020572,-1.352507,-0.243482,-0.001911,-0.169640,0.269008,0.814336,0.115846,-0.487288,-0.552716,0.236651,0.310592,-0.015018,-1.428436,-0.736882,0.035620,0.479430,-0.527049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2966,2142664100,65.0,1,1,0,0,0,0,0,0,0,2180-08-11 04:23:00,,0.922099,0.042585,0.923212,0.478056,0.590705,0.147277,-0.911299,1.199462,0.696486,0.878002,0.005758,0.012165,0.065819,0.005208,0.193450,-0.021576,-0.021353,0.369540,0.668464,0.340834,0.787982,0.176321,1.271618,0.677623,1.480342,-0.003939,-0.016202,0.860642,0.995802,0.658422,0.008135,0.010722,0.050481,0.005318,-0.074325,-0.070097,0.021896,-1.070385,-0.297327,-0.770770,1.159687,0.027788,0.143739,0.426617,1.450961,0.212988,0.004993,0.728786,0.909948,0.608757,0.518438,1.200010,1.399830,1.023441,0.256038,-0.076283,-0.104075,0.659929,0.897364,0.536865,-0.406425,-0.435946,-0.525287,-0.368269,-0.880709,-0.593707,-0.010576,0.467995,-0.478110,-0.509698,-0.136778,-0.414710,-0.67252,-0.011670,-0.494002,-0.297224,-0.629342,-0.759988,-0.019965,0.713106,-0.696408,-0.390413,-0.000071,-0.288795,-0.016913,-0.014374,-0.035229,-0.013240,-0.743734,-0.715493,-0.016252,0.006360,-0.013233,-0.210144,-0.085050,-0.386047,-0.011771,-0.011525,0.061035,-0.011611,-0.027660,0.317957,-0.425908,0.069374,-0.405238,0.130181,-0.101402,-0.498371,-0.758575,-0.015993,0.207140,-0.570279,-0.025335,-0.493829,-0.095126,-1.077804,-0.383393,-1.287911,-0.388793,-1.002204,-0.522500,-0.006300,0.214698,0.082360,0.061983,0.726081,-0.412791
2967,2144053271,71.0,0,1,0,0,0,0,0,0,0,2139-06-14 18:06:00,,-0.770865,-0.309992,-1.266415,0.203813,-0.989724,-0.289349,0.153408,-1.160865,-0.933000,-1.404035,-0.006194,-0.017613,-0.097583,-0.086726,-0.152111,-0.094999,-0.021353,-2.341504,-1.697886,0.340834,-0.392124,-0.085922,-0.200104,-0.157504,-0.607748,-0.703571,0.203230,-2.261705,-1.329499,-0.781902,0.027313,0.004928,-0.021672,-0.074656,0.230757,-0.051864,-0.022384,-0.525946,-0.695068,2.321236,0.171092,0.676157,-0.117799,-1.034255,0.210129,0.119938,-0.042774,-1.726614,-1.251745,-2.087654,-0.272170,-0.906783,-0.409011,0.674339,-0.031562,-0.049323,0.577649,-2.085645,-1.270319,0.536865,-0.406425,0.247011,-0.525287,0.436393,-0.037955,0.890715,-0.015065,-0.738549,-0.478110,0.582608,0.332560,0.383505,-0.67252,-0.011316,-0.679219,-0.154781,0.238172,0.716880,-0.023853,1.149591,-0.586530,0.116820,-0.009732,-0.149027,-0.018165,-0.012790,-0.045443,-0.012487,-0.133104,0.719692,-0.020665,-0.008613,-0.012629,0.361963,0.030420,0.302005,-0.011807,-0.011204,-0.108504,-0.011709,-0.029364,3.070855,-0.379428,0.069374,1.619047,-0.837220,-0.188448,-0.096515,1.474674,-0.019107,-0.572683,-0.134549,-0.021076,0.233621,-0.274555,-1.077804,0.053441,-0.487288,0.594742,-0.106022,0.459732,-0.013464,1.447048,-0.081489,-0.394976,-0.044510,0.617391
2968,2144497079,65.0,1,1,0,0,0,0,0,0,0,2150-04-26 14:47:00,,-1.019831,0.512686,-0.565734,0.094116,0.590705,0.076853,-0.256094,0.412686,1.239648,0.878002,-0.024740,-0.018696,0.008984,0.514128,-0.057867,-0.091328,-0.021353,0.369540,0.668464,0.340834,-0.518717,0.179794,-0.911637,0.063405,-0.454248,-0.206785,0.026708,0.779071,1.041396,0.658422,-0.022059,-0.031786,-0.065282,0.342593,-0.375340,-0.118276,-0.037423,-0.442689,-0.695068,-0.770770,-2.229782,0.200686,-0.272345,-0.782381,-0.534370,-0.112686,-0.042774,0.728786,0.909948,0.608757,-0.022504,-0.390023,-1.011959,-0.547517,-0.462963,0.011336,0.009546,0.659929,0.897364,0.536865,-1.606127,1.233504,-1.730477,0.587266,-0.683435,0.513319,-0.011180,-1.341821,-0.301052,1.897587,-0.404887,0.279541,-1.85596,-0.011238,-1.420087,1.127208,-0.763852,2.620167,-0.017423,0.276622,1.611036,1.663162,-0.045566,1.050989,-0.026981,-0.011752,-0.093731,-0.011945,-0.689295,2.194280,-0.012900,-0.033542,-0.009988,1.903047,-0.262423,0.504916,-0.011964,-0.011281,0.505534,-0.011578,-0.026784,1.918271,1.706086,-1.683307,1.792557,-1.643388,1.204297,-0.637819,2.378350,-0.014711,-0.962595,2.044097,2.104140,-0.189268,1.126194,-1.866196,0.864704,-1.287911,0.867946,-0.534881,0.260155,-0.005695,0.009306,0.737753,1.599825,-0.298614,0.632700
2969,2144648302,91.0,0,1,0,0,0,0,0,0,0,2143-12-22 17:48:00,2143-12-24 05:35:00,-3.459691,-3.248127,-1.704340,-1.935277,-0.989724,-0.021740,-0.911299,0.412686,-0.933000,0.421594,-0.005370,-0.031690,-0.086926,-0.122843,-0.120696,-0.128039,-0.021353,0.369540,0.076876,0.340834,0.678240,-1.085425,-0.395879,-0.793931,-0.023143,-0.586757,-1.195029,0.012306,-0.539201,0.097864,0.065100,0.016387,0.021585,0.079918,0.223859,-0.115518,0.078192,-0.096463,1.304656,-0.166237,0.830155,-0.101886,0.108074,0.678491,-1.030702,-0.438361,-0.138307,0.728786,0.369525,0.608757,-3.559438,-3.530337,-2.459032,-2.729403,-2.907566,-0.109982,-0.899419,-0.255262,-0.728398,-0.060221,0.493351,1.612924,0.679902,2.096006,-0.831390,2.576414,-0.011180,-0.336368,2.885998,1.897587,1.069710,2.344939,0.06713,-0.011419,-0.308786,1.079727,-1.183209,1.840131,-0.023804,-1.032832,1.171523,0.636375,0.008201,1.332178,-0.010858,-0.012530,-0.001860,-0.010777,-1.029536,2.281586,-0.019115,-0.039683,-0.004448,1.330598,0.300997,2.028771,-0.011764,-0.011484,-1.445892,-0.011768,-0.031508,-0.790351,-0.481346,0.204196,1.272027,0.291414,1.160773,-1.129988,2.177265,-0.018618,-0.377728,1.608368,1.039403,0.295774,1.307131,0.498979,1.176729,0.113179,1.960762,-0.894360,2.042868,-0.013637,-1.017652,2.048540,0.940750,0.850287,2.294285


### Save standardised data matrix

In [18]:
mergedDf.to_csv(dataDirName + 'data_matrix/data_matrix_standardised.csv', index=False)

## Create rescaled data matrix

### Rescale the data

In [19]:
import os


os.system(
    '''cd /superbugai-data/yash/chapter_1/workspace/EHRQC;'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/vitals_min_icd_corrected.csv ''' + dataDirName + '''rescaled/vitals_min_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/vitals_max_icd_corrected.csv ''' + dataDirName + '''rescaled/vitals_max_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/vitals_avg_icd_corrected.csv ''' + dataDirName + '''rescaled/vitals_avg_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/vitals_stddev_icd_corrected.csv ''' + dataDirName + '''rescaled/vitals_stddev_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/vitals_first_icd_corrected.csv ''' + dataDirName + '''rescaled/vitals_first_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/vitals_last_icd_corrected.csv ''' + dataDirName + '''rescaled/vitals_last_icd.csv -c "heartrate","sysbp","diabp","meanbp","resprate","tempc","spo2","gcseye","gcsverbal","gcsmotor";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/lab_measurements_min_icd_corrected.csv ''' + dataDirName + '''rescaled/lab_measurements_min_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","platelet_count","urea_nitrogen","glucose_serum","bicarbonate","potassium_serum","anion_gap","leukocytes_blood_manual","hematocrit";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/lab_measurements_max_icd_corrected.csv ''' + dataDirName + '''rescaled/lab_measurements_max_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","platelet_count","urea_nitrogen","glucose_serum","bicarbonate","potassium_serum","anion_gap","leukocytes_blood_manual","hematocrit";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/lab_measurements_avg_icd_corrected.csv ''' + dataDirName + '''rescaled/lab_measurements_avg_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","platelet_count","urea_nitrogen","glucose_serum","bicarbonate","potassium_serum","anion_gap","leukocytes_blood_manual","hematocrit";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/lab_measurements_stddev_icd_corrected.csv ''' + dataDirName + '''rescaled/lab_measurements_stddev_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","glucose_serum","bicarbonate","potassium_serum";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/lab_measurements_first_icd_corrected.csv ''' + dataDirName + '''rescaled/lab_measurements_first_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","platelet_count","urea_nitrogen","glucose_serum","bicarbonate","potassium_serum","anion_gap","leukocytes_blood_manual","hematocrit";'''
    +
    '''.venv/bin/python -m ehrqc.qc.Rescale ''' + dataDirName + '''outlier_removal/lab_measurements_last_icd_corrected.csv ''' + dataDirName + '''rescaled/lab_measurements_last_icd.csv -c "chloride_serum","creatinine","sodium_serum","hemoglobin","platelet_count","urea_nitrogen","glucose_serum","bicarbonate","potassium_serum","anion_gap","leukocytes_blood_manual","hematocrit";'''
    )

2023-09-21 19:41:41,655 - EHRQC - INFO - Parsing command line arguments
2023-09-21 19:41:41,656 - EHRQC - INFO - args.source_path: /home/yram0006/phd/chapter_2/workspace/mortality_data/standardisation_experiment/outlier_removal/vitals_min_icd_corrected.csv
2023-09-21 19:41:41,656 - EHRQC - INFO - args.save_path: /home/yram0006/phd/chapter_2/workspace/mortality_data/standardisation_experiment/rescaled/vitals_min_icd.csv
2023-09-21 19:41:41,656 - EHRQC - INFO - args.columns: heartrate,sysbp,diabp,meanbp,resprate,tempc,spo2,gcseye,gcsverbal,gcsmotor
2023-09-21 19:41:41,657 - EHRQC - INFO - args.scaler_save_path: None
2023-09-21 19:41:41,657 - EHRQC - INFO - args.min: 0
2023-09-21 19:41:41,657 - EHRQC - INFO - args.max: 1
2023-09-21 19:41:41,708 - EHRQC - INFO - Done!!
2023-09-21 19:41:42,416 - EHRQC - INFO - Parsing command line arguments
2023-09-21 19:41:42,418 - EHRQC - INFO - args.source_path: /home/yram0006/phd/chapter_2/workspace/mortality_data/standardisation_experiment/outlier_remo

0

In [20]:
import pandas as pd


demographics =  pd.read_csv(dataDirName + 'extract/demographics_icd.csv')
deaths =  pd.read_csv(dataDirName + 'extract/deaths.csv')
vitals_min =  pd.read_csv(dataDirName + 'rescaled/vitals_min_icd.csv')
vitals_max =  pd.read_csv(dataDirName + 'rescaled/vitals_max_icd.csv')
vitals_avg =  pd.read_csv(dataDirName + 'rescaled/vitals_avg_icd.csv')
vitals_stddev =  pd.read_csv(dataDirName + 'rescaled/vitals_stddev_icd.csv')
vitals_first =  pd.read_csv(dataDirName + 'rescaled/vitals_first_icd.csv')
vitals_last =  pd.read_csv(dataDirName + 'rescaled/vitals_last_icd.csv')
lab_measurements_min =  pd.read_csv(dataDirName + 'rescaled/lab_measurements_min_icd.csv')
lab_measurements_max =  pd.read_csv(dataDirName + 'rescaled/lab_measurements_max_icd.csv')
lab_measurements_avg =  pd.read_csv(dataDirName + 'rescaled/lab_measurements_avg_icd.csv')
lab_measurements_stddev =  pd.read_csv(dataDirName + 'rescaled/lab_measurements_stddev_icd.csv')
lab_measurements_first =  pd.read_csv(dataDirName + 'rescaled/lab_measurements_first_icd.csv')
lab_measurements_last =  pd.read_csv(dataDirName + 'rescaled/lab_measurements_last_icd.csv')

### Display data counts

In [21]:
demographics.shape

(12241, 4)

In [22]:
vitals_min.shape, vitals_max.shape, vitals_avg.shape, vitals_stddev.shape, vitals_first.shape, vitals_last.shape

((3942, 11), (3942, 11), (3942, 11), (3942, 11), (3942, 11), (3942, 11))

In [23]:
lab_measurements_min.shape, lab_measurements_max.shape, lab_measurements_avg.shape, lab_measurements_stddev.shape, lab_measurements_first.shape, lab_measurements_last.shape

((7448, 13), (7450, 13), (7450, 13), (7584, 8), (7450, 13), (7448, 13))

### Categorical value handling

In [24]:
demographics['gender'] = demographics.gender.apply(lambda x: 1 if(x == 'M') else 0)

In [25]:
demographics['ethnicity_WHITE'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'WHITE') else 0)
demographics['ethnicity_BLACK'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'BLACK/AFRICAN AMERICAN') else 0)
demographics['ethnicity_UNKNOWN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'UNKNOWN') else 0)
demographics['ethnicity_OTHER'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'OTHER') else 0)
demographics['ethnicity_HISPANIC'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'HISPANIC/LATINO') else 0)
demographics['ethnicity_ASIAN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'ASIAN') else 0)
demographics['ethnicity_UNABLE_TO_OBTAIN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'UNABLE TO OBTAIN') else 0)
demographics['ethnicity_AMERICAN_INDIAN'] = demographics.ethnicity.apply(lambda x: 1 if(x == 'AMERICAN INDIAN/ALASKA NATIVE') else 0)

demographics.drop(columns='ethnicity', inplace=True)

### Merge the data

In [26]:
import pandas as pd


mergedDf = pd.merge(demographics, deaths, how='inner', on='person_id')
mergedDf = pd.merge(mergedDf, vitals_min.add_suffix('_min'), how='inner', left_on='person_id', right_on='person_id_min')
mergedDf = pd.merge(mergedDf, vitals_max.add_suffix('_max'), how='inner', left_on='person_id', right_on='person_id_max')
mergedDf = pd.merge(mergedDf, vitals_avg.add_suffix('_avg'), how='inner', left_on='person_id', right_on='person_id_avg')
mergedDf = pd.merge(mergedDf, vitals_stddev.add_suffix('_stddev'), how='inner', left_on='person_id', right_on='person_id_stddev')
mergedDf = pd.merge(mergedDf, vitals_first.add_suffix('_first'), how='inner', left_on='person_id', right_on='person_id_first')
mergedDf = pd.merge(mergedDf, vitals_last.add_suffix('_last'), how='inner', left_on='person_id', right_on='person_id_last')
mergedDf = mergedDf.drop(['person_id_min', 'person_id_max', 'person_id_avg', 'person_id_stddev', 'person_id_first', 'person_id_last'], axis = 1)
mergedDf = pd.merge(mergedDf, lab_measurements_min.add_suffix('_min'), how='inner', left_on='person_id', right_on='person_id_min')
mergedDf = pd.merge(mergedDf, lab_measurements_max.add_suffix('_max'), how='inner', left_on='person_id', right_on='person_id_max')
mergedDf = pd.merge(mergedDf, lab_measurements_avg.add_suffix('_avg'), how='inner', left_on='person_id', right_on='person_id_avg')
mergedDf = pd.merge(mergedDf, lab_measurements_stddev.add_suffix('_stddev'), how='inner', left_on='person_id', right_on='person_id_stddev')
mergedDf = pd.merge(mergedDf, lab_measurements_first.add_suffix('_first'), how='inner', left_on='person_id', right_on='person_id_first')
mergedDf = pd.merge(mergedDf, lab_measurements_last.add_suffix('_last'), how='inner', left_on='person_id', right_on='person_id_last')
mergedDf = mergedDf.drop(['person_id_min', 'person_id_max', 'person_id_avg', 'person_id_stddev', 'person_id_first', 'person_id_last'], axis = 1)
pd.set_option('display.max_columns', None)
mergedDf

Unnamed: 0,person_id,age,gender,ethnicity_WHITE,ethnicity_BLACK,ethnicity_UNKNOWN,ethnicity_OTHER,ethnicity_HISPANIC,ethnicity_ASIAN,ethnicity_UNABLE_TO_OBTAIN,ethnicity_AMERICAN_INDIAN,anchor_time,death_datetime,heartrate_min,sysbp_min,diabp_min,meanbp_min,resprate_min,tempc_min,spo2_min,gcseye_min,gcsverbal_min,gcsmotor_min,heartrate_max,sysbp_max,diabp_max,meanbp_max,resprate_max,tempc_max,spo2_max,gcseye_max,gcsverbal_max,gcsmotor_max,heartrate_avg,sysbp_avg,diabp_avg,meanbp_avg,resprate_avg,tempc_avg,spo2_avg,gcseye_avg,gcsverbal_avg,gcsmotor_avg,heartrate_stddev,sysbp_stddev,diabp_stddev,meanbp_stddev,resprate_stddev,tempc_stddev,spo2_stddev,gcseye_stddev,gcsverbal_stddev,gcsmotor_stddev,heartrate_first,sysbp_first,diabp_first,meanbp_first,resprate_first,tempc_first,spo2_first,gcseye_first,gcsverbal_first,gcsmotor_first,heartrate_last,sysbp_last,diabp_last,meanbp_last,resprate_last,tempc_last,spo2_last,gcseye_last,gcsverbal_last,gcsmotor_last,chloride_serum_min,creatinine_min,sodium_serum_min,hemoglobin_min,platelet_count_min,urea_nitrogen_min,glucose_serum_min,bicarbonate_min,potassium_serum_min,anion_gap_min,leukocytes_blood_manual_min,hematocrit_min,chloride_serum_max,creatinine_max,sodium_serum_max,hemoglobin_max,platelet_count_max,urea_nitrogen_max,glucose_serum_max,bicarbonate_max,potassium_serum_max,anion_gap_max,leukocytes_blood_manual_max,hematocrit_max,chloride_serum_avg,creatinine_avg,sodium_serum_avg,hemoglobin_avg,platelet_count_avg,urea_nitrogen_avg,glucose_serum_avg,bicarbonate_avg,potassium_serum_avg,anion_gap_avg,leukocytes_blood_manual_avg,hematocrit_avg,chloride_serum_stddev,creatinine_stddev,sodium_serum_stddev,hemoglobin_stddev,glucose_serum_stddev,bicarbonate_stddev,potassium_serum_stddev,chloride_serum_first,creatinine_first,sodium_serum_first,hemoglobin_first,platelet_count_first,urea_nitrogen_first,glucose_serum_first,bicarbonate_first,potassium_serum_first,anion_gap_first,leukocytes_blood_manual_first,hematocrit_first,chloride_serum_last,creatinine_last,sodium_serum_last,hemoglobin_last,platelet_count_last,urea_nitrogen_last,glucose_serum_last,bicarbonate_last,potassium_serum_last,anion_gap_last,leukocytes_blood_manual_last,hematocrit_last
0,-2144679073,82.0,0,0,0,0,1,0,0,0,0,2190-01-30 19:22:00,2194-04-23 19:27:00,0.477612,0.647436,0.644231,0.746377,0.448276,0.958824,0.91,0.666667,1.00,1.0,0.000243,0.001086,0.014332,0.009218,0.008090,0.065896,0.005767,1.000000,1.00,1.0,0.022636,0.058034,0.137783,0.090005,0.303832,0.408465,0.099371,0.916667,1.000000,1.000000,0.000319,0.001370,0.012634,0.005398,0.012099,0.001929,0.000924,0.207020,0.000000,0.000000,0.392265,0.425532,0.009218,0.239782,0.111765,0.918310,0.009372,1.000000,1.00,1.0,0.403315,0.670940,0.327273,0.121354,0.289855,0.064607,0.95,1.000000,1.00,1.0,0.639777,0.030303,0.800000,0.495,0.221315,0.032612,0.000091,0.547619,0.360656,0.310233,0.032497,0.401314,0.373333,4.000005e-07,0.348485,0.319527,0.186261,0.036850,0.000074,0.466667,0.140845,0.168182,0.001092,0.300636,0.000404,2.986655e-06,0.002665,0.000039,0.221769,0.034536,0.000063,0.001843,0.000059,0.137665,0.009571,0.309224,0.000002,1.414216e-07,0.023761,3.500041e-07,0.000024,0.118153,0.030090,0.567901,0.021834,0.539326,0.364130,0.140471,0.038150,0.000103,0.425532,0.258824,0.169130,0.057659,0.369668,0.534483,0.015385,0.431034,0.372671,0.169522,0.030122,0.000071,0.466667,0.263889,0.1624,0.017126,0.347697
1,-2142084288,84.0,1,1,0,0,0,0,0,0,0,2161-07-10 08:07:00,,0.507463,0.467949,0.567308,0.376812,0.517241,0.960784,0.84,1.000000,1.00,1.0,0.000657,0.000879,0.010383,0.007119,0.013068,0.066417,0.005767,1.000000,1.00,1.0,0.033761,0.042013,0.138408,0.081294,0.362088,0.410178,0.098281,1.000000,1.000000,1.000000,0.001067,0.001488,0.012200,0.006750,0.019299,0.002255,0.001373,0.000000,0.000000,0.000000,0.502762,0.468085,0.011715,0.275204,0.152941,0.934272,0.008726,1.000000,1.00,1.0,0.430939,0.559829,0.377273,0.126021,0.463768,0.065023,0.97,1.000000,1.00,1.0,0.611961,0.045455,0.811429,0.515,0.194634,0.071224,0.000097,0.476190,0.344262,0.334884,0.044797,0.441647,0.400000,1.300002e-06,0.409091,0.426036,0.197368,0.125472,0.000121,0.444444,0.309859,0.256591,0.001505,0.415132,0.000385,7.653303e-06,0.002922,0.000047,0.225081,0.080750,0.000089,0.001589,0.000093,0.225024,0.011626,0.388918,0.000007,6.247436e-07,0.030636,1.935424e-06,0.000061,0.158557,0.137789,0.543210,0.061135,0.595506,0.472826,0.168942,0.096181,0.000104,0.404255,0.341176,0.305217,0.064045,0.497232,0.482759,0.030769,0.448276,0.403727,0.186928,0.074245,0.000095,0.422222,0.222222,0.2398,0.024391,0.383216
2,-2133944014,50.0,1,1,0,0,0,0,0,0,0,2141-03-25 16:45:00,,0.492537,0.583333,0.605769,0.695652,0.000000,0.955882,0.82,0.333333,0.00,0.8,0.000394,0.001017,0.010968,0.007393,0.009956,0.065272,0.005568,1.000000,1.00,1.0,0.029871,0.050755,0.114534,0.073749,0.237514,0.407345,0.094471,0.909091,0.727273,0.981818,0.000695,0.001352,0.009949,0.004547,0.020008,0.001776,0.001688,0.297496,0.644658,0.101929,0.447514,0.462766,0.008834,0.237057,0.082353,0.928638,0.009480,0.333333,0.00,0.8,0.441989,0.495726,0.263636,0.093349,0.260870,0.064503,0.93,1.000000,1.00,1.0,0.292072,0.030303,0.754286,0.635,0.243372,0.056612,0.000151,0.785714,0.213115,0.345814,0.046304,0.571686,0.160000,8.000010e-07,0.333333,0.573964,0.203780,0.095276,0.000211,0.711111,0.225352,0.152500,0.001784,0.475175,0.000125,3.999984e-06,0.002128,0.000064,0.208909,0.064388,0.000154,0.002841,0.000039,0.164521,0.011340,0.540194,0.000013,5.529728e-07,0.067101,2.129814e-06,0.000098,0.150634,0.208081,0.259259,0.039301,0.449438,0.559783,0.219761,0.092717,0.000146,0.638298,0.152941,0.301304,0.087561,0.581328,0.206897,0.015385,0.293103,0.527950,0.235435,0.051755,0.000225,0.666667,0.263889,0.1882,0.031025,0.518942
3,-2133227983,52.0,0,1,0,0,0,0,0,0,0,2120-09-22 10:15:00,2121-08-28 15:15:00,0.522388,0.397436,0.596154,0.442029,0.344828,0.954902,0.90,0.666667,0.25,0.8,0.000441,0.000793,0.011114,0.007484,0.017424,0.067458,0.005767,1.000000,1.00,1.0,0.032323,0.035620,0.134382,0.077553,0.274406,0.406377,0.100756,0.966667,0.557692,0.892308,0.000745,0.001250,0.012137,0.006131,0.026028,0.003181,0.000934,0.149071,0.327676,0.179743,0.574586,0.531915,0.018437,0.359673,0.164706,0.930516,0.009480,1.000000,0.75,0.8,0.447514,0.491453,0.254545,0.098016,0.289855,0.064711,1.00,1.000000,0.25,1.0,0.514604,0.015152,0.800000,0.450,0.267222,0.037469,0.000101,0.619048,0.180328,0.334419,0.041593,0.367569,0.333333,1.000001e-07,0.378788,0.384615,0.294784,0.069685,0.000100,0.555556,0.352113,0.195682,0.001273,0.341820,0.000324,8.533299e-07,0.002840,0.000041,0.276229,0.036790,0.000082,0.002169,0.000061,0.184907,0.009882,0.345487,0.000012,1.632996e-07,0.039828,3.066522e-06,0.000044,0.152535,0.350903,0.456790,0.008734,0.539326,0.434783,0.233666,0.035787,0.000096,0.489362,0.247059,0.267174,0.063413,0.457417,0.482759,0.000000,0.482759,0.298137,0.262278,0.047878,0.000114,0.600000,0.222222,0.1500,0.025648,0.279336
4,-2132499549,68.0,0,1,0,0,0,0,0,0,0,2205-11-16 13:07:00,,0.559701,0.442308,0.528846,0.652174,0.448276,0.956863,0.89,0.666667,1.00,1.0,0.000598,0.000810,0.008482,0.005841,0.010579,0.064751,0.005767,1.000000,1.00,1.0,0.035741,0.037331,0.114358,0.066329,0.269673,0.405097,0.101020,0.888889,1.000000,1.000000,0.000911,0.001260,0.010509,0.005051,0.014950,0.001397,0.001274,0.228665,0.000000,0.000000,0.535912,0.414894,0.011523,0.258856,0.135294,0.917371,0.009480,1.000000,1.00,1.0,0.569061,0.487179,0.272727,0.096849,0.405797,0.065335,0.96,0.666667,1.00,1.0,0.667594,0.095960,0.800000,0.445,0.243160,0.162449,0.000084,0.357143,0.360656,0.388372,0.044690,0.365529,0.386667,1.700002e-06,0.333333,0.390533,0.181636,0.175433,0.000058,0.244444,0.183099,0.365682,0.001715,0.378048,0.000422,1.351995e-05,0.002591,0.000040,0.167014,0.161045,0.000054,0.001001,0.000070,0.273097,0.012137,0.317528,0.000000,1.632996e-07,0.020741,3.818858e-06,0.000013,0.063155,0.106383,0.592593,0.078603,0.561798,0.440217,0.155768,0.146693,0.000079,0.255319,0.317647,0.306739,0.051639,0.470756,0.551724,0.082051,0.396552,0.291925,0.191852,0.143510,0.000072,0.288889,0.236111,0.2576,0.035929,0.275685
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2966,2142664100,65.0,1,1,0,0,0,0,0,0,0,2180-08-11 04:23:00,,0.656716,0.519231,0.644231,0.695652,0.379310,0.955882,0.76,1.000000,0.75,1.0,0.000815,0.001310,0.014185,0.008762,0.018046,0.066937,0.005767,1.000000,1.00,1.0,0.052837,0.046306,0.158438,0.088128,0.443673,0.408452,0.100739,1.000000,0.980769,1.000000,0.001218,0.001544,0.013485,0.006054,0.024891,0.003092,0.002119,0.000000,0.096077,0.000000,0.674033,0.441489,0.013059,0.277929,0.188235,0.930516,0.009695,1.000000,1.00,1.0,0.541436,0.615385,0.386364,0.126021,0.318841,0.064191,0.94,1.000000,1.00,1.0,0.500695,0.045455,0.777143,0.430,0.092259,0.048980,0.000115,0.523810,0.327869,0.348837,0.035318,0.339216,0.293333,9.000011e-07,0.333333,0.349112,0.109278,0.062992,0.000159,0.511111,0.183099,0.204545,0.001520,0.317982,0.000301,6.408863e-06,0.002426,0.000035,0.107265,0.056122,0.000111,0.001849,0.000069,0.231283,0.009527,0.283257,0.000008,3.089097e-07,0.041899,1.949005e-06,0.000097,0.216922,0.087455,0.506173,0.043668,0.561798,0.402174,0.115358,0.051181,0.000154,0.425532,0.282353,0.304348,0.039836,0.426199,0.344828,0.041026,0.327586,0.310559,0.074232,0.065306,0.000173,0.466667,0.305556,0.2600,0.041081,0.288382
2967,2144053271,71.0,0,1,0,0,0,0,0,0,0,2139-06-14 18:06:00,,0.402985,0.480769,0.403846,0.659420,0.000000,0.925490,0.89,0.000000,0.00,0.0,0.000625,0.000836,0.007458,0.006206,0.011201,0.064855,0.005767,0.333333,0.00,1.0,0.028420,0.041665,0.101843,0.067906,0.246380,0.395111,0.104300,0.018519,0.000000,0.588889,0.001528,0.001451,0.010900,0.004332,0.031126,0.003576,0.001301,0.111111,0.000000,0.792366,0.558011,0.521277,0.008834,0.198910,0.129412,0.926761,0.008941,0.000000,0.00,0.0,0.436464,0.388889,0.250000,0.116686,0.289855,0.064607,1.00,0.000000,0.00,1.0,0.500695,0.090909,0.777143,0.510,0.182863,0.169388,0.000063,0.380952,0.327869,0.433256,0.048259,0.429392,0.293333,5.000006e-06,0.318182,0.366864,0.184632,0.219528,0.000081,0.555556,0.197183,0.260682,0.001408,0.336513,0.000286,2.523418e-05,0.002308,0.000044,0.165569,0.186588,0.000059,0.001675,0.000076,0.288371,0.011068,0.368866,0.000008,3.988085e-06,0.037185,8.197657e-07,0.000060,0.512707,0.091691,0.506173,0.196507,0.494382,0.391304,0.148799,0.269803,0.000103,0.340426,0.329412,0.304783,0.066320,0.404244,0.344828,0.076923,0.396552,0.422360,0.159317,0.157510,0.000090,0.600000,0.291667,0.2184,0.024984,0.402863
2968,2144497079,65.0,1,1,0,0,0,0,0,0,0,2150-04-26 14:47:00,,0.365672,0.570513,0.480769,0.644928,0.379310,0.950980,0.84,0.666667,1.00,1.0,0.000329,0.000818,0.011846,0.022908,0.013068,0.064959,0.005767,1.000000,1.00,1.0,0.025801,0.046368,0.074480,0.073255,0.260883,0.404584,0.101435,0.974359,1.000000,1.000000,0.000730,0.000867,0.009338,0.013320,0.018738,0.001812,0.001023,0.128103,0.000000,0.000000,0.276243,0.462766,0.006338,0.212534,0.094118,0.917371,0.008941,1.000000,1.00,1.0,0.469613,0.444444,0.204545,0.084014,0.246377,0.065543,0.95,1.000000,1.00,1.0,0.389430,0.156566,0.737143,0.525,0.113468,0.138776,0.000108,0.309524,0.344262,0.534884,0.027926,0.417647,0.186667,5.900007e-06,0.257576,0.526627,0.097595,0.421260,0.000210,0.466667,0.478873,0.431818,0.000992,0.495614,0.000183,3.756429e-05,0.001749,0.000050,0.112463,0.320635,0.000151,0.001387,0.000106,0.442149,0.007160,0.394113,0.000006,3.107507e-06,0.054258,2.325773e-06,0.000117,0.388867,0.281720,0.345679,0.209607,0.438202,0.565217,0.103754,0.358268,0.000175,0.297872,0.564706,0.521739,0.050924,0.575646,0.258621,0.143590,0.327586,0.453416,0.118601,0.138776,0.000180,0.444444,0.361111,0.4000,0.019676,0.404564
2969,2144648302,91.0,0,1,0,0,0,0,0,0,0,2143-12-22 17:48:00,2143-12-24 05:35:00,0.000000,0.160256,0.355769,0.376812,0.000000,0.944118,0.76,0.666667,0.00,0.8,0.000638,0.000612,0.007897,0.005202,0.011823,0.063918,0.005767,1.000000,0.75,1.0,0.050567,0.023975,0.094314,0.052496,0.301616,0.397338,0.081607,0.733333,0.333333,0.840000,0.002139,0.001634,0.012450,0.007661,0.030985,0.001885,0.003160,0.198762,0.483046,0.154919,0.635359,0.425532,0.012483,0.291553,0.070588,0.904225,0.007433,1.000000,0.75,1.0,0.000000,0.106838,0.095455,0.025671,0.000000,0.063670,0.87,0.666667,0.25,0.8,0.584145,0.181818,0.817143,0.675,0.097561,0.306122,0.000108,0.428571,0.639344,0.534884,0.068583,0.650980,0.360000,3.800005e-06,0.348485,0.520710,0.061168,0.338583,0.000082,0.333333,0.422535,0.318182,0.001616,0.532895,0.000372,2.831989e-05,0.002812,0.000064,0.079976,0.328571,0.000077,0.001316,0.000170,0.385027,0.014678,0.583716,0.000008,7.745981e-07,0.000000,1.449155e-07,0.000012,0.097840,0.082404,0.518519,0.170306,0.573034,0.559783,0.062799,0.338583,0.000111,0.361702,0.517647,0.413043,0.068583,0.597786,0.517241,0.169231,0.448276,0.577640,0.084471,0.306122,0.000088,0.333333,0.472222,0.3400,0.043676,0.589212


### Save rescaled data

In [27]:
mergedDf.to_csv(dataDirName + 'data_matrix/data_matrix_rescaled.csv', index=False)