In [1]:
#Import packages
import pandas as pd
import numpy as np
import re
import warnings
from collections import OrderedDict
warnings.filterwarnings('ignore')

## Load Data Files

In [2]:
adEvents  = pd.read_csv("../Data/Raw Data/adevents.csv", sep=",", index_col='DEIDNUM')
quality = pd.read_csv("../Data/Raw Data/quality.csv", sep=",", index_col='DEIDNUM')
patient  = pd.read_csv("../Data/Raw Data/patient.csv", sep=",", index_col='DEIDNUM')
mhist  = pd.read_csv("../Data/Raw Data/mhist.csv", sep=",", index_col='DEIDNUM')
physical  = pd.read_csv("../Data/Raw Data/physical.csv", sep=",", index_col='DEIDNUM')

hemoOrig = pd.read_csv("../Data/Raw Data/hemo.csv", sep=",", index_col='DEIDNUM')

ex = pd.read_csv("../Data/Raw Data/exercise.csv", sep=",", index_col='DEIDNUM')
labs = pd.read_csv("../Data/Raw Data/labs.csv", sep=",", index_col='DEIDNUM')
meds = pd.read_csv("../Data/Raw Data/meds.csv", sep=",", index_col='DEIDNUM').fillna(0)
ivmeds = pd.read_csv("../Data/Raw Data/ivmeds.csv", sep=",", index_col='DEIDNUM').fillna(0)


#added for additional variables needed for risk score comparison
mech = pd.read_csv("../Data/Raw Data/mechd.csv", sep=",", index_col='DEIDNUM').fillna(0)
procs = pd.read_csv("../Data/Raw Data/procs.csv", sep=",", index_col='DEIDNUM').fillna(0)

## Make Meds

In [3]:
grandList = []
idx = sorted(set(patient.index))
for i in idx:
    lst = []
    t = meds.loc[i]
    tiv = ivmeds.loc[i]
    #Ace
    aceB = t["ACEB"] + t['BENB'] + t['LISB'] + t['MONB'] + t['VALSAB'] + t['ANGIOTB'] + t['LOSAB'] + t['CANDB'] + t['CAPB'] + t['ENAB'] + t['FOSB'] + t['QUIB'] + t['RAMB'] + t['TRAB']  
    aceD = t["ACED"] + t['BEND'] + t['LISD'] + t['MOND'] + t['VALSAD'] + t['ANGIOTD'] + t['LOSAD'] + t['CANDD'] + t['CAPD'] + t['ENAD'] + t['FOSD'] + t['QUID'] + t['RAMD'] + t['TRAD']
    lst.append(1 if aceB >= 1 else 0)
    lst.append(1 if aceD >= 1 else 0)
    
    #BET
    betB = t['BETB'] + t['BISB'] + t['CARB'] + t['ATEB'] + t['PROB']
    betD = t['BETD'] + t['BISD'] + t['CARD'] + t['ATED'] + t['PROD']
    lst.append(1 if betB >= 1 else 0)
    lst.append(1 if betD >= 1 else 0)
    
    #NIT
    nitB = t['NITB'] + t['DINB'] + t['NIPB'] + t['BUMB'] + t['ETHB'] + t['NIGB'] + t['TOPB']
    nitD = t['NITD'] + t['DIND'] + t['NIPD'] + t['BUMD'] + t['ETHD'] + t['NIGD'] + t['TOPD']
    lst.append(1 if nitB >= 1 else 0)
    lst.append(1 if nitD >= 1 else 0)
    
    #Diuretics
    diurB = t['FURB'] + t['TORB'] + t['METB'] + t['DIURB'] 
    diurD = t['FURD'] + t['TORD'] + t['METD'] + t['DIURD']
    lst.append(1 if diurB >= 1 else 0)
    lst.append(1 if diurD >= 1 else 0)
    
    diurBDse = max([t['FURBDSE'], t['TORBDSE'], t['METBDSE'], t['OTHDBDSE'], t['BUMBDSE']])
    diurDDse = max([t['FURDDSE'], t['TORDDSE'], t['METDDSE'], t['OTHDDDSE'], t['BUMDDSE']])
    lst.append(diurBDse)
    lst.append(diurDDse)
    
    lst.append(t['FURBDSE'])
    lst.append(t['FURDDSE'])
    
    lst.append(t['BUMBDSE'])
    lst.append(t['BUMDDSE'])
    
    lst.append(t['TORBDSE'])
    lst.append(t['TORDDSE'])
    
    lst.append(1 if tiv['INOTRP'] >= 1 else 0)
    
    grandList.append(lst)


cols = ['ACE_B', 'ACE_D', 'BET_B', 'BET_D', 'NIT_B', 'NIT_D', 'DIUR_B', 'DIUR_D', 'DIURDSE_B', 'DIURDSE_D', 
        'FurosemideDse_B','FurosemideDse_D', 'Bumetanide_B', 'Bumetanide_D', 'Torsemide_B', 'Torsemide_D',
        'INOT']
medData = pd.DataFrame(grandList, columns =cols, index=patient.index)

medData.head(10)

Unnamed: 0_level_0,ACE_B,ACE_D,BET_B,BET_D,NIT_B,NIT_D,DIUR_B,DIUR_D,DIURDSE_B,DIURDSE_D,FurosemideDse_B,FurosemideDse_D,Bumetanide_B,Bumetanide_D,Torsemide_B,Torsemide_D,INOT
DEIDNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
72,1,1,1,0,1,1,1,1,200.0,80.0,200.0,80.0,0.0,0.0,0.0,0.0,1
81,1,1,1,1,0,1,1,1,120.0,60.0,120.0,0.0,0.0,0.0,0.0,60.0,1
86,1,1,0,0,1,1,1,1,40.0,80.0,40.0,80.0,0.0,0.0,0.0,0.0,0
267,1,1,1,0,0,1,1,1,90.0,160.0,90.0,160.0,0.0,0.0,0.0,0.0,1
814,0,1,1,1,0,0,1,1,120.0,40.0,120.0,40.0,0.0,0.0,0.0,0.0,1
1020,1,1,0,1,1,1,1,1,100.0,160.0,100.0,160.0,0.0,0.0,0.0,0.0,0
1059,1,1,0,0,1,0,1,1,400.0,80.0,400.0,80.0,0.0,0.0,0.0,0.0,0
1084,1,1,0,1,0,1,1,1,160.0,80.0,160.0,80.0,0.0,0.0,0.0,0.0,0
1262,1,1,0,1,1,1,1,1,80.0,80.0,80.0,80.0,0.0,0.0,0.0,0.0,1
1312,1,1,1,1,0,0,1,1,600.0,50.0,600.0,40.0,0.0,0.0,0.0,0.0,0


## Make Lab Data

In [4]:
labUnits = pd.read_csv("../Data/Raw Data/labUnits.csv", index_col='DEIDNUM')

labUnits.loc[72]

Unnamed: 0_level_0,FORM,VISIT,PAGEID,PAGEREP,CRFPAGE,PERIOD,LAB,LABVAL,LABUNIT,LABDAY,LABDT
DEIDNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
72,3-Month Follow-Up,,59,,58,7.0,1,,,,
72,3-Month Follow-Up,,59,,58,7.0,3,,,,
72,3-Month Follow-Up,,59,,58,7.0,5,,,,
72,3-Month Follow-Up,,59,,58,7.0,7,,,,
72,3-Month Follow-Up,,59,,58,7.0,9,,,,
...,...,...,...,...,...,...,...,...,...,...,...
72,2-Month Follow-Up,,52,,51,6.0,4,,,,52.0
72,2-Month Follow-Up,,52,,51,6.0,6,4.3,8.0,,52.0
72,2-Month Follow-Up,,52,,51,6.0,8,1.8,9.0,,52.0
72,2-Month Follow-Up,,52,,51,6.0,10,,,,52.0


In [5]:
#go thru lab vals (alb - wbc) using code nums
#then get units and convert to be standardized (ref crf annotations)
labData = pd.DataFrame(columns =[], index=patient.index)

#ALB
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 12] #get alb lab val
        unit = pt['LABUNIT'].item()

        if unit == 1.0 and pt['LABVAL'].item() > 10:
            val = pt['LABVAL'].item() / 10
            base.append(val)
        else:
            base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 12] #get alb lab val
        unit = pt['LABUNIT'].item()

        if unit == 1.0 and pt['LABVAL'].item() > 10:
            val = pt['LABVAL'].item() / 10
            final.append(val)
        else:
            final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["ALB_B"] = base
labData["ALB_D"] = final

#ALT
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 9] #get alb lab val
        base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 9] #get alb lab val
        final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["ALT_B"] = base
labData["ALT_D"] = final

#AST
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 10] #get alb lab val
        base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 10] #get alb lab val
        final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["AST_B"] = base
labData["AST_D"] = final

#BUN
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 7] #get alb lab val
        base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 7] #get alb lab val
        final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["BUN_B"] = base
labData["BUN_D"] = final

#CRT
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 8] #get alb lab val
        base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 8] #get alb lab val
        final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["CRT_B"] = base
labData["CRT_D"] = final

#DIAL
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 14] #get alb lab val
        unit = pt['LABUNIT'].item()

        if unit == 11.0:
            val = pt['LABVAL'].item() * 0.0585
            base.append(val)
        else:
            base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 14] #get alb lab val
        unit = pt['LABUNIT'].item()

        if unit == 11.0:
            val = pt['LABVAL'].item() * 0.0585
#             print("unit", unit, "val", pt['LABVAL'].item(), "new val", val)
            final.append(val)
        else:
            final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["DIAL_B"] = base
labData["DIAL_D"] = final

#HEC
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 3] #get alb lab val
        base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 3] #get alb lab val
        final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["HEC_B"] = base
labData["HEC_D"] = final

#HEM
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 1] #get alb lab val
        unit = pt['LABUNIT'].item()

        if unit == 2.0: #mmol/L
            val = pt['LABVAL'].item() * 68
            base.append(val)
        elif unit == 1.0 and pt['LABVAL'].item() > 50.0: #g/L
            val = pt['LABVAL'].item() * 0.1
            base.append(val)
        else:
            base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 1] #get alb lab val
        unit = pt['LABUNIT'].item()

        if unit == 2.0: #mmol/L
            val = pt['LABVAL'].item() * 68
#             print("unit mmol", unit, "val", pt['LABVAL'].item(), "new val", val)
            final.append(val)
        elif unit == 1.0 and pt['LABVAL'].item() > 50.0: #g/L
            val = pt['LABVAL'].item() * 0.1
            final.append(val)
#             print(i, "unit g/L", unit, "val", pt['LABVAL'].item(), "new val", val)
        else:
            final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["HEM_B"] = base
labData["HEM_D"] = final

#PLA
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 2] #get alb lab val
        if pt['LABVAL'].item() > 1000:
            val = pt['LABVAL'].item() / 1000
            print("unit", unit, "val", pt['LABVAL'].item(), "new val", val)
            base.append(val)
        else:
            base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 2] #get alb lab val
        if pt['LABVAL'].item() > 1000:
            val = pt['LABVAL'].item() / 1000
            print("unit", unit, "val", pt['LABVAL'].item(), "new val", val)
            final.append(val)
        else:
            final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["PLA_B"] = base
labData["PLA_D"] = final

#POT
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 6] #get alb lab val
        base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 6] #get alb lab val
        final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["POT_B"] = base
labData["POT_D"] = final

#SOD
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 5] #get alb lab val
        base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 5] #get alb lab val
        final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["SOD_B"] = base
labData["SOD_D"] = final

#TALB
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 13] #get alb lab val
        unit = pt['LABUNIT'].item()

        if unit == 11.0:
            val = pt['LABVAL'].item() * 0.0585
            base.append(val)
        else:
            base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 13] #get alb lab val
        unit = pt['LABUNIT'].item()

        if unit == 11.0:
            val = pt['LABVAL'].item() * 0.0585
#             print("unit", unit, "val", pt['LABVAL'].item(), "new val", val)
            final.append(val)
        else:
            final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["TALB_B"] = base
labData["TALB_D"] = final

#TOTP
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 11] #get alb lab val
        unit = pt['LABUNIT'].item()

        if unit == 1.0 and pt['LABVAL'].item() > 50:
            val = pt['LABVAL'].item() * 0.1
            base.append(val)
        else:
            base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 11] #get alb lab val
        unit = pt['LABUNIT'].item()

        if unit == 1.0 and pt['LABVAL'].item() > 50:
            val = pt['LABVAL'].item() * 0.1
#             print("unit", unit, "val", pt['LABVAL'].item(), "new val", val)
            final.append(val)
        else:
            final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)

labData["TOTP_B"] = base
labData["TOTP_D"] = final

#WBC
base = []
final = []
idx = sorted(set(labs.index))
for i in idx:
    pt = labUnits.loc[i]
    #get baseline
    pt = pt[pt['FORM'] == 'Baseline']
    if not pt.empty:
        pt = pt[pt['LAB'] == 4] #get alb lab val
        unit = pt['LABUNIT'].item()

        if pt['LABVAL'].item() > 1000:
            val = pt['LABVAL'].item() / 1000
            base.append(val)
        else:
            base.append(pt['LABVAL'].item())
    else:
        base.append(np.nan)

    #get discharge
    pt = labUnits.loc[i]
    pt = pt[pt['FORM'] == 'Discharge']
    if not pt.empty:
        pt = pt[pt['LAB'] == 4] #get alb lab val
        unit = pt['LABUNIT'].item()

        if pt['LABVAL'].item() > 1000:
            val = pt['LABVAL'].item() / 1000
#             print("unit", unit, "val", pt['LABVAL'].item(), "new val", val)
            final.append(val)
        else:
            final.append(pt['LABVAL'].item())
    else:
        final.append(np.nan)


labData["WBC_B"] = base
labData["WBC_D"] = final


labData

Unnamed: 0_level_0,ALB_B,ALB_D,ALT_B,ALT_D,AST_B,AST_D,BUN_B,BUN_D,CRT_B,CRT_D,...,POT_B,POT_D,SOD_B,SOD_D,TALB_B,TALB_D,TOTP_B,TOTP_D,WBC_B,WBC_D
DEIDNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
72,3.0,,21.0,,24.0,,39.0,49.0,1.50,1.70,...,3.5,3.7,141.0,135.0,0.700,,6.2,,4.50,4.60
81,3.9,3.6,14.0,14.0,18.0,19.0,49.0,32.0,2.10,1.70,...,4.9,4.3,136.0,134.0,1.200,0.9,6.7,6.4,5.10,7.00
86,3.7,3.5,16.0,17.0,21.0,25.0,16.0,22.0,1.60,1.80,...,4.5,4.8,140.0,135.0,0.390,0.4,7.1,7.2,9.05,11.54
267,2.8,,11.0,,42.0,,74.0,43.0,2.80,1.40,...,4.7,4.5,124.0,135.0,0.600,,5.8,,6.60,10.60
814,4.6,4.3,13.0,15.0,27.0,23.0,16.0,18.0,0.80,0.90,...,3.8,4.3,140.0,136.0,1.200,0.9,7.7,7.5,8.38,9.64
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98078,4.1,,10.0,,26.0,,22.0,15.0,0.90,0.90,...,2.8,3.8,139.0,134.0,8.300,,8.3,,5.30,5.30
98508,4.3,,23.0,,19.0,,7.8,,73.00,85.00,...,4.1,4.6,134.0,138.0,0.936,,,,7.80,9.10
99302,4.1,3.8,32.0,39.0,35.0,36.0,27.0,35.0,1.60,1.50,...,3.8,4.1,134.0,138.0,0.600,0.4,7.4,7.0,9.58,10.11
99912,,3.8,26.0,24.0,26.0,22.0,12.0,14.0,1.08,1.18,...,3.5,4.1,142.0,141.0,1.700,1.3,,7.1,6.90,8.40


## Make Conditions

In [6]:
grandList = []
idx = sorted(set(patient.index))
for i in idx:
    lst = []
    t = mhist.loc[i]

    lst.append(t['AFIB'])
    lst.append(t['ALCHOE'])
    lst.append(t['ANGP'])
    lst.append(t['ARRH'])
    lst.append(t['CARREST'])
    lst.append(t['CVD'])
    lst.append(t['COPD'])
    lst.append(t['DEPR'])
    lst.append(t['DIAB'])
    lst.append(t['GOUT'])
    lst.append(t['HEPT'])
    lst.append(t['HTN'])
    lst.append(t['MALIG'])
    lst.append(t['RENALI'])
    lst.append(t['SMOKING'])
    lst.append(t['STERD'])
    
    #Stroke / TIA
    if t['STROKE'] == 1 or t['TIA'] == 1:
        lst.append(1)
    else:
        lst.append(0)
    
    #VAHD
    if t['VAHD'] == 1 or t['PVD'] == 1:
        lst.append(1)
    else:
        lst.append(0)

    lst.append(t['VF'])
    
    #VHD
    vhd = t['AOREG'] + t['AOST'] + t['MTST'] + t['PMRG'] + t['PTREG'] + t['VALVUE']
    lst.append(1 if vhd >= 1 else 0)
    
    #VT
    vt = t['SVT'] + t['TDP']
    lst.append(1 if vt >= 1 else 0)
    
    #ischemic
    isch = t['ISCHD'] + t['ISCHEME'] + t['MI'] + t['CYTOE']
    lst.append(1 if isch >= 1 else 0)
    
    #nonischemic
    nisch = t['FAMILE'] + t['HYPERE'] + t['IDIOPE'] + t['PERIPAE']
    lst.append(1 if nisch >= 1 else 0)
    
    #Devices / procedures
    lst.append(t['CABG'])
    lst.append(1 if t['HTRANS'] >= 1 else 0)
    lst.append(t['ICD'])
    lst.append(t['PACE'])
    lst.append(t['PTCI'])
    
    grandList.append(lst)
    


cols = ['AF', 'AlchE','ANGP','ARRH','CARREST','CVD','COPD','DEPR','DIAB','GOUT','HEPT','HTN','MALIG','RENAL','SMOKING',
        'STERD','StrokeTIA','VAHD','VF','VHD','VT', 'ISCH', 'NonISCH', 'CABG', 'HTRANS', 'ICD', 'PACE', 'PTCI']
conditions = pd.DataFrame(grandList, columns =cols, index=patient.index)

conditions.head(10)

Unnamed: 0_level_0,AF,AlchE,ANGP,ARRH,CARREST,CVD,COPD,DEPR,DIAB,GOUT,...,VF,VHD,VT,ISCH,NonISCH,CABG,HTRANS,ICD,PACE,PTCI
DEIDNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
72,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0,0,1,0,1.0,1,0.0,1.0,0.0
81,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0,0,1,0.0,1,0.0,1.0,0.0
86,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0,0,0,1,0.0,1,0.0,0.0,0.0
267,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0,0,1,0,1.0,1,1.0,1.0,0.0
814,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0,1,1,0,0.0,1,1.0,1.0,1.0
1020,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1,0,0,1,0.0,1,0.0,0.0,0.0
1059,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,1,1,0,0,0.0,1,0.0,0.0,0.0
1084,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0,1,1,0.0,1,0.0,0.0,0.0
1262,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,...,0.0,0,0,1,1,1.0,1,1.0,1.0,1.0
1312,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,...,1.0,0,0,0,0,0.0,1,1.0,0.0,0.0


## All Data

In [7]:
#  no hemo, just bps
allData = pd.DataFrame(columns =[], index=patient.index)

#demog
allData['Age'] = patient['AGE']
allData['Gender'] = patient['GENDER']

#Race
lst= []
idx = sorted(set(labs.index))
for i in idx:
    pt = patient.loc[i]
    if pt['RACE'] == 1.0:
        lst.append(1.0)
    else:
        lst.append(2.0)

allData["Race"] = lst


allData['Wt_B'] = patient['WTADM']
allData['Wt_D'] = patient['WTDIS']
allData['BMI_B'] = patient['bmiadm']
allData['BMI_D'] = patient['bmidis']

allData['InitialHospDays'] = patient['HOSPDAY']
allData['TotalHospDays'] = patient['HSPDAY']

allData['NYHA_B'] = quality['NYHAB']
allData['NYHA_D'] = quality['NYHAD']
allData['MLHFS'] = quality['MLHFSB']


#conditions
allData = pd.concat([allData, conditions], axis=1)

#ex
allData['SixFtWlk_B'] = ex['FTWLKB']
allData['SixFtWlk_D'] = ex['FTWLKD']
allData['VO2_B'] = ex['VO2B']
allData['VO2_D'] = ex['VO2D']

#labs
allData = pd.concat([allData, labData], axis=1)

# meds
allData = pd.concat([allData, medData], axis=1)


# #Hemo ish
allData['EjF_B'] = patient['EF1']
allData['EjF_D'] = patient['EF1']
allData['BPDIAS_B'] = physical['DIASBPB']
allData['BPDIAS_D'] = physical['DIASBPD']
allData['BPSYS_B'] = physical['SYSBPB']
allData['BPSYS_D'] = physical['SYSBPD']
allData['HR_B'] = physical['HRSUPB']
allData['HR_D'] = physical['HRSUPD']

#Invasive Hemo
var = ['RAP', 'PAS', 'PAD', 'PAMN', 'PCWP', 'CO', 'CI', 'SVR', 'MIXED', 'BPSYS', 
       'BPDIAS', 'HRTRT', 'RAT']

for v in var:
    idx = sorted(set(allData.index))
    chg = []
    base = []
    final = []
    for i in idx:
        t = hemoOrig.loc[i]
        
        base.append(t[v+"B"])
        final.append(t[v+"L"])
        chg.append(t[v+"L"] - t[v+"B"])
        
    allData[v+"_B"] = base
    allData[v+"_D"] = final
    allData[v+"_Chg"] = chg


# #Composite
var = ['MAP', 'MPAP', 'CPI', 'PP', 'PPP', 'PAPP', 'SVR', 'RAT', 'PPRatio']

idx = sorted(set(allData.index))

#MAP
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = t['BPSYS_B'] + ((2 * t['BPDIAS_B']) / 3)
    f = t['BPSYS_D'] + ((2 * t['BPDIAS_D']) / 3)
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['MAP_B'] = base
allData['MAP_D'] = final
allData['MAP_Chg'] = chg

#MPAP
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = t['PAS_B'] + ((2 * t['PAD_B']) / 3)
    f = t['PAS_D'] + ((2 * t['PAD_D']) / 3)
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['MPAP_B'] = base
allData['MPAP_D'] = final
allData['MPAP_Chg'] = chg

#CPI
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = (t['CI_B'] * t['MAP_B']) / 451
    f = (t['CI_D'] * t['MAP_D']) / 451
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['CPI_B'] = base
allData['CPI_D'] = final
allData['CPI_Chg'] = chg
    
#PP
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = t['BPSYS_B'] - t['BPDIAS_B']
    f = t['BPSYS_D'] - t['BPDIAS_D']
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['PP_B'] = base
allData['PP_D'] = final
allData['PP_Chg'] = chg

#PPP
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = t['PP_B'] / t['BPSYS_B']
    f = t['PP_D'] / t['BPSYS_D']
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['PPP_B'] = base
allData['PPP_D'] = final
allData['PPP_Chg'] = chg
    
#PAPP
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = (t['PAS_B']  - t['PAD_B']) / t['PAS_B']
    f = (t['PAS_D']  - t['PAD_D']) / t['PAS_D']
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['PAPP_B'] = base
allData['PAPP_D'] = final
allData['PAPP_Chg'] = chg

#SVR
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = 80 * (t['MAP_B']  - t['RAP_B']) / t['CO_B']
    f = 80 * (t['MAP_D']  - t['RAP_D']) / t['CO_D']
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['SVR_B'] = base
allData['SVR_D'] = final
allData['SVR_Chg'] = chg

#RAT
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = t['RAP_B'] / t['PCWP_B']
    f = t['RAP_D'] / t['PCWP_D']
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['RAT_B'] = base
allData['RAT_D'] = final
allData['RAT_Chg'] = chg

#PPRatio
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = t['PP_B'] / t['HRTRT_B']
    f = t['PP_D'] / t['HRTRT_D']
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['PPRatio_B'] = base
allData['PPRatio_D'] = final
allData['PPRatio_Chg'] = chg

#PAPi
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = (t['PAS_B'] - t['PAD_B']) / t['RAP_B']
    f = (t['PAS_D'] - t['PAD_D']) / t['RAP_D']
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['PAPi_B'] = base
allData['PAPi_D'] = final
allData['PAPi_Chg'] = chg

#SAPi
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = (t['BPSYS_B'] - t['BPDIAS_B']) / t['PCWP_B']
    f = (t['BPSYS_D'] - t['BPDIAS_D']) / t['PCWP_D']
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['SAPi_B'] = base
allData['SAPi_D'] = final
allData['SAPi_Chg'] = chg

#CPP
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = t['BPDIAS_B'] - t['PCWP_B']
    f = t['BPDIAS_B'] - t['PCWP_B']
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['CPP_B'] = base
allData['CPP_D'] = final
allData['CPP_Chg'] = chg

#PRAPRat
chg = []
base = []
final = []
for i in idx:
    t = allData.loc[i]

    b = t['PP_B'] / t['RAP_B']
    f = t['PP_D'] / t['RAP_D']
    c = f - b

    base.append(b)
    final.append(f)
    chg.append(c)
    
allData['PRAPRat_B'] = base
allData['PRAPRat_D'] = final
allData['PRAPRat_Chg'] = chg

allData.head(5)

Unnamed: 0_level_0,Age,Gender,Race,Wt_B,Wt_D,BMI_B,BMI_D,InitialHospDays,TotalHospDays,NYHA_B,...,PAPi_Chg,SAPi_B,SAPi_D,SAPi_Chg,CPP_B,CPP_D,CPP_Chg,PRAPRat_B,PRAPRat_D,PRAPRat_Chg
DEIDNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
72,88,2,1.0,57.1,52.0,26.424175,24.064047,9.0,16,4.0,...,0.25,0.777778,1.0,0.222222,50.0,50.0,0.0,1.166667,1.333333,0.166667
81,69,1,1.0,102.363636,100.454545,32.307675,31.705134,5.0,13,4.0,...,-0.333333,0.944444,2.352941,1.408497,45.0,45.0,0.0,1.7,3.333333,1.633333
86,56,2,2.0,98.636364,97.0,40.016375,39.352509,3.0,3,4.0,...,,,,,,,,,,
267,82,1,1.0,82.2,81.7,30.938312,30.750122,15.0,14,4.0,...,,,,,,,,,,
814,58,1,1.0,82.1,69.9,24.515513,20.872525,4.0,4,4.0,...,2.333333,1.888889,3.5,1.611111,44.0,44.0,0.0,11.333333,21.0,9.666667


In [8]:
# add additional values needed for risk score comparison

#BNP
base = []
final = []
idx = sorted(set(allData.index))
for i in idx:
    try:
        pt = mech.loc[i]
        base.append(pt['BNPB'])
        final.append(pt['BNPD'])
    except:
        base.append(np.nan)
        final.append(np.nan)

allData['BNP_B'] = base
allData['BNP_D'] = final

#CPR
base = []
idx = sorted(set(allData.index))
for i in idx:
    try:
        pt = procs.loc[i]
        base.append(pt['CPRES'])
    except:
        base.append(np.nan)

allData['CPR'] = base

#MechVT
base = []
idx = sorted(set(allData.index))
for i in idx:
    try:
        pt = procs.loc[i]
        base.append(pt['MEVT'])
    except:
        base.append(np.nan)

allData['MEVT'] = base


allData

Unnamed: 0_level_0,Age,Gender,Race,Wt_B,Wt_D,BMI_B,BMI_D,InitialHospDays,TotalHospDays,NYHA_B,...,CPP_B,CPP_D,CPP_Chg,PRAPRat_B,PRAPRat_D,PRAPRat_Chg,BNP_B,BNP_D,CPR,MEVT
DEIDNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
72,88,2,1.0,57.100000,52.000000,26.424175,24.064047,9.0,16,4.0,...,50.0,50.0,0.0,1.166667,1.333333,0.166667,3903.0,0.0,0.0,0.0
81,69,1,1.0,102.363636,100.454545,32.307675,31.705134,5.0,13,4.0,...,45.0,45.0,0.0,1.700000,3.333333,1.633333,0.0,188.0,0.0,0.0
86,56,2,2.0,98.636364,97.000000,40.016375,39.352509,3.0,3,4.0,...,,,,,,,13.2,4.0,0.0,0.0
267,82,1,1.0,82.200000,81.700000,30.938312,30.750122,15.0,14,4.0,...,,,,,,,341.0,0.0,0.0,0.0
814,58,1,1.0,82.100000,69.900000,24.515513,20.872525,4.0,4,4.0,...,44.0,44.0,0.0,11.333333,21.000000,9.666667,321.0,909.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98078,51,2,2.0,57.272727,60.100000,21.036814,22.075298,4.0,34,4.0,...,,,,,,,631.0,0.0,0.0,0.0
98508,57,1,2.0,65.000000,64.200000,23.030045,22.746599,5.0,5,4.0,...,37.0,37.0,0.0,10.500000,,,,,0.0,0.0
99302,41,1,2.0,97.300000,97.400000,40.499480,40.541103,21.0,21,4.0,...,13.0,13.0,0.0,2.842105,7.818182,4.976077,13.0,16.2,0.0,0.0
99912,46,1,1.0,78.636364,78.200000,25.677180,25.534694,2.0,2,4.0,...,,,,,,,754.0,0.0,0.0,0.0


In [9]:
allData.to_csv('Preprocessed Data/ESCAPEAllData.csv', index=True)

In [10]:
## STOPPED HERE

In [11]:
# #Get only patients with hemo
# hemoComposite = hemo.loc[~(np.isnan(hemo)).all(axis=1)]
# print(hemoComposite.shape)
# hemoComposite.head(5)

## Make single point of care values
--> take base and discharge measurements and make them each one row

In [12]:
#Load original dataframes
allDataOrig = pd.read_csv("Preprocessed Data/ESCAPEAllData.csv", sep=",", index_col='DEIDNUM') #all feature 
allDataOrig

Unnamed: 0_level_0,Age,Gender,Race,Wt_B,Wt_D,BMI_B,BMI_D,InitialHospDays,TotalHospDays,NYHA_B,...,CPP_B,CPP_D,CPP_Chg,PRAPRat_B,PRAPRat_D,PRAPRat_Chg,BNP_B,BNP_D,CPR,MEVT
DEIDNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
72,88,2,1.0,57.100000,52.000000,26.424175,24.064047,9.0,16,4.0,...,50.0,50.0,0.0,1.166667,1.333333,0.166667,3903.0,0.0,0.0,0.0
81,69,1,1.0,102.363636,100.454545,32.307675,31.705134,5.0,13,4.0,...,45.0,45.0,0.0,1.700000,3.333333,1.633333,0.0,188.0,0.0,0.0
86,56,2,2.0,98.636364,97.000000,40.016375,39.352509,3.0,3,4.0,...,,,,,,,13.2,4.0,0.0,0.0
267,82,1,1.0,82.200000,81.700000,30.938312,30.750122,15.0,14,4.0,...,,,,,,,341.0,0.0,0.0,0.0
814,58,1,1.0,82.100000,69.900000,24.515513,20.872525,4.0,4,4.0,...,44.0,44.0,0.0,11.333333,21.000000,9.666667,321.0,909.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98078,51,2,2.0,57.272727,60.100000,21.036814,22.075298,4.0,34,4.0,...,,,,,,,631.0,0.0,0.0,0.0
98508,57,1,2.0,65.000000,64.200000,23.030045,22.746599,5.0,5,4.0,...,37.0,37.0,0.0,10.500000,,,,,0.0,0.0
99302,41,1,2.0,97.300000,97.400000,40.499480,40.541103,21.0,21,4.0,...,13.0,13.0,0.0,2.842105,7.818182,4.976077,13.0,16.2,0.0,0.0
99912,46,1,1.0,78.636364,78.200000,25.677180,25.534694,2.0,2,4.0,...,,,,,,,754.0,0.0,0.0,0.0


In [13]:
giantList = []
colNames = []
indexLst = []

colTest = []
for i in range(len(allDataOrig)):
    df = allDataOrig.iloc[i]
    row0 = []
    row1 = []
    row0.append(df.name)
    row1.append(df.name)
    for col in df.index:
        colTest.append(col)
        if "_B" in col:
            row0.append(df[col])
        elif "_D" in col:
            row1.append(df[col])
        elif "_Chg" in col or "_M3" in col or "_M6" in col:
            pass
        else: #value for both base and discharge
            row0.append(df[col])
            row1.append(df[col])
    
    giantList.append(row0)
    giantList.append(row1)

colNames.append('DEIDNUM')
for col in allDataOrig.columns:
    if "_B" in col:
        colNames.append(col.replace('_B', ''))
    elif "_D" in col:
        colNames.append(col.replace('_D', ''))
    elif "_Chg" in col:
        colNames.append(col.replace('_Chg', ''))
    else:
        colNames.append(col)
        
colNames = list(OrderedDict.fromkeys(colNames))

allDataSingleDF = pd.DataFrame(giantList, columns=colNames).set_index('DEIDNUM', drop=True)

In [14]:
allDataSingleDF.head(8)

Unnamed: 0_level_0,Age,Gender,Race,Wt,BMI,InitialHospDays,TotalHospDays,NYHA,MLHFS,AF,...,PPP,PAPP,PPRatio,PAPi,SAPi,CPP,PRAPRat,BNP,CPR,MEVT
DEIDNUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
72,88.0,2.0,1.0,57.1,26.424175,9.0,16.0,4.0,76.0,1.0,...,0.245614,0.428571,0.266667,0.75,0.777778,50.0,1.166667,3903.0,0.0,0.0
72,88.0,2.0,1.0,52.0,24.064047,9.0,16.0,3.0,76.0,1.0,...,0.235294,0.428571,0.272727,1.0,1.0,50.0,1.333333,0.0,0.0,0.0
81,69.0,1.0,1.0,102.363636,32.307675,5.0,13.0,4.0,21.0,1.0,...,0.2125,0.5,0.242857,2.0,0.944444,45.0,1.7,0.0,0.0,0.0
81,69.0,1.0,1.0,100.454545,31.705134,5.0,13.0,2.0,21.0,1.0,...,0.470588,0.571429,0.571429,1.666667,2.352941,45.0,3.333333,188.0,0.0,0.0
86,56.0,2.0,2.0,98.636364,40.016375,3.0,3.0,4.0,,0.0,...,,,,,,,,13.2,0.0,0.0
86,56.0,2.0,2.0,97.0,39.352509,3.0,3.0,4.0,,0.0,...,,,,,,,,4.0,0.0,0.0
267,82.0,1.0,1.0,82.2,30.938312,15.0,14.0,4.0,60.0,1.0,...,,,,,,,,341.0,0.0,0.0
267,82.0,1.0,1.0,81.7,30.750122,15.0,14.0,4.0,60.0,1.0,...,,,,,,,,0.0,0.0,0.0


In [15]:
#save to file
allDataSingleDF.to_csv("Preprocessed Data/ESCAPEAllDataSingleValue.csv")