In [1]:
import pandas as pd

In [2]:
vitals = pd.read_hdf("all_hourly_data.h5", 'vitals_labs_mean')
print(vitals.shape)

(2200954, 104)


In [3]:
interventions = pd.read_hdf("all_hourly_data.h5",'interventions')
print(interventions.shape)

(2200954, 14)


In [4]:
patients = pd.read_hdf("all_hourly_data.h5", 'patients')
print(patients.shape)

(34472, 28)


In [5]:
# Prepare labels for los > 7 task
patients["los_7"] = patients['los_icu'] > 7
patients.drop(columns=['los_icu'], inplace=True)

In [6]:
patients = patients[["gender","ethnicity","age","los_7","mort_icu"]].reset_index()

In [7]:
patients.los_7 = patients.los_7.apply(int)

In [8]:
print(f"class imbalance for length of stay prediction: {patients.los_7.mean()}")
print(f"class imbalance for icu mortality prediction: {patients.mort_icu.mean()}")

class imbalance for length of stay prediction: 0.053521698770016245
class imbalance for icu mortality prediction: 0.06558946391274077


WINDOW_SIZE determines how much information we want to use for the prediction tasks. For example, a window length of 24 means that we will use the sequence of the first 24 readings for vitals and interventions.

In [9]:
WINDOW_SIZE = 24

In [10]:
interventions = interventions.reset_index()

In [11]:
interventions = interventions[interventions.hours_in < WINDOW_SIZE]

In [12]:
# interventions.drop(columns=['level_0','index'], inplace=True)

In [13]:
interventions

Unnamed: 0,subject_id,hadm_id,icustay_id,hours_in,vent,vaso,adenosine,dobutamine,dopamine,epinephrine,isuprel,milrinone,norepinephrine,phenylephrine,vasopressin,colloid_bolus,crystalloid_bolus,nivdurations
0,3,145834,211552,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,3,145834,211552,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0
2,3,145834,211552,2,1,1,0,0,1,0,0,0,0,1,0,0,0,0
3,3,145834,211552,3,1,1,0,0,0,0,0,0,0,1,0,0,0,0
4,3,145834,211552,4,1,1,0,0,0,0,0,0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2200946,99999,113369,246512,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2200947,99999,113369,246512,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2200948,99999,113369,246512,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2200949,99999,113369,246512,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [14]:
vitals = vitals.reset_index()

In [15]:
vitals = vitals[vitals.hours_in < WINDOW_SIZE]

In [16]:
vitals.shape

(808539, 108)

In [17]:
vitals.columns = vitals.columns.to_flat_index()
vitals.columns = list(map(lambda x: x[0] + x[1] if x[1] == "" else x[0] + " " + x[1] , vitals.columns.tolist()))

In [18]:
vitals

Unnamed: 0,subject_id,hadm_id,icustay_id,hours_in,alanine aminotransferase mean,albumin mean,albumin ascites mean,albumin pleural mean,albumin urine mean,alkaline phosphate mean,...,total protein mean,total protein urine mean,troponin-i mean,troponin-t mean,venous pvo2 mean,weight mean,white blood cell count mean,white blood cell count urine mean,ph mean,ph urine mean
0,3,145834,211552,0,25.0,1.8,,,,73.0,...,,,,,,,14.842857,,7.40,5.0
1,3,145834,211552,1,,,,,,,...,,,,,,,,,,
2,3,145834,211552,2,,,,,,,...,,,,,,,,,7.26,
3,3,145834,211552,3,,,,,,,...,,,,,,,,,,
4,3,145834,211552,4,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2200946,99999,113369,246512,19,,,,,,,...,,,,,,,,,,
2200947,99999,113369,246512,20,,,,,,,...,,,,,,,,,,
2200948,99999,113369,246512,21,,,,,,,...,,,,,,,,,,
2200949,99999,113369,246512,22,,,,,,,...,,,,,,,,,,


In [19]:
import tensorflow as tf

In [22]:
vitals.isna().mean(axis=0)

subject_id                           0.000000
hadm_id                              0.000000
icustay_id                           0.000000
hours_in                             0.000000
alanine aminotransferase mean        0.968198
                                       ...   
weight mean                          0.954340
white blood cell count mean          0.883480
white blood cell count urine mean    0.995364
ph mean                              0.859961
ph urine mean                        0.978932
Length: 108, dtype: float64

In [23]:
patients

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,ethnicity,age,los_7,mort_icu
0,3,145834,211552,M,WHITE,76.526792,0,0
1,4,185777,294638,F,WHITE,47.845047,0,0
2,6,107064,228232,F,WHITE,65.942297,0,0
3,9,150750,220597,M,UNKNOWN/NOT SPECIFIED,41.790228,0,1
4,11,194540,229441,F,WHITE,50.148295,0,0
...,...,...,...,...,...,...,...,...
34467,99983,117390,286606,M,UNKNOWN/NOT SPECIFIED,78.576624,0,0
34468,99991,151118,226241,M,WHITE,47.729259,0,0
34469,99992,197084,242052,F,WHITE,65.772155,0,0
34470,99995,137810,229633,F,WHITE,88.698942,0,0


In [29]:
subjects = interventions.subject_id.unique()

In [31]:
subjects.size

34472

In [53]:
for subject in subjects:
    interv_list = []
    tmp = interventions[interventions.subject_id == subject]
    for _, i in tmp.iterrows():
        interv_list.append(interventions.columns[i.eq(1).values].tolist())
    break

In [54]:
interv_list

[['vent'],
 ['hours_in', 'vent', 'vaso', 'dopamine', 'phenylephrine'],
 ['vent', 'vaso', 'dopamine', 'phenylephrine'],
 ['vent', 'vaso', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine', 'crystalloid_bolus'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine'],
 ['vent', 'vaso', 'norepinephrine', 'phenylephrine', 'crystalloid_bo

In [33]:
interventions[interventions.subject_id == subject]

Unnamed: 0,subject_id,hadm_id,icustay_id,hours_in,vent,vaso,adenosine,dobutamine,dopamine,epinephrine,isuprel,milrinone,norepinephrine,phenylephrine,vasopressin,colloid_bolus,crystalloid_bolus,nivdurations
0,3,145834,211552,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,3,145834,211552,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0
2,3,145834,211552,2,1,1,0,0,1,0,0,0,0,1,0,0,0,0
3,3,145834,211552,3,1,1,0,0,0,0,0,0,0,1,0,0,0,0
4,3,145834,211552,4,1,1,0,0,0,0,0,0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2200946,99999,113369,246512,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2200947,99999,113369,246512,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2200948,99999,113369,246512,21,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2200949,99999,113369,246512,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [46]:
interventions.head(1).eq(1).values[0]

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False])

In [51]:
interventions.columns[interventions.head(1).eq(1).values[0]].tolist()

['vent']

In [50]:
for _, i in interventions.iterrows():
    print(interventions.columns[i.eq(1).values].tolist())

Index(['vent'], dtype='object')
Index(['hours_in', 'vent', 'vaso', 'dopamine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'dopamine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'norepinephrine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'norepinephrine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'norepinephrine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'norepinephrine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'norepinephrine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'norepinephrine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'norepinephrine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'norepinephrine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'norepinephrine', 'phenylephrine'], dtype='object')
Index(['vent', 'vaso', 'norepinephrine', 'phenylephrine'], dtype='object')
Index(['ven

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Index(['hours_in'], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['nivdurations'], dtype='object')
Index(['vaso', 'dopamine'], dtype='object')
Index(['hours_in', 'vaso', 'dopamine'], dtype='object')
Index(['vaso

TypeError: object of type 'NoneType' has no len()