# DL Survival - Ventilation Outcomes
 Updated 21/11/21

In [1]:
import pandas as pd
import numpy as np
import math
import statistics
from datetime import datetime
import datetime as dt
from datetime import timedelta
import json
import miceforest as mf
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

pd.set_option('display.max_columns', None)

## 1. Data cleaning

- Import MIMIC III + IV combined data
- Review column unique values, assign correct data types
- Impute missing values


### 1.1: Importing data

In [2]:
df = pd.read_csv('mimic_combined.csv')

In [3]:
pd.set_option('display.max_columns', None)
print(df.shape)
df.head(5)

(12332, 110)


Unnamed: 0.1,Unnamed: 0,hospital_expire_flag,los,spo2,free_calcium,outtime,meanbp,ptt,tidalvol,wcc,cvd,weight,bicarb,ggt,t1dm,temp,malig,subject_id,hr,baseexcess,diab_un,first_careunit,hadm_id,bilirubin_direct,liver_severe,ventrate,fibrinogen,arrhythmia,neutrophils,prbc,glucose,magnesium,po2,ext_time,alp,tricuspid,albumin,dementia,dischtime,pulmonary,t2dm,plts,lactate,bleed_time,admission_location,rr,mit,insulin,pvd,pud,lymphocytes,gender,cabg,smoking,reintubation,height,inr,bilirubin_total,diab_cc,creatinine,insurance,mi,specimen,deathtime,pt,aado2,hba1c,crp,pco2,aids,language,dod,dbp,reint_time,intime,rheum,bg_temp,sbp,chloride,fio2,sodium,last_careunit,infection,paraplegia,cardiac_index,marital_status,potassium,bilirubin_indirect,bun,dtoutput,ckd,copd,cryo,admission_type,met_ca,hb,ethnicity,admittime,ffp,inr_1,ccf,icustay_seq,ph,ast,alt,plt,aortic,vent_array,hematocrit,liver_mild
0,0,0,2.2769,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",2198-02-02 19:06:39,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,84.0,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],0,"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,27328,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",0,CSRU,195663,[],0,[],[],0,[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 2, 2, 0...","[{'charttime': datetime.datetime(2198, 1, 31, ...",2198-01-31 22:00:00,[],0,[],0,2198-02-04 12:00:00,0,0,[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,[],0,0,[],M,1,0,0,172.72,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],0,"[{'charttime': datetime.datetime(2198, 1, 31, ...",Private,1,[],,[],[],[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,ENGL,,"[{'charttime': datetime.datetime(2198, 1, 31, ...",,2198-01-31 12:27:58,0,[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,[],SINGLE,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],0,0,[],ELECTIVE,0,"[{'charttime': datetime.datetime(2198, 1, 31, ...",white,2198-01-31 08:00:00,[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,1,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,"[{'starttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",0
1,1,0,2.2722,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",2198-05-10 19:46:00,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 10, ...",0,60.0,"[{'charttime': datetime.datetime(2198, 5, 9, 3...",[],0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,6280,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,CSRU,106984,[],0,[],[],0,[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",2198-05-09 09:29:00,[],0,[],0,2198-05-15 13:49:00,0,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",1,0,[],F,0,0,0,170.18,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],0,"[{'charttime': datetime.datetime(2198, 5, 9, 3...",Self Pay,0,[],,[],[],[],[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,SPAN,,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,2198-05-08 13:14:00,0,[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 9, 3...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",DIVORCED,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 9, 3...",[],0,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",ELECTIVE,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",other,2198-05-08 07:15:00,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,1,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...",1,"[{'starttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",0
2,2,0,2.1157,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",2189-02-20 13:37:48,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",0,57.0,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],0,"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,15201,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",0,CSRU,123613,[],0,[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",1,[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 19, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",2189-02-19 09:00:00,[],0,[],0,2189-03-17 14:20:00,0,0,[],[],[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,[],1,0,[],F,1,0,0,165.1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],0,"[{'charttime': datetime.datetime(2189, 2, 18, ...",Medicare,0,[],,[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 10, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,,2191-12-14,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,2189-02-18 10:51:08,0,[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2189, 2, 18, ...",MARRIED,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],0,0,[],ELECTIVE,0,"[{'charttime': datetime.datetime(2189, 2, 18, ...",unknown,2189-02-18 08:00:00,[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",1,1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",1,"[{'starttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",0
3,3,0,1.0738,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",2118-01-26 12:33:02,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 26, ...",0,135.0,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],0,"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,25226,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",1,CSRU,126027,[],0,[],"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,[],[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",2118-01-25 17:30:00,[],0,[],0,2118-01-29 13:00:00,0,0,[],[],[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,[],0,0,[],M,1,0,0,190.5,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],0,"[{'charttime': datetime.datetime(2118, 1, 25, ...",Private,0,[],,[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 14, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,,,"[{'charttime': datetime.datetime(2118, 1, 25, ...",,2118-01-25 10:46:42,0,[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2118, 1, 25, ...",MARRIED,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],0,0,[],ELECTIVE,0,"[{'charttime': datetime.datetime(2118, 1, 25, ...",unknown,2118-01-25 07:15:00,[],"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,1,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],[],"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,"[{'starttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",0
4,4,0,2.0507,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",2198-01-03 12:00:00,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 2, 4...",0,70.0,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",0,19637,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",1,CSRU,190332,[],0,[],[],1,[],"[{'charttime': datetime.datetime(2198, 1, 2, 6...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",2198-01-01 21:00:00,[],0,[],0,2198-01-09 13:07:00,0,0,[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",0,[],0,0,[],M,0,0,0,175.26,"[{'charttime': datetime.datetime(2198, 1, 3, 4...",[],0,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",Medicare,0,[],,[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",0,,2203-12-06,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,2198-01-01 10:47:00,0,[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",MARRIED,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],0,0,[],ELECTIVE,0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",unknown,2198-01-01 07:15:00,[],"[{'charttime': datetime.datetime(2198, 1, 3, 4...",1,1,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...",1,"[{'starttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",0


#### 1.1.1: Column lists

In [4]:
#view and reorder columns
cols = list(df.columns)
new_cols = ['Unnamed: 0','hadm_id','subject_id','gender','ethnicity','marital_status','insurance','language','aortic','mit','tricuspid',
            'pulmonary','cabg','temp','bg_temp','hr','spo2','rr','sbp','dbp','meanbp','weight','height','cardiac_index','pt','ptt',
            'inr','inr_1','fibrinogen','hb','hematocrit','plts','wcc','lymphocytes','neutrophils','alp','ast','alt','ggt',
            'bilirubin_indirect','bilirubin_direct','bilirubin_total','chloride','magnesium','potassium','crp','bleed_time','albumin',
            'creatinine','free_calcium','sodium','bicarb','bun','hba1c','glucose','lactate','po2','pco2','baseexcess','ph','aado2',
            'fio2','ffp','insulin','cryo','prbc','infection','ventrate','tidalvol','vent_array','reintubation','liver_severe','liver_mild',
            'rheum','cvd','aids','ckd','copd','arrhythmia','pud','smoking','pvd','paraplegia','ccf','met_ca','t2dm','t1dm','malig','mi',
            'dementia','first_careunit','last_careunit','admission_location','admission_type','hospital_expire_flag','admittime',
            'dischtime','intime','outtime','ext_time','reint_time','los','icustay_seq','deathtime','plt','diab_un','diab_cc',
            'dtoutput','specimen','dod']

ptinfo=['Unnamed: 0','hadm_id','subject_id']

demographics=['gender','ethnicity','marital_status','insurance','language']

proceduretype=['aortic','mit','tricuspid','pulmonary','cabg']

vitals=['temp','bg_temp','hr','spo2','rr','sbp','dbp','meanbp','weight','height','cardiac_index']

labs=['pt','ptt','inr','inr_1','fibrinogen','hb','hematocrit','plts','wcc','lymphocytes','neutrophils','alp','ast','alt','ggt',
'bilirubin_indirect','bilirubin_direct','bilirubin_total','chloride','magnesium','potassium','crp','bleed_time',
'albumin','creatinine','free_calcium','sodium','bicarb','bun','hba1c','glucose','lactate']

bloodgases=['po2','pco2','baseexcess','ph','aado2','fio2']

products=['ffp','insulin','cryo','prbc','infection']

ventilation=['ventrate','tidalvol','vent_array','reintubation']

comorbidities=['liver_severe','liver_mild','rheum','cvd','aids','ckd','copd','arrhythmia','pud','smoking','pvd',
'paraplegia','ccf','met_ca','t2dm','t1dm','malig','mi','dementia']

adm_cat=['first_careunit','last_careunit','admission_location','admission_type','hospital_expire_flag']

adm_num=['admittime','dischtime','intime','outtime','ext_time','reint_time','los','icustay_seq','deathtime']

others=['plt','diab_un','diab_cc','dtoutput','specimen','dod']

timeseries=[*vitals,*labs,*bloodgases,*products,*ventilation,'plt','dtoutput']
timeseries = [i for i in timeseries if i not in ('weight','height','reintubation', 'infection', 'vent_array')]
    
timeseries_valuenames = {'cardiac_index':'ci',
                         'plts':'bloodproduct',
                         'ffp':'bloodproduct',
                         'insulin':'amount',
                         'cryo':'bloodproduct',
                         'prbc':'bloodproduct',
                         'dtoutput':'output'}

In [5]:
df = df[new_cols]
df.head(10)

Unnamed: 0.1,Unnamed: 0,hadm_id,subject_id,gender,ethnicity,marital_status,insurance,language,aortic,mit,tricuspid,pulmonary,cabg,temp,bg_temp,hr,spo2,rr,sbp,dbp,meanbp,weight,height,cardiac_index,pt,ptt,inr,inr_1,fibrinogen,hb,hematocrit,plts,wcc,lymphocytes,neutrophils,alp,ast,alt,ggt,bilirubin_indirect,bilirubin_direct,bilirubin_total,chloride,magnesium,potassium,crp,bleed_time,albumin,creatinine,free_calcium,sodium,bicarb,bun,hba1c,glucose,lactate,po2,pco2,baseexcess,ph,aado2,fio2,ffp,insulin,cryo,prbc,infection,ventrate,tidalvol,vent_array,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_location,admission_type,hospital_expire_flag,admittime,dischtime,intime,outtime,ext_time,reint_time,los,icustay_seq,deathtime,plt,diab_un,diab_cc,dtoutput,specimen,dod
0,0,195663,27328,M,white,SINGLE,Private,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",84.0,172.72,[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 2, 2, 0...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2198, 1, 31, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2198-01-31 08:00:00,2198-02-04 12:00:00,2198-01-31 12:27:58,2198-02-02 19:06:39,2198-01-31 22:00:00,,2.2769,1,,"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,0,[],[],
1,1,106984,6280,F,other,DIVORCED,Self Pay,SPAN,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",60.0,170.18,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 10, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],[],[],"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2198, 5, 8, 1...",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2198-05-08 07:15:00,2198-05-15 13:49:00,2198-05-08 13:14:00,2198-05-10 19:46:00,2198-05-09 09:29:00,,2.2722,1,,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,0,[],[],
2,2,123613,15201,F,unknown,MARRIED,Medicare,,1,0,0,0,1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",57.0,165.1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 19, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 10, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'starttime': datetime.datetime(2189, 2, 18, ...",0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2189-02-18 08:00:00,2189-03-17 14:20:00,2189-02-18 10:51:08,2189-02-20 13:37:48,2189-02-19 09:00:00,,2.1157,1,,"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,0,[],[],2191-12-14
3,3,126027,25226,M,unknown,MARRIED,Private,,0,0,0,0,1,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",135.0,190.5,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 26, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],[],[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 14, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'starttime': datetime.datetime(2118, 1, 25, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2118-01-25 07:15:00,2118-01-29 13:00:00,2118-01-25 10:46:42,2118-01-26 12:33:02,2118-01-25 17:30:00,,1.0738,1,,"[{'charttime': datetime.datetime(2118, 1, 25, ...",1,0,[],[],
4,4,190332,19637,M,unknown,MARRIED,Medicare,,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",70.0,175.26,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],"[{'charttime': datetime.datetime(2198, 1, 3, 4...","[{'charttime': datetime.datetime(2198, 1, 3, 4...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 6...","[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'starttime': datetime.datetime(2198, 1, 1, 1...",0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2198-01-01 07:15:00,2198-01-09 13:07:00,2198-01-01 10:47:00,2198-01-03 12:00:00,2198-01-01 21:00:00,,2.0507,1,,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",1,0,[],[],2203-12-06
5,5,115203,29498,F,unknown,SINGLE,Medicaid,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",73.3,162.56,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],[],[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 7, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],[],[],"[{'charttime': datetime.datetime(2130, 12, 8, ...",[{'suspected_infection_time': datetime.datetim...,[],[],"[{'starttime': datetime.datetime(2130, 12, 8, ...",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,TRANSFER FROM HOSP/EXTRAM,EMERGENCY,0,2130-12-06 18:30:00,2130-12-18 18:38:00,2130-12-08 10:42:40,2130-12-12 12:08:24,2130-12-09 12:00:00,,4.0595,1,,"[{'charttime': datetime.datetime(2130, 12, 8, ...",1,0,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],
6,6,181661,18498,M,white,WIDOWED,Medicare,,0,1,0,0,1,"[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...",101.15,,"[{'charttime': datetime.datetime(2113, 5, 11, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],[],[],[],[],[],"[{'charttime': datetime.datetime(2113, 5, 13, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 13, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],[],[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],[],0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,CCU,CCU,EMERGENCY ROOM ADMIT,EMERGENCY,0,2113-05-07 12:16:00,2113-05-26 13:55:00,2113-05-11 15:46:43,2113-05-13 18:23:43,,,2.109,1,,"[{'charttime': datetime.datetime(2113, 5, 12, ...",0,0,[],[],
7,7,195614,29429,M,white,UNKNOWN (DEFAULT),Private,ENGL,0,1,0,0,0,"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",62.3,167.64,"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 7, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],[],[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 9, 15, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2139, 10, 6, ...",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2139-10-06 07:15:00,2139-10-11 15:04:00,2139-10-06 09:31:07,2139-10-07 17:13:18,2139-10-06 16:20:00,,1.321,1,,"[{'charttime': datetime.datetime(2139, 10, 6, ...",0,0,[],[],
8,8,190585,28892,M,white,MARRIED,Private,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",79.9,165.1,"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 6, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],[],[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 9, 29, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2130, 10, 5, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2130-10-05 07:15:00,2130-10-09 17:00:00,2130-10-05 08:50:46,2130-10-06 21:54:27,2130-10-05 13:30:00,,1.5442,1,,"[{'charttime': datetime.datetime(2130, 10, 5, ...",0,0,[],[],
9,9,190638,25989,M,white,SINGLE,Medicaid,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 2...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...",133.1,175.26,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",[],"[{'charttime': datetime.datetime(2142, 3, 9, 2...","[{'charttime': datetime.datetime(2142, 3, 9, 2...","[{'charttime': datetime.datetime(2142, 3, 9, 2...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 9, 2...","[{'charttime': datetime.datetime(2142, 3, 9, 2...",[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 8, 2...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],[],[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],"[{'charttime': datetime.datetime(2142, 3, 8, 3...",[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2142, 3, 8, 3...","[{'starttime': datetime.datetime(2142, 3, 7, 1...",0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,CSRU,CSRU,EMERGENCY ROOM ADMIT,EMERGENCY,0,2142-03-05 17:10:00,2142-03-13 16:40:00,2142-03-07 09:44:47,2142-03-11 11:41:29,2142-03-08 12:00:00,,4.081,1,,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",0,0,[],[],


### 1.2: Cleaning data types

#### 1.2.0: NaN assignment

In [6]:
df = df.replace('NaT',np.datetime64('NaT'))
df = df.replace(['[]','NaN',np.datetime64('NaT')],np.NaN)

#### 1.2.1: Datetime columns

In [7]:
# set column types as datetime
time_cols = ['admittime','dischtime','intime','outtime','reint_time','ext_time','deathtime']
for col in time_cols:
    df[col] = pd.to_datetime(df[col], format='%Y-%m-%d %H:%M:%S')

#dod
df['dod'] = pd.to_datetime(df['dod'], format='%Y-%m-%d')

In [8]:
## CHECK FOR ROWS WHERE DEATHTIME < INTIME OR ADMITTIME

In [9]:
df[time_cols].dtypes

admittime     datetime64[ns]
dischtime     datetime64[ns]
intime        datetime64[ns]
outtime       datetime64[ns]
reint_time    datetime64[ns]
ext_time      datetime64[ns]
deathtime     datetime64[ns]
dtype: object

#### 1.2.2: Demographics

In [10]:
for x in demographics:
    print(x,': ',df[x].unique())

gender :  ['M' 'F']
ethnicity :  ['white' 'other' 'unknown' 'hispanic' 'black' 'asian' 'native' 'WHITE'
 'UNKNOWN' 'OTHER' 'BLACK/AFRICAN AMERICAN' 'HISPANIC/LATINO'
 'UNABLE TO OBTAIN' 'ASIAN' 'AMERICAN INDIAN/ALASKA NATIVE']
marital_status :  ['SINGLE' 'DIVORCED' 'MARRIED' 'WIDOWED' 'UNKNOWN (DEFAULT)' nan
 'SEPARATED']
insurance :  ['Private' 'Self Pay' 'Medicare' 'Medicaid' 'Government' 'Other']
language :  ['ENGL' 'SPAN' nan 'VIET' 'RUSS' 'HAIT' 'CANT' 'PORT' 'PTUN' 'ALBA' 'THAI'
 'ARAB' 'GREE' 'AMER' '*LEB' '*BEN' 'CAPE' 'POLI' 'ITAL' 'HIND' 'URDU'
 'KORE' 'GERM' 'TURK' 'ETHI' 'CAMB' 'MAND' '*GUJ' 'PERS' 'ENGLISH' '?']


In [11]:
#ethnicity
df.replace({'ethnicity':
                {'unknown': np.NaN,'UNKNOWN':np.NaN,'UNABLE TO OBTAIN':np.NaN,
                'OTHER':'other','WHITE':'white','BLACK/AFRICAN AMERICAN':'black','ASIAN':'asian',
                'HISPANIC/LATINO':'hispanic','AMERICAN INDIAN/ALASKA NATIVE':'native'
                }
            }, 
            inplace=True)
print(df['ethnicity'].unique())

['white' 'other' nan 'hispanic' 'black' 'asian' 'native']


In [12]:
#marital_status
df.replace({'marital_status':
                {'UNKNOWN (DEFAULT)': np.NaN
                }
            }, 
            inplace=True)
print(df['marital_status'].unique())

['SINGLE' 'DIVORCED' 'MARRIED' 'WIDOWED' nan 'SEPARATED']


In [13]:
#language
df.replace({'language':
                {'ENGLISH':'ENGL','?':np.NaN
                }
            }, 
            inplace=True)
print(df['marital_status'].unique())

['SINGLE' 'DIVORCED' 'MARRIED' 'WIDOWED' nan 'SEPARATED']


#### 1.2.3: ✔Procedure type

In [14]:
for x in proceduretype:
    print(x,': ',df[x].unique())

aortic :  [0 1]
mit :  [0 1]
tricuspid :  [0 1]
pulmonary :  [0 1]
cabg :  [1 0]


#### 1.2.4: **Vitals / Blood Gases / Products + infection / Ventilation


In [15]:
# wait for Jahan/others
# ventrate seems to be empty

#### 1.2.5: ✔Comorbidities

In [16]:
for x in comorbidities:
    print(x,': ',df[x].unique())

liver_severe :  [0 1]
liver_mild :  [0 1]
rheum :  [0 1]
cvd :  [0 1]
aids :  [0 1]
ckd :  [0 1]
copd :  [0 1]
arrhythmia :  [0 1]
pud :  [0 1]
smoking :  [0 1]
pvd :  [0 1]
paraplegia :  [0 1]
ccf :  [0 1]
met_ca :  [0 1]
t2dm :  [0 1]
t1dm :  [0 1]
malig :  [0 1]
mi :  [1 0]
dementia :  [0 1]


#### 1.2.6: Admissions (categorical)

In [17]:
for x in adm_cat:
    print(x,': ',df[x].unique())

first_careunit :  ['CSRU' 'CCU' 'TSICU' 'SICU' 'MICU'
 'Cardiac Vascular Intensive Care Unit (CVICU)' 'Coronary Care Unit (CCU)'
 'Trauma SICU (TSICU)' 'Medical Intensive Care Unit (MICU)'
 'Medical/Surgical Intensive Care Unit (MICU/SICU)'
 'Surgical Intensive Care Unit (SICU)'
 'Neuro Surgical Intensive Care Unit (Neuro SICU)' 'Neuro Intermediate']
last_careunit :  ['CSRU' 'CCU' 'MICU' 'SICU' 'TSICU'
 'Cardiac Vascular Intensive Care Unit (CVICU)' 'Coronary Care Unit (CCU)'
 'Medical Intensive Care Unit (MICU)' 'Trauma SICU (TSICU)'
 'Medical/Surgical Intensive Care Unit (MICU/SICU)'
 'Surgical Intensive Care Unit (SICU)'
 'Neuro Surgical Intensive Care Unit (Neuro SICU)']
admission_location :  ['PHYS REFERRAL/NORMAL DELI' 'TRANSFER FROM HOSP/EXTRAM'
 'EMERGENCY ROOM ADMIT' 'CLINIC REFERRAL/PREMATURE'
 'TRANSFER FROM OTHER HEALT' 'TRANSFER FROM SKILLED NUR'
 'PHYSICIAN REFERRAL' 'TRANSFER FROM HOSPITAL' 'EMERGENCY ROOM' 'PACU'
 'PROCEDURE SITE' 'TRANSFER FROM SKILLED NURSING FACILITY

In [18]:
#first_careunit
df.replace({'first_careunit':
                {'Cardiac Vascular Intensive Care Unit (CVICU)':'CVICU',
                'Coronary Care Unit (CCU)':'CCU',
                'Medical Intensive Care Unit (MICU)':'MICU',
                'Surgical Intensive Care Unit (SICU)':'SICU',
                'Neuro Intermediate':'Neuro Inter',
                'Medical/Surgical Intensive Care Unit (MICU/SICU)':'MICU/SICU',
                'Trauma SICU (TSICU)':'TSICU',
                'Neuro Surgical Intensive Care Unit (Neuro SICU)':'Neuro SICU'
                }
            }, 
            inplace=True)
print(df['first_careunit'].unique())

['CSRU' 'CCU' 'TSICU' 'SICU' 'MICU' 'CVICU' 'MICU/SICU' 'Neuro SICU'
 'Neuro Inter']


In [19]:
#last_careunit
df.replace({'last_careunit':
                {'Cardiac Vascular Intensive Care Unit (CVICU)':'CVICU',
                'Coronary Care Unit (CCU)':'CCU',
                'Medical Intensive Care Unit (MICU)':'MICU',
                'Surgical Intensive Care Unit (SICU)':'SICU',
                'Neuro Intermediate':'Neuro Inter',
                'Medical/Surgical Intensive Care Unit (MICU/SICU)':'MICU/SICU',
                'Trauma SICU (TSICU)':'TSICU',
                'Neuro Surgical Intensive Care Unit (Neuro SICU)':'Neuro SICU'
                }
            }, 
            inplace=True)
print(df['last_careunit'].unique())

['CSRU' 'CCU' 'MICU' 'SICU' 'TSICU' 'CVICU' 'MICU/SICU' 'Neuro SICU']


In [20]:
#admission_location
df.replace({'admission_location':
                {'TRANSFER FROM HOSP/EXTRAM':'TRANSFER FROM HOSPITAL',
                'PHYS REFERRAL/NORMAL DELI':'PHYSICIAN REFERRAL',
                'TRANSFER FROM SKILLED NUR':'TRANSFER FROM SKILLED NURSING FACILITY',
                'INFORMATION NOT AVAILABLE':np.NaN,
                'CLINIC REFERRAL':'CLINIC REFERRAL/PREMATURE',
                'EMERGENCY ROOM ADMIT':'EMERGENCY ROOM',
                }
            }, 
            inplace=True)
print(df['admission_location'].unique())

['PHYSICIAN REFERRAL' 'TRANSFER FROM HOSPITAL' 'EMERGENCY ROOM'
 'CLINIC REFERRAL/PREMATURE' 'TRANSFER FROM OTHER HEALT'
 'TRANSFER FROM SKILLED NURSING FACILITY' 'PACU' 'PROCEDURE SITE'
 'WALK-IN/SELF REFERRAL' nan 'INTERNAL TRANSFER TO OR FROM PSYCH'
 'AMBULATORY SURGERY TRANSFER']


#### 1.2.7: Others

In [21]:
# for x in others:
#     print(x,': ',df[x].unique())

### 1.3: Parsing time series data

In [22]:
df['vent_array'][14]

"[{'starttime': datetime.datetime(2184, 1, 18, 1, 43), 'endtime': datetime.datetime(2184, 1, 18, 4, 20), 'duration_hours': 2.6166666666666667}]"

In [23]:
def va_parser(row, output=6):
    """
    Takes row from `df` returns a list of starttime, endtime, vent duration 
    for first and (if applicable) second intubations

    Parameters
    ----------
    row : row in df
    output_ : select which output you want (use list index below) - e.g. args=[6] for all output when using df.apply()

    Returns
    -------
    single list variable containing  
        [0] int_time1: first intubation starttime
        [1] ext_time1: first intubation endtime
        [2] duration1: first intubation duration
        [3] int_time2: second intubation starttime
        [4] ext_time2: second intubation endtime 
        [5] duration2: second intubation duration
        [6] all

    """
    int_time1=np.NaN
    ext_time1=np.NaN
    duration1=np.NaN
    int_time2=np.NaN
    ext_time2=np.NaN
    duration2=np.NaN
    value = row['vent_array']
    list=[]
    '''a = value
    print(value)'''
    if value == np.NaN or pd.isna(value):
        return np.NaN
    a = value.replace("'",'"')
    a = a.replace('\n ...\n',',').replace('\n', ',').replace('...', '')
    a = a.replace('datetime.','"dt.')
    a = a.replace('),', ')",')
    a = json.loads(a)
    b = [(i['starttime'], i['endtime'], i['duration_hours']) for i in a]
    int_time1=dt.datetime.strptime(b[0][0],'dt.datetime(%Y, %m, %d, %H, %M)')
    ext_time1=dt.datetime.strptime(b[0][1],'dt.datetime(%Y, %m, %d, %H, %M)')
    duration1=b[0][2]
    
    if output==0:
        return int_time1
    if output==1:
        return ext_time1
    if output==2:
        return duration1

    if len(b)>=2:
        int_time2=dt.datetime.strptime(b[1][0],'dt.datetime(%Y, %m, %d, %H, %M)')
        ext_time2=dt.datetime.strptime(b[1][1],'dt.datetime(%Y, %m, %d, %H, %M)')
        duration2=b[1][2]
    if output==3:
        return int_time2
    if output==4:
        return ext_time2
    if output==5:
        return duration2
    if output==6:
        return int_time1, ext_time1, duration1, int_time2, ext_time2, duration2

In [24]:
df['int_time1']=df.apply(va_parser, args=[0], axis=1)
df['ext_time1']=df.apply(va_parser, args=[1], axis=1)
df['duration1']=df.apply(va_parser, args=[2], axis=1)
df['int_time2']=df.apply(va_parser, args=[3], axis=1)
df['ext_time2']=df.apply(va_parser, args=[4], axis=1)
df['duration2']=df.apply(va_parser, args=[5], axis=1)
df.head()

Unnamed: 0.1,Unnamed: 0,hadm_id,subject_id,gender,ethnicity,marital_status,insurance,language,aortic,mit,tricuspid,pulmonary,cabg,temp,bg_temp,hr,spo2,rr,sbp,dbp,meanbp,weight,height,cardiac_index,pt,ptt,inr,inr_1,fibrinogen,hb,hematocrit,plts,wcc,lymphocytes,neutrophils,alp,ast,alt,ggt,bilirubin_indirect,bilirubin_direct,bilirubin_total,chloride,magnesium,potassium,crp,bleed_time,albumin,creatinine,free_calcium,sodium,bicarb,bun,hba1c,glucose,lactate,po2,pco2,baseexcess,ph,aado2,fio2,ffp,insulin,cryo,prbc,infection,ventrate,tidalvol,vent_array,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_location,admission_type,hospital_expire_flag,admittime,dischtime,intime,outtime,ext_time,reint_time,los,icustay_seq,deathtime,plt,diab_un,diab_cc,dtoutput,specimen,dod,int_time1,ext_time1,duration1,int_time2,ext_time2,duration2
0,0,195663,27328,M,white,SINGLE,Private,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",84.0,172.72,,,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 2, 2, 0...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,,,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,,,,,,"[{'suspected_infection_time': None, 'antibioti...",,,"[{'starttime': datetime.datetime(2198, 1, 31, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2198-01-31 08:00:00,2198-02-04 12:00:00,2198-01-31 12:27:58,2198-02-02 19:06:39,2198-01-31 22:00:00,NaT,2.2769,1,NaT,"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,0,,,NaT,2198-01-31 17:00:00,2198-01-31 22:00:00,5.0,NaT,NaT,
1,1,106984,6280,F,other,DIVORCED,Self Pay,SPAN,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",60.0,170.18,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 10, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",,,,"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",,,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'suspected_infection_time': None, 'antibioti...",,,"[{'starttime': datetime.datetime(2198, 5, 8, 1...",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2198-05-08 07:15:00,2198-05-15 13:49:00,2198-05-08 13:14:00,2198-05-10 19:46:00,2198-05-09 09:29:00,NaT,2.2722,1,NaT,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,0,,,NaT,2198-05-08 17:00:00,2198-05-09 09:29:00,16.483333,NaT,NaT,
2,2,123613,15201,F,,MARRIED,Medicare,,1,0,0,0,1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",57.0,165.1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 19, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,,,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 10, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,,,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'suspected_infection_time': None, 'antibioti...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'starttime': datetime.datetime(2189, 2, 18, ...",0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2189-02-18 08:00:00,2189-03-17 14:20:00,2189-02-18 10:51:08,2189-02-20 13:37:48,2189-02-19 09:00:00,NaT,2.1157,1,NaT,"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,0,,,2191-12-14,2189-02-18 13:00:00,2189-02-19 09:00:00,20.0,NaT,NaT,
3,3,126027,25226,M,,MARRIED,Private,,0,0,0,0,1,"[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",135.0,190.5,"[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 26, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,,,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 14, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,,,,"[{'suspected_infection_time': None, 'antibioti...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'starttime': datetime.datetime(2118, 1, 25, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2118-01-25 07:15:00,2118-01-29 13:00:00,2118-01-25 10:46:42,2118-01-26 12:33:02,2118-01-25 17:30:00,NaT,1.0738,1,NaT,"[{'charttime': datetime.datetime(2118, 1, 25, ...",1,0,,,NaT,2118-01-25 13:00:00,2118-01-25 17:30:00,4.5,NaT,NaT,
4,4,190332,19637,M,,MARRIED,Medicare,,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",70.0,175.26,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,,"[{'charttime': datetime.datetime(2198, 1, 3, 4...","[{'charttime': datetime.datetime(2198, 1, 3, 4...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",,,,,,,,,,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,,,"[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 2, 4...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",,,,"[{'charttime': datetime.datetime(2198, 1, 2, 6...","[{'suspected_infection_time': None, 'antibioti...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'starttime': datetime.datetime(2198, 1, 1, 1...",0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2198-01-01 07:15:00,2198-01-09 13:07:00,2198-01-01 10:47:00,2198-01-03 12:00:00,2198-01-01 21:00:00,NaT,2.0507,1,NaT,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",1,0,,,2203-12-06,2198-01-01 13:00:00,2198-01-01 21:00:00,8.0,NaT,NaT,


In [25]:
def inf_parser(value, timeLimits=None):
    if value == np.NaN or pd.isna(value):
        return 0
    a = value.replace("'", '"')
    a = a.replace('\n ...\n',',').replace('\n', ',').replace('...', '')
    a = a.replace('datetime.', '"dt.')
    a = a.replace('": None', '": "None"')
    for valuename in ['antibiotic','antibiotic_time']:
        a = a.replace(f'), "{valuename}"', f')", "{valuename}"')
    a = json.loads(a)
    b = [(eval(i['suspected_infection_time']), eval(i['antibiotic_time']), i['antibiotic'], i['specimen'], 
               i['positiveculture']) for i in a]
    sus = [i[0] for i in b]
    abx = [i[1] for i in b]
    pos = [i[4] for i in b]
    inf_ = 0
    for i in range(len(sus)):
        if sus[i]==None:
            sus[i]=dt.datetime(1000, 1, 1, 0, 0)
        if abx[i]==None:
            abx[i]=dt.datetime(1000, 1, 1, 0, 0)
        if ((sus[i]>= timeLimits[0] and sus[i]<=timeLimits[1]) or (abx[i]>= timeLimits[0] and abx[i]<=timeLimits[1])) and pos[i] == 1.0:
            temp = 1
            break
        else:
            continue
    return inf_

In [26]:
def ts_parser(value, timeDelta=None, timeLimits=None, valuename='value'):
    # timeDelta is timedelta in hours from earliest entry
    # timeLimits = (startTime, endTime)
    # if both timeDelta and timeLimits are provided, timeDelta overrules.
    # if both are None, then all timepoints are accepted
    if value == np.NaN or pd.isna(value):
        return np.NaN, np.NaN, np.NaN
    a = value.replace("'", '"')
    a = a.replace('\n ...\n',',').replace('\n', ',').replace('...', '')
    a = a.replace('datetime.', '"dt.')
    a = a.replace(f'), "{valuename}"', f')", "{valuename}"')
    a = a.replace('"unit": None', '"unit": "None"')
    a = a.replace('starttime', 'charttime')
    a = json.loads(a)
    b = [(eval(i['charttime']), i[valuename]) for i in a]
    
    if timeDelta:
        startTime = min(b, key=lambda x:x[0])[0]
        inc_b = [i[1] for i in b if i[0] <= startTime + dt.timedelta(hours=timeDelta)]
    else:
        if timeLimits:
            inc_b = [i[1] for i in b if i[0] >= timeLimits[0] and i[0] <= timeLimits[1]]
        else:
            inc_b = [i[1] for i in b]
    if len(inc_b) == 0:
        return np.NaN, np.NaN, np.NaN
    
    return sum(inc_b) / len(inc_b), max(inc_b), min(inc_b)

In [27]:
# delete all rows where int_time1 OR ext_time1 are missing
df = df[~(pd.isnull(df['int_time1']) | pd.isnull(df['ext_time1']))]
df = df.reset_index()
df = df.drop(axis=1, columns=['index', 'Unnamed: 0'], inplace=False)
df

Unnamed: 0,hadm_id,subject_id,gender,ethnicity,marital_status,insurance,language,aortic,mit,tricuspid,pulmonary,cabg,temp,bg_temp,hr,spo2,rr,sbp,dbp,meanbp,weight,height,cardiac_index,pt,ptt,inr,inr_1,fibrinogen,hb,hematocrit,plts,wcc,lymphocytes,neutrophils,alp,ast,alt,ggt,bilirubin_indirect,bilirubin_direct,bilirubin_total,chloride,magnesium,potassium,crp,bleed_time,albumin,creatinine,free_calcium,sodium,bicarb,bun,hba1c,glucose,lactate,po2,pco2,baseexcess,ph,aado2,fio2,ffp,insulin,cryo,prbc,infection,ventrate,tidalvol,vent_array,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_location,admission_type,hospital_expire_flag,admittime,dischtime,intime,outtime,ext_time,reint_time,los,icustay_seq,deathtime,plt,diab_un,diab_cc,dtoutput,specimen,dod,int_time1,ext_time1,duration1,int_time2,ext_time2,duration2
0,195663,27328,M,white,SINGLE,Private,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",84.00,172.72,,,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 2, 2, 0...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,,,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,,,,,,"[{'suspected_infection_time': None, 'antibioti...",,,"[{'starttime': datetime.datetime(2198, 1, 31, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2198-01-31 08:00:00,2198-02-04 12:00:00,2198-01-31 12:27:58,2198-02-02 19:06:39,2198-01-31 22:00:00,NaT,2.276900,1,NaT,"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,0,,,NaT,2198-01-31 17:00:00,2198-01-31 22:00:00,5.000000,NaT,NaT,
1,106984,6280,F,other,DIVORCED,Self Pay,SPAN,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",60.00,170.18,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 10, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",,,,"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",,,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'suspected_infection_time': None, 'antibioti...",,,"[{'starttime': datetime.datetime(2198, 5, 8, 1...",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2198-05-08 07:15:00,2198-05-15 13:49:00,2198-05-08 13:14:00,2198-05-10 19:46:00,2198-05-09 09:29:00,NaT,2.272200,1,NaT,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,0,,,NaT,2198-05-08 17:00:00,2198-05-09 09:29:00,16.483333,NaT,NaT,
2,123613,15201,F,,MARRIED,Medicare,,1,0,0,0,1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",57.00,165.10,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 19, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,,,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 10, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,,,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'suspected_infection_time': None, 'antibioti...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'starttime': datetime.datetime(2189, 2, 18, ...",0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2189-02-18 08:00:00,2189-03-17 14:20:00,2189-02-18 10:51:08,2189-02-20 13:37:48,2189-02-19 09:00:00,NaT,2.115700,1,NaT,"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,0,,,2191-12-14,2189-02-18 13:00:00,2189-02-19 09:00:00,20.000000,NaT,NaT,
3,126027,25226,M,,MARRIED,Private,,0,0,0,0,1,"[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",135.00,190.50,"[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 26, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,,,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 14, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,,,,"[{'suspected_infection_time': None, 'antibioti...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'starttime': datetime.datetime(2118, 1, 25, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2118-01-25 07:15:00,2118-01-29 13:00:00,2118-01-25 10:46:42,2118-01-26 12:33:02,2118-01-25 17:30:00,NaT,1.073800,1,NaT,"[{'charttime': datetime.datetime(2118, 1, 25, ...",1,0,,,NaT,2118-01-25 13:00:00,2118-01-25 17:30:00,4.500000,NaT,NaT,
4,190332,19637,M,,MARRIED,Medicare,,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",70.00,175.26,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,,"[{'charttime': datetime.datetime(2198, 1, 3, 4...","[{'charttime': datetime.datetime(2198, 1, 3, 4...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",,,,,,,,,,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,,,"[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 2, 4...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",,,,"[{'charttime': datetime.datetime(2198, 1, 2, 6...","[{'suspected_infection_time': None, 'antibioti...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'starttime': datetime.datetime(2198, 1, 1, 1...",0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2198-01-01 07:15:00,2198-01-09 13:07:00,2198-01-01 10:47:00,2198-01-03 12:00:00,2198-01-01 21:00:00,NaT,2.050700,1,NaT,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",1,0,,,2203-12-06,2198-01-01 13:00:00,2198-01-01 21:00:00,8.000000,NaT,NaT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9469,22051087,14971805,M,white,MARRIED,Other,ENGL,1,0,0,0,0,"[{'charttime': datetime.datetime(2121, 5, 3, 1...",,"[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...",96.80,178.00,,"[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 9...","[{'charttime': datetime.datetime(2121, 5, 2, 9...",,"[{'charttime': datetime.datetime(2121, 5, 2, 1...",,,,,,,,,,"[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 3, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 9...",,,,"[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 1, 2...","[{'charttime': datetime.datetime(2121, 5, 2, 9...","[{'charttime': datetime.datetime(2121, 5, 2, 1...","[{'charttime': datetime.datetime(2121, 5, 2, 9...","[{'charttime': datetime.datetime(2121, 5, 2, 9...","[{'charttime': datetime.datetime(2121, 5, 2, 9...","[{'charttime': datetime.datetime(2121, 5, 2, 9...",,,,"[{'charttime': datetime.datetime(2121, 5, 2, 1...",,,[{'suspected_infection_time': datetime.datetim...,,,"[{'starttime': datetime.datetime(2121, 5, 2, 1...",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CVICU,CVICU,PHYSICIAN REFERRAL,ELECTIVE,0,2121-05-01 19:37:00,2121-05-06 16:50:00,2121-05-02 09:33:46,2121-05-04 18:19:07,2121-05-02 18:01:00,NaT,2.364826,1,NaT,"[{'charttime': datetime.datetime(2121, 5, 2, 1...",0,0,,,NaT,2121-05-02 12:00:00,2121-05-02 18:01:00,6.000000,NaT,NaT,
9470,21555454,15547313,M,,,Medicare,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2111, 11, 19,...",,"[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...",85.10,168.00,,"[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...",,"[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...",,,,,,,,"[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 20,...","[{'charttime': datetime.datetime(2111, 11, 19,...",,,,"[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 18,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...","[{'charttime': datetime.datetime(2111, 11, 19,...",,,,"[{'charttime': datetime.datetime(2111, 11, 19,...",,,[{'suspected_infection_time': datetime.datetim...,,,"[{'starttime': datetime.datetime(2111, 11, 19,...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CVICU,CVICU,TRANSFER FROM HOSPITAL,URGENT,0,2111-11-18 18:24:00,2111-11-23 17:00:00,2111-11-19 17:29:25,2111-11-22 17:29:35,2111-11-20 04:30:00,NaT,3.000116,1,NaT,"[{'charttime': datetime.datetime(2111, 11, 19,...",0,0,,,NaT,2111-11-19 19:00:00,2111-11-20 04:30:00,9.000000,NaT,NaT,
9471,22084741,16252024,M,white,,Medicare,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2156, 3, 1, 1...",,"[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...",100.00,175.00,,"[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...",,"[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...",,,,,,,,"[{'charttime': datetime.datetime(2156, 3, 1, 1...",,"[{'charttime': datetime.datetime(2156, 3, 1, 1...",,,,"[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 2, 26, ...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...","[{'charttime': datetime.datetime(2156, 3, 1, 1...",,,,"[{'charttime': datetime.datetime(2156, 3, 1, 1...",,,[{'suspected_infection_time': datetime.datetim...,,,"[{'starttime': datetime.datetime(2156, 3, 1, 1...",0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,CVICU,CVICU,TRANSFER FROM HOSPITAL,URGENT,0,2156-02-26 18:43:00,2156-03-06 18:30:00,2156-03-01 09:26:32,2156-03-02 17:09:51,2156-03-01 22:27:00,NaT,1.321748,1,NaT,"[{'charttime': datetime.datetime(2156, 3, 1, 1...",0,1,,,NaT,2156-03-01 16:00:00,2156-03-01 22:27:00,6.000000,NaT,NaT,
9472,25588352,18504988,M,white,SINGLE,Medicare,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 6, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...",112.05,173.00,,"[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...",,"[{'charttime': datetime.datetime(2127, 3, 5, 1...",,,"[{'charttime': datetime.datetime(2127, 3, 6, 1...","[{'charttime': datetime.datetime(2127, 3, 6, 1...","[{'charttime': datetime.datetime(2127, 3, 6, 1...",,,,"[{'charttime': datetime.datetime(2127, 3, 6, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 6, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...",,,"[{'charttime': datetime.datetime(2127, 3, 6, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 0...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...","[{'charttime': datetime.datetime(2127, 3, 5, 1...",,,,"[{'charttime': datetime.datetime(2127, 3, 5, 1...",,,[{'suspected_infection_time': datetime.datetim...,,,"[{'starttime': datetime.datetime(2127, 3, 5, 1...",1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,CVICU,CVICU,PROCEDURE SITE,EW EMER.,0,2127-03-04 15:15:00,2127-03-15 15:55:00,2127-03-05 10:07:40,2127-03-11 10:31:22,2127-03-06 12:00:00,2127-03-07 13:00:00,6.016458,1,NaT,"[{'charttime': datetime.datetime(2127, 3, 5, 1...",0,0,,,NaT,2127-03-05 12:00:00,2127-03-06 12:00:00,24.000000,2127-03-07 13:00:00,2127-03-08 05:00:00,16.0


### 1.4: Handling missing data

#### 1.4.0 Assessing for missing data

In [28]:
def missing_values_table(df): 
    mis_val = df.isnull().sum()
    mis_val_percent = 100 * df.isnull().sum() / len(df)
    mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)
    mis_val_table_ren_columns = mis_val_table.rename(columns = {0: 'Missing Values', 1: '% Missing Values'})
    return mis_val_table_ren_columns

missing_data = missing_values_table(df)

In [29]:
#set limit and get list of variables missing above limit in `missing_cols`
# missing_limit = 50 #allen
missing_limit = 70 #marcel
missing_cols = missing_data.loc[missing_data['% Missing Values']>missing_limit].index.tolist()
print(missing_cols)
missing_data = missing_data.loc[missing_data['% Missing Values']>missing_limit]
missing_data = missing_data.sort_values(by=['% Missing Values'])
pd.set_option('display.max_rows', None)
missing_data

['bg_temp', 'plts', 'alp', 'ast', 'alt', 'ggt', 'bilirubin_indirect', 'bilirubin_direct', 'bilirubin_total', 'crp', 'bleed_time', 'albumin', 'aado2', 'ffp', 'cryo', 'ventrate', 'tidalvol', 'reint_time', 'deathtime', 'dtoutput', 'specimen', 'dod', 'int_time2', 'ext_time2', 'duration2']


Unnamed: 0,Missing Values,% Missing Values
bg_temp,6787,71.638168
bilirubin_total,7320,77.264091
ast,7324,77.306312
alt,7331,77.380198
tidalvol,7360,77.686299
alp,7364,77.72852
plts,7785,82.172261
albumin,7879,83.16445
ffp,7928,83.681655
aado2,8018,84.631623


In [30]:
pd.reset_option('display.max_rows')

In [31]:
dfDroppedMissing = df[[i for i in df.columns if i not in missing_data.index and i not in ['duration2','int_time2','ext_time2','aado2','fio2','deathtime']]]
print(list(dfDroppedMissing.columns))

['hadm_id', 'subject_id', 'gender', 'ethnicity', 'marital_status', 'insurance', 'language', 'aortic', 'mit', 'tricuspid', 'pulmonary', 'cabg', 'temp', 'hr', 'spo2', 'rr', 'sbp', 'dbp', 'meanbp', 'weight', 'height', 'cardiac_index', 'pt', 'ptt', 'inr', 'inr_1', 'fibrinogen', 'hb', 'hematocrit', 'wcc', 'lymphocytes', 'neutrophils', 'chloride', 'magnesium', 'potassium', 'creatinine', 'free_calcium', 'sodium', 'bicarb', 'bun', 'hba1c', 'glucose', 'lactate', 'po2', 'pco2', 'baseexcess', 'ph', 'insulin', 'prbc', 'infection', 'vent_array', 'reintubation', 'liver_severe', 'liver_mild', 'rheum', 'cvd', 'aids', 'ckd', 'copd', 'arrhythmia', 'pud', 'smoking', 'pvd', 'paraplegia', 'ccf', 'met_ca', 't2dm', 't1dm', 'malig', 'mi', 'dementia', 'first_careunit', 'last_careunit', 'admission_location', 'admission_type', 'hospital_expire_flag', 'admittime', 'dischtime', 'intime', 'outtime', 'ext_time', 'los', 'icustay_seq', 'plt', 'diab_un', 'diab_cc', 'int_time1', 'ext_time1', 'duration1']


#### 1.4.1 Beginning imputation

In [32]:
dfForImpute = pd.DataFrame([0 for i in range(dfDroppedMissing.shape[0])])

# generating timeseries summary values
for column in timeseries:
    if column not in dfDroppedMissing.columns:
        continue
    x = timeseries_valuenames[column] if column in timeseries_valuenames else "value"
    meanList = []
    maxList = []
    minList = []
    for i in range(len(dfDroppedMissing[column])):
        inTime = dfDroppedMissing['int_time1'][i].to_pydatetime()
        y = ts_parser(dfDroppedMissing[column][i], timeLimits=(inTime, inTime+dt.timedelta(hours=24)), valuename=x)
        meanList.append(y[0])
        maxList.append(y[1])
        minList.append(y[2])
    dfForImpute[column+'_mean'] = meanList
    dfForImpute[column+'_max'] = maxList
    dfForImpute[column+'_min'] = minList

dfForImpute = dfForImpute[[i for i in dfForImpute.columns if i != 0]]

  dfForImpute[column+'_mean'] = meanList
  dfForImpute[column+'_max'] = maxList
  dfForImpute[column+'_min'] = minList


In [33]:
# generating infection during ventilation binary values

x = timeseries_valuenames[column] if column in timeseries_valuenames else "value"
infList = []
column='infection'
for i in range(len(df[column])):
    y = inf_parser(df[column][i], timeLimits=(df['int_time1'][i].to_pydatetime(), df['ext_time1'][i].to_pydatetime()))
    infList.append(y)
dfForImpute['infection_vent'] = infList

In [34]:
# add on non-time data for imputation
dfForImpute = dfForImpute.copy()
extraColumns = [i for i in dfDroppedMissing.columns if i not in list(dfForImpute.columns) + timeseries + ['infection', 'vent_array', 'int_time1', 'ext_time1'] + ptinfo + adm_num]
print(extraColumns)
for i in extraColumns:
    if i in ('weight', 'height', 'duration1','duration2'):
        dfForImpute[i] = df[i]
    else:
        dfForImpute[i] = df[i].astype('category')
dfForImpute = dfForImpute.copy()

['gender', 'ethnicity', 'marital_status', 'insurance', 'language', 'aortic', 'mit', 'tricuspid', 'pulmonary', 'cabg', 'weight', 'height', 'reintubation', 'liver_severe', 'liver_mild', 'rheum', 'cvd', 'aids', 'ckd', 'copd', 'arrhythmia', 'pud', 'smoking', 'pvd', 'paraplegia', 'ccf', 'met_ca', 't2dm', 't1dm', 'malig', 'mi', 'dementia', 'first_careunit', 'last_careunit', 'admission_location', 'admission_type', 'hospital_expire_flag', 'diab_un', 'diab_cc', 'duration1']


In [35]:
# before imputation again
dfForImpute

Unnamed: 0,temp_mean,temp_max,temp_min,hr_mean,hr_max,hr_min,spo2_mean,spo2_max,spo2_min,rr_mean,rr_max,rr_min,sbp_mean,sbp_max,sbp_min,dbp_mean,dbp_max,dbp_min,meanbp_mean,meanbp_max,meanbp_min,cardiac_index_mean,cardiac_index_max,cardiac_index_min,pt_mean,pt_max,pt_min,ptt_mean,ptt_max,ptt_min,inr_mean,inr_max,inr_min,inr_1_mean,inr_1_max,inr_1_min,fibrinogen_mean,fibrinogen_max,fibrinogen_min,hb_mean,hb_max,hb_min,hematocrit_mean,hematocrit_max,hematocrit_min,wcc_mean,wcc_max,wcc_min,lymphocytes_mean,lymphocytes_max,lymphocytes_min,neutrophils_mean,neutrophils_max,neutrophils_min,chloride_mean,chloride_max,chloride_min,magnesium_mean,magnesium_max,magnesium_min,potassium_mean,potassium_max,potassium_min,creatinine_mean,creatinine_max,creatinine_min,free_calcium_mean,free_calcium_max,free_calcium_min,sodium_mean,sodium_max,sodium_min,bicarb_mean,bicarb_max,bicarb_min,bun_mean,bun_max,bun_min,hba1c_mean,hba1c_max,hba1c_min,glucose_mean,glucose_max,glucose_min,lactate_mean,lactate_max,lactate_min,po2_mean,po2_max,po2_min,pco2_mean,pco2_max,pco2_min,baseexcess_mean,baseexcess_max,baseexcess_min,ph_mean,ph_max,ph_min,insulin_mean,insulin_max,insulin_min,prbc_mean,prbc_max,prbc_min,plt_mean,plt_max,plt_min,infection_vent,gender,ethnicity,marital_status,insurance,language,aortic,mit,tricuspid,pulmonary,cabg,weight,height,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_location,admission_type,hospital_expire_flag,diab_un,diab_cc,duration1
0,37.145834,37.944446,36.499998,89.393939,109.0,63.0,97.967742,100.0,94.0,24.382353,43.0,14.0,115.666667,161.0,98.0,60.515152,69.0,50.0,75.712121,86.0,64.0,,,,,,,29.80,29.8,29.8,1.20,1.2,1.2,1.20,1.2,1.2,,,,12.050000,12.1,12.0,,,,13.900000,14.7,13.1,,,,,,,104.500000,107.0,102.0,,,,3.666667,3.7,3.6,0.80,0.8,0.8,1.150000,1.15,1.15,138.000000,140.0,136.0,25.5,26.0,25.0,12.0,13.0,11.0,,,,124.875000,168.0,87.0,,,,0.500000,1.0,0.0,41.000000,42.0,40.0,0.500000,1.0,0.0,7.400000,7.40,7.40,,,,,,,261.500000,268.0,255.0,0,M,white,SINGLE,Private,ENGL,0,0,0,0,1,84.00,172.72,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0,5.000000
1,37.023530,38.500000,35.200001,92.277778,119.0,76.0,97.276316,100.0,65.0,12.807692,23.0,7.5,98.000000,134.0,75.0,53.222222,75.0,35.0,67.861111,101.0,45.0,2.222764,2.91716,1.71598,,,,42.80,47.5,38.1,1.60,1.8,1.4,1.60,1.8,1.4,,,,7.320000,10.0,6.0,22.0,30.0,18.0,,,,,,,,,,108.000000,108.0,108.0,2.10,2.1,2.1,4.375000,4.8,3.6,0.60,0.6,0.6,1.128000,1.21,1.00,142.000000,142.0,142.0,26.0,26.0,26.0,12.0,12.0,12.0,,,,145.562500,195.0,118.0,,,,0.818182,4.0,-1.0,42.818182,50.0,36.0,0.818182,4.0,-1.0,7.398182,7.50,7.35,262.500000,314.000000,211.000000,375.0,375.0,375.0,122.500000,141.0,104.0,0,F,other,DIVORCED,Self Pay,SPAN,1,0,0,0,0,60.00,170.18,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0,16.483333
2,36.883784,37.799999,35.400002,85.027027,88.0,70.0,99.636364,100.0,93.0,15.342105,26.0,9.0,121.000000,170.0,89.0,58.459459,88.0,45.0,80.108108,121.0,57.0,2.108540,3.19018,1.55215,,,,59.30,59.3,59.3,1.60,1.6,1.6,1.60,1.6,1.6,,,,10.060000,11.9,6.5,28.0,35.0,20.0,21.850000,22.8,20.9,,,,,,,111.000000,113.0,109.0,2.60,2.6,2.6,4.285714,4.9,3.1,1.20,1.2,1.2,1.156000,1.32,0.95,140.333333,143.0,139.0,26.5,28.0,25.0,19.0,19.0,19.0,,,,120.629630,188.0,92.0,,,,1.166667,3.0,-1.0,41.333333,48.0,34.0,1.166667,3.0,-1.0,7.404286,7.48,7.32,,,,375.0,375.0,375.0,144.500000,148.0,141.0,0,F,,MARRIED,Medicare,,1,0,0,0,1,57.00,165.10,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0,20.000000
3,37.532258,38.700001,36.700001,87.939394,104.0,73.0,97.531250,100.0,93.0,16.212121,28.0,10.0,113.191176,162.0,85.0,56.823529,78.0,40.0,73.575758,99.0,56.0,2.899448,4.16974,2.32841,,,,28.30,28.3,28.3,1.20,1.2,1.2,1.20,1.2,1.2,,,,13.800000,13.8,13.8,,,,14.000000,14.0,14.0,,,,,,,106.000000,106.0,106.0,1.85,1.9,1.8,3.800000,4.0,3.6,0.55,0.7,0.4,1.125000,1.17,1.10,139.000000,140.0,138.0,27.5,28.0,27.0,12.5,14.0,11.0,,,,126.533333,150.0,71.0,,,,0.625000,2.0,-2.0,45.750000,55.0,37.0,0.625000,2.0,-2.0,7.377778,7.46,7.32,,,,,,,161.000000,161.0,161.0,0,M,,MARRIED,Private,,0,0,0,0,1,135.00,190.50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,1,0,4.500000
4,36.880362,37.400002,35.055555,87.240000,108.0,64.0,99.083333,100.0,94.0,13.397959,19.0,4.0,113.440000,144.0,69.0,63.810000,83.0,49.0,79.446667,104.0,57.0,2.386696,3.72973,1.94595,,,,,,,,,,,,,,,,9.812500,11.2,8.6,30.0,34.0,28.0,11.600000,11.6,11.6,,,,,,,108.000000,108.0,108.0,,,,4.420000,4.8,3.5,1.20,1.2,1.2,1.027500,1.22,0.85,137.000000,137.0,137.0,23.0,23.0,23.0,16.0,16.0,16.0,,,,134.230769,158.0,115.0,1.50,1.5,1.5,-3.000000,-1.0,-7.0,44.600000,58.0,38.0,-3.000000,-1.0,-7.0,7.318000,7.36,7.27,,,,187.5,375.0,0.0,95.000000,95.0,95.0,0,M,,MARRIED,Medicare,,1,0,0,0,0,70.00,175.26,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,1,0,8.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9469,,,,71.035714,83.0,63.0,97.586207,100.0,94.0,19.666667,27.0,14.0,102.155172,126.0,75.5,52.482759,76.0,44.0,67.224138,82.0,54.0,,,,12.4,12.6,12.2,28.95,31.1,26.8,1.15,1.2,1.1,1.15,1.2,1.1,,,,12.333333,13.1,11.4,,,,16.333333,18.9,14.2,,,,,,,105.500000,108.0,103.0,2.00,2.0,2.0,4.000000,4.2,3.8,0.65,0.7,0.6,1.120000,1.20,1.04,137.666667,140.0,135.0,26.5,27.0,26.0,9.5,10.0,9.0,,,,130.333333,141.0,112.0,,,,140.000000,155.0,110.0,44.000000,45.0,43.0,-0.333333,0.0,-1.0,7.370000,7.39,7.36,4.853254,20.000000,0.733333,,,,178.333333,186.0,172.0,0,M,white,MARRIED,Other,ENGL,1,0,0,0,0,96.80,178.00,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CVICU,CVICU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0,6.000000
9470,37.481667,37.830000,37.170000,88.285714,101.0,78.0,98.517241,100.0,96.0,20.413793,28.0,14.0,111.666667,142.0,85.0,52.150000,71.0,42.0,69.966667,94.0,53.0,,,,13.3,13.3,13.3,29.90,29.9,29.9,1.20,1.2,1.2,1.20,1.2,1.2,,,,10.150000,10.7,9.6,,,,7.750000,9.4,6.1,,,,,,,107.333333,112.0,103.0,2.00,2.0,2.0,4.300000,4.8,3.9,0.65,0.7,0.6,1.096667,1.12,1.08,136.000000,137.0,135.0,21.5,23.0,20.0,11.5,13.0,10.0,,,,110.750000,158.0,89.0,0.90,0.9,0.9,185.333333,296.0,85.0,40.000000,42.0,38.0,-3.000000,0.0,-5.0,7.350000,7.41,7.32,3.493849,10.000000,1.000000,,,,98.000000,112.0,84.0,0,M,,,Medicare,ENGL,0,0,0,0,1,85.10,168.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CVICU,CVICU,TRANSFER FROM HOSPITAL,URGENT,0,0,0,9.000000
9471,36.685556,37.060000,36.330000,70.500000,82.0,60.0,96.333333,100.0,92.0,19.222222,25.0,12.0,118.948718,155.0,90.0,59.256410,84.0,42.0,76.000000,104.0,61.0,,,,12.6,12.6,12.6,23.40,23.4,23.4,1.20,1.2,1.2,1.20,1.2,1.2,,,,9.333333,9.8,9.0,28.0,28.0,28.0,14.700000,16.0,13.4,,,,,,,109.500000,111.0,108.0,,,,4.333333,4.4,4.3,1.30,1.4,1.2,1.285000,1.37,1.20,138.500000,139.0,138.0,21.0,21.0,21.0,40.0,41.0,39.0,,,,120.000000,125.0,117.0,1.20,1.2,1.2,136.600000,192.0,96.0,41.800000,44.0,39.0,-2.400000,-1.0,-3.0,7.335000,7.36,7.31,5.596820,10.831461,2.000000,,,,154.000000,167.0,141.0,0,M,white,,Medicare,ENGL,0,0,0,0,1,100.00,175.00,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,CVICU,CVICU,TRANSFER FROM HOSPITAL,URGENT,0,0,1,6.000000
9472,36.691429,37.060000,36.440000,78.571429,96.0,68.0,98.178571,100.0,94.0,14.089286,29.0,6.0,119.214286,163.0,84.0,55.178571,77.0,40.0,74.357143,107.0,52.0,,,,13.9,13.9,13.9,29.70,29.7,29.7,1.30,1.3,1.3,1.30,1.3,1.3,,,,10.200000,10.2,10.2,,,,8.800000,8.8,8.8,,,,,,,107.000000,107.0,107.0,2.00,2.0,2.0,3.857143,4.1,3.7,0.60,0.6,0.6,1.120000,1.20,1.07,132.500000,135.0,130.0,24.0,24.0,24.0,7.0,7.0,7.0,,,,101.000000,141.0,87.0,0.85,0.9,0.8,133.777778,261.0,95.0,40.222222,43.0,35.0,-2.111111,0.0,-4.0,7.356000,7.42,7.31,5.552349,10.000000,2.000000,,,,146.000000,146.0,146.0,0,M,white,SINGLE,Medicare,ENGL,0,0,0,0,1,112.05,173.00,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,CVICU,CVICU,PROCEDURE SITE,EW EMER.,0,0,0,24.000000


In [36]:
dfForImpute2 = dfForImpute[[i for i in dfForImpute.columns if i not in ['int_time1', 'ext_time1', 'duration1','int_time2', 'ext_time2','duration2']]]

kds = mf.ImputationKernel(
  dfForImpute2,
  datasets=1,
  save_all_iterations=True,
  random_state=1991
)

# Run the MICE algorithm for 3 iterations
kds.mice(3)

print(kds)

dfImputed = kds.complete_data(dataset=0, inplace=False)
print(dfImputed.isnull().sum(0))

# after imputation
dfImputed.head(10)

              Class: ImputationKernel
           Datasets: 1
         Iterations: 3
  Imputed Variables: 114
save_all_iterations: True
temp_mean               0
temp_max                0
temp_min                0
hr_mean                 0
hr_max                  0
                       ..
admission_location      0
admission_type          0
hospital_expire_flag    0
diab_un                 0
diab_cc                 0
Length: 148, dtype: int64


Unnamed: 0,temp_mean,temp_max,temp_min,hr_mean,hr_max,hr_min,spo2_mean,spo2_max,spo2_min,rr_mean,rr_max,rr_min,sbp_mean,sbp_max,sbp_min,dbp_mean,dbp_max,dbp_min,meanbp_mean,meanbp_max,meanbp_min,cardiac_index_mean,cardiac_index_max,cardiac_index_min,pt_mean,pt_max,pt_min,ptt_mean,ptt_max,ptt_min,inr_mean,inr_max,inr_min,inr_1_mean,inr_1_max,inr_1_min,fibrinogen_mean,fibrinogen_max,fibrinogen_min,hb_mean,hb_max,hb_min,hematocrit_mean,hematocrit_max,hematocrit_min,wcc_mean,wcc_max,wcc_min,lymphocytes_mean,lymphocytes_max,lymphocytes_min,neutrophils_mean,neutrophils_max,neutrophils_min,chloride_mean,chloride_max,chloride_min,magnesium_mean,magnesium_max,magnesium_min,potassium_mean,potassium_max,potassium_min,creatinine_mean,creatinine_max,creatinine_min,free_calcium_mean,free_calcium_max,free_calcium_min,sodium_mean,sodium_max,sodium_min,bicarb_mean,bicarb_max,bicarb_min,bun_mean,bun_max,bun_min,hba1c_mean,hba1c_max,hba1c_min,glucose_mean,glucose_max,glucose_min,lactate_mean,lactate_max,lactate_min,po2_mean,po2_max,po2_min,pco2_mean,pco2_max,pco2_min,baseexcess_mean,baseexcess_max,baseexcess_min,ph_mean,ph_max,ph_min,insulin_mean,insulin_max,insulin_min,prbc_mean,prbc_max,prbc_min,plt_mean,plt_max,plt_min,infection_vent,gender,ethnicity,marital_status,insurance,language,aortic,mit,tricuspid,pulmonary,cabg,weight,height,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_location,admission_type,hospital_expire_flag,diab_un,diab_cc
0,37.145834,37.944446,36.499998,89.393939,109.0,63.0,97.967742,100.0,94.0,24.382353,43.0,14.0,115.666667,161.0,98.0,60.515152,69.0,50.0,75.712121,86.0,64.0,2.759261,3.125,2.47685,14.45,14.5,14.2,29.8,29.8,29.8,1.2,1.2,1.2,1.2,1.2,1.2,527.0,562.0,504.0,12.05,12.1,12.0,35.0,36.0,35.0,13.9,14.7,13.1,17.1,16.8,16.4,80.1,81.0,80.0,104.5,107.0,102.0,1.8,2.0,1.6,3.666667,3.7,3.6,0.8,0.8,0.8,1.15,1.15,1.15,138.0,140.0,136.0,25.5,26.0,25.0,12.0,13.0,11.0,6.4,6.4,6.6,124.875,168.0,87.0,1.657143,1.9,1.5,0.5,1.0,0.0,41.0,42.0,40.0,0.5,1.0,0.0,7.4,7.4,7.4,335.333333,335.0,264.0,843.75,750.0,750.0,261.5,268.0,255.0,0,M,white,SINGLE,Private,ENGL,0,0,0,0,1,84.0,172.72,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0
1,37.02353,38.5,35.200001,92.277778,119.0,76.0,97.276316,100.0,65.0,12.807692,23.0,7.5,98.0,134.0,75.0,53.222222,75.0,35.0,67.861111,101.0,45.0,2.222764,2.91716,1.71598,17.35,19.7,15.6,42.8,47.5,38.1,1.6,1.8,1.4,1.6,1.8,1.4,188.5,208.0,173.0,7.32,10.0,6.0,22.0,30.0,18.0,19.566667,23.3,16.4,8.3,8.3,8.0,80.1,80.0,80.3,108.0,108.0,108.0,2.1,2.1,2.1,4.375,4.8,3.6,0.6,0.6,0.6,1.128,1.21,1.0,142.0,142.0,142.0,26.0,26.0,26.0,12.0,12.0,12.0,6.3,6.1,6.1,145.5625,195.0,118.0,1.4,1.6,1.2,0.818182,4.0,-1.0,42.818182,50.0,36.0,0.818182,4.0,-1.0,7.398182,7.5,7.35,262.5,314.0,211.0,375.0,375.0,375.0,122.5,141.0,104.0,0,F,other,DIVORCED,Self Pay,SPAN,1,0,0,0,0,60.0,170.18,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0
2,36.883784,37.799999,35.400002,85.027027,88.0,70.0,99.636364,100.0,93.0,15.342105,26.0,9.0,121.0,170.0,89.0,58.459459,88.0,45.0,80.108108,121.0,57.0,2.10854,3.19018,1.55215,17.633333,17.7,17.5,59.3,59.3,59.3,1.6,1.6,1.6,1.6,1.6,1.6,138.0,173.0,124.0,10.06,11.9,6.5,28.0,35.0,20.0,21.85,22.8,20.9,12.2,12.5,12.0,84.8,84.8,84.7,111.0,113.0,109.0,2.6,2.6,2.6,4.285714,4.9,3.1,1.2,1.2,1.2,1.156,1.32,0.95,140.333333,143.0,139.0,26.5,28.0,25.0,19.0,19.0,19.0,9.9,9.1,9.1,120.62963,188.0,92.0,1.4,1.7,1.1,1.166667,3.0,-1.0,41.333333,48.0,34.0,1.166667,3.0,-1.0,7.404286,7.48,7.32,170.0,167.230173,0.0,375.0,375.0,375.0,144.5,148.0,141.0,0,F,asian,MARRIED,Medicare,CANT,1,0,0,0,1,57.0,165.1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0
3,37.532258,38.700001,36.700001,87.939394,104.0,73.0,97.53125,100.0,93.0,16.212121,28.0,10.0,113.191176,162.0,85.0,56.823529,78.0,40.0,73.575758,99.0,56.0,2.899448,4.16974,2.32841,13.8,13.7,13.7,28.3,28.3,28.3,1.2,1.2,1.2,1.2,1.2,1.2,230.0,254.0,212.0,13.8,13.8,13.8,40.166667,41.0,40.0,14.0,14.0,14.0,8.1,8.3,7.0,88.0,88.0,88.1,106.0,106.0,106.0,1.85,1.9,1.8,3.8,4.0,3.6,0.55,0.7,0.4,1.125,1.17,1.1,139.0,140.0,138.0,27.5,28.0,27.0,12.5,14.0,11.0,5.6,5.6,5.6,126.533333,150.0,71.0,1.785714,2.5,1.3,0.625,2.0,-2.0,45.75,55.0,37.0,0.625,2.0,-2.0,7.377778,7.46,7.32,229.666667,240.0,0.0,1184.0,1400.0,1000.0,161.0,161.0,161.0,0,M,other,MARRIED,Private,PTUN,0,0,0,0,1,135.0,190.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,1,0
4,36.880362,37.400002,35.055555,87.24,108.0,64.0,99.083333,100.0,94.0,13.397959,19.0,4.0,113.44,144.0,69.0,63.81,83.0,49.0,79.446667,104.0,57.0,2.386696,3.72973,1.94595,13.2,13.5,13.1,30.5,30.5,30.0,1.175,1.2,1.2,1.175,1.2,1.2,377.0,388.0,352.0,9.8125,11.2,8.6,30.0,34.0,28.0,11.6,11.6,11.6,15.0,15.2,14.8,61.0,63.0,56.0,108.0,108.0,108.0,3.6,3.8,3.4,4.42,4.8,3.5,1.2,1.2,1.2,1.0275,1.22,0.85,137.0,137.0,137.0,23.0,23.0,23.0,16.0,16.0,16.0,6.4,6.3,6.4,134.230769,158.0,115.0,1.5,1.5,1.5,-3.0,-1.0,-7.0,44.6,58.0,38.0,-3.0,-1.0,-7.0,7.318,7.36,7.27,162.666667,210.0,0.0,187.5,375.0,0.0,95.0,95.0,95.0,0,M,white,MARRIED,Medicare,HIND,1,0,0,0,0,70.0,175.26,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,1,0
5,36.87,37.400002,35.599998,83.52381,91.0,73.0,99.871795,100.0,98.0,13.777778,22.0,9.0,107.976744,156.0,78.0,53.232558,66.0,45.0,66.790698,89.0,57.0,2.138372,3.44086,1.08602,14.1,13.6,14.0,46.9,46.9,46.9,1.3,1.3,1.3,1.3,1.3,1.3,139.5,138.0,132.0,9.866667,10.4,9.4,29.0,29.0,29.0,19.4,21.5,17.3,17.2,17.9,16.2,78.5,78.5,78.5,113.5,114.0,113.0,2.6,2.6,2.6,4.171429,4.9,3.5,0.7,0.8,0.6,1.144,1.2,1.04,138.5,141.0,136.0,22.5,24.0,21.0,16.5,18.0,15.0,6.4,6.3,6.4,127.26087,226.0,89.0,2.65,2.8,2.4,-4.0,0.0,-7.0,43.75,49.0,37.0,-4.0,0.0,-7.0,7.298889,7.37,7.24,175.5,243.0,0.0,375.0,375.0,375.0,155.5,167.0,144.0,0,F,hispanic,SINGLE,Medicaid,ENGL,0,0,0,0,1,73.3,162.56,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,TRANSFER FROM HOSPITAL,EMERGENCY,0,1,0
6,36.430556,36.799999,36.0,82.068966,95.0,76.0,96.757576,100.0,91.0,18.545455,29.0,14.0,105.532258,128.0,85.0,58.822581,69.0,48.0,72.935484,88.0,58.0,3.134502,4.22807,1.92398,14.0,14.2,13.7,39.1,39.1,39.1,1.3,1.3,1.3,1.3,1.3,1.3,316.0,313.0,319.0,10.066667,11.0,8.7,26.0,26.0,26.0,11.7,13.2,10.2,13.8,14.1,13.1,84.0,84.8,83.0,111.0,115.0,107.0,2.2,2.2,2.2,4.2,4.4,3.9,0.8,0.8,0.8,1.1,1.17,1.0,137.5,139.0,136.0,21.5,23.0,20.0,9.0,9.0,9.0,9.1,9.4,9.1,111.342857,150.0,90.0,1.9,1.9,1.9,-4.166667,0.0,-9.0,33.5,39.0,25.0,-4.166667,0.0,-9.0,7.378571,7.42,7.35,234.2,310.0,2.833839,392.200003,700.0,281.0,167.0,178.0,156.0,0,M,white,MARRIED,Private,ENGL,0,1,0,0,0,62.3,167.64,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0
7,37.221914,38.299999,35.099998,85.25,91.0,63.0,99.142857,100.0,97.0,16.277778,25.0,11.0,110.833333,178.0,88.0,63.944444,92.0,51.0,81.382353,144.0,62.0,2.853204,3.35897,1.93333,13.5,13.5,13.5,37.9,37.9,37.9,1.2,1.2,1.2,1.2,1.2,1.2,401.0,421.0,399.0,9.666667,10.3,8.8,26.0,26.0,26.0,10.233333,11.5,8.7,8.5,8.9,8.3,83.5,84.1,83.0,108.333333,115.0,104.0,2.1,2.1,2.1,4.6,5.5,3.5,0.733333,0.9,0.5,1.05,1.05,1.05,135.666667,136.0,135.0,22.333333,23.0,21.0,13.333333,14.0,12.0,6.1,6.1,6.1,115.833333,162.0,84.0,1.3,1.8,1.0,-2.0,0.0,-4.0,39.0,40.0,38.0,-2.0,0.0,-4.0,7.375,7.42,7.33,145.666667,240.0,0.0,375.0,375.0,375.0,207.333333,233.0,173.0,0,M,white,MARRIED,Private,ENGL,0,0,0,0,1,79.9,165.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0
8,37.694613,38.722221,35.900002,119.320755,144.0,94.0,95.8,100.0,89.0,22.149123,33.0,12.0,115.584906,169.0,85.0,55.320755,77.0,38.0,73.924528,106.0,55.0,3.311734,4.12214,2.09924,15.75,16.7,14.6,80.225,150.0,40.2,1.42,1.5,1.3,1.42,1.5,1.3,289.0,303.0,283.0,13.0,13.0,13.0,36.5,39.0,35.0,12.3,12.3,12.3,10.7,11.0,10.8,71.9,71.6,71.0,104.5,106.0,103.0,1.7,1.7,1.7,4.466667,4.8,4.0,0.85,0.9,0.8,1.174,1.28,1.13,135.5,136.0,135.0,26.5,27.0,26.0,11.0,13.0,9.0,10.0,9.4,8.6,114.030303,138.0,88.0,3.833333,4.6,3.1,0.538462,3.0,-2.0,44.461538,58.0,37.0,0.538462,3.0,-2.0,7.378462,7.44,7.28,25.884186,51.621526,10.0,275.0,278.0,278.0,189.0,189.0,189.0,0,M,white,SINGLE,Medicaid,ENGL,0,0,0,0,1,133.1,175.26,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,CSRU,CSRU,EMERGENCY ROOM,EMERGENCY,0,0,0
9,37.224,38.599998,34.900002,90.194444,102.0,79.0,97.684211,100.0,65.0,14.038462,27.0,8.0,106.973684,134.0,76.0,52.184211,76.0,41.0,76.552632,99.0,60.0,2.4699,2.8655,2.04678,15.8,17.7,14.6,35.7,36.1,35.3,1.45,1.6,1.3,1.45,1.6,1.3,141.0,147.0,135.0,10.25,12.9,7.6,23.0,23.0,23.0,17.1,17.1,17.1,15.6,19.0,11.0,80.6,82.8,79.5,113.5,115.0,112.0,3.3,4.2,2.4,4.133333,4.6,3.7,0.7,0.8,0.6,1.161667,1.21,1.1,139.5,140.0,139.0,19.0,21.0,17.0,10.5,12.0,9.0,8.1,8.6,8.0,113.979167,198.0,71.0,4.55,5.8,3.3,-2.0,2.0,-5.0,37.0,45.0,30.0,-2.0,2.0,-5.0,7.385833,7.5,7.3,59.799936,51.621526,0.0,375.0,375.0,375.0,154.5,163.0,146.0,0,F,white,MARRIED,Medicare,CANT,0,0,0,0,1,64.0,157.48,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,CCU,CSRU,TRANSFER FROM HOSPITAL,EMERGENCY,0,0,0


In [37]:
y = np.asarray([i.to_pydatetime() for i in df["int_time1"]])
z = np.asarray([i.to_pydatetime() for i in df["outtime"]])
dfImputed['icu_stay_duration'] = [i.total_seconds() for i in z-y]
dfImputed[[i for i in adm_num if i != 'reint_time']] = df[[i for i in adm_num if i != 'reint_time']] #marcel
dfImputed[['hadm_id','subject_id']] = df[['hadm_id','subject_id']]
dfImputed[['int_time1', 'ext_time1', 'duration1','int_time2', 'ext_time2','duration2']] = df[['int_time1', 'ext_time1', 'duration1','int_time2', 'ext_time2','duration2']] #marcel not #allen
dfImputed

Unnamed: 0,temp_mean,temp_max,temp_min,hr_mean,hr_max,hr_min,spo2_mean,spo2_max,spo2_min,rr_mean,rr_max,rr_min,sbp_mean,sbp_max,sbp_min,dbp_mean,dbp_max,dbp_min,meanbp_mean,meanbp_max,meanbp_min,cardiac_index_mean,cardiac_index_max,cardiac_index_min,pt_mean,pt_max,pt_min,ptt_mean,ptt_max,ptt_min,inr_mean,inr_max,inr_min,inr_1_mean,inr_1_max,inr_1_min,fibrinogen_mean,fibrinogen_max,fibrinogen_min,hb_mean,hb_max,hb_min,hematocrit_mean,hematocrit_max,hematocrit_min,wcc_mean,wcc_max,wcc_min,lymphocytes_mean,lymphocytes_max,lymphocytes_min,neutrophils_mean,neutrophils_max,neutrophils_min,chloride_mean,chloride_max,chloride_min,magnesium_mean,magnesium_max,magnesium_min,potassium_mean,potassium_max,potassium_min,creatinine_mean,creatinine_max,creatinine_min,free_calcium_mean,free_calcium_max,free_calcium_min,sodium_mean,sodium_max,sodium_min,bicarb_mean,bicarb_max,bicarb_min,bun_mean,bun_max,bun_min,hba1c_mean,hba1c_max,hba1c_min,glucose_mean,glucose_max,glucose_min,lactate_mean,lactate_max,lactate_min,po2_mean,po2_max,po2_min,pco2_mean,pco2_max,pco2_min,baseexcess_mean,baseexcess_max,baseexcess_min,ph_mean,ph_max,ph_min,insulin_mean,insulin_max,insulin_min,prbc_mean,prbc_max,prbc_min,plt_mean,plt_max,plt_min,infection_vent,gender,ethnicity,marital_status,insurance,language,aortic,mit,tricuspid,pulmonary,cabg,weight,height,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_location,admission_type,hospital_expire_flag,diab_un,diab_cc,icu_stay_duration,admittime,dischtime,intime,outtime,ext_time,los,icustay_seq,deathtime,hadm_id,subject_id,int_time1,ext_time1,duration1,int_time2,ext_time2,duration2
0,37.145834,37.944446,36.499998,89.393939,109.0,63.0,97.967742,100.0,94.0,24.382353,43.0,14.0,115.666667,161.0,98.0,60.515152,69.0,50.0,75.712121,86.0,64.0,2.759261,3.12500,2.47685,14.450000,14.5,14.2,29.80,29.8,29.8,1.200,1.2,1.2,1.200,1.2,1.2,527.0,562.00,504.0,12.050000,12.1,12.0,35.000000,36.0,35.0,13.900000,14.7,13.1,17.1,16.8,16.4,80.1,81.0,80.0,104.500000,107.0,102.0,1.80,2.0,1.6,3.666667,3.7,3.6,0.80,0.8,0.8,1.150000,1.15,1.15,138.000000,140.0,136.0,25.5,26.0,25.0,12.0,13.0,11.0,6.4,6.4,6.6,124.875000,168.0,87.0,1.657143,1.9,1.5,0.500000,1.0,0.0,41.000000,42.0,40.0,0.500000,1.0,0.0,7.400000,7.40,7.40,335.333333,335.000000,264.000000,843.750000,750.0,750.00000,261.500000,268.0,255.0,0,M,white,SINGLE,Private,ENGL,0,0,0,0,1,84.00,172.72,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0,180399.0,2198-01-31 08:00:00,2198-02-04 12:00:00,2198-01-31 12:27:58,2198-02-02 19:06:39,2198-01-31 22:00:00,2.276900,1,NaT,195663,27328,2198-01-31 17:00:00,2198-01-31 22:00:00,5.000000,NaT,NaT,
1,37.023530,38.500000,35.200001,92.277778,119.0,76.0,97.276316,100.0,65.0,12.807692,23.0,7.5,98.000000,134.0,75.0,53.222222,75.0,35.0,67.861111,101.0,45.0,2.222764,2.91716,1.71598,17.350000,19.7,15.6,42.80,47.5,38.1,1.600,1.8,1.4,1.600,1.8,1.4,188.5,208.00,173.0,7.320000,10.0,6.0,22.000000,30.0,18.0,19.566667,23.3,16.4,8.3,8.3,8.0,80.1,80.0,80.3,108.000000,108.0,108.0,2.10,2.1,2.1,4.375000,4.8,3.6,0.60,0.6,0.6,1.128000,1.21,1.00,142.000000,142.0,142.0,26.0,26.0,26.0,12.0,12.0,12.0,6.3,6.1,6.1,145.562500,195.0,118.0,1.400000,1.6,1.2,0.818182,4.0,-1.0,42.818182,50.0,36.0,0.818182,4.0,-1.0,7.398182,7.50,7.35,262.500000,314.000000,211.000000,375.000000,375.0,375.00000,122.500000,141.0,104.0,0,F,other,DIVORCED,Self Pay,SPAN,1,0,0,0,0,60.00,170.18,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0,182760.0,2198-05-08 07:15:00,2198-05-15 13:49:00,2198-05-08 13:14:00,2198-05-10 19:46:00,2198-05-09 09:29:00,2.272200,1,NaT,106984,6280,2198-05-08 17:00:00,2198-05-09 09:29:00,16.483333,NaT,NaT,
2,36.883784,37.799999,35.400002,85.027027,88.0,70.0,99.636364,100.0,93.0,15.342105,26.0,9.0,121.000000,170.0,89.0,58.459459,88.0,45.0,80.108108,121.0,57.0,2.108540,3.19018,1.55215,17.633333,17.7,17.5,59.30,59.3,59.3,1.600,1.6,1.6,1.600,1.6,1.6,138.0,173.00,124.0,10.060000,11.9,6.5,28.000000,35.0,20.0,21.850000,22.8,20.9,12.2,12.5,12.0,84.8,84.8,84.7,111.000000,113.0,109.0,2.60,2.6,2.6,4.285714,4.9,3.1,1.20,1.2,1.2,1.156000,1.32,0.95,140.333333,143.0,139.0,26.5,28.0,25.0,19.0,19.0,19.0,9.9,9.1,9.1,120.629630,188.0,92.0,1.400000,1.7,1.1,1.166667,3.0,-1.0,41.333333,48.0,34.0,1.166667,3.0,-1.0,7.404286,7.48,7.32,170.000000,167.230173,0.000000,375.000000,375.0,375.00000,144.500000,148.0,141.0,0,F,asian,MARRIED,Medicare,CANT,1,0,0,0,1,57.00,165.10,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0,175068.0,2189-02-18 08:00:00,2189-03-17 14:20:00,2189-02-18 10:51:08,2189-02-20 13:37:48,2189-02-19 09:00:00,2.115700,1,NaT,123613,15201,2189-02-18 13:00:00,2189-02-19 09:00:00,20.000000,NaT,NaT,
3,37.532258,38.700001,36.700001,87.939394,104.0,73.0,97.531250,100.0,93.0,16.212121,28.0,10.0,113.191176,162.0,85.0,56.823529,78.0,40.0,73.575758,99.0,56.0,2.899448,4.16974,2.32841,13.800000,13.7,13.7,28.30,28.3,28.3,1.200,1.2,1.2,1.200,1.2,1.2,230.0,254.00,212.0,13.800000,13.8,13.8,40.166667,41.0,40.0,14.000000,14.0,14.0,8.1,8.3,7.0,88.0,88.0,88.1,106.000000,106.0,106.0,1.85,1.9,1.8,3.800000,4.0,3.6,0.55,0.7,0.4,1.125000,1.17,1.10,139.000000,140.0,138.0,27.5,28.0,27.0,12.5,14.0,11.0,5.6,5.6,5.6,126.533333,150.0,71.0,1.785714,2.5,1.3,0.625000,2.0,-2.0,45.750000,55.0,37.0,0.625000,2.0,-2.0,7.377778,7.46,7.32,229.666667,240.000000,0.000000,1184.000000,1400.0,1000.00000,161.000000,161.0,161.0,0,M,other,MARRIED,Private,PTUN,0,0,0,0,1,135.00,190.50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,1,0,84782.0,2118-01-25 07:15:00,2118-01-29 13:00:00,2118-01-25 10:46:42,2118-01-26 12:33:02,2118-01-25 17:30:00,1.073800,1,NaT,126027,25226,2118-01-25 13:00:00,2118-01-25 17:30:00,4.500000,NaT,NaT,
4,36.880362,37.400002,35.055555,87.240000,108.0,64.0,99.083333,100.0,94.0,13.397959,19.0,4.0,113.440000,144.0,69.0,63.810000,83.0,49.0,79.446667,104.0,57.0,2.386696,3.72973,1.94595,13.200000,13.5,13.1,30.50,30.5,30.0,1.175,1.2,1.2,1.175,1.2,1.2,377.0,388.00,352.0,9.812500,11.2,8.6,30.000000,34.0,28.0,11.600000,11.6,11.6,15.0,15.2,14.8,61.0,63.0,56.0,108.000000,108.0,108.0,3.60,3.8,3.4,4.420000,4.8,3.5,1.20,1.2,1.2,1.027500,1.22,0.85,137.000000,137.0,137.0,23.0,23.0,23.0,16.0,16.0,16.0,6.4,6.3,6.4,134.230769,158.0,115.0,1.500000,1.5,1.5,-3.000000,-1.0,-7.0,44.600000,58.0,38.0,-3.000000,-1.0,-7.0,7.318000,7.36,7.27,162.666667,210.000000,0.000000,187.500000,375.0,0.00000,95.000000,95.0,95.0,0,M,white,MARRIED,Medicare,HIND,1,0,0,0,0,70.00,175.26,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,1,0,169200.0,2198-01-01 07:15:00,2198-01-09 13:07:00,2198-01-01 10:47:00,2198-01-03 12:00:00,2198-01-01 21:00:00,2.050700,1,NaT,190332,19637,2198-01-01 13:00:00,2198-01-01 21:00:00,8.000000,NaT,NaT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9469,36.813333,37.110000,36.500000,71.035714,83.0,63.0,97.586207,100.0,94.0,19.666667,27.0,14.0,102.155172,126.0,75.5,52.482759,76.0,44.0,67.224138,82.0,54.0,2.528107,3.02703,1.87727,12.400000,12.6,12.2,28.95,31.1,26.8,1.150,1.2,1.1,1.150,1.2,1.1,210.0,212.00,214.0,12.333333,13.1,11.4,31.000000,32.0,31.0,16.333333,18.9,14.2,8.6,8.9,8.5,80.4,80.5,80.5,105.500000,108.0,103.0,2.00,2.0,2.0,4.000000,4.2,3.8,0.65,0.7,0.6,1.120000,1.20,1.04,137.666667,140.0,135.0,26.5,27.0,26.0,9.5,10.0,9.0,5.5,5.5,5.5,130.333333,141.0,112.0,2.780000,3.4,2.3,140.000000,155.0,110.0,44.000000,45.0,43.0,-0.333333,0.0,-1.0,7.370000,7.39,7.36,4.853254,20.000000,0.733333,425.000000,558.0,350.00001,178.333333,186.0,172.0,0,M,white,MARRIED,Other,ENGL,1,0,0,0,0,96.80,178.00,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CVICU,CVICU,PHYSICIAN REFERRAL,ELECTIVE,0,0,0,195547.0,2121-05-01 19:37:00,2121-05-06 16:50:00,2121-05-02 09:33:46,2121-05-04 18:19:07,2121-05-02 18:01:00,2.364826,1,NaT,22051087,14971805,2121-05-02 12:00:00,2121-05-02 18:01:00,6.000000,NaT,NaT,
9470,37.481667,37.830000,37.170000,88.285714,101.0,78.0,98.517241,100.0,96.0,20.413793,28.0,14.0,111.666667,142.0,85.0,52.150000,71.0,42.0,69.966667,94.0,53.0,2.756978,3.79464,1.94872,13.300000,13.3,13.3,29.90,29.9,29.9,1.200,1.2,1.2,1.200,1.2,1.2,655.0,649.85,595.0,10.150000,10.7,9.6,30.500000,32.0,29.0,7.750000,9.4,6.1,6.7,6.8,6.7,90.6,91.0,90.0,107.333333,112.0,103.0,2.00,2.0,2.0,4.300000,4.8,3.9,0.65,0.7,0.6,1.096667,1.12,1.08,136.000000,137.0,135.0,21.5,23.0,20.0,11.5,13.0,10.0,6.4,6.4,6.2,110.750000,158.0,89.0,0.900000,0.9,0.9,185.333333,296.0,85.0,40.000000,42.0,38.0,-3.000000,0.0,-5.0,7.350000,7.41,7.32,3.493849,10.000000,1.000000,350.500000,375.0,350.00001,98.000000,112.0,84.0,0,M,white,MARRIED,Medicare,ENGL,0,0,0,0,1,85.10,168.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CVICU,CVICU,TRANSFER FROM HOSPITAL,URGENT,0,0,0,253775.0,2111-11-18 18:24:00,2111-11-23 17:00:00,2111-11-19 17:29:25,2111-11-22 17:29:35,2111-11-20 04:30:00,3.000116,1,NaT,21555454,15547313,2111-11-19 19:00:00,2111-11-20 04:30:00,9.000000,NaT,NaT,
9471,36.685556,37.060000,36.330000,70.500000,82.0,60.0,96.333333,100.0,92.0,19.222222,25.0,12.0,118.948718,155.0,90.0,59.256410,84.0,42.0,76.000000,104.0,61.0,3.026018,4.44444,1.86458,12.600000,12.6,12.6,23.40,23.4,23.4,1.200,1.2,1.2,1.200,1.2,1.2,168.0,180.00,152.0,9.333333,9.8,9.0,28.000000,28.0,28.0,14.700000,16.0,13.4,11.6,11.6,11.3,83.6,84.0,83.2,109.500000,111.0,108.0,2.30,2.4,2.2,4.333333,4.4,4.3,1.30,1.4,1.2,1.285000,1.37,1.20,138.500000,139.0,138.0,21.0,21.0,21.0,40.0,41.0,39.0,7.0,6.6,7.0,120.000000,125.0,117.0,1.200000,1.2,1.2,136.600000,192.0,96.0,41.800000,44.0,39.0,-2.400000,-1.0,-3.0,7.335000,7.36,7.31,5.596820,10.831461,2.000000,374.999994,375.0,375.00000,154.000000,167.0,141.0,0,M,white,MARRIED,Medicare,ENGL,0,0,0,0,1,100.00,175.00,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,CVICU,CVICU,TRANSFER FROM HOSPITAL,URGENT,0,0,1,90591.0,2156-02-26 18:43:00,2156-03-06 18:30:00,2156-03-01 09:26:32,2156-03-02 17:09:51,2156-03-01 22:27:00,1.321748,1,NaT,22084741,16252024,2156-03-01 16:00:00,2156-03-01 22:27:00,6.000000,NaT,NaT,
9472,36.691429,37.060000,36.440000,78.571429,96.0,68.0,98.178571,100.0,94.0,14.089286,29.0,6.0,119.214286,163.0,84.0,55.178571,77.0,40.0,74.357143,107.0,52.0,2.568054,2.94898,1.92090,13.900000,13.9,13.9,29.70,29.7,29.7,1.300,1.3,1.3,1.300,1.3,1.3,260.0,282.00,244.0,10.200000,10.2,10.2,30.000000,30.0,30.0,8.800000,8.8,8.8,8.4,8.8,8.3,78.2,77.9,77.9,107.000000,107.0,107.0,2.00,2.0,2.0,3.857143,4.1,3.7,0.60,0.6,0.6,1.120000,1.20,1.07,132.500000,135.0,130.0,24.0,24.0,24.0,7.0,7.0,7.0,5.3,5.3,5.2,101.000000,141.0,87.0,0.850000,0.9,0.8,133.777778,261.0,95.0,40.222222,43.0,35.0,-2.111111,0.0,-4.0,7.356000,7.42,7.31,5.552349,10.000000,2.000000,375.000000,375.0,375.00000,146.000000,146.0,146.0,0,M,white,SINGLE,Medicare,ENGL,0,0,0,0,1,112.05,173.00,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,CVICU,CVICU,PROCEDURE SITE,EW EMER.,0,0,0,513082.0,2127-03-04 15:15:00,2127-03-15 15:55:00,2127-03-05 10:07:40,2127-03-11 10:31:22,2127-03-06 12:00:00,6.016458,1,NaT,25588352,18504988,2127-03-05 12:00:00,2127-03-06 12:00:00,24.000000,2127-03-07 13:00:00,2127-03-08 05:00:00,16.0


In [38]:
# adding on critical rows that were removed when yeeting columns with high missing values

for i in ('dod', 'deathtime'):
    dfImputed[i] = df[i]

In [39]:
dfImputed.to_csv('imputed.csv')