# DL Survival - Ventilation Outcomes
 Updated 21/11/21

In [1]:
import pandas as pd
import numpy as np
import math
import statistics
from datetime import datetime
import datetime as dt
from datetime import timedelta
import json
import miceforest as mf

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

pd.set_option('display.max_columns', None)

## 1. Data cleaning

- Import MIMIC III data
- Review column unique values, assign correct data types
- Impute missing values


### 1.1: Importing data

In [2]:
df = pd.read_csv('mimic_combined.csv')
print(df.shape)
df.head(10)

#### 1.1.1: Column lists

In [50]:
#view and reorder columns
cols = list(df.columns)
new_cols = ['Unnamed: 0','hadm_id','subject_id','gender','ethnicity','marital_status','insurance','language','aortic','mit','tricuspid',
            'pulmonary','cabg','temp','bg_temp','hr','spo2','rr','sbp','dbp','meanbp','weight','height','cardiac_index','pt','ptt',
            'inr','inr_1','fibrinogen','hb','hematocrit','plts','wcc','lymphocytes','neutrophils','alp','ast','alt','ggt',
            'bilirubin_indirect','bilirubin_direct','bilirubin_total','chloride','magnesium','potassium','crp','bleed_time','albumin',
            'creatinine','free_calcium','sodium','bicarb','bun','hba1c','glucose','lactate','po2','pco2','baseexcess','ph','aado2',
            'fio2','ffp','insulin','cryo','prbc','infection','ventrate','tidalvol','vent_array','reintubation','liver_severe','liver_mild',
            'rheum','cvd','aids','ckd','copd','arrhythmia','pud','smoking','pvd','paraplegia','ccf','met_ca','t2dm','t1dm','malig','mi',
            'dementia','first_careunit','last_careunit','admission_location','admission_type','hospital_expire_flag','admittime',
            'dischtime','intime','outtime','ext_time','reint_time','los','icustay_seq','deathtime','plt','diab_un','diab_cc',
            'dtoutput','specimen','dod']

ptinfo=['Unnamed: 0','hadm_id','subject_id']

demographics=['gender','ethnicity','marital_status','insurance','language']

proceduretype=['aortic','mit','tricuspid','pulmonary','cabg']

vitals=['temp','bg_temp','hr','spo2','rr','sbp','dbp','meanbp','weight','height','cardiac_index']

labs=['pt','ptt','inr','inr_1','fibrinogen','hb','hematocrit','plts','wcc','lymphocytes','neutrophils','alp','ast','alt','ggt',
'bilirubin_indirect','bilirubin_direct','bilirubin_total','chloride','magnesium','potassium','crp','bleed_time',
'albumin','creatinine','free_calcium','sodium','bicarb','bun','hba1c','glucose','lactate']

bloodgases=['po2','pco2','baseexcess','ph','aado2','fio2']

products=['ffp','insulin','cryo','prbc','infection']

ventilation=['ventrate','tidalvol','vent_array','reintubation']

comorbidities=['liver_severe','liver_mild','rheum','cvd','aids','ckd','copd','arrhythmia','pud','smoking','pvd',
'paraplegia','ccf','met_ca','t2dm','t1dm','malig','mi','dementia']

adm_cat=['first_careunit','last_careunit','admission_location','admission_type','hospital_expire_flag']

adm_num=['admittime','dischtime','intime','outtime','ext_time','reint_time','los','icustay_seq','deathtime']

others=['plt','diab_un','diab_cc','dtoutput','specimen','dod']

timeseries=[*vitals,*labs,*bloodgases,*products,*ventilation,'plt','dtoutput']
timeseries = [i for i in timeseries if i not in ('weight','height','reintubation', 'infection', 'vent_array')]
    
timeseries_valuenames = {'cardiac_index':'ci',
                         'plts':'bloodproduct',
                         'ffp':'bloodproduct',
                         'insulin':'amount',
                         'cryo':'bloodproduct',
                         'prbc':'bloodproduct',
                         'dtoutput':'output'}

In [5]:
df = df[new_cols]
df.head(10)

Unnamed: 0.1,Unnamed: 0,hadm_id,subject_id,gender,ethnicity,marital_status,insurance,language,aortic,mit,tricuspid,pulmonary,cabg,temp,bg_temp,hr,spo2,rr,sbp,dbp,meanbp,weight,height,cardiac_index,pt,ptt,inr,inr_1,fibrinogen,hb,hematocrit,plts,wcc,lymphocytes,neutrophils,alp,ast,alt,ggt,bilirubin_indirect,bilirubin_direct,bilirubin_total,chloride,magnesium,potassium,crp,bleed_time,albumin,creatinine,free_calcium,sodium,bicarb,bun,hba1c,glucose,lactate,po2,pco2,baseexcess,ph,aado2,fio2,ffp,insulin,cryo,prbc,infection,ventrate,tidalvol,vent_array,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_location,admission_type,hospital_expire_flag,admittime,dischtime,intime,outtime,ext_time,reint_time,los,icustay_seq,deathtime,plt,diab_un,diab_cc,dtoutput,specimen,dod
0,0,195663,27328,M,white,SINGLE,Private,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",84.0,172.72,[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 2, 2, 0...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2198, 1, 31, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2198-01-31 08:00:00,2198-02-04 12:00:00,2198-01-31 12:27:58,2198-02-02 19:06:39,2198-01-31 22:00:00,,2.2769,1,,"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,0,[],[],
1,1,106984,6280,F,other,DIVORCED,Self Pay,SPAN,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",60.0,170.18,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 10, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],[],[],"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2198, 5, 8, 1...",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2198-05-08 07:15:00,2198-05-15 13:49:00,2198-05-08 13:14:00,2198-05-10 19:46:00,2198-05-09 09:29:00,,2.2722,1,,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,0,[],[],
2,2,123613,15201,F,unknown,MARRIED,Medicare,,1,0,0,0,1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",57.0,165.1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 19, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 10, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'starttime': datetime.datetime(2189, 2, 18, ...",0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2189-02-18 08:00:00,2189-03-17 14:20:00,2189-02-18 10:51:08,2189-02-20 13:37:48,2189-02-19 09:00:00,,2.1157,1,,"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,0,[],[],2191-12-14
3,3,126027,25226,M,unknown,MARRIED,Private,,0,0,0,0,1,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",135.0,190.5,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 26, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],[],[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 14, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'starttime': datetime.datetime(2118, 1, 25, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2118-01-25 07:15:00,2118-01-29 13:00:00,2118-01-25 10:46:42,2118-01-26 12:33:02,2118-01-25 17:30:00,,1.0738,1,,"[{'charttime': datetime.datetime(2118, 1, 25, ...",1,0,[],[],
4,4,190332,19637,M,unknown,MARRIED,Medicare,,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",70.0,175.26,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],"[{'charttime': datetime.datetime(2198, 1, 3, 4...","[{'charttime': datetime.datetime(2198, 1, 3, 4...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 6...","[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'starttime': datetime.datetime(2198, 1, 1, 1...",0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2198-01-01 07:15:00,2198-01-09 13:07:00,2198-01-01 10:47:00,2198-01-03 12:00:00,2198-01-01 21:00:00,,2.0507,1,,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",1,0,[],[],2203-12-06
5,5,115203,29498,F,unknown,SINGLE,Medicaid,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",73.3,162.56,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],[],[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 7, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],[],[],"[{'charttime': datetime.datetime(2130, 12, 8, ...",[{'suspected_infection_time': datetime.datetim...,[],[],"[{'starttime': datetime.datetime(2130, 12, 8, ...",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,TRANSFER FROM HOSP/EXTRAM,EMERGENCY,0,2130-12-06 18:30:00,2130-12-18 18:38:00,2130-12-08 10:42:40,2130-12-12 12:08:24,2130-12-09 12:00:00,,4.0595,1,,"[{'charttime': datetime.datetime(2130, 12, 8, ...",1,0,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],
6,6,181661,18498,M,white,WIDOWED,Medicare,,0,1,0,0,1,"[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...",101.15,,"[{'charttime': datetime.datetime(2113, 5, 11, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],[],[],[],[],[],"[{'charttime': datetime.datetime(2113, 5, 13, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 13, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],[],[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],[],0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,CCU,CCU,EMERGENCY ROOM ADMIT,EMERGENCY,0,2113-05-07 12:16:00,2113-05-26 13:55:00,2113-05-11 15:46:43,2113-05-13 18:23:43,,,2.109,1,,"[{'charttime': datetime.datetime(2113, 5, 12, ...",0,0,[],[],
7,7,195614,29429,M,white,UNKNOWN (DEFAULT),Private,ENGL,0,1,0,0,0,"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",62.3,167.64,"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 7, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],[],[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 9, 15, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2139, 10, 6, ...",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2139-10-06 07:15:00,2139-10-11 15:04:00,2139-10-06 09:31:07,2139-10-07 17:13:18,2139-10-06 16:20:00,,1.321,1,,"[{'charttime': datetime.datetime(2139, 10, 6, ...",0,0,[],[],
8,8,190585,28892,M,white,MARRIED,Private,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",79.9,165.1,"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 6, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],[],[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 9, 29, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2130, 10, 5, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2130-10-05 07:15:00,2130-10-09 17:00:00,2130-10-05 08:50:46,2130-10-06 21:54:27,2130-10-05 13:30:00,,1.5442,1,,"[{'charttime': datetime.datetime(2130, 10, 5, ...",0,0,[],[],
9,9,190638,25989,M,white,SINGLE,Medicaid,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 2...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...",133.1,175.26,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",[],"[{'charttime': datetime.datetime(2142, 3, 9, 2...","[{'charttime': datetime.datetime(2142, 3, 9, 2...","[{'charttime': datetime.datetime(2142, 3, 9, 2...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 9, 2...","[{'charttime': datetime.datetime(2142, 3, 9, 2...",[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 8, 2...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],[],[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],"[{'charttime': datetime.datetime(2142, 3, 8, 3...",[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2142, 3, 8, 3...","[{'starttime': datetime.datetime(2142, 3, 7, 1...",0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,CSRU,CSRU,EMERGENCY ROOM ADMIT,EMERGENCY,0,2142-03-05 17:10:00,2142-03-13 16:40:00,2142-03-07 09:44:47,2142-03-11 11:41:29,2142-03-08 12:00:00,,4.081,1,,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",0,0,[],[],


### 1.2: Cleaning data types

#### 1.2.0: NaN assignment

In [6]:
df = df.replace('NaT',np.datetime64('NaT'))
df = df.replace(['[]','NaN',np.datetime64('NaT')],np.NaN)

#### 1.2.1: Datetime columns

In [7]:
# set column types as datetime
time_cols = ['admittime','dischtime','intime','outtime','reint_time','ext_time','deathtime']
for col in time_cols:
    df[col] = pd.to_datetime(df[col], format='%Y-%m-%d %H:%M:%S')

#dod
df['dod'] = pd.to_datetime(df['dod'], format='%Y-%m-%d')

In [8]:
## CHECK FOR ROWS WHERE DEATHTIME < INTIME OR ADMITTIME

In [9]:
df[time_cols].dtypes

admittime     datetime64[ns]
dischtime     datetime64[ns]
intime        datetime64[ns]
outtime       datetime64[ns]
reint_time    datetime64[ns]
ext_time      datetime64[ns]
deathtime     datetime64[ns]
dtype: object

#### 1.2.2: Demographics

In [10]:
for x in demographics:
    print(x,': ',df[x].unique())

gender :  ['M' 'F']
ethnicity :  ['white' 'other' 'unknown' 'hispanic' 'black' 'asian' 'native' 'WHITE'
 'UNKNOWN' 'OTHER' 'BLACK/AFRICAN AMERICAN' 'HISPANIC/LATINO'
 'UNABLE TO OBTAIN' 'ASIAN' 'AMERICAN INDIAN/ALASKA NATIVE']
marital_status :  ['SINGLE' 'DIVORCED' 'MARRIED' 'WIDOWED' 'UNKNOWN (DEFAULT)' nan
 'SEPARATED']
insurance :  ['Private' 'Self Pay' 'Medicare' 'Medicaid' 'Government' 'Other']
language :  ['ENGL' 'SPAN' nan 'VIET' 'RUSS' 'HAIT' 'CANT' 'PORT' 'PTUN' 'ALBA' 'THAI'
 'ARAB' 'GREE' 'AMER' '*LEB' '*BEN' 'CAPE' 'POLI' 'ITAL' 'HIND' 'URDU'
 'KORE' 'GERM' 'TURK' 'ETHI' 'CAMB' 'MAND' '*GUJ' 'PERS' 'ENGLISH' '?']


In [11]:
#ethnicity
df.replace({'ethnicity':
                {'unknown': np.NaN,'UNKNOWN':np.NaN,'UNABLE TO OBTAIN':np.NaN,
                'OTHER':'other','WHITE':'white','BLACK/AFRICAN AMERICAN':'black','ASIAN':'asian',
                'HISPANIC/LATINO':'hispanic','AMERICAN INDIAN/ALASKA NATIVE':'native'
                }
            }, 
            inplace=True)
print(df['ethnicity'].unique())

['white' 'other' nan 'hispanic' 'black' 'asian' 'native']


In [12]:
#marital_status
df.replace({'marital_status':
                {'UNKNOWN (DEFAULT)': np.NaN
                }
            }, 
            inplace=True)
print(df['marital_status'].unique())

['SINGLE' 'DIVORCED' 'MARRIED' 'WIDOWED' nan 'SEPARATED']


In [13]:
#language
df.replace({'language':
                {'ENGLISH':'ENGL','?':np.NaN
                }
            }, 
            inplace=True)
print(df['marital_status'].unique())

['SINGLE' 'DIVORCED' 'MARRIED' 'WIDOWED' nan 'SEPARATED']


#### 1.2.3: ✔Procedure type

In [14]:
for x in proceduretype:
    print(x,': ',df[x].unique())

aortic :  [0 1]
mit :  [0 1]
tricuspid :  [0 1]
pulmonary :  [0 1]
cabg :  [1 0]


#### 1.2.4: **Vitals / Blood Gases / Products + infection / Ventilation


In [15]:
# wait for Jahan/others
# ventrate seems to be empty

#### 1.2.5: ✔Comorbidities

In [16]:
for x in comorbidities:
    print(x,': ',df[x].unique())

liver_severe :  [0 1]
liver_mild :  [0 1]
rheum :  [0 1]
cvd :  [0 1]
aids :  [0 1]
ckd :  [0 1]
copd :  [0 1]
arrhythmia :  [0 1]
pud :  [0 1]
smoking :  [0 1]
pvd :  [0 1]
paraplegia :  [0 1]
ccf :  [0 1]
met_ca :  [0 1]
t2dm :  [0 1]
t1dm :  [0 1]
malig :  [0 1]
mi :  [1 0]
dementia :  [0 1]


#### 1.2.6: Admissions (categorical)

In [17]:
for x in adm_cat:
    print(x,': ',df[x].unique())

first_careunit :  ['CSRU' 'CCU' 'TSICU' 'SICU' 'MICU'
 'Cardiac Vascular Intensive Care Unit (CVICU)' 'Coronary Care Unit (CCU)'
 'Trauma SICU (TSICU)' 'Medical Intensive Care Unit (MICU)'
 'Medical/Surgical Intensive Care Unit (MICU/SICU)'
 'Surgical Intensive Care Unit (SICU)'
 'Neuro Surgical Intensive Care Unit (Neuro SICU)' 'Neuro Intermediate']
last_careunit :  ['CSRU' 'CCU' 'MICU' 'SICU' 'TSICU'
 'Cardiac Vascular Intensive Care Unit (CVICU)' 'Coronary Care Unit (CCU)'
 'Medical Intensive Care Unit (MICU)' 'Trauma SICU (TSICU)'
 'Medical/Surgical Intensive Care Unit (MICU/SICU)'
 'Surgical Intensive Care Unit (SICU)'
 'Neuro Surgical Intensive Care Unit (Neuro SICU)']
admission_location :  ['PHYS REFERRAL/NORMAL DELI' 'TRANSFER FROM HOSP/EXTRAM'
 'EMERGENCY ROOM ADMIT' 'CLINIC REFERRAL/PREMATURE'
 'TRANSFER FROM OTHER HEALT' 'TRANSFER FROM SKILLED NUR'
 'PHYSICIAN REFERRAL' 'TRANSFER FROM HOSPITAL' 'EMERGENCY ROOM' 'PACU'
 'PROCEDURE SITE' 'TRANSFER FROM SKILLED NURSING FACILITY

In [18]:
#first_careunit
df.replace({'first_careunit':
                {'Cardiac Vascular Intensive Care Unit (CVICU)':'CVICU',
                'Coronary Care Unit (CCU)':'CCU',
                'Medical Intensive Care Unit (MICU)':'MICU',
                'Surgical Intensive Care Unit (SICU)':'SICU',
                'Neuro Intermediate':'Neuro Inter',
                'Medical/Surgical Intensive Care Unit (MICU/SICU)':'MICU/SICU',
                'Trauma SICU (TSICU)':'TSICU',
                'Neuro Surgical Intensive Care Unit (Neuro SICU)':'Neuro SICU'
                }
            }, 
            inplace=True)
print(df['first_careunit'].unique())

['CSRU' 'CCU' 'TSICU' 'SICU' 'MICU' 'CVICU' 'MICU/SICU' 'Neuro SICU'
 'Neuro Inter']


In [19]:
#last_careunit
df.replace({'last_careunit':
                {'Cardiac Vascular Intensive Care Unit (CVICU)':'CVICU',
                'Coronary Care Unit (CCU)':'CCU',
                'Medical Intensive Care Unit (MICU)':'MICU',
                'Surgical Intensive Care Unit (SICU)':'SICU',
                'Neuro Intermediate':'Neuro Inter',
                'Medical/Surgical Intensive Care Unit (MICU/SICU)':'MICU/SICU',
                'Trauma SICU (TSICU)':'TSICU',
                'Neuro Surgical Intensive Care Unit (Neuro SICU)':'Neuro SICU'
                }
            }, 
            inplace=True)
print(df['last_careunit'].unique())

['CSRU' 'CCU' 'MICU' 'SICU' 'TSICU' 'CVICU' 'MICU/SICU' 'Neuro SICU']


In [20]:
#admission_location
df.replace({'admission_location':
                {'TRANSFER FROM HOSP/EXTRAM':'TRANSFER FROM HOSPITAL',
                'PHYS REFERRAL/NORMAL DELI':'PHYSICIAN REFERRAL',
                'TRANSFER FROM SKILLED NUR':'TRANSFER FROM SKILLED NURSING FACILITY',
                'INFORMATION NOT AVAILABLE':np.NaN,
                'CLINIC REFERRAL':'CLINIC REFERRAL/PREMATURE',
                'EMERGENCY ROOM ADMIT':'EMERGENCY ROOM',
                }
            }, 
            inplace=True)
print(df['admission_location'].unique())

['PHYSICIAN REFERRAL' 'TRANSFER FROM HOSPITAL' 'EMERGENCY ROOM'
 'CLINIC REFERRAL/PREMATURE' 'TRANSFER FROM OTHER HEALT'
 'TRANSFER FROM SKILLED NURSING FACILITY' 'PACU' 'PROCEDURE SITE'
 'WALK-IN/SELF REFERRAL' nan 'INTERNAL TRANSFER TO OR FROM PSYCH'
 'AMBULATORY SURGERY TRANSFER']


#### 1.2.7: Others

In [21]:
# for x in others:
#     print(x,': ',df[x].unique())

### Parsing time series data

In [22]:
df['vent_array'][14]

"[{'starttime': datetime.datetime(2184, 1, 18, 1, 43), 'endtime': datetime.datetime(2184, 1, 18, 4, 20), 'duration_hours': 2.6166666666666667}]"

In [23]:
def va_parser(row, output=6):
    """
    Takes row index from `df` returns a list of starttime, endtime, vent duration 
    for first and (if applicable) second intubations

    Parameters
    ----------
    row : row in df
    output_ : select which output you want (use list index below) - e.g. args=[6] for all output when using df.apply()

    Returns
    -------
    single list variable containing  
        [0] int_time1: first intubation starttime
        [1] ext_time1: first intubation endtime
        [2] duration1: first intubation duration
        [3] int_time2: second intubation starttime
        [4] ext_time2: second intubation endtime 
        [5] duration2: second intubation duration
        [6] all

    """
    int_time1=np.NaN
    ext_time1=np.NaN
    duration1=np.NaN
    int_time2=np.NaN
    ext_time2=np.NaN
    duration2=np.NaN
    value = row['vent_array']
    list=[]
    '''a = value
    print(value)'''
    if value == np.NaN or pd.isna(value):
        return np.NaN
    a = value.replace("'",'"')
    a = a.replace('\n ...\n',',').replace('\n', ',').replace('...', '')
    a = a.replace('datetime.','"dt.')
    a = a.replace('),', ')",')
    a = json.loads(a)
    b = [(i['starttime'], i['endtime'], i['duration_hours']) for i in a]
    int_time1=dt.datetime.strptime(b[0][0],'dt.datetime(%Y, %m, %d, %H, %M)')
    ext_time1=dt.datetime.strptime(b[0][1],'dt.datetime(%Y, %m, %d, %H, %M)')
    duration1=b[0][2]
    
    if output==0:
        return int_time1
    if output==1:
        return ext_time1
    if output==2:
        return duration1

    if len(b)>=2:
        int_time2=dt.datetime.strptime(b[1][0],'dt.datetime(%Y, %m, %d, %H, %M)')
        ext_time2=dt.datetime.strptime(b[1][1],'dt.datetime(%Y, %m, %d, %H, %M)')
        duration2=b[1][2]
    if output==3:
        return int_time2
    if output==4:
        return ext_time2
    if output==5:
        return duration2
    if output==6:
        return int_time1, ext_time1, duration1, int_time2, ext_time2, duration2

In [24]:
df['int_time1']=df.apply(va_parser, args=[0], axis=1)
df['ext_time1']=df.apply(va_parser, args=[1], axis=1)
df['duration1']=df.apply(va_parser, args=[2], axis=1)
df['int_time2']=df.apply(va_parser, args=[3], axis=1)
df['ext_time2']=df.apply(va_parser, args=[4], axis=1)
df['duration2']=df.apply(va_parser, args=[5], axis=1)
df.head()

Unnamed: 0.1,Unnamed: 0,hadm_id,subject_id,gender,ethnicity,marital_status,insurance,language,aortic,mit,tricuspid,pulmonary,cabg,temp,bg_temp,hr,spo2,rr,sbp,dbp,meanbp,weight,height,cardiac_index,pt,ptt,inr,inr_1,fibrinogen,hb,hematocrit,plts,wcc,lymphocytes,neutrophils,alp,ast,alt,ggt,bilirubin_indirect,bilirubin_direct,bilirubin_total,chloride,magnesium,potassium,crp,bleed_time,albumin,creatinine,free_calcium,sodium,bicarb,bun,hba1c,glucose,lactate,po2,pco2,baseexcess,ph,aado2,fio2,ffp,insulin,cryo,prbc,infection,ventrate,tidalvol,vent_array,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_location,admission_type,hospital_expire_flag,admittime,dischtime,intime,outtime,ext_time,reint_time,los,icustay_seq,deathtime,plt,diab_un,diab_cc,dtoutput,specimen,dod,int_time1,ext_time1,duration1,int_time2,ext_time2,duration2
0,0,195663,27328,M,white,SINGLE,Private,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",84.0,172.72,,,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 2, 2, 0...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,,,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",,,,,,,"[{'suspected_infection_time': None, 'antibioti...",,,"[{'starttime': datetime.datetime(2198, 1, 31, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2198-01-31 08:00:00,2198-02-04 12:00:00,2198-01-31 12:27:58,2198-02-02 19:06:39,2198-01-31 22:00:00,NaT,2.2769,1,NaT,"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,0,,,NaT,2198-01-31 17:00:00,2198-01-31 22:00:00,5.0,NaT,NaT,
1,1,106984,6280,F,other,DIVORCED,Self Pay,SPAN,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",60.0,170.18,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 10, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",,,,"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",,,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'suspected_infection_time': None, 'antibioti...",,,"[{'starttime': datetime.datetime(2198, 5, 8, 1...",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2198-05-08 07:15:00,2198-05-15 13:49:00,2198-05-08 13:14:00,2198-05-10 19:46:00,2198-05-09 09:29:00,NaT,2.2722,1,NaT,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,0,,,NaT,2198-05-08 17:00:00,2198-05-09 09:29:00,16.483333,NaT,NaT,
2,2,123613,15201,F,,MARRIED,Medicare,,1,0,0,0,1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",57.0,165.1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 19, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,,,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 10, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",,,,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'suspected_infection_time': None, 'antibioti...",,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'starttime': datetime.datetime(2189, 2, 18, ...",0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2189-02-18 08:00:00,2189-03-17 14:20:00,2189-02-18 10:51:08,2189-02-20 13:37:48,2189-02-19 09:00:00,NaT,2.1157,1,NaT,"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,0,,,2191-12-14,2189-02-18 13:00:00,2189-02-19 09:00:00,20.0,NaT,NaT,
3,3,126027,25226,M,,MARRIED,Private,,0,0,0,0,1,"[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",135.0,190.5,"[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 26, ...",,,,,,,,,,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,,,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 14, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",,,,,"[{'suspected_infection_time': None, 'antibioti...",,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'starttime': datetime.datetime(2118, 1, 25, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2118-01-25 07:15:00,2118-01-29 13:00:00,2118-01-25 10:46:42,2118-01-26 12:33:02,2118-01-25 17:30:00,NaT,1.0738,1,NaT,"[{'charttime': datetime.datetime(2118, 1, 25, ...",1,0,,,NaT,2118-01-25 13:00:00,2118-01-25 17:30:00,4.5,NaT,NaT,
4,4,190332,19637,M,,MARRIED,Medicare,,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",70.0,175.26,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,,"[{'charttime': datetime.datetime(2198, 1, 3, 4...","[{'charttime': datetime.datetime(2198, 1, 3, 4...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",,,,,,,,,,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,,,"[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 2, 4...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",,,,"[{'charttime': datetime.datetime(2198, 1, 2, 6...","[{'suspected_infection_time': None, 'antibioti...",,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'starttime': datetime.datetime(2198, 1, 1, 1...",0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYSICIAN REFERRAL,ELECTIVE,0,2198-01-01 07:15:00,2198-01-09 13:07:00,2198-01-01 10:47:00,2198-01-03 12:00:00,2198-01-01 21:00:00,NaT,2.0507,1,NaT,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",1,0,,,2203-12-06,2198-01-01 13:00:00,2198-01-01 21:00:00,8.0,NaT,NaT,


In [25]:
def infection_parser(value, timelimit):
    if value == np.NaN:
        return np.NaN
    else:
        a = value
        a = a.replace('\n ','')
        a = a.replace('[','')
        a = a.replace(']','')
        a = a.replace("{'charttime': datetime.datetime",'')
        split = a.split('}')

In [26]:
# df_infection

In [27]:
# def ts_parser(value, timelimit):
#     """
#     Takes single string of timeseries data in MIMIC format and returns the mean, max, min values   
#     Parameters
#     ----------
#     value : single string of timeseries data in MIMIC format
#     timelimit : time (in hours) from the first data entry to include data up to

#     Returns
#     -------
#     avg : mean of all values within specified time period
#     max_: maximum of all values within specified time period
#     min_: minimum of all values within specified time period
#     """
#     if value == np.NaN:
#         return np.NaN, np.NaN, np.NaN
#     else:
#         a = value
#         a = a.replace('\n ','')
#         a = a.replace('[','')
#         a = a.replace(']','')
#         a = a.replace("{'charttime': datetime.datetime",'')
#         split = a.split('}')
#         del split[-1]
#         times = []
#         values = []
#         for n in range(0,len(split)):
#             subsplit = split[n].split(", 'value'")
#             t = datetime.strptime(subsplit[0],'(%Y, %m, %d, %H, %M)')
#             times.append(t)
#             v = float(subsplit[1].replace(': ',''))
#             values.append(v)
#         starttime = times[0]
#         endtime = times[0] + timedelta(hours=timelimit)
#         #find the average
#         incl_values = []
#         for n in range(0,len(split)):
#             if times[n] > starttime and times[n] < endtime: 
#                 incl_values.append(values[n])
#         print(incl_values)
#         avg = statistics.mean(incl_values)
#         max_ = max(incl_values)
#         min_ = min(incl_values)
#         return avg, max_, min_

def ts_parser2(value, timeDelta=None, timeLimits=None, valuename='value'):
    # timeDelta is timedelta in hours from earliest entry
    # timeLimits = (startTime, endTime)
    # if both timeDelta and timeLimits are provided, timeDelta overrules.
    # if both are None, then all timepoints are accepted
    
    if value == np.NaN or pd.isna(value):
        return np.NaN, np.NaN, np.NaN
    
    a = value.replace("'", '"')
    a = a.replace('\n ...\n',',').replace('\n', ',').replace('...', '')
    a = a.replace('datetime.', '"dt.')
    a = a.replace(f'), "{valuename}"', f')", "{valuename}"')
    a = a.replace('"unit": None', '"unit": "None"')
    a = a.replace('starttime', 'charttime')
    a = json.loads(a)
    b = [(eval(i['charttime']), i[valuename]) for i in a]
    
    if timeDelta:
        startTime = min(b, key=lambda x:x[0])[0]
        inc_b = [i[1] for i in b if i[0] <= startTime + dt.timedelta(hours=timeDelta)]
    else:
        if timeLimits:
            inc_b = [i[1] for i in b if i[0] >= timeLimits[0] and i[0] <= timeLimits[1]]
        else:
            inc_b = [i[1] for i in b]
    if len(inc_b) == 0:
        return np.NaN, np.NaN, np.NaN
    
    return sum(inc_b) / len(inc_b), max(inc_b), min(inc_b)

# test_x = df[timeseries].iloc[0,0]
# print(ts_parser(test_x,12))
# print(ts_parser2(test_x, timeDelta=12))
# print()
# test_y = df['bg_temp'][9]
# print(test_y)
# print('Parser1: ', ts_parser(test_y, 36))
# print('Parser2: ', ts_parser2(test_y, timeDelta=36))

### 1.3: Handling missing data

#### 1.3.0 Assessing for missing data

In [28]:
# formula for checking % missing values
def missing_values_table(df): 
    mis_val = df.isnull().sum()
    mis_val_percent = 100 * df.isnull().sum() / len(df)
    mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)
    mis_val_table_ren_columns = mis_val_table.rename(columns = {0: 'Missing Values', 1: '% Missing Values'})
    return mis_val_table_ren_columns

missing_data = missing_values_table(df)

In [29]:
#set limit and get list of variables missing above limit in `missing_cols`
missing_limit = 0
missing_cols = missing_data.loc[missing_data['% Missing Values']>missing_limit].index.tolist()
print(missing_cols)
missing_data = missing_data.loc[missing_data['% Missing Values']>missing_limit]
missing_data = missing_data.sort_values(by=['% Missing Values'])
pd.set_option('display.max_rows', None)
missing_data

['bg_temp', 'cardiac_index', 'plts', 'lymphocytes', 'neutrophils', 'alp', 'ast', 'alt', 'ggt', 'bilirubin_indirect', 'bilirubin_direct', 'bilirubin_total', 'crp', 'bleed_time', 'albumin', 'aado2', 'fio2', 'ffp', 'cryo', 'prbc', 'ventrate', 'tidalvol', 'reint_time', 'deathtime', 'dtoutput', 'specimen', 'dod', 'int_time2', 'ext_time2', 'duration2']


Unnamed: 0,Missing Values,% Missing Values
lymphocytes,6819,55.295167
neutrophils,6820,55.303276
prbc,6850,55.546546
cardiac_index,8743,70.896854
fio2,8956,72.624067
bg_temp,9324,75.608174
bilirubin_total,9659,78.324684
ast,9668,78.397665
alt,9678,78.478754
alp,9722,78.83555


In [30]:
pd.reset_option('display.max_rows')

In [31]:
# missing_data.loc[time_cols,:]

In [32]:
df = df.drop(axis=1, columns=list(missing_data.index), inplace=False)
print(list(df.columns))

['Unnamed: 0', 'hadm_id', 'subject_id', 'gender', 'ethnicity', 'marital_status', 'insurance', 'language', 'aortic', 'mit', 'tricuspid', 'pulmonary', 'cabg', 'temp', 'hr', 'spo2', 'rr', 'sbp', 'dbp', 'meanbp', 'weight', 'height', 'pt', 'ptt', 'inr', 'inr_1', 'fibrinogen', 'hb', 'hematocrit', 'wcc', 'chloride', 'magnesium', 'potassium', 'creatinine', 'free_calcium', 'sodium', 'bicarb', 'bun', 'hba1c', 'glucose', 'lactate', 'po2', 'pco2', 'baseexcess', 'ph', 'insulin', 'infection', 'vent_array', 'reintubation', 'liver_severe', 'liver_mild', 'rheum', 'cvd', 'aids', 'ckd', 'copd', 'arrhythmia', 'pud', 'smoking', 'pvd', 'paraplegia', 'ccf', 'met_ca', 't2dm', 't1dm', 'malig', 'mi', 'dementia', 'first_careunit', 'last_careunit', 'admission_location', 'admission_type', 'hospital_expire_flag', 'admittime', 'dischtime', 'intime', 'outtime', 'ext_time', 'los', 'icustay_seq', 'plt', 'diab_un', 'diab_cc', 'int_time1', 'ext_time1', 'duration1']


In [33]:
# option 2: impute data based on median


In [34]:
# option 3: multiple imputation

# x = missing_data.loc[missing_data['% Missing Values']> 0]
# x.loc[[i for i in x.index if i not in time_cols],:]

#### 1.3.1 Creating summary fields for time-series data

In [35]:
# checking that ts_parser2() works for the timeseries columns

# for j in timeseries:
#     for i in range(len(df[j])):
#         try:
#             if j in timeseries_valuenames:
#                 ts_parser2(df[j][i], timeDelta=36, valuename=timeseries_valuenames[j])
#             else:
#                 ts_parser2(df[j][i], timeDelta=36)
#         except:
#             print(j, i)
#             break
#     print(j, 'Fine')

#### 1.3.2 Beginning imputation

In [56]:
dfForImpute = df[['gender', 'ethnicity', 'marital_status', 'language', 'admission_location']]

for i in ['gender', 'ethnicity', 'marital_status', 'language', 'admission_location']:
    dfForImpute[i] = df[i].astype('category')

# generating timeseries summary values
for column in timeseries:
    if column not in df.columns:
        continue
    x = timeseries_valuenames[column] if column in timeseries_valuenames else "value"
    meanList = []
    maxList = []
    minList = []
    for i in range(len(df[column])):
        y = ts_parser2(df[column][i], timeLimits=(df['intime'][i].to_pydatetime(), df['outtime'][i].to_pydatetime()), valuename=x)
        meanList.append(y[0])
        maxList.append(y[1])
        minList.append(y[2])
    dfForImpute[column+'_mean'] = meanList
    dfForImpute[column+'_max'] = maxList
    dfForImpute[column+'_min'] = minList

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfForImpute[i] = df[i].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfForImpute[column+'_mean']=meanList
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfForImpute[column+'_max']=maxList
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index

In [59]:
# add on non-time data for imputation
extraColumns = [i for i in df.columns if i not in list(dfForImpute.columns) + timeseries + ['infection', 'vent_array', 'int_time1', 'ext_time1'] + ptinfo + adm_num]
for i in extraColumns:
    if i in ('weight', 'height', 'duration1'):
        dfForImpute[i] = df[i]
    else:
        dfForImpute[i] = df[i].astype('category')
dfForImpute = dfForImpute.copy()

In [60]:
# before imputation again
dfForImpute

Unnamed: 0,ethnicity,marital_status,language,admission_location,temp_mean,temp_max,temp_min,hr_mean,hr_max,hr_min,spo2_mean,spo2_max,spo2_min,rr_mean,rr_max,rr_min,sbp_mean,sbp_max,sbp_min,dbp_mean,dbp_max,dbp_min,meanbp_mean,meanbp_max,meanbp_min,pt_mean,pt_max,pt_min,ptt_mean,ptt_max,ptt_min,inr_mean,inr_max,inr_min,inr_1_mean,inr_1_max,inr_1_min,fibrinogen_mean,fibrinogen_max,fibrinogen_min,hb_mean,hb_max,hb_min,hematocrit_mean,hematocrit_max,hematocrit_min,wcc_mean,wcc_max,wcc_min,chloride_mean,chloride_max,chloride_min,magnesium_mean,magnesium_max,magnesium_min,potassium_mean,potassium_max,potassium_min,creatinine_mean,creatinine_max,creatinine_min,free_calcium_mean,free_calcium_max,free_calcium_min,sodium_mean,sodium_max,sodium_min,bicarb_mean,bicarb_max,bicarb_min,bun_mean,bun_max,bun_min,hba1c_mean,hba1c_max,hba1c_min,glucose_mean,glucose_max,glucose_min,lactate_mean,lactate_max,lactate_min,po2_mean,po2_max,po2_min,pco2_mean,pco2_max,pco2_min,baseexcess_mean,baseexcess_max,baseexcess_min,ph_mean,ph_max,ph_min,insulin_mean,insulin_max,insulin_min,plt_mean,plt_max,plt_min,gender,insurance,aortic,mit,tricuspid,pulmonary,cabg,weight,height,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_type,hospital_expire_flag,diab_un,diab_cc,duration1
0,white,SINGLE,ENGL,PHYSICIAN REFERRAL,37.261905,37.944446,36.499998,87.000000,109.0,63.0,97.963636,100.0,92.0,23.862069,43.0,13.0,111.333333,161.0,91.0,59.035088,69.0,43.0,72.622807,86.0,57.000000,,,,29.800000,29.8,29.8,1.200000,1.2,1.2,1.200000,1.2,1.2,,,,12.200000,13.0,11.4,38.500000,39.0,38.0,12.666667,14.7,10.2,103.400000,107.0,100.0,2.000,2.0,2.0,3.816667,4.1,3.6,0.833333,0.9,0.8,1.133333,1.15,1.12,138.400000,140.0,136.0,27.666667,32.0,25.0,11.666667,13.0,11.0,,,,122.170213,168.0,87.0,2.600,2.8,2.4,0.750000,2.0,0.0,44.250000,48.0,40.0,0.750000,2.0,0.0,7.385000,7.40,7.36,,,,239.666667,268.0,196.0,M,Private,0,0,0,0,1,84.00,172.72,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,ELECTIVE,0,0,0,5.000000
1,other,DIVORCED,SPAN,PHYSICIAN REFERRAL,37.043305,38.500000,35.200001,90.157407,119.0,76.0,97.339286,100.0,65.0,14.210526,25.0,7.5,99.149533,134.0,75.0,52.696262,75.0,35.0,67.266356,101.0,45.000000,,,,41.166667,47.5,37.9,1.500000,1.8,1.3,1.500000,1.8,1.3,,,,7.116667,10.0,6.0,21.000000,30.0,18.0,10.100000,10.1,10.1,106.000000,108.0,104.0,1.850,2.1,1.6,4.200000,4.9,3.4,0.550000,0.6,0.5,1.026250,1.21,0.69,141.333333,143.0,139.0,26.500000,27.0,26.0,13.000000,14.0,12.0,,,,136.300000,195.0,105.0,,,,0.294118,4.0,-3.0,40.764706,50.0,35.0,0.294118,4.0,-3.0,7.401667,7.50,7.35,175.000000,314.000000,0.000000,112.666667,141.0,93.0,F,Self Pay,1,0,0,0,0,60.00,170.18,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,0,0,16.483333
2,,MARRIED,,PHYSICIAN REFERRAL,36.881572,37.799999,35.400002,79.419355,88.0,57.0,98.736842,100.0,93.0,19.112903,39.0,9.0,118.258065,170.0,89.0,52.096774,88.0,22.0,74.623657,121.0,48.666698,,,,54.750000,59.3,50.2,2.150000,2.7,1.6,2.150000,2.7,1.6,175.0,175.0,175.0,9.600000,11.9,6.5,28.000000,35.0,20.0,17.975000,22.8,9.4,109.000000,113.0,105.0,2.600,2.6,2.6,4.363636,5.2,3.1,1.333333,1.6,1.2,1.165000,1.32,0.95,140.000000,143.0,139.0,27.333333,29.0,25.0,19.666667,21.0,19.0,,,,124.789474,188.0,92.0,,,,1.571429,4.0,-1.0,40.142857,48.0,33.0,1.571429,4.0,-1.0,7.413333,7.52,7.32,,,,114.250000,148.0,67.0,F,Medicare,1,0,0,0,1,57.00,165.10,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,0,0,20.000000
3,,MARRIED,,PHYSICIAN REFERRAL,37.532258,38.700001,36.700001,87.939394,104.0,73.0,97.531250,100.0,93.0,16.212121,28.0,10.0,113.191176,162.0,85.0,56.823529,78.0,40.0,73.575758,99.0,56.000000,,,,26.700000,28.3,25.1,1.350000,1.5,1.2,1.350000,1.5,1.2,201.0,201.0,201.0,12.600000,13.8,11.1,36.333333,40.0,33.0,14.000000,14.0,14.0,106.000000,106.0,106.0,1.850,1.9,1.8,3.742857,4.4,3.2,0.550000,0.7,0.4,1.151667,1.24,1.10,137.750000,140.0,135.0,27.500000,28.0,27.0,12.500000,14.0,11.0,,,,136.222222,203.0,71.0,,,,0.454545,2.0,-2.0,46.363636,55.0,37.0,0.454545,2.0,-2.0,7.372500,7.46,7.32,,,,149.500000,161.0,138.0,M,Private,0,0,0,0,1,135.00,190.50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,1,0,4.500000
4,,MARRIED,,PHYSICIAN REFERRAL,36.859811,37.400002,35.055555,92.706667,122.0,64.0,97.852941,100.0,94.0,15.168919,24.0,4.0,109.439189,144.0,69.0,61.601351,83.0,42.0,76.846849,104.0,57.000000,,,,,,,1.100000,1.1,1.1,1.100000,1.1,1.1,,,,9.463636,11.2,7.9,28.888889,34.0,24.0,9.900000,11.6,8.2,106.000000,108.0,104.0,,,,4.337500,4.8,3.5,1.300000,1.4,1.2,1.085000,1.25,0.85,136.750000,137.0,136.0,25.000000,27.0,23.0,21.500000,27.0,16.0,,,,137.857143,230.0,108.0,1.500,1.5,1.5,-2.500000,0.0,-7.0,44.166667,58.0,38.0,-2.500000,0.0,-7.0,7.330000,7.40,7.27,,,,90.500000,95.0,86.0,M,Medicare,1,0,0,0,0,70.00,175.26,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,1,0,8.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12327,white,,ENGL,TRANSFER FROM HOSPITAL,36.685556,37.060000,36.330000,70.378378,82.0,60.0,96.270270,100.0,92.0,19.162162,25.0,12.0,118.975610,155.0,90.0,59.292683,84.0,42.0,76.048780,104.0,61.000000,13.550000,14.5,12.6,24.800000,26.2,23.4,1.250000,1.3,1.2,1.250000,1.3,1.2,282.0,282.0,282.0,9.500000,12.4,8.8,29.000000,37.0,26.0,13.533333,16.0,11.2,110.200000,112.0,108.0,,,,4.375000,4.9,3.4,1.300000,1.4,1.2,1.291429,1.40,1.16,139.000000,144.0,137.0,21.000000,21.0,21.0,40.000000,41.0,39.0,,,,111.600000,131.0,77.0,1.300,1.6,1.2,157.200000,300.0,61.0,41.900000,47.0,37.0,-2.000000,0.0,-3.0,7.341818,7.40,7.31,5.596820,10.831461,2.000000,142.666667,167.0,120.0,M,Medicare,0,0,0,0,1,100.00,175.00,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,CVICU,CVICU,URGENT,0,0,1,6.000000
12328,white,SINGLE,ENGL,TRANSFER FROM HOSPITAL,37.017917,37.500000,36.500000,77.545455,88.0,66.0,97.272727,100.0,92.0,13.382353,20.0,9.0,114.906250,134.0,89.0,52.687500,67.0,39.0,71.593750,89.0,56.000000,13.466667,13.7,13.2,27.200000,28.9,24.8,1.166667,1.2,1.1,1.166667,1.2,1.1,,,,11.100000,13.5,9.7,29.000000,29.0,29.0,12.266667,13.8,10.2,107.500000,109.0,106.0,2.300,2.3,2.3,4.980000,5.8,4.4,0.800000,0.9,0.7,1.320000,1.58,1.16,137.600000,141.0,135.0,26.000000,28.0,24.0,19.500000,21.0,18.0,,,,148.000000,203.0,105.0,2.600,2.6,2.6,215.666667,271.0,158.0,43.666667,49.0,38.0,-2.000000,-1.0,-3.0,7.355000,7.41,7.29,3.945752,10.000000,1.649718,135.333333,154.0,113.0,F,Other,0,0,0,0,1,84.70,160.00,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,CVICU,CVICU,URGENT,0,1,0,
12329,white,SINGLE,ENGL,PHYSICIAN REFERRAL,36.494444,37.100000,35.800000,86.291667,101.0,75.0,96.200000,100.0,94.0,14.312500,21.0,11.0,113.080000,141.0,91.0,53.320000,71.0,41.0,70.680000,97.0,11.000000,13.166667,15.1,11.9,26.166667,26.8,25.5,1.233333,1.4,1.1,1.233333,1.4,1.1,192.0,192.0,192.0,10.442857,12.7,9.2,31.500000,38.0,29.0,14.366667,15.4,12.8,105.250000,107.0,104.0,,,,4.271429,4.7,3.9,0.550000,0.6,0.5,1.148333,1.25,1.08,136.250000,139.0,135.0,23.500000,24.0,23.0,11.500000,12.0,11.0,,,,150.333333,180.0,122.0,1.825,2.0,1.7,186.750000,359.0,92.0,41.875000,56.0,36.0,-2.375000,0.0,-5.0,7.345556,7.42,7.24,9.423242,50.000000,2.000000,100.666667,114.0,91.0,F,Medicare,1,0,0,0,0,78.50,163.00,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,CVICU,CVICU,SURGICAL SAME DAY ADMISSION,0,1,0,
12330,white,SINGLE,ENGL,PROCEDURE SITE,36.899189,37.560000,36.110000,86.031646,113.0,65.0,96.430380,100.0,91.0,15.649682,29.0,6.0,123.195783,180.0,74.0,55.891566,91.0,36.0,74.388554,107.0,12.000000,14.540000,17.7,12.7,29.360000,31.2,26.9,1.380000,1.7,1.2,1.380000,1.7,1.2,152.0,152.0,152.0,10.766667,12.2,10.0,31.000000,32.0,30.0,9.122222,12.5,5.1,103.363636,107.0,101.0,2.025,2.3,1.8,3.800000,5.4,3.2,0.600000,0.7,0.5,1.112632,1.20,1.03,133.750000,137.0,130.0,24.000000,25.0,22.0,11.428571,19.0,7.0,,,,103.173913,147.0,87.0,0.980,1.8,0.8,138.666667,288.0,62.0,39.208333,44.0,31.0,0.250000,4.0,-4.0,7.408929,7.50,7.31,5.552349,10.000000,2.000000,159.666667,224.0,127.0,M,Medicare,0,0,0,0,1,112.05,173.00,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,CVICU,CVICU,EW EMER.,0,0,0,24.000000


In [61]:
dfForImpute2 = dfForImpute[dfForImpute.columns[:]]

kds = mf.ImputationKernel(
  dfForImpute2,
  datasets=1,
  save_all_iterations=True,
  random_state=1991
)

# Run the MICE algorithm for 3 iterations
kds.mice(3)

print(kds)

dfImputed = kds.complete_data(dataset=0, inplace=False)
print(dfImputed.isnull().sum(0))

# after imputation
dfImputed.head(10)

              Class: ImputationKernel
           Datasets: 1
         Iterations: 3
  Imputed Variables: 103
save_all_iterations: True
ethnicity               0
marital_status          0
language                0
admission_location      0
temp_mean               0
                       ..
admission_type          0
hospital_expire_flag    0
diab_un                 0
diab_cc                 0
duration1               0
Length: 136, dtype: int64


Unnamed: 0,ethnicity,marital_status,language,admission_location,temp_mean,temp_max,temp_min,hr_mean,hr_max,hr_min,spo2_mean,spo2_max,spo2_min,rr_mean,rr_max,rr_min,sbp_mean,sbp_max,sbp_min,dbp_mean,dbp_max,dbp_min,meanbp_mean,meanbp_max,meanbp_min,pt_mean,pt_max,pt_min,ptt_mean,ptt_max,ptt_min,inr_mean,inr_max,inr_min,inr_1_mean,inr_1_max,inr_1_min,fibrinogen_mean,fibrinogen_max,fibrinogen_min,hb_mean,hb_max,hb_min,hematocrit_mean,hematocrit_max,hematocrit_min,wcc_mean,wcc_max,wcc_min,chloride_mean,chloride_max,chloride_min,magnesium_mean,magnesium_max,magnesium_min,potassium_mean,potassium_max,potassium_min,creatinine_mean,creatinine_max,creatinine_min,free_calcium_mean,free_calcium_max,free_calcium_min,sodium_mean,sodium_max,sodium_min,bicarb_mean,bicarb_max,bicarb_min,bun_mean,bun_max,bun_min,hba1c_mean,hba1c_max,hba1c_min,glucose_mean,glucose_max,glucose_min,lactate_mean,lactate_max,lactate_min,po2_mean,po2_max,po2_min,pco2_mean,pco2_max,pco2_min,baseexcess_mean,baseexcess_max,baseexcess_min,ph_mean,ph_max,ph_min,insulin_mean,insulin_max,insulin_min,plt_mean,plt_max,plt_min,gender,insurance,aortic,mit,tricuspid,pulmonary,cabg,weight,height,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_type,hospital_expire_flag,diab_un,diab_cc,duration1
0,white,SINGLE,ENGL,PHYSICIAN REFERRAL,37.261905,37.944446,36.499998,87.0,109.0,63.0,97.963636,100.0,92.0,23.862069,43.0,13.0,111.333333,161.0,91.0,59.035088,69.0,43.0,72.622807,86.0,57.0,12.66,12.6,12.5,29.8,29.8,29.8,1.2,1.2,1.2,1.2,1.2,1.2,455.0,651.0,277.0,12.2,13.0,11.4,38.5,39.0,38.0,12.666667,14.7,10.2,103.4,107.0,100.0,2.0,2.0,2.0,3.816667,4.1,3.6,0.833333,0.9,0.8,1.133333,1.15,1.12,138.4,140.0,136.0,27.666667,32.0,25.0,11.666667,13.0,11.0,6.0,6.0,6.0,122.170213,168.0,87.0,2.6,2.8,2.4,0.75,2.0,0.0,44.25,48.0,40.0,0.75,2.0,0.0,7.385,7.4,7.36,188.666667,314.0,0.39193,239.666667,268.0,196.0,M,Private,0,0,0,0,1,84.0,172.72,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,ELECTIVE,0,0,0,5.0
1,other,DIVORCED,SPAN,PHYSICIAN REFERRAL,37.043305,38.5,35.200001,90.157407,119.0,76.0,97.339286,100.0,65.0,14.210526,25.0,7.5,99.149533,134.0,75.0,52.696262,75.0,35.0,67.266356,101.0,45.0,16.775,19.7,14.2,41.166667,47.5,37.9,1.5,1.8,1.3,1.5,1.8,1.3,278.0,277.0,276.0,7.116667,10.0,6.0,21.0,30.0,18.0,10.1,10.1,10.1,106.0,108.0,104.0,1.85,2.1,1.6,4.2,4.9,3.4,0.55,0.6,0.5,1.02625,1.21,0.69,141.333333,143.0,139.0,26.5,27.0,26.0,13.0,14.0,12.0,6.1,6.1,6.1,136.3,195.0,105.0,0.85,0.9,0.8,0.294118,4.0,-3.0,40.764706,50.0,35.0,0.294118,4.0,-3.0,7.401667,7.5,7.35,175.0,314.0,0.0,112.666667,141.0,93.0,F,Self Pay,1,0,0,0,0,60.0,170.18,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,0,0,16.483333
2,white,MARRIED,CANT,PHYSICIAN REFERRAL,36.881572,37.799999,35.400002,79.419355,88.0,57.0,98.736842,100.0,93.0,19.112903,39.0,9.0,118.258065,170.0,89.0,52.096774,88.0,22.0,74.623657,121.0,48.666698,23.377778,29.1,17.7,54.75,59.3,50.2,2.15,2.7,1.6,2.15,2.7,1.6,175.0,175.0,175.0,9.6,11.9,6.5,28.0,35.0,20.0,17.975,22.8,9.4,109.0,113.0,105.0,2.6,2.6,2.6,4.363636,5.2,3.1,1.333333,1.6,1.2,1.165,1.32,0.95,140.0,143.0,139.0,27.333333,29.0,25.0,19.666667,21.0,19.0,6.1,6.1,6.1,124.789474,188.0,92.0,1.2,1.7,0.9,1.571429,4.0,-1.0,40.142857,48.0,33.0,1.571429,4.0,-1.0,7.413333,7.52,7.32,169.333333,294.0,0.0,114.25,148.0,67.0,F,Medicare,1,0,0,0,1,57.0,165.1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,0,0,20.0
3,white,MARRIED,PTUN,PHYSICIAN REFERRAL,37.532258,38.700001,36.700001,87.939394,104.0,73.0,97.53125,100.0,93.0,16.212121,28.0,10.0,113.191176,162.0,85.0,56.823529,78.0,40.0,73.575758,99.0,56.0,14.9,16.7,13.6,26.7,28.3,25.1,1.35,1.5,1.2,1.35,1.5,1.2,201.0,201.0,201.0,12.6,13.8,11.1,36.333333,40.0,33.0,14.0,14.0,14.0,106.0,106.0,106.0,1.85,1.9,1.8,3.742857,4.4,3.2,0.55,0.7,0.4,1.151667,1.24,1.1,137.75,140.0,135.0,27.5,28.0,27.0,12.5,14.0,11.0,6.3,6.3,6.3,136.222222,203.0,71.0,1.975,2.7,1.2,0.454545,2.0,-2.0,46.363636,55.0,37.0,0.454545,2.0,-2.0,7.3725,7.46,7.32,2.0,83.825678,0.0,149.5,161.0,138.0,M,Private,0,0,0,0,1,135.0,190.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,1,0,4.5
4,white,MARRIED,CANT,PHYSICIAN REFERRAL,36.859811,37.400002,35.055555,92.706667,122.0,64.0,97.852941,100.0,94.0,15.168919,24.0,4.0,109.439189,144.0,69.0,61.601351,83.0,42.0,76.846849,104.0,57.0,13.15,13.5,12.8,36.5,44.7,30.9,1.1,1.1,1.1,1.1,1.1,1.1,231.0,275.0,198.0,9.463636,11.2,7.9,28.888889,34.0,24.0,9.9,11.6,8.2,106.0,108.0,104.0,3.0,3.5,2.6,4.3375,4.8,3.5,1.3,1.4,1.2,1.085,1.25,0.85,136.75,137.0,136.0,25.0,27.0,23.0,21.5,27.0,16.0,6.3,6.3,6.3,137.857143,230.0,108.0,1.5,1.5,1.5,-2.5,0.0,-7.0,44.166667,58.0,38.0,-2.5,0.0,-7.0,7.33,7.4,7.27,162.666667,314.0,0.0,90.5,95.0,86.0,M,Medicare,1,0,0,0,0,70.0,175.26,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,1,0,8.0
5,asian,SINGLE,ENGL,TRANSFER FROM HOSPITAL,36.953644,37.799999,35.555556,81.570248,92.0,64.0,98.815126,100.0,91.0,12.792,27.0,7.0,118.800813,185.0,78.0,53.735772,73.0,43.0,71.721311,110.0,55.0,12.75,13.6,12.0,39.133333,46.9,33.5,1.2,1.3,1.1,1.2,1.3,1.1,239.0,239.0,239.0,8.69,10.4,7.2,23.75,29.0,22.0,16.533333,21.5,13.3,109.428571,114.0,105.0,2.533333,2.6,2.5,4.246667,5.0,3.5,0.94,1.3,0.6,1.16,1.28,1.04,136.857143,141.0,134.0,22.0,24.0,21.0,24.6,33.0,15.0,6.4,6.4,6.4,126.592593,226.0,81.0,2.266667,2.8,0.8,-1.692308,6.0,-7.0,41.692308,49.0,31.0,-1.692308,6.0,-7.0,7.349286,7.56,7.24,145.5,304.0,0.0,138.166667,188.0,106.0,F,Medicaid,0,0,0,0,1,73.3,162.56,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,EMERGENCY,0,1,0,22.583333
6,white,WIDOWED,MAND,EMERGENCY ROOM,36.253968,36.944444,34.499999,108.043478,127.0,90.0,95.702703,99.0,93.0,24.23913,30.0,16.0,119.409091,146.0,88.0,56.931818,90.0,28.0,77.757579,108.0,48.0,17.1,18.3,15.7,74.28,150.0,33.1,1.55,1.7,1.4,1.55,1.7,1.4,343.0,499.0,240.0,12.725,13.2,12.0,38.5,39.0,38.0,10.5,10.9,10.1,100.5,102.0,99.0,2.1,2.1,2.1,4.0,4.1,3.9,0.95,1.0,0.9,1.105625,1.18,1.0,135.5,138.0,133.0,26.0,26.0,26.0,28.5,31.0,26.0,6.0,6.0,6.0,116.5,126.0,107.0,3.025,3.3,3.4,1.5,3.0,0.0,43.5,46.0,41.0,1.5,3.0,0.0,7.42,7.42,7.42,150.666667,294.0,0.0,179.5,181.0,178.0,M,Medicare,0,1,0,0,1,101.15,177.8,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,CCU,CCU,EMERGENCY,0,0,0,15.283333
7,white,MARRIED,ENGL,PHYSICIAN REFERRAL,36.430556,36.799999,36.0,81.774194,95.0,76.0,96.657143,100.0,91.0,18.6,29.0,14.0,105.234375,128.0,85.0,58.703125,69.0,48.0,72.625,88.0,58.0,15.45,15.7,14.8,37.35,39.1,35.6,1.35,1.4,1.3,1.35,1.4,1.3,174.0,174.0,174.0,9.342857,11.0,8.4,26.75,28.0,26.0,11.733333,13.2,10.2,109.666667,115.0,107.0,2.2,2.2,2.2,4.4125,5.2,3.9,0.8,0.8,0.8,1.124,1.22,1.0,136.333333,139.0,134.0,21.5,23.0,20.0,9.0,9.0,9.0,5.6,5.6,5.6,118.489796,160.0,90.0,2.85,3.8,1.9,-2.888889,0.0,-9.0,34.888889,42.0,25.0,-2.888889,0.0,-9.0,7.386,7.43,7.35,149.333333,232.0,0.0,169.666667,178.0,156.0,M,Private,0,1,0,0,0,62.3,167.64,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,0,0,2.583333
8,white,MARRIED,ENGL,PHYSICIAN REFERRAL,37.166667,38.299999,35.099998,84.891304,91.0,63.0,98.266667,100.0,93.0,16.434783,25.0,11.0,109.444444,178.0,88.0,62.666667,92.0,48.0,78.860465,144.0,62.0,13.9,14.1,13.7,37.9,37.9,37.9,1.2,1.2,1.2,1.2,1.2,1.2,139.0,141.0,138.0,8.983333,10.3,7.7,24.666667,28.0,23.0,10.233333,11.5,8.7,108.75,115.0,104.0,2.1,2.1,2.1,4.7375,6.6,3.5,0.733333,0.9,0.5,1.085,1.12,1.05,136.0,137.0,135.0,22.333333,23.0,21.0,13.333333,14.0,12.0,5.4,5.4,5.4,114.083333,162.0,55.0,2.2,2.2,2.2,0.0,3.0,-4.0,38.6,41.0,35.0,0.0,3.0,-4.0,7.41,7.48,7.33,160.333333,293.0,0.0,207.333333,233.0,173.0,M,Private,0,0,0,0,1,79.9,165.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,0,0,2.5
9,white,SINGLE,ENGL,EMERGENCY ROOM,37.906996,39.111112,35.900002,114.581395,144.0,94.0,94.114504,100.0,87.0,24.583969,46.0,12.0,117.492063,169.0,85.0,53.888889,77.0,33.0,73.766936,106.0,53.333302,14.1,14.1,14.1,27.9,27.9,27.9,1.2,1.2,1.2,1.2,1.2,1.2,241.0,242.0,242.0,11.41,14.3,9.5,32.4,43.0,29.0,11.48,15.5,8.6,101.4,106.0,98.0,1.84,2.0,1.7,4.135294,4.8,3.3,0.84,0.9,0.8,1.147273,1.28,1.08,134.857143,136.0,133.0,26.6,27.0,26.0,13.0,16.0,9.0,8.4,8.4,8.6,116.453333,173.0,88.0,1.825,2.4,1.3,0.684211,3.0,-2.0,45.052632,62.0,37.0,0.684211,3.0,-2.0,7.386818,7.46,7.27,152.333333,246.0,0.0,142.0,189.0,111.0,M,Medicaid,0,0,0,0,1,133.1,175.26,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,CSRU,CSRU,EMERGENCY,0,0,0,23.0


In [92]:
y = np.asarray([i.to_pydatetime() for i in df["intime"]])
z = np.asarray([i.to_pydatetime() for i in df["outtime"]])
dfImputed['icu_stay_duration'] = [i.total_seconds() for i in z-y]
dfImputed['hadm_id','subject_id']
dfImputed

Unnamed: 0,ethnicity,marital_status,language,admission_location,temp_mean,temp_max,temp_min,hr_mean,hr_max,hr_min,spo2_mean,spo2_max,spo2_min,rr_mean,rr_max,rr_min,sbp_mean,sbp_max,sbp_min,dbp_mean,dbp_max,dbp_min,meanbp_mean,meanbp_max,meanbp_min,pt_mean,pt_max,pt_min,ptt_mean,ptt_max,ptt_min,inr_mean,inr_max,inr_min,inr_1_mean,inr_1_max,inr_1_min,fibrinogen_mean,fibrinogen_max,fibrinogen_min,hb_mean,hb_max,hb_min,hematocrit_mean,hematocrit_max,hematocrit_min,wcc_mean,wcc_max,wcc_min,chloride_mean,chloride_max,chloride_min,magnesium_mean,magnesium_max,magnesium_min,potassium_mean,potassium_max,potassium_min,creatinine_mean,creatinine_max,creatinine_min,free_calcium_mean,free_calcium_max,free_calcium_min,sodium_mean,sodium_max,sodium_min,bicarb_mean,bicarb_max,bicarb_min,bun_mean,bun_max,bun_min,hba1c_mean,hba1c_max,hba1c_min,glucose_mean,glucose_max,glucose_min,lactate_mean,lactate_max,lactate_min,po2_mean,po2_max,po2_min,pco2_mean,pco2_max,pco2_min,baseexcess_mean,baseexcess_max,baseexcess_min,ph_mean,ph_max,ph_min,insulin_mean,insulin_max,insulin_min,plt_mean,plt_max,plt_min,gender,insurance,aortic,mit,tricuspid,pulmonary,cabg,weight,height,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_type,hospital_expire_flag,diab_un,diab_cc,duration1,icu_stay_duration
0,white,SINGLE,ENGL,PHYSICIAN REFERRAL,37.261905,37.944446,36.499998,87.000000,109.0,63.0,97.963636,100.0,92.0,23.862069,43.0,13.0,111.333333,161.0,91.0,59.035088,69.0,43.0,72.622807,86.0,57.000000,12.660000,12.6,12.5,29.800000,29.8,29.8,1.200000,1.2,1.2,1.200000,1.2,1.2,455.0,651.0,277.0,12.200000,13.0,11.4,38.500000,39.0,38.0,12.666667,14.7,10.2,103.400000,107.0,100.0,2.000000,2.0,2.0,3.816667,4.1,3.6,0.833333,0.9,0.8,1.133333,1.15,1.12,138.400000,140.0,136.0,27.666667,32.0,25.0,11.666667,13.0,11.0,6.0,6.0,6.0,122.170213,168.0,87.0,2.600,2.8,2.4,0.750000,2.0,0.0,44.250000,48.0,40.0,0.750000,2.0,0.0,7.385000,7.40,7.36,188.666667,314.000000,0.391930,239.666667,268.0,196.0,M,Private,0,0,0,0,1,84.00,172.72,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,ELECTIVE,0,0,0,5.000000,196721.0
1,other,DIVORCED,SPAN,PHYSICIAN REFERRAL,37.043305,38.500000,35.200001,90.157407,119.0,76.0,97.339286,100.0,65.0,14.210526,25.0,7.5,99.149533,134.0,75.0,52.696262,75.0,35.0,67.266356,101.0,45.000000,16.775000,19.7,14.2,41.166667,47.5,37.9,1.500000,1.8,1.3,1.500000,1.8,1.3,278.0,277.0,276.0,7.116667,10.0,6.0,21.000000,30.0,18.0,10.100000,10.1,10.1,106.000000,108.0,104.0,1.850000,2.1,1.6,4.200000,4.9,3.4,0.550000,0.6,0.5,1.026250,1.21,0.69,141.333333,143.0,139.0,26.500000,27.0,26.0,13.000000,14.0,12.0,6.1,6.1,6.1,136.300000,195.0,105.0,0.850,0.9,0.8,0.294118,4.0,-3.0,40.764706,50.0,35.0,0.294118,4.0,-3.0,7.401667,7.50,7.35,175.000000,314.000000,0.000000,112.666667,141.0,93.0,F,Self Pay,1,0,0,0,0,60.00,170.18,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,0,0,16.483333,196320.0
2,white,MARRIED,CANT,PHYSICIAN REFERRAL,36.881572,37.799999,35.400002,79.419355,88.0,57.0,98.736842,100.0,93.0,19.112903,39.0,9.0,118.258065,170.0,89.0,52.096774,88.0,22.0,74.623657,121.0,48.666698,23.377778,29.1,17.7,54.750000,59.3,50.2,2.150000,2.7,1.6,2.150000,2.7,1.6,175.0,175.0,175.0,9.600000,11.9,6.5,28.000000,35.0,20.0,17.975000,22.8,9.4,109.000000,113.0,105.0,2.600000,2.6,2.6,4.363636,5.2,3.1,1.333333,1.6,1.2,1.165000,1.32,0.95,140.000000,143.0,139.0,27.333333,29.0,25.0,19.666667,21.0,19.0,6.1,6.1,6.1,124.789474,188.0,92.0,1.200,1.7,0.9,1.571429,4.0,-1.0,40.142857,48.0,33.0,1.571429,4.0,-1.0,7.413333,7.52,7.32,169.333333,294.000000,0.000000,114.250000,148.0,67.0,F,Medicare,1,0,0,0,1,57.00,165.10,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,0,0,20.000000,182800.0
3,white,MARRIED,PTUN,PHYSICIAN REFERRAL,37.532258,38.700001,36.700001,87.939394,104.0,73.0,97.531250,100.0,93.0,16.212121,28.0,10.0,113.191176,162.0,85.0,56.823529,78.0,40.0,73.575758,99.0,56.000000,14.900000,16.7,13.6,26.700000,28.3,25.1,1.350000,1.5,1.2,1.350000,1.5,1.2,201.0,201.0,201.0,12.600000,13.8,11.1,36.333333,40.0,33.0,14.000000,14.0,14.0,106.000000,106.0,106.0,1.850000,1.9,1.8,3.742857,4.4,3.2,0.550000,0.7,0.4,1.151667,1.24,1.10,137.750000,140.0,135.0,27.500000,28.0,27.0,12.500000,14.0,11.0,6.3,6.3,6.3,136.222222,203.0,71.0,1.975,2.7,1.2,0.454545,2.0,-2.0,46.363636,55.0,37.0,0.454545,2.0,-2.0,7.372500,7.46,7.32,2.000000,83.825678,0.000000,149.500000,161.0,138.0,M,Private,0,0,0,0,1,135.00,190.50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,1,0,4.500000,92780.0
4,white,MARRIED,CANT,PHYSICIAN REFERRAL,36.859811,37.400002,35.055555,92.706667,122.0,64.0,97.852941,100.0,94.0,15.168919,24.0,4.0,109.439189,144.0,69.0,61.601351,83.0,42.0,76.846849,104.0,57.000000,13.150000,13.5,12.8,36.500000,44.7,30.9,1.100000,1.1,1.1,1.100000,1.1,1.1,231.0,275.0,198.0,9.463636,11.2,7.9,28.888889,34.0,24.0,9.900000,11.6,8.2,106.000000,108.0,104.0,3.000000,3.5,2.6,4.337500,4.8,3.5,1.300000,1.4,1.2,1.085000,1.25,0.85,136.750000,137.0,136.0,25.000000,27.0,23.0,21.500000,27.0,16.0,6.3,6.3,6.3,137.857143,230.0,108.0,1.500,1.5,1.5,-2.500000,0.0,-7.0,44.166667,58.0,38.0,-2.500000,0.0,-7.0,7.330000,7.40,7.27,162.666667,314.000000,0.000000,90.500000,95.0,86.0,M,Medicare,1,0,0,0,0,70.00,175.26,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,ELECTIVE,0,1,0,8.000000,177180.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12327,white,MARRIED,ENGL,TRANSFER FROM HOSPITAL,36.685556,37.060000,36.330000,70.378378,82.0,60.0,96.270270,100.0,92.0,19.162162,25.0,12.0,118.975610,155.0,90.0,59.292683,84.0,42.0,76.048780,104.0,61.000000,13.550000,14.5,12.6,24.800000,26.2,23.4,1.250000,1.3,1.2,1.250000,1.3,1.2,282.0,282.0,282.0,9.500000,12.4,8.8,29.000000,37.0,26.0,13.533333,16.0,11.2,110.200000,112.0,108.0,2.333333,2.7,2.1,4.375000,4.9,3.4,1.300000,1.4,1.2,1.291429,1.40,1.16,139.000000,144.0,137.0,21.000000,21.0,21.0,40.000000,41.0,39.0,5.6,5.6,5.6,111.600000,131.0,77.0,1.300,1.6,1.2,157.200000,300.0,61.0,41.900000,47.0,37.0,-2.000000,0.0,-3.0,7.341818,7.40,7.31,5.596820,10.831461,2.000000,142.666667,167.0,120.0,M,Medicare,0,0,0,0,1,100.00,175.00,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,0,0,1,0,CVICU,CVICU,URGENT,0,0,1,6.000000,114199.0
12328,white,SINGLE,ENGL,TRANSFER FROM HOSPITAL,37.017917,37.500000,36.500000,77.545455,88.0,66.0,97.272727,100.0,92.0,13.382353,20.0,9.0,114.906250,134.0,89.0,52.687500,67.0,39.0,71.593750,89.0,56.000000,13.466667,13.7,13.2,27.200000,28.9,24.8,1.166667,1.2,1.1,1.166667,1.2,1.1,183.0,203.0,167.0,11.100000,13.5,9.7,29.000000,29.0,29.0,12.266667,13.8,10.2,107.500000,109.0,106.0,2.300000,2.3,2.3,4.980000,5.8,4.4,0.800000,0.9,0.7,1.320000,1.58,1.16,137.600000,141.0,135.0,26.000000,28.0,24.0,19.500000,21.0,18.0,5.7,5.7,5.7,148.000000,203.0,105.0,2.600,2.6,2.6,215.666667,271.0,158.0,43.666667,49.0,38.0,-2.000000,-1.0,-3.0,7.355000,7.41,7.29,3.945752,10.000000,1.649718,135.333333,154.0,113.0,F,Other,0,0,0,0,1,84.70,160.00,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,CVICU,CVICU,URGENT,0,1,0,4.000000,108410.0
12329,white,SINGLE,ENGL,PHYSICIAN REFERRAL,36.494444,37.100000,35.800000,86.291667,101.0,75.0,96.200000,100.0,94.0,14.312500,21.0,11.0,113.080000,141.0,91.0,53.320000,71.0,41.0,70.680000,97.0,11.000000,13.166667,15.1,11.9,26.166667,26.8,25.5,1.233333,1.4,1.1,1.233333,1.4,1.1,192.0,192.0,192.0,10.442857,12.7,9.2,31.500000,38.0,29.0,14.366667,15.4,12.8,105.250000,107.0,104.0,2.400000,2.7,2.2,4.271429,4.7,3.9,0.550000,0.6,0.5,1.148333,1.25,1.08,136.250000,139.0,135.0,23.500000,24.0,23.0,11.500000,12.0,11.0,9.2,9.1,9.0,150.333333,180.0,122.0,1.825,2.0,1.7,186.750000,359.0,92.0,41.875000,56.0,36.0,-2.375000,0.0,-5.0,7.345556,7.42,7.24,9.423242,50.000000,2.000000,100.666667,114.0,91.0,F,Medicare,1,0,0,0,0,78.50,163.00,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,CVICU,CVICU,SURGICAL SAME DAY ADMISSION,0,1,0,4.700000,96410.0
12330,white,SINGLE,ENGL,PROCEDURE SITE,36.899189,37.560000,36.110000,86.031646,113.0,65.0,96.430380,100.0,91.0,15.649682,29.0,6.0,123.195783,180.0,74.0,55.891566,91.0,36.0,74.388554,107.0,12.000000,14.540000,17.7,12.7,29.360000,31.2,26.9,1.380000,1.7,1.2,1.380000,1.7,1.2,152.0,152.0,152.0,10.766667,12.2,10.0,31.000000,32.0,30.0,9.122222,12.5,5.1,103.363636,107.0,101.0,2.025000,2.3,1.8,3.800000,5.4,3.2,0.600000,0.7,0.5,1.112632,1.20,1.03,133.750000,137.0,130.0,24.000000,25.0,22.0,11.428571,19.0,7.0,5.8,5.8,5.8,103.173913,147.0,87.0,0.980,1.8,0.8,138.666667,288.0,62.0,39.208333,44.0,31.0,0.250000,4.0,-4.0,7.408929,7.50,7.31,5.552349,10.000000,2.000000,159.666667,224.0,127.0,M,Medicare,0,0,0,0,1,112.05,173.00,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,CVICU,CVICU,EW EMER.,0,0,0,24.000000,519822.0


In [93]:
dfImputed.to_csv('imputed.csv')