# DL Survival - Ventilation Outcomes
 Updated 21/11/21

In [1]:
import pandas as pd
import numpy as np
import math
import statistics
from datetime import datetime
import datetime as dt
from datetime import timedelta
import json

## 1. Data cleaning

- Import MIMIC III data
- Review column unique values, assign correct data types
- Impute missing values


### 1.1: Importing data

In [2]:
df = pd.read_csv('mimic_combined.csv')

In [3]:
pd.set_option('display.max_columns', None)
print(df.shape)
df.head(10)

(12332, 110)


Unnamed: 0.1,Unnamed: 0,hospital_expire_flag,los,spo2,free_calcium,outtime,meanbp,ptt,tidalvol,wcc,cvd,weight,bicarb,ggt,t1dm,temp,malig,subject_id,hr,baseexcess,diab_un,first_careunit,hadm_id,bilirubin_direct,liver_severe,ventrate,fibrinogen,arrhythmia,neutrophils,prbc,glucose,magnesium,po2,ext_time,alp,tricuspid,albumin,dementia,dischtime,pulmonary,t2dm,plts,lactate,bleed_time,admission_location,rr,mit,insulin,pvd,pud,lymphocytes,gender,cabg,smoking,reintubation,height,inr,bilirubin_total,diab_cc,creatinine,insurance,mi,specimen,deathtime,pt,aado2,hba1c,crp,pco2,aids,language,dod,dbp,reint_time,intime,rheum,bg_temp,sbp,chloride,fio2,sodium,last_careunit,infection,paraplegia,cardiac_index,marital_status,potassium,bilirubin_indirect,bun,dtoutput,ckd,copd,cryo,admission_type,met_ca,hb,ethnicity,admittime,ffp,inr_1,ccf,icustay_seq,ph,ast,alt,plt,aortic,vent_array,hematocrit,liver_mild
0,0,0,2.2769,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",2198-02-02 19:06:39,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,84.0,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],0,"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,27328,"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",0,CSRU,195663,[],0,[],[],0,[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 2, 2, 0...","[{'charttime': datetime.datetime(2198, 1, 31, ...",2198-01-31 22:00:00,[],0,[],0,2198-02-04 12:00:00,0,0,[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,[],0,0,[],M,1,0,0,172.72,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],0,"[{'charttime': datetime.datetime(2198, 1, 31, ...",Private,1,[],,[],[],[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,ENGL,,"[{'charttime': datetime.datetime(2198, 1, 31, ...",,2198-01-31 12:27:58,0,[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,[],SINGLE,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],0,0,[],ELECTIVE,0,"[{'charttime': datetime.datetime(2198, 1, 31, ...",white,2198-01-31 08:00:00,[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,1,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,"[{'starttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",0
1,1,0,2.2722,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",2198-05-10 19:46:00,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 10, ...",0,60.0,"[{'charttime': datetime.datetime(2198, 5, 9, 3...",[],0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,6280,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,CSRU,106984,[],0,[],[],0,[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",2198-05-09 09:29:00,[],0,[],0,2198-05-15 13:49:00,0,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",1,0,[],F,0,0,0,170.18,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],0,"[{'charttime': datetime.datetime(2198, 5, 9, 3...",Self Pay,0,[],,[],[],[],[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,SPAN,,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",,2198-05-08 13:14:00,0,[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 9, 3...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",DIVORCED,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 9, 3...",[],0,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",ELECTIVE,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",other,2198-05-08 07:15:00,"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,1,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...",1,"[{'starttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",0
2,2,0,2.1157,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",2189-02-20 13:37:48,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",0,57.0,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],0,"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,15201,"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",0,CSRU,123613,[],0,[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",1,[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 19, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",2189-02-19 09:00:00,[],0,[],0,2189-03-17 14:20:00,0,0,[],[],[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,[],1,0,[],F,1,0,0,165.1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],0,"[{'charttime': datetime.datetime(2189, 2, 18, ...",Medicare,0,[],,[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 10, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,,2191-12-14,"[{'charttime': datetime.datetime(2189, 2, 18, ...",,2189-02-18 10:51:08,0,[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2189, 2, 18, ...",MARRIED,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],0,0,[],ELECTIVE,0,"[{'charttime': datetime.datetime(2189, 2, 18, ...",unknown,2189-02-18 08:00:00,[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",1,1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",1,"[{'starttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",0
3,3,0,1.0738,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",2118-01-26 12:33:02,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 26, ...",0,135.0,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],0,"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,25226,"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",1,CSRU,126027,[],0,[],"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,[],[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",2118-01-25 17:30:00,[],0,[],0,2118-01-29 13:00:00,0,0,[],[],[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,[],0,0,[],M,1,0,0,190.5,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],0,"[{'charttime': datetime.datetime(2118, 1, 25, ...",Private,0,[],,[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 14, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,,,"[{'charttime': datetime.datetime(2118, 1, 25, ...",,2118-01-25 10:46:42,0,[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2118, 1, 25, ...",MARRIED,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],0,0,[],ELECTIVE,0,"[{'charttime': datetime.datetime(2118, 1, 25, ...",unknown,2118-01-25 07:15:00,[],"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,1,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],[],"[{'charttime': datetime.datetime(2118, 1, 25, ...",0,"[{'starttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",0
4,4,0,2.0507,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",2198-01-03 12:00:00,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 2, 4...",0,70.0,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",0,19637,"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",1,CSRU,190332,[],0,[],[],1,[],"[{'charttime': datetime.datetime(2198, 1, 2, 6...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",2198-01-01 21:00:00,[],0,[],0,2198-01-09 13:07:00,0,0,[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",0,[],0,0,[],M,0,0,0,175.26,"[{'charttime': datetime.datetime(2198, 1, 3, 4...",[],0,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",Medicare,0,[],,[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",0,,2203-12-06,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",,2198-01-01 10:47:00,0,[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",MARRIED,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],0,0,[],ELECTIVE,0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",unknown,2198-01-01 07:15:00,[],"[{'charttime': datetime.datetime(2198, 1, 3, 4...",1,1,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...",1,"[{'starttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",0
5,5,0,4.0595,"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",2130-12-12 12:08:24,"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...",0,73.3,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],0,"[{'charttime': datetime.datetime(2130, 12, 8, ...",0,29498,"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",1,CSRU,115203,[],0,[],"[{'charttime': datetime.datetime(2130, 12, 8, ...",1,"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",2130-12-09 12:00:00,[],0,[],0,2130-12-18 18:38:00,0,0,[],"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],TRANSFER FROM HOSP/EXTRAM,"[{'charttime': datetime.datetime(2130, 12, 8, ...",0,[],0,0,"[{'charttime': datetime.datetime(2130, 12, 8, ...",F,1,0,0,162.56,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],0,"[{'charttime': datetime.datetime(2130, 12, 8, ...",Medicaid,0,[],,[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 7, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...",0,ENGL,,"[{'charttime': datetime.datetime(2130, 12, 8, ...",,2130-12-08 10:42:40,0,[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",CSRU,[{'suspected_infection_time': datetime.datetim...,0,"[{'charttime': datetime.datetime(2130, 12, 8, ...",SINGLE,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",0,0,[],EMERGENCY,0,"[{'charttime': datetime.datetime(2130, 12, 8, ...",unknown,2130-12-06 18:30:00,[],"[{'charttime': datetime.datetime(2130, 12, 8, ...",0,1,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],[],"[{'charttime': datetime.datetime(2130, 12, 8, ...",0,"[{'starttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",0
6,6,0,2.109,"[{'charttime': datetime.datetime(2113, 5, 11, ...",[],2113-05-13 18:23:43,"[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",0,101.15,"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],0,"[{'charttime': datetime.datetime(2113, 5, 11, ...",0,18498,"[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",0,CCU,181661,[],0,[],[],1,"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 13, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",,[],0,[],0,2113-05-26 13:55:00,0,0,[],[],[],EMERGENCY ROOM ADMIT,"[{'charttime': datetime.datetime(2113, 5, 11, ...",1,[],0,0,"[{'charttime': datetime.datetime(2113, 5, 12, ...",M,1,0,0,,"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 13, ...",0,"[{'charttime': datetime.datetime(2113, 5, 12, ...",Medicare,1,[],,[],[],[],[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",0,,,"[{'charttime': datetime.datetime(2113, 5, 11, ...",,2113-05-11 15:46:43,0,"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",CCU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2113, 5, 11, ...",WIDOWED,"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],0,0,[],EMERGENCY,0,"[{'charttime': datetime.datetime(2113, 5, 11, ...",white,2113-05-07 12:16:00,[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",1,1,"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",0,[],"[{'charttime': datetime.datetime(2113, 5, 11, ...",0
7,7,0,1.321,"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",2139-10-07 17:13:18,"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",0,62.3,"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],0,"[{'charttime': datetime.datetime(2139, 10, 6, ...",0,29429,"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",0,CSRU,195614,[],0,[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",0,[],[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 7, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",2139-10-06 16:20:00,[],0,[],0,2139-10-11 15:04:00,0,0,[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2139, 10, 6, ...",1,[],0,0,[],M,0,0,0,167.64,"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],0,"[{'charttime': datetime.datetime(2139, 10, 6, ...",Private,0,[],,[],[],"[{'charttime': datetime.datetime(2139, 9, 15, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",0,ENGL,,"[{'charttime': datetime.datetime(2139, 10, 6, ...",,2139-10-06 09:31:07,0,[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2139, 10, 6, ...",UNKNOWN (DEFAULT),"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],0,1,[],ELECTIVE,0,"[{'charttime': datetime.datetime(2139, 10, 6, ...",white,2139-10-06 07:15:00,[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",0,1,"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",0,"[{'starttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",0
8,8,0,1.5442,"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",2130-10-06 21:54:27,"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...",0,79.9,"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],0,"[{'charttime': datetime.datetime(2130, 10, 5, ...",0,28892,"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",0,CSRU,190585,[],0,[],[],0,[],[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 6, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",2130-10-05 13:30:00,[],0,[],0,2130-10-09 17:00:00,0,0,[],"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],PHYS REFERRAL/NORMAL DELI,"[{'charttime': datetime.datetime(2130, 10, 5, ...",0,[],0,0,[],M,1,0,0,165.1,"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],0,"[{'charttime': datetime.datetime(2130, 10, 5, ...",Private,0,[],,[],[],"[{'charttime': datetime.datetime(2130, 9, 29, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...",0,ENGL,,"[{'charttime': datetime.datetime(2130, 10, 5, ...",,2130-10-05 08:50:46,0,[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2130, 10, 5, ...",MARRIED,"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],0,0,[],ELECTIVE,0,"[{'charttime': datetime.datetime(2130, 10, 5, ...",white,2130-10-05 07:15:00,[],"[{'charttime': datetime.datetime(2130, 10, 5, ...",0,1,"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],[],"[{'charttime': datetime.datetime(2130, 10, 5, ...",0,"[{'starttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",0
9,9,0,4.081,"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",2142-03-11 11:41:29,"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 9, 2...","[{'charttime': datetime.datetime(2142, 3, 8, 3...","[{'charttime': datetime.datetime(2142, 3, 7, 1...",0,133.1,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",[],0,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",0,25989,"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",0,CSRU,190638,[],0,[],[],1,"[{'charttime': datetime.datetime(2142, 3, 9, 2...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 8, 2...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",2142-03-08 12:00:00,[],0,[],0,2142-03-13 16:40:00,0,0,[],[],[],EMERGENCY ROOM ADMIT,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",0,[],0,0,"[{'charttime': datetime.datetime(2142, 3, 9, 2...",M,1,0,0,175.26,"[{'charttime': datetime.datetime(2142, 3, 9, 2...",[],0,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",Medicaid,1,[],,[],[],[],[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...",0,ENGL,,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",,2142-03-07 09:44:47,0,"[{'charttime': datetime.datetime(2142, 3, 7, 2...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 8, 3...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",CSRU,"[{'suspected_infection_time': None, 'antibioti...",0,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",SINGLE,"[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...",[],0,1,[],EMERGENCY,0,"[{'charttime': datetime.datetime(2142, 3, 7, 9...",white,2142-03-05 17:10:00,[],"[{'charttime': datetime.datetime(2142, 3, 9, 2...",1,1,"[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...",0,"[{'starttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",0


#### 1.1.1: Column lists

In [4]:
#view and reorder columns
cols = list(df.columns)
new_cols = ['Unnamed: 0','hadm_id','subject_id','gender','ethnicity','marital_status','insurance','language','aortic','mit','tricuspid',
            'pulmonary','cabg','temp','bg_temp','hr','spo2','rr','sbp','dbp','meanbp','weight','height','cardiac_index','pt','ptt',
            'inr','inr_1','fibrinogen','hb','hematocrit','plts','wcc','lymphocytes','neutrophils','alp','ast','alt','ggt',
            'bilirubin_indirect','bilirubin_direct','bilirubin_total','chloride','magnesium','potassium','crp','bleed_time','albumin',
            'creatinine','free_calcium','sodium','bicarb','bun','hba1c','glucose','lactate','po2','pco2','baseexcess','ph','aado2',
            'fio2','ffp','insulin','cryo','prbc','infection','ventrate','tidalvol','vent_array','reintubation','liver_severe','liver_mild',
            'rheum','cvd','aids','ckd','copd','arrhythmia','pud','smoking','pvd','paraplegia','ccf','met_ca','t2dm','t1dm','malig','mi',
            'dementia','first_careunit','last_careunit','admission_location','admission_type','hospital_expire_flag','admittime',
            'dischtime','intime','outtime','ext_time','reint_time','los','icustay_seq','deathtime','plt','diab_un','diab_cc',
            'dtoutput','specimen','dod']


ptinfo=['Unnamed:0','hadm_id','subject_id']

demographics=['gender','ethnicity','marital_status','insurance','language']

proceduretype=['aortic','mit','tricuspid','pulmonary','cabg']

vitals=['temp','bg_temp','hr','spo2','rr','sbp','dbp','meanbp','weight','height','cardiac_index']

labs=['pt','ptt','inr','inr_1','fibrinogen','hb','hematocrit','plts','wcc','lymphocytes','neutrophils','alp','ast','alt','ggt',
'bilirubin_indirect','bilirubin_direct','bilirubin_total','chloride','magnesium','potassium','crp','bleed_time',
'albumin','creatinine','free_calcium','sodium','bicarb','bun','hba1c','glucose','lactate']

bloodgases=['po2','pco2','baseexcess','ph','aado2','fio2']

products=['ffp','insulin','cryo','prbc','infection']

ventilation=['ventrate','tidalvol','vent_array','reintubation']

comorbidities=['liver_severe','liver_mild','rheum','cvd','aids','ckd','copd','arrhythmia','pud','smoking','pvd',
'paraplegia','ccf','met_ca','t2dm','t1dm','malig','mi','dementia']

adm_cat=['first_careunit','last_careunit','admission_location','admission_type','hospital_expire_flag']

adm_num=['admittime','dischtime','intime','outtime','ext_time','reint_time','los','icustay_seq','deathtime']

others=['plt','diab_un','diab_cc','dtoutput','specimen','dod']

timeseries=[*vitals,*labs,*bloodgases,*products,*ventilation,'plt','dtoutput']
for x in ['weight','height','reintubation']:
    timeseries.remove(x)

In [5]:
df = df[new_cols]
df.head(10)

Unnamed: 0.1,Unnamed: 0,hadm_id,subject_id,gender,ethnicity,marital_status,insurance,language,aortic,mit,tricuspid,pulmonary,cabg,temp,bg_temp,hr,spo2,rr,sbp,dbp,meanbp,weight,height,cardiac_index,pt,ptt,inr,inr_1,fibrinogen,hb,hematocrit,plts,wcc,lymphocytes,neutrophils,alp,ast,alt,ggt,bilirubin_indirect,bilirubin_direct,bilirubin_total,chloride,magnesium,potassium,crp,bleed_time,albumin,creatinine,free_calcium,sodium,bicarb,bun,hba1c,glucose,lactate,po2,pco2,baseexcess,ph,aado2,fio2,ffp,insulin,cryo,prbc,infection,ventrate,tidalvol,vent_array,reintubation,liver_severe,liver_mild,rheum,cvd,aids,ckd,copd,arrhythmia,pud,smoking,pvd,paraplegia,ccf,met_ca,t2dm,t1dm,malig,mi,dementia,first_careunit,last_careunit,admission_location,admission_type,hospital_expire_flag,admittime,dischtime,intime,outtime,ext_time,reint_time,los,icustay_seq,deathtime,plt,diab_un,diab_cc,dtoutput,specimen,dod
0,0,195663,27328,M,white,SINGLE,Private,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",84.0,172.72,[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 2, 2, 0...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],"[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...","[{'charttime': datetime.datetime(2198, 1, 31, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2198, 1, 31, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2198-01-31 08:00:00,2198-02-04 12:00:00,2198-01-31 12:27:58,2198-02-02 19:06:39,2198-01-31 22:00:00,,2.2769,1,,"[{'charttime': datetime.datetime(2198, 1, 31, ...",0,0,[],[],
1,1,106984,6280,F,other,DIVORCED,Self Pay,SPAN,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",60.0,170.18,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 10, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],[],[],"[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 9, 3...","[{'charttime': datetime.datetime(2198, 5, 9, 3...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...",[],[],"[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'charttime': datetime.datetime(2198, 5, 8, 1...","[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2198, 5, 8, 1...",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2198-05-08 07:15:00,2198-05-15 13:49:00,2198-05-08 13:14:00,2198-05-10 19:46:00,2198-05-09 09:29:00,,2.2722,1,,"[{'charttime': datetime.datetime(2198, 5, 8, 1...",0,0,[],[],
2,2,123613,15201,F,unknown,MARRIED,Medicare,,1,0,0,0,1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",57.0,165.1,"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 19, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 10, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'charttime': datetime.datetime(2189, 2, 18, ...",[],[],[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2189, 2, 18, ...","[{'starttime': datetime.datetime(2189, 2, 18, ...",0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2189-02-18 08:00:00,2189-03-17 14:20:00,2189-02-18 10:51:08,2189-02-20 13:37:48,2189-02-19 09:00:00,,2.1157,1,,"[{'charttime': datetime.datetime(2189, 2, 18, ...",0,0,[],[],2191-12-14
3,3,126027,25226,M,unknown,MARRIED,Private,,0,0,0,0,1,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",135.0,190.5,"[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 26, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],[],[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 14, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'charttime': datetime.datetime(2118, 1, 25, ...",[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2118, 1, 25, ...","[{'starttime': datetime.datetime(2118, 1, 25, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2118-01-25 07:15:00,2118-01-29 13:00:00,2118-01-25 10:46:42,2118-01-26 12:33:02,2118-01-25 17:30:00,,1.0738,1,,"[{'charttime': datetime.datetime(2118, 1, 25, ...",1,0,[],[],
4,4,190332,19637,M,unknown,MARRIED,Medicare,,1,0,0,0,0,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",70.0,175.26,"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],"[{'charttime': datetime.datetime(2198, 1, 3, 4...","[{'charttime': datetime.datetime(2198, 1, 3, 4...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 2, 4...","[{'charttime': datetime.datetime(2198, 1, 2, 4...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'charttime': datetime.datetime(2198, 1, 1, 1...",[],[],[],"[{'charttime': datetime.datetime(2198, 1, 2, 6...","[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2198, 1, 1, 1...","[{'starttime': datetime.datetime(2198, 1, 1, 1...",0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2198-01-01 07:15:00,2198-01-09 13:07:00,2198-01-01 10:47:00,2198-01-03 12:00:00,2198-01-01 21:00:00,,2.0507,1,,"[{'charttime': datetime.datetime(2198, 1, 2, 4...",1,0,[],[],2203-12-06
5,5,115203,29498,F,unknown,SINGLE,Medicaid,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",73.3,162.56,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],[],[],"[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 7, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...","[{'charttime': datetime.datetime(2130, 12, 8, ...",[],[],[],"[{'charttime': datetime.datetime(2130, 12, 8, ...",[{'suspected_infection_time': datetime.datetim...,[],[],"[{'starttime': datetime.datetime(2130, 12, 8, ...",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,TRANSFER FROM HOSP/EXTRAM,EMERGENCY,0,2130-12-06 18:30:00,2130-12-18 18:38:00,2130-12-08 10:42:40,2130-12-12 12:08:24,2130-12-09 12:00:00,,4.0595,1,,"[{'charttime': datetime.datetime(2130, 12, 8, ...",1,0,"[{'charttime': datetime.datetime(2130, 12, 8, ...",[],
6,6,181661,18498,M,white,WIDOWED,Medicare,,0,1,0,0,1,"[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...",101.15,,"[{'charttime': datetime.datetime(2113, 5, 11, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 11, ...","[{'charttime': datetime.datetime(2113, 5, 11, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],[],[],[],[],[],"[{'charttime': datetime.datetime(2113, 5, 13, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 13, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],[],[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...",[],"[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...","[{'charttime': datetime.datetime(2113, 5, 12, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],[],0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,CCU,CCU,EMERGENCY ROOM ADMIT,EMERGENCY,0,2113-05-07 12:16:00,2113-05-26 13:55:00,2113-05-11 15:46:43,2113-05-13 18:23:43,,,2.109,1,,"[{'charttime': datetime.datetime(2113, 5, 12, ...",0,0,[],[],
7,7,195614,29429,M,white,UNKNOWN (DEFAULT),Private,ENGL,0,1,0,0,0,"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",62.3,167.64,"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],"[{'charttime': datetime.datetime(2139, 10, 6, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 7, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],[],[],"[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 9, 15, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...","[{'charttime': datetime.datetime(2139, 10, 6, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2139, 10, 6, ...",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2139-10-06 07:15:00,2139-10-11 15:04:00,2139-10-06 09:31:07,2139-10-07 17:13:18,2139-10-06 16:20:00,,1.321,1,,"[{'charttime': datetime.datetime(2139, 10, 6, ...",0,0,[],[],
8,8,190585,28892,M,white,MARRIED,Private,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",79.9,165.1,"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],"[{'charttime': datetime.datetime(2130, 10, 5, ...",[],[],[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 6, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],[],[],"[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 9, 29, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...","[{'charttime': datetime.datetime(2130, 10, 5, ...",[],[],[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],[],"[{'starttime': datetime.datetime(2130, 10, 5, ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,CSRU,CSRU,PHYS REFERRAL/NORMAL DELI,ELECTIVE,0,2130-10-05 07:15:00,2130-10-09 17:00:00,2130-10-05 08:50:46,2130-10-06 21:54:27,2130-10-05 13:30:00,,1.5442,1,,"[{'charttime': datetime.datetime(2130, 10, 5, ...",0,0,[],[],
9,9,190638,25989,M,white,SINGLE,Medicaid,ENGL,0,0,0,0,1,"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 2...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...",133.1,175.26,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",[],"[{'charttime': datetime.datetime(2142, 3, 9, 2...","[{'charttime': datetime.datetime(2142, 3, 9, 2...","[{'charttime': datetime.datetime(2142, 3, 9, 2...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 9, 2...","[{'charttime': datetime.datetime(2142, 3, 9, 2...",[],[],[],[],[],[],[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 8, 2...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],[],[],"[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 1...","[{'charttime': datetime.datetime(2142, 3, 7, 1...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],"[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...","[{'charttime': datetime.datetime(2142, 3, 7, 9...",[],"[{'charttime': datetime.datetime(2142, 3, 8, 3...",[],[],[],[],"[{'suspected_infection_time': None, 'antibioti...",[],"[{'charttime': datetime.datetime(2142, 3, 8, 3...","[{'starttime': datetime.datetime(2142, 3, 7, 1...",0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,CSRU,CSRU,EMERGENCY ROOM ADMIT,EMERGENCY,0,2142-03-05 17:10:00,2142-03-13 16:40:00,2142-03-07 09:44:47,2142-03-11 11:41:29,2142-03-08 12:00:00,,4.081,1,,"[{'charttime': datetime.datetime(2142, 3, 7, 1...",0,0,[],[],


### 1.2: Cleaning data types

#### 1.2.0: NaN assignment

In [6]:
df = df.replace('NaT',np.datetime64('NaT'))
df = df.replace(['[]','NaN',np.datetime64('NaT')],np.NaN)

#### 1.2.1: Datetime columns
+ add vent_duration column

In [7]:
# set column types as datetime
time_cols = ['admittime','dischtime','intime','outtime','reint_time','ext_time','deathtime']
for col in time_cols:
    df[col] = pd.to_datetime(df[col], format='%Y-%m-%d %H:%M:%S')

#dod
df['dod'] = pd.to_datetime(df['dod'], format='%Y-%m-%d')

In [8]:
# define function for getting ventilation duration (1st ventilation)


### NOTE: NEED TO EDIT FORMULA FOR VENT DURATION BASED ON JAHAN'S FORMULA ###


def get_vent_duration(row):
    time_s = (row['ext_time']-row['intime']).total_seconds()
    if math.isnan(time_s):
        time_s = (row['deathtime']-row['intime']).total_seconds()
    time_min = time_s / 60
    time_h = time_min / 60
    return time_h

In [9]:
# create ~NEW COLUMN~ for vent_duration
df['vent_duration'] = df.apply(get_vent_duration, axis=1)

In [10]:
## CHECK FOR ROWS WHERE DEATHTIME < INTIME OR ADMITTIME
xtime_cols = ['ext_time','vent_duration','admittime','dischtime','intime','outtime','reint_time','deathtime','dod']
df.loc[df['vent_duration'] < 0][xtime_cols]

Unnamed: 0,ext_time,vent_duration,admittime,dischtime,intime,outtime,reint_time,deathtime,dod
110,2125-10-02 12:05:00,-0.731944,2125-10-02 12:47:00,2125-10-17 14:20:00,2125-10-02 12:48:55,2125-10-05 10:53:40,NaT,NaT,NaT
725,NaT,-0.642222,2162-04-02 12:33:00,2162-04-02 12:00:00,2162-04-02 12:38:32,2162-04-03 05:42:37,NaT,2162-04-02 12:00:00,2162-04-02
762,2130-04-27 14:33:00,-1.233333,2130-04-27 07:15:00,2130-05-03 13:21:00,2130-04-27 15:47:00,2130-05-01 10:17:00,NaT,NaT,NaT
1275,2193-05-24 10:15:00,-0.283333,2193-05-24 13:30:00,2193-05-29 13:33:00,2193-05-24 10:32:00,2193-05-27 09:50:00,NaT,NaT,NaT
1330,NaT,-3.383333,2153-10-12 09:49:00,2153-10-12 06:29:00,2153-10-12 09:52:00,2153-10-13 06:29:00,NaT,2153-10-12 06:29:00,2153-10-12
1967,2109-07-18 11:33:00,-0.233333,2109-07-16 11:15:00,2109-07-22 12:57:00,2109-07-18 11:47:00,2109-07-19 16:53:00,NaT,NaT,NaT
3660,NaT,-3.271944,2110-01-06 15:15:00,2110-01-06 12:00:00,2110-01-06 15:16:19,2110-01-07 07:03:31,NaT,2110-01-06 12:00:00,2110-01-06
4182,NaT,-10.9125,2136-03-19 22:54:00,2136-03-19 12:00:00,2136-03-19 22:54:45,2136-03-20 05:55:41,NaT,2136-03-19 12:00:00,2136-03-19


In [11]:
df[xtime_cols].dtypes

ext_time         datetime64[ns]
vent_duration           float64
admittime        datetime64[ns]
dischtime        datetime64[ns]
intime           datetime64[ns]
outtime          datetime64[ns]
reint_time       datetime64[ns]
deathtime        datetime64[ns]
dod              datetime64[ns]
dtype: object

#### 1.2.2: Demographics

In [12]:
for x in demographics:
    print(x,': ',df[x].unique())

gender :  ['M' 'F']
ethnicity :  ['white' 'other' 'unknown' 'hispanic' 'black' 'asian' 'native' 'WHITE'
 'UNKNOWN' 'OTHER' 'BLACK/AFRICAN AMERICAN' 'HISPANIC/LATINO'
 'UNABLE TO OBTAIN' 'ASIAN' 'AMERICAN INDIAN/ALASKA NATIVE']
marital_status :  ['SINGLE' 'DIVORCED' 'MARRIED' 'WIDOWED' 'UNKNOWN (DEFAULT)' nan
 'SEPARATED']
insurance :  ['Private' 'Self Pay' 'Medicare' 'Medicaid' 'Government' 'Other']
language :  ['ENGL' 'SPAN' nan 'VIET' 'RUSS' 'HAIT' 'CANT' 'PORT' 'PTUN' 'ALBA' 'THAI'
 'ARAB' 'GREE' 'AMER' '*LEB' '*BEN' 'CAPE' 'POLI' 'ITAL' 'HIND' 'URDU'
 'KORE' 'GERM' 'TURK' 'ETHI' 'CAMB' 'MAND' '*GUJ' 'PERS' 'ENGLISH' '?']


In [13]:
#ethnicity
df.replace({'ethnicity':
                {'unknown': np.NaN,'UNKNOWN':np.NaN,'UNABLE TO OBTAIN':np.NaN,
                'OTHER':'other','WHITE':'white','BLACK/AFRICAN AMERICAN':'black','ASIAN':'asian',
                'HISPANIC/LATINO':'hispanic','AMERICAN INDIAN/ALASKA NATIVE':'native'
                }
            }, 
            inplace=True)
print(df['ethnicity'].unique())

['white' 'other' nan 'hispanic' 'black' 'asian' 'native']


In [14]:
#marital_status
df.replace({'marital_status':
                {'UNKNOWN (DEFAULT)': np.NaN
                }
            }, 
            inplace=True)
print(df['marital_status'].unique())

['SINGLE' 'DIVORCED' 'MARRIED' 'WIDOWED' nan 'SEPARATED']


In [15]:
#language
df.replace({'language':
                {'ENGLISH':'ENGL','?':np.NaN
                }
            }, 
            inplace=True)
print(df['marital_status'].unique())

['SINGLE' 'DIVORCED' 'MARRIED' 'WIDOWED' nan 'SEPARATED']


#### 1.2.3: ✔Procedure type

In [16]:
for x in proceduretype:
    print(x,': ',df[x].unique())

aortic :  [0 1]
mit :  [0 1]
tricuspid :  [0 1]
pulmonary :  [0 1]
cabg :  [1 0]


#### 1.2.4: **Vitals / Blood Gases / Products + infection / Ventilation


In [17]:
# wait for Jahan/others
# ventrate seems to be empty

#### 1.2.5: ✔Comorbidities

In [18]:
for x in comorbidities:
    print(x,': ',df[x].unique())

liver_severe :  [0 1]
liver_mild :  [0 1]
rheum :  [0 1]
cvd :  [0 1]
aids :  [0 1]
ckd :  [0 1]
copd :  [0 1]
arrhythmia :  [0 1]
pud :  [0 1]
smoking :  [0 1]
pvd :  [0 1]
paraplegia :  [0 1]
ccf :  [0 1]
met_ca :  [0 1]
t2dm :  [0 1]
t1dm :  [0 1]
malig :  [0 1]
mi :  [1 0]
dementia :  [0 1]


#### 1.2.6: Admissions (categorical)

In [19]:
for x in adm_cat:
    print(x,': ',df[x].unique())

first_careunit :  ['CSRU' 'CCU' 'TSICU' 'SICU' 'MICU'
 'Cardiac Vascular Intensive Care Unit (CVICU)' 'Coronary Care Unit (CCU)'
 'Trauma SICU (TSICU)' 'Medical Intensive Care Unit (MICU)'
 'Medical/Surgical Intensive Care Unit (MICU/SICU)'
 'Surgical Intensive Care Unit (SICU)'
 'Neuro Surgical Intensive Care Unit (Neuro SICU)' 'Neuro Intermediate']
last_careunit :  ['CSRU' 'CCU' 'MICU' 'SICU' 'TSICU'
 'Cardiac Vascular Intensive Care Unit (CVICU)' 'Coronary Care Unit (CCU)'
 'Medical Intensive Care Unit (MICU)' 'Trauma SICU (TSICU)'
 'Medical/Surgical Intensive Care Unit (MICU/SICU)'
 'Surgical Intensive Care Unit (SICU)'
 'Neuro Surgical Intensive Care Unit (Neuro SICU)']
admission_location :  ['PHYS REFERRAL/NORMAL DELI' 'TRANSFER FROM HOSP/EXTRAM'
 'EMERGENCY ROOM ADMIT' 'CLINIC REFERRAL/PREMATURE'
 'TRANSFER FROM OTHER HEALT' 'TRANSFER FROM SKILLED NUR'
 'PHYSICIAN REFERRAL' 'TRANSFER FROM HOSPITAL' 'EMERGENCY ROOM' 'PACU'
 'PROCEDURE SITE' 'TRANSFER FROM SKILLED NURSING FACILITY

In [20]:
#first_careunit
df.replace({'first_careunit':
                {'Cardiac Vascular Intensive Care Unit (CVICU)':'CVICU',
                'Coronary Care Unit (CCU)':'CCU',
                'Medical Intensive Care Unit (MICU)':'MICU',
                'Surgical Intensive Care Unit (SICU)':'SICU',
                'Neuro Intermediate':'Neuro Inter',
                'Medical/Surgical Intensive Care Unit (MICU/SICU)':'MICU/SICU',
                'Trauma SICU (TSICU)':'TSICU',
                'Neuro Surgical Intensive Care Unit (Neuro SICU)':'Neuro SICU'
                }
            }, 
            inplace=True)
print(df['first_careunit'].unique())

['CSRU' 'CCU' 'TSICU' 'SICU' 'MICU' 'CVICU' 'MICU/SICU' 'Neuro SICU'
 'Neuro Inter']


In [21]:
#last_careunit
df.replace({'last_careunit':
                {'Cardiac Vascular Intensive Care Unit (CVICU)':'CVICU',
                'Coronary Care Unit (CCU)':'CCU',
                'Medical Intensive Care Unit (MICU)':'MICU',
                'Surgical Intensive Care Unit (SICU)':'SICU',
                'Neuro Intermediate':'Neuro Inter',
                'Medical/Surgical Intensive Care Unit (MICU/SICU)':'MICU/SICU',
                'Trauma SICU (TSICU)':'TSICU',
                'Neuro Surgical Intensive Care Unit (Neuro SICU)':'Neuro SICU'
                }
            }, 
            inplace=True)
print(df['last_careunit'].unique())

['CSRU' 'CCU' 'MICU' 'SICU' 'TSICU' 'CVICU' 'MICU/SICU' 'Neuro SICU']


In [22]:
#admission_location
df.replace({'admission_location':
                {'TRANSFER FROM HOSP/EXTRAM':'TRANSFER FROM HOSPITAL',
                'PHYS REFERRAL/NORMAL DELI':'PHYSICIAN REFERRAL',
                'TRANSFER FROM SKILLED NUR':'TRANSFER FROM SKILLED NURSING FACILITY',
                'INFORMATION NOT AVAILABLE':np.NaN,
                'CLINIC REFERRAL':'CLINIC REFERRAL/PREMATURE',
                'EMERGENCY ROOM ADMIT':'EMERGENCY ROOM',
                }
            }, 
            inplace=True)
print(df['admission_location'].unique())

['PHYSICIAN REFERRAL' 'TRANSFER FROM HOSPITAL' 'EMERGENCY ROOM'
 'CLINIC REFERRAL/PREMATURE' 'TRANSFER FROM OTHER HEALT'
 'TRANSFER FROM SKILLED NURSING FACILITY' 'PACU' 'PROCEDURE SITE'
 'WALK-IN/SELF REFERRAL' nan 'INTERNAL TRANSFER TO OR FROM PSYCH'
 'AMBULATORY SURGERY TRANSFER']


#### 1.2.7: Others

In [23]:
# for x in others:
#     print(x,': ',df[x].unique())

### Parsing time series data

In [24]:
# df['vent_array'][0]

In [25]:
# def ventarray_parser(value):
#     int_time1=np.NaN
#     ext_time1=np.NaN
#     int_time2=np.NaN
#     ext_time2=np.NaN
#     if value == np.NaN:
#         return np
#     else:
#         a = value
#         for i in ['\n ',
#                   '[',']',
#                   "{'starttime': datetime.datetime",
#                   " 'endtime': datetime.datetime",
#                   " 'duration_hours': "]:
#             a = a.replace(i,'')
#         split = a.split('}')
#         del split[-1]
#         int_time1=np.NaN
#         ext_time1=np.NaN
#         int_time2=np.NaN
#         ext_time2=np.NaN
#         if len(split) == 1:
#             pass
#         elif len(split) == 2:
#             pass
#         else:
#             raise ValueError("length of vent_array is wonky")

# test_x = df['vent_array'][13]
# ventarray_parser(test_x)

In [26]:
def infection_parser(value, timelimit):
    if value == np.NaN:
        return np.NaN
    else:
        a = value
        a = a.replace('\n ','')
        a = a.replace('[','')
        a = a.replace(']','')
        a = a.replace("{'charttime': datetime.datetime",'')
        split = a.split('}')

In [38]:
def ts_parser(value, timelimit):
    """
    Takes single string of timeseries data in MIMIC format and returns the mean, max, min values   
    Parameters
    ----------
    value : single string of timeseries data in MIMIC format
    timelimit : time (in hours) from the first data entry to include data up to

    Returns
    -------
    avg : mean of all values within specified time period
    max_: maximum of all values within specified time period
    min_: minimum of all values within specified time period
    """
    if value == np.NaN:
        return np.NaN
    else:
        a = value
        a = a.replace('\n ','')
        a = a.replace('[','')
        a = a.replace(']','')
        a = a.replace("{'charttime': datetime.datetime",'')
        split = a.split('}')
        del split[-1]
        times = []
        values = []
        for n in range(0,len(split)):
            subsplit = split[n].split(", 'value'")
            t = datetime.strptime(subsplit[0],'(%Y, %m, %d, %H, %M)')
            times.append(t)
            v = float(subsplit[1].replace(': ',''))
            values.append(v)
        starttime = times[0]
        endtime = times[0] + timedelta(hours=timelimit)
        #find the average
        incl_values = []
        for n in range(0,len(split)):
            if times[n] > starttime and times[n] < endtime: 
                incl_values.append(values[n])
        print(incl_values)
        avg = statistics.mean(incl_values)
        max_ = max(incl_values)
        min_ = min(incl_values)
        return avg, max_, min_

def ts_parser2(value, timelimit, valuename='value'):
    if value == np.NaN or pd.isna(value):
        return np.NaN
    a = value.replace("'", '"')
    a = a.replace('\n ...\n',',').replace('\n', ',').replace('...', '')
    a = a.replace('datetime.', '"dt.')
    a = a.replace(f'), "{valuename}"', f')", "{valuename}"')
    a = json.loads(a)
    b = [(eval(i['charttime']), i[valuename]) for i in a]
    
    startTime = min(b, key=lambda x:x[0])[0]
    inc_b = [i[1] for i in b if i[0] <= startTime + dt.timedelta(hours=timelimit)]
    return sum(inc_b) / len(inc_b), max(inc_b), min(inc_b)

test_x = df[timeseries].iloc[0,0]
print(ts_parser(test_x,12))
print(ts_parser2(test_x,12))
print()
test_y = df['bg_temp'][9]
print(test_y)
print('Parser1: ', ts_parser(test_y, 36))
print('Parser2: ', ts_parser2(test_y, 36))

[37.11111280653212, 37.3888905843099, 37.444445292154946, 37.94444613986545, 37.5]
(37.47777896457248, 37.94444613986545, 37.11111280653212)
(37.43518617418077, 37.94444613986545, 37.11111280653212)

[{'charttime': datetime.datetime(2142, 3, 7, 20, 29), 'value': 38.6}
 {'charttime': datetime.datetime(2142, 3, 7, 22, 11), 'value': 38.6}
 {'charttime': datetime.datetime(2142, 3, 7, 23, 50), 'value': 38.6}
 {'charttime': datetime.datetime(2142, 3, 8, 2, 37), 'value': 38.5}
 {'charttime': datetime.datetime(2142, 3, 8, 3, 50), 'value': 38.1}
 {'charttime': datetime.datetime(2142, 3, 8, 5, 55), 'value': 37.9}]
[38.6, 38.6, 38.5, 38.1, 37.9]
Parser1:  (38.34, 38.6, 37.9)
Parser2:  (38.38333333333333, 38.6, 37.9)


### 1.3: Handling missing data

#### 1.3.0 Assessing for missing data

In [28]:
# formula for checking % missing values
def missing_values_table(df): 
    mis_val = df.isnull().sum()
    mis_val_percent = 100 * df.isnull().sum() / len(df)
    mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)
    mis_val_table_ren_columns = mis_val_table.rename(columns = {0: 'Missing Values', 1: '% Missing Values'})
    return mis_val_table_ren_columns

missing_data = missing_values_table(df)

In [29]:
#set limit and get list of variables missing above limit in `missing_cols`
missing_limit = 20
missing_cols = missing_data.loc[missing_data['% Missing Values']>missing_limit].index.tolist()
print(missing_cols)
missing_data.loc[missing_data['% Missing Values']>missing_limit]

['language', 'bg_temp', 'cardiac_index', 'pt', 'fibrinogen', 'plts', 'lymphocytes', 'neutrophils', 'alp', 'ast', 'alt', 'ggt', 'bilirubin_indirect', 'bilirubin_direct', 'bilirubin_total', 'crp', 'bleed_time', 'albumin', 'lactate', 'aado2', 'fio2', 'ffp', 'insulin', 'cryo', 'prbc', 'ventrate', 'tidalvol', 'vent_array', 'ext_time', 'reint_time', 'deathtime', 'dtoutput', 'specimen', 'dod', 'vent_duration']


Unnamed: 0,Missing Values,% Missing Values
language,3551,28.795005
bg_temp,9324,75.608174
cardiac_index,8743,70.896854
pt,4339,35.184885
fibrinogen,3911,31.714239
plts,10465,84.860525
lymphocytes,6819,55.295167
neutrophils,6820,55.303276
alp,9722,78.83555
ast,9668,78.397665


In [30]:
missing_data.loc[time_cols,:]

Unnamed: 0,Missing Values,% Missing Values
admittime,0,0.0
dischtime,0,0.0
intime,0,0.0
outtime,0,0.0
reint_time,11703,94.899449
ext_time,2858,23.175478
deathtime,12123,98.305222


In [31]:
# option 1: delete all rows in `missing_cols` (set inplace to true to execute)

df.drop(columns=missing_cols, inplace=False)
print(list(df.columns))

# reset index
#df.reset_index(drop=True, inplace=True)

['Unnamed: 0', 'hadm_id', 'subject_id', 'gender', 'ethnicity', 'marital_status', 'insurance', 'language', 'aortic', 'mit', 'tricuspid', 'pulmonary', 'cabg', 'temp', 'bg_temp', 'hr', 'spo2', 'rr', 'sbp', 'dbp', 'meanbp', 'weight', 'height', 'cardiac_index', 'pt', 'ptt', 'inr', 'inr_1', 'fibrinogen', 'hb', 'hematocrit', 'plts', 'wcc', 'lymphocytes', 'neutrophils', 'alp', 'ast', 'alt', 'ggt', 'bilirubin_indirect', 'bilirubin_direct', 'bilirubin_total', 'chloride', 'magnesium', 'potassium', 'crp', 'bleed_time', 'albumin', 'creatinine', 'free_calcium', 'sodium', 'bicarb', 'bun', 'hba1c', 'glucose', 'lactate', 'po2', 'pco2', 'baseexcess', 'ph', 'aado2', 'fio2', 'ffp', 'insulin', 'cryo', 'prbc', 'infection', 'ventrate', 'tidalvol', 'vent_array', 'reintubation', 'liver_severe', 'liver_mild', 'rheum', 'cvd', 'aids', 'ckd', 'copd', 'arrhythmia', 'pud', 'smoking', 'pvd', 'paraplegia', 'ccf', 'met_ca', 't2dm', 't1dm', 'malig', 'mi', 'dementia', 'first_careunit', 'last_careunit', 'admission_locatio

In [32]:
# option 2: impute data based on median


In [33]:
# option 3: multiple imputation

x = missing_data.loc[missing_data['% Missing Values']>missing_limit]
x.loc[[i for i in x.index if i not in time_cols],:]

Unnamed: 0,Missing Values,% Missing Values
language,3551,28.795005
bg_temp,9324,75.608174
cardiac_index,8743,70.896854
pt,4339,35.184885
fibrinogen,3911,31.714239
plts,10465,84.860525
lymphocytes,6819,55.295167
neutrophils,6820,55.303276
alp,9722,78.83555
ast,9668,78.397665


#### 1.3.1 Creating summary fields for time-series data

In [34]:
df['cardiac_index'][1]

"[{'charttime': datetime.datetime(2198, 5, 8, 17, 15), 'ci': 2.8994100093841553}\n {'charttime': datetime.datetime(2198, 5, 8, 19, 30), 'ci': 1.8461500406265259}\n {'charttime': datetime.datetime(2198, 5, 8, 20, 5), 'ci': 2.479290008544922}\n {'charttime': datetime.datetime(2198, 5, 8, 20, 45), 'ci': 1.9940799474716187}\n {'charttime': datetime.datetime(2198, 5, 8, 21, 0), 'ci': 1.9940799474716187}\n {'charttime': datetime.datetime(2198, 5, 8, 22, 0), 'ci': 2.573960065841675}\n {'charttime': datetime.datetime(2198, 5, 8, 23, 0), 'ci': 1.715980052947998}\n {'charttime': datetime.datetime(2198, 5, 9, 1, 0), 'ci': 1.8224899768829346}\n {'charttime': datetime.datetime(2198, 5, 9, 1, 30), 'ci': 1.7692300081253052}\n {'charttime': datetime.datetime(2198, 5, 9, 1, 45), 'ci': 1.7692300081253052}\n {'charttime': datetime.datetime(2198, 5, 9, 2, 0), 'ci': 1.8461500406265259}\n {'charttime': datetime.datetime(2198, 5, 9, 3, 30), 'ci': 2.236690044403076}\n {'charttime': datetime.datetime(2198, 5, 

In [35]:
timeseries_valuenames = {'cardiac_index', 'ci'}

for j in timeseries:
    for i in range(len(df[j])):
        #print(df['temp'][i])
        #print(i, ts_parser2(df['temp'][i],12))
        try:
            if j in timeseries_valuenames:
                ts_parser2(df[j][i], 12, timeseries_valuenames[j])
            else:
                ts_parser2(df[j][i], 12)
        except:
            print(j, i)
            break
    print(j, 'Fine')

temp Fine
bg_temp Fine
hr Fine
spo2 Fine
rr Fine
sbp Fine
dbp Fine
meanbp Fine
cardiac_index 0
cardiac_index Fine
pt Fine
ptt Fine
inr Fine
inr_1 Fine
fibrinogen Fine
hb Fine
hematocrit Fine
plts 1
plts Fine
wcc Fine
lymphocytes Fine
neutrophils Fine
alp Fine
ast Fine
alt Fine
ggt Fine
bilirubin_indirect Fine
bilirubin_direct Fine
bilirubin_total Fine
chloride Fine
magnesium Fine
potassium Fine
crp Fine
bleed_time Fine
albumin Fine
creatinine Fine
free_calcium Fine
sodium Fine
bicarb Fine
bun Fine
hba1c Fine
glucose Fine
lactate Fine
po2 Fine
pco2 Fine
baseexcess Fine
ph Fine
aado2 Fine
fio2 Fine
ffp 1
ffp Fine
insulin 1
insulin Fine
cryo 1
cryo Fine
prbc 1
prbc Fine
infection 0
infection Fine
ventrate Fine
tidalvol Fine
vent_array 0
vent_array Fine
plt Fine
dtoutput 5
dtoutput Fine
