# DL Survival - Readmission time
 Updated 25/11/21

In [1]:
import pandas as pd
import numpy as np
import math
import statistics
from datetime import datetime
import datetime as dt
from datetime import timedelta
import json
import miceforest as mf

pd.set_option('display.max_columns', None)

## 1. Data cleaning

- Import MIMIC III combined data
- Review column unique values, assign correct data types
- Impute missing values


### 1.1: Importing data

In [2]:
df = pd.read_csv('../mimiciii_v2.csv')
print(df.shape)
df.head(5)

(6556, 123)


Unnamed: 0,subject_id,hadm_id,stay_id,age,gender,ethnicity,height,weight,admission_type,admission_location,admittime,dischtime,intime,outtime,insurance,marital_status,language,icustay_seq,los,first_careunit,last_careunit,dbsource,hospital_expire_flag,deathtime,dod,sofa,postop_intime,cabg,aortic,mitral,tricuspid,pulmonary,aki,ph,bicarb,baseexcess,chloride,free_calcium,glucose,potassium,sodium,lactate,hematocrit,hb,pco2,po2,bg_temp,fio2,ventrate,tidalvol,aado2,specimen,neutrophils,lymphocytes,wcc,plt,crp,albumin,creatinine,bun,magnesium,alt,alp,ast,ggt,bilirubin_total,bilirubin_direct,bilirubin_indirect,inr,fibrinogen,bleed_time,ptt,pt,hba1c,prbc,plts,ffp,cryo,mi,arrhythmia,ccf,pvd,cvd,dementia,copd,rheum,pud,liver_mild,diab_un,diab_cc,t1dm,t2dm,paraplegia,ckd,malig,liver_severe,met_ca,aids,smoking,dtoutput,echo,fluid,infection,intropes,insulin,pasp,padp,mpap,readmissions,readmit_times,first_readmission,vent_array,reintubation,reint_time,ext_time,hr,sbp,dbp,meanbp,rr,temp,spo2,cardiac_index
0,28772,132385,293560,19,F,white,170.18,115.4,EMERGENCY,EMERGENCY ROOM ADMIT,2148-12-15T00:19:00,2149-01-04T15:45:00,2148-12-23T13:00:34,2148-12-24T16:42:20,Private,SINGLE,ENGL,2,1.154,CSRU,CSRU,carevue,0,,,2,2148-12-23T15:00:00,0,0,1,0,0,"[{'charttime': '2148-12-23T15:35:00', 'aki_sta...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...",[],[],"[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...",[],[],[],[],[],[],[],[],"[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...",[],[],"[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-24T02:47:00', 'value':...",[],[],[],[],[],[],[],"[{'charttime': '2148-12-23T15:52:00', 'value':...",[],[],"[{'charttime': '2148-12-23T15:52:00', 'value':...",[],"[{'charttime': '2148-12-20T14:20:00', 'value':...",[],[],[],[],0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2148-12-23T15:35:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2148-12-23T14:35:00', 'endtime...",[{'suspected_infection_time': '2148-12-20T12:2...,[],[],[],"[{'charttime': '2148-12-23T15:35:00', 'PADP': ...","[{'charttime': '2148-12-23T15:35:00', 'mPAP': ...",,[],,"[{'starttime': '2148-12-23T15:00:00', 'endtime...",0,,2148-12-23 18:00:00,"[{'charttime': '2148-12-23T15:35:00', 'value':...","[{'charttime': '2148-12-23T16:00:00', 'value':...","[{'charttime': '2148-12-23T16:00:00', 'value':...","[{'charttime': '2148-12-23T15:35:00', 'value':...","[{'charttime': '2148-12-23T15:00:00', 'value':...","[{'charttime': '2148-12-23T15:45:00', 'value':...","[{'charttime': '2148-12-23T16:00:00', 'value':...","[{'charttime': '2148-12-23T15:45:00', 'ci': 2...."
1,73457,184405,251108,21,F,hispanic,152.4,60.0,ELECTIVE,PHYS REFERRAL/NORMAL DELI,2198-12-28T12:00:00,2199-01-04T20:05:00,2198-12-28T10:37:23,2198-12-29T10:14:22,Medicaid,SINGLE,ENGL,1,0.984,CSRU,CSRU,metavision,0,,,6,2198-12-28T13:00:00,0,0,1,0,0,"[{'charttime': '2198-12-28T13:59:00', 'aki_sta...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...",[],[],[],[],[],[],[],[],"[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-28T13:59:00', 'value':...",[],[],"[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-29T02:12:00', 'value':...",[],[],[],[],[],[],[],"[{'charttime': '2198-12-28T13:59:00', 'value':...",[],[],"[{'charttime': '2198-12-28T13:59:00', 'value':...",[],[],"[{'charttime': '2198-12-28T15:12:00', 'bloodpr...",[],[],[],0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2198-12-28T13:45:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2198-12-28T12:45:00', 'endtime...",[{'suspected_infection_time': '2198-12-28T13:5...,"[{'starttime': '2198-12-28T13:55:00', 'endtime...",[],"[{'charttime': '2198-12-28T13:47:00', 'PASP': ...","[{'charttime': '2198-12-28T13:47:00', 'PADP': ...","[{'charttime': '2198-12-28T13:47:00', 'mPAP': ...",,[],,"[{'starttime': '2198-12-28T13:00:00', 'endtime...",0,,2198-12-28 18:50:00,[],[],[],[],[],[],[],[]
2,93887,137808,242592,21,F,hispanic,160.02,74.4,ELECTIVE,PHYS REFERRAL/NORMAL DELI,2188-02-04T00:39:00,2188-02-09T16:41:00,2188-02-05T11:15:53,2188-02-07T18:14:19,Medicaid,SINGLE,ENGL,1,2.2906,CSRU,CSRU,metavision,0,,,2,2188-02-05T16:40:00,0,0,0,1,0,"[{'charttime': '2188-02-05T17:00:00', 'aki_sta...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...",[],[],"[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...",[],[],[],[],[],[],[],[],"[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...",[],[],"[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-06T03:19:00', 'value':...",[],[],[],[],[],[],[],"[{'charttime': '2188-02-05T17:00:00', 'value':...",[],[],"[{'charttime': '2188-02-05T17:00:00', 'value':...",[],"[{'charttime': '2188-01-16T07:05:00', 'value':...","[{'charttime': '2188-02-07T14:10:00', 'bloodpr...",[],[],[],0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2188-02-05T17:01:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2188-02-05T15:36:00', 'endtime...",[{'suspected_infection_time': '2188-02-04T22:4...,"[{'starttime': '2188-02-05T16:35:00', 'endtime...",[],[],[],[],,[],,"[{'starttime': '2188-02-05T16:40:00', 'endtime...",0,,2188-02-05 21:00:00,[],[],[],[],[],[],[],[]
3,16927,109148,254842,22,M,white,190.5,71.1,EMERGENCY,EMERGENCY ROOM ADMIT,2166-09-24T20:15:00,2166-10-20T13:30:00,2166-10-06T09:41:05,2166-10-07T17:19:43,Private,SINGLE,,2,1.3185,CSRU,CSRU,carevue,0,,,5,2166-10-06T11:30:00,0,1,1,0,0,"[{'charttime': '2166-10-06T03:48:00', 'aki_sta...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...",[],[],"[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...",[],[],[],[],[],[],[],[],"[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...",[],[],"[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T17:59:00', 'value':...",[],[],[],[],[],[],[],"[{'charttime': '2166-10-06T11:53:00', 'value':...",[],[],"[{'charttime': '2166-10-06T11:53:00', 'value':...",[],[],"[{'charttime': '2166-10-07T00:30:00', 'bloodpr...",[],[],[],0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2166-10-06T11:30:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2166-09-27T15:00:00', 'endtime...",[{'suspected_infection_time': '2166-09-24T11:1...,"[{'starttime': '2166-10-06T11:30:00', 'endtime...",[],[],"[{'charttime': '2166-10-06T11:30:00', 'PADP': ...","[{'charttime': '2166-10-06T11:30:00', 'mPAP': ...",,[],,"[{'starttime': '2166-10-06T11:30:00', 'endtime...",0,,2166-10-06 14:11:00,"[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T12:00:00', 'ci': 5...."
4,4846,154439,264181,22,M,asian,182.88,66.0,ELECTIVE,PHYS REFERRAL/NORMAL DELI,2111-10-05T07:15:00,2111-10-13T15:30:00,2111-10-05T13:02:50,2111-10-09T10:13:52,Private,SINGLE,,1,3.8827,CSRU,CSRU,carevue,0,,,7,2111-10-05T14:00:00,0,0,1,0,0,"[{'charttime': '2111-10-05T14:30:00', 'aki_sta...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...",[],"[{'charttime': '2111-10-07T09:11:00', 'value':...","[{'charttime': '2111-10-06T03:10:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-06T11:26:00', 'value':...",[],[],[],[],[],[],[],"[{'charttime': '2111-10-06T03:10:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...",[],[],"[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-07T02:09:00', 'value':...",[],[],[],[],[],[],[],"[{'charttime': '2111-10-05T14:44:00', 'value':...",[],[],"[{'charttime': '2111-10-05T14:44:00', 'value':...",[],"[{'charttime': '2111-09-29T12:00:00', 'value':...",[],[],[],[],0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2111-10-05T15:00:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2111-10-05T13:30:00', 'endtime...","[{'suspected_infection_time': None, 'antibioti...","[{'starttime': '2111-10-05T16:15:00', 'endtime...",[],[],"[{'charttime': '2111-10-05T14:15:00', 'PADP': ...","[{'charttime': '2111-10-05T14:15:00', 'mPAP': ...",,[],,"[{'starttime': '2111-10-05T14:00:00', 'endtime...",0,,2111-10-05 17:15:00,"[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:00:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'ci': 3...."


#### 1.1.1: Column lists

In [3]:
print(df.columns.tolist())

['subject_id', 'hadm_id', 'stay_id', 'age', 'gender', 'ethnicity', 'height', 'weight', 'admission_type', 'admission_location', 'admittime', 'dischtime', 'intime', 'outtime', 'insurance', 'marital_status', 'language', 'icustay_seq', 'los', 'first_careunit', 'last_careunit', 'dbsource', 'hospital_expire_flag', 'deathtime', 'dod', 'sofa', 'postop_intime', 'cabg', 'aortic', 'mitral', 'tricuspid', 'pulmonary', 'aki', 'ph', 'bicarb', 'baseexcess', 'chloride', 'free_calcium', 'glucose', 'potassium', 'sodium', 'lactate', 'hematocrit', 'hb', 'pco2', 'po2', 'bg_temp', 'fio2', 'ventrate', 'tidalvol', 'aado2', 'specimen', 'neutrophils', 'lymphocytes', 'wcc', 'plt', 'crp', 'albumin', 'creatinine', 'bun', 'magnesium', 'alt', 'alp', 'ast', 'ggt', 'bilirubin_total', 'bilirubin_direct', 'bilirubin_indirect', 'inr', 'fibrinogen', 'bleed_time', 'ptt', 'pt', 'hba1c', 'prbc', 'plts', 'ffp', 'cryo', 'mi', 'arrhythmia', 'ccf', 'pvd', 'cvd', 'dementia', 'copd', 'rheum', 'pud', 'liver_mild', 'diab_un', 'diab_c

In [4]:
# removing columns

# removing spurious stay_id_X columns
df = df[[i for i in df.columns if i not in [f'stay_id_{i}' for i in range(1,15)]]]

# removing hospital_expire_flag as it is just a bool of whether deathtime is present
df = df[[i for i in df.columns if i != 'hospital_expire_flag']]

In [5]:
#view and reorder columns
colCats = {
    'admInfo': ['subject_id', 'hadm_id', 'stay_id'],
    'ptInfoCont': ['age', 'height', 'weight'],
    'demographics': ['gender','ethnicity','marital_status','insurance','language'],
    'admCat': ['first_careunit','last_careunit','admission_location','admission_type', 'dbsource'],
    'admTime': ['admittime', 'dischtime', 'intime', 'outtime', 'reint_time', 'ext_time', 'postop_intime', 'deathtime', 'dod'],
    'admNum': ['los','icustay_seq'],
    'markersTs': ['aki', 'ph', 'bicarb', 'baseexcess', 'chloride', 'free_calcium', 'glucose', 'potassium', 'sodium', 'lactate', 
                  'hematocrit', 'hb', 'pco2', 'po2', 'bg_temp', 'fio2', 'ventrate', 'tidalvol', 'aado2', 'specimen', 'neutrophils', 
                  'lymphocytes', 'wcc', 'plt', 'crp', 'albumin', 'creatinine', 'bun', 'magnesium', 'alt', 'alp', 'ast', 'ggt', 
                  'bilirubin_total', 'bilirubin_direct', 'bilirubin_indirect', 'inr', 'fibrinogen', 'bleed_time', 'ptt', 'pt', 'hba1c', 'prbc', 'plts', 'ffp', 'cryo'],
    'procedureType': ['aortic','mitral','tricuspid','pulmonary','cabg'],
    'comorbids': ['liver_severe','liver_mild','rheum','cvd','aids','ckd','copd','arrhythmia','pud','smoking','pvd',
                      'paraplegia','ccf','met_ca','t2dm','t1dm','malig','mi','dementia'],
    'diabUnits': ['diab_un', 'diab_cc'],
    'otherTs': ['dtoutput', 'echo', 'fluid', 'infection', 'intropes', 'insulin', 'pasp', 'padp', 'mpap', 'vent_array'],
    'vitalsTs': ['hr', 'sbp', 'dbp', 'meanbp', 'rr', 'temp', 'spo2', 'cardiac_index'],
    'misc': ['sofa', 'reintubation'],
    'readmitInfo': ['readmit_times', 'readmissions', 'first_readmission']
}
colCatsSquashed = [j for k in [colCats[i] for i in colCats] for j in k]

#print uncategorised and/or unaccounted columns
print([i for i in df.columns if i not in colCatsSquashed])
print([i for i in colCatsSquashed if i not in df.columns])

df[[i for i in df.columns if i not in colCatsSquashed]].head(5)

timeseries=[i for i in [*colCats['markersTs'],*colCats['vitalsTs'],*colCats['otherTs']] if i not in ('vent_array', 'fluid', 'infection', 'intropes', 'echo', 'aki')]
    
timeseries_valuenames = {'cardiac_index':'ci',
                         'plts':'bloodproduct',
                         'ffp':'bloodproduct',
                         'insulin':'amount',
                         'cryo':'bloodproduct',
                         'prbc':'bloodproduct',
                         'dtoutput':'output',
                         'pasp': 'PASP',
                         'padp': 'PADP',
                         'mpap': 'mPAP'
                        }

[]
[]


### 1.2: Cleaning data types

#### 1.2.0: NaN assignment

In [6]:
df = df.replace('NaT',np.datetime64('NaT'))
df = df.replace(['[]','NaN',np.datetime64('NaT')],np.NaN)

#### 1.2.1: Datetime columns

In [7]:
# set column types as datetime
for col in colCats['admTime']:
    df[col] = pd.to_datetime(df[col], format='%Y-%m-%dT%H:%M:%S')

In [8]:
## CHECK FOR ROWS WHERE DEATHTIME < INTIME OR ADMITTIME
# TODO AGAIN

#### 1.2.2: Demographics

In [9]:
for x in colCats['demographics']:
    print(x,': ',df[x].unique())

gender :  ['F' 'M']
ethnicity :  ['white' 'hispanic' 'asian' 'unknown' 'black' 'other' 'native']
marital_status :  ['SINGLE' 'MARRIED' 'DIVORCED' nan 'SEPARATED' 'UNKNOWN (DEFAULT)'
 'WIDOWED' 'LIFE PARTNER']
insurance :  ['Private' 'Medicaid' 'Government' 'Medicare' 'Self Pay']
language :  ['ENGL' nan 'SPAN' 'PORT' 'CANT' 'CAPE' 'PTUN' 'THAI' 'RUSS' 'PERS' 'GERM'
 'HAIT' 'MAND' 'VIET' 'ETHI' 'ARAB' 'ALBA' 'CAMB' 'HIND' '*BEN' 'AMER'
 'KORE' '*GUJ' 'GREE' 'ITAL' 'POLI' 'URDU' '*LEB' 'TURK' 'JAPA' '*ARM'
 '*FIL']


In [10]:
#ethnicity
df.replace({'ethnicity':
                {'unknown': np.NaN,'UNKNOWN':np.NaN,'UNABLE TO OBTAIN':np.NaN,
                'OTHER':'other','WHITE':'white','BLACK/AFRICAN AMERICAN':'black','ASIAN':'asian',
                'HISPANIC/LATINO':'hispanic','AMERICAN INDIAN/ALASKA NATIVE':'native'
                }
            }, 
            inplace=True)
print(df['ethnicity'].unique())

#marital_status
df.replace({'marital_status':
                {'UNKNOWN (DEFAULT)': np.NaN
                }
            }, 
            inplace=True)
print(df['marital_status'].unique())

#language
df.replace({'language':
                {'ENGLISH':'ENGL','?':np.NaN
                }
            }, 
            inplace=True)
print(df['language'].unique())

['white' 'hispanic' 'asian' nan 'black' 'other' 'native']
['SINGLE' 'MARRIED' 'DIVORCED' nan 'SEPARATED' 'WIDOWED' 'LIFE PARTNER']
['ENGL' nan 'SPAN' 'PORT' 'CANT' 'CAPE' 'PTUN' 'THAI' 'RUSS' 'PERS' 'GERM'
 'HAIT' 'MAND' 'VIET' 'ETHI' 'ARAB' 'ALBA' 'CAMB' 'HIND' '*BEN' 'AMER'
 'KORE' '*GUJ' 'GREE' 'ITAL' 'POLI' 'URDU' '*LEB' 'TURK' 'JAPA' '*ARM'
 '*FIL']


#### 1.2.3: ✔Procedure type

In [11]:
for x in colCats['procedureType']:
    print(x,': ',df[x].unique())

aortic :  [0 1]
mitral :  [1 0]
tricuspid :  [0 1]
pulmonary :  [0 1]
cabg :  [0 1]


#### 1.2.5: ✔Comorbidities

In [12]:
for x in colCats['comorbids']:
    print(x,': ',df[x].unique())

liver_severe :  [0 1]
liver_mild :  [0 1]
rheum :  [0 1]
cvd :  [0 1]
aids :  [0 1]
ckd :  [0 1]
copd :  [0 1]
arrhythmia :  [1 0]
pud :  [0 1]
smoking :  [0 1]
pvd :  [0 1]
paraplegia :  [0 1]
ccf :  [0 1]
met_ca :  [0 1]
t2dm :  [0]
t1dm :  [0]
malig :  [0 1]
mi :  [0 1]
dementia :  [0 1]


#### 1.2.6: Admissions (categorical)

In [13]:
for x in colCats['admCat']:
    print(x,': ',df[x].unique())

first_careunit :  ['CSRU' 'CCU' 'MICU' 'SICU' 'TSICU']
last_careunit :  ['CSRU' 'SICU' 'MICU' 'CCU' 'TSICU']
admission_location :  ['EMERGENCY ROOM ADMIT' 'PHYS REFERRAL/NORMAL DELI'
 'TRANSFER FROM HOSP/EXTRAM' 'CLINIC REFERRAL/PREMATURE'
 'TRANSFER FROM OTHER HEALT' 'TRANSFER FROM SKILLED NUR']
admission_type :  ['EMERGENCY' 'ELECTIVE' 'URGENT']
dbsource :  ['carevue' 'metavision' 'both']


#### 1.2.7: Others

In [14]:
# for x in others:
#     print(x,': ',df[x].unique())

### 1.3: Parsing time series data

In [15]:
hoursRange = 24

In [16]:
def va_parser(row):
    """
    Takes row from `df` returns a list of starttime, endtime, vent duration 
    for first and (if applicable) second intubations
    
    Returns
        0 int_time1: first intubation starttime
        1 ext_time1: first intubation endtime
        2 duration1: first intubation duration
        3 int_time2: second intubation starttime
        4 ext_time2: second intubation endtime 
        5 duration2: second intubation duration
    """
    [int_time1, ext_time1, duration1, int_time2, ext_time2, duration2] = [np.NaN for i in range(6)]
    value = row['vent_array']
    if value == np.NaN or pd.isna(value):
        return [np.NaN for i in range(6)]
    
    a = value.replace("'",'"')
    a = a.replace('\n ...\n',',').replace('\n', ',').replace('...', '')
    a = a.replace('),', ')",')
    a = json.loads(a)
    b = [(i['starttime'], i['endtime'], i['duration_hours']) for i in a]
    int_time1=dt.datetime.strptime(b[0][0],'%Y-%m-%dT%H:%M:%S')
    ext_time1=dt.datetime.strptime(b[0][0],'%Y-%m-%dT%H:%M:%S')
    duration1=float(b[0][2])
    
    if len(b)>=2:
        int_time2=dt.datetime.strptime(b[1][0],'%Y-%m-%dT%H:%M:%S')
        ext_time2=dt.datetime.strptime(b[1][1],'%Y-%m-%dT%H:%M:%S')
        duration2=float(b[1][2])
    
    return int_time1, ext_time1, duration1, int_time2, ext_time2, duration2

for col in ['int_time1', 'ext_time1', 'duration1', 'int_time2', 'ext_time2', 'duration2']:
    df[col] = np.zeros(df.shape[0])

for i in range(df.shape[0]):
    x = va_parser(df.loc[i])
    for i in range(6):
        df[['int_time1', 'ext_time1', 'duration1', 'int_time2', 'ext_time2', 'duration2'][i]] = x[i]

colCats['ventTimes'] = ['int_time1', 'ext_time1', 'duration1', 'int_time2', 'ext_time2', 'duration2']
df.head(5)

Unnamed: 0,subject_id,hadm_id,stay_id,age,gender,ethnicity,height,weight,admission_type,admission_location,admittime,dischtime,intime,outtime,insurance,marital_status,language,icustay_seq,los,first_careunit,last_careunit,dbsource,deathtime,dod,sofa,postop_intime,cabg,aortic,mitral,tricuspid,pulmonary,aki,ph,bicarb,baseexcess,chloride,free_calcium,glucose,potassium,sodium,lactate,hematocrit,hb,pco2,po2,bg_temp,fio2,ventrate,tidalvol,aado2,specimen,neutrophils,lymphocytes,wcc,plt,crp,albumin,creatinine,bun,magnesium,alt,alp,ast,ggt,bilirubin_total,bilirubin_direct,bilirubin_indirect,inr,fibrinogen,bleed_time,ptt,pt,hba1c,prbc,plts,ffp,cryo,mi,arrhythmia,ccf,pvd,cvd,dementia,copd,rheum,pud,liver_mild,diab_un,diab_cc,t1dm,t2dm,paraplegia,ckd,malig,liver_severe,met_ca,aids,smoking,dtoutput,echo,fluid,infection,intropes,insulin,pasp,padp,mpap,readmissions,readmit_times,first_readmission,vent_array,reintubation,reint_time,ext_time,hr,sbp,dbp,meanbp,rr,temp,spo2,cardiac_index,int_time1,ext_time1,duration1,int_time2,ext_time2,duration2
0,28772,132385,293560,19,F,white,170.18,115.4,EMERGENCY,EMERGENCY ROOM ADMIT,2148-12-15 00:19:00,2149-01-04 15:45:00,2148-12-23 13:00:34,2148-12-24 16:42:20,Private,SINGLE,ENGL,2,1.154,CSRU,CSRU,carevue,NaT,NaT,2,2148-12-23 15:00:00,0,0,1,0,0,"[{'charttime': '2148-12-23T15:35:00', 'aki_sta...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...",,,"[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...",,,,,,,,,"[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...",,,"[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-24T02:47:00', 'value':...",,,,,,,,"[{'charttime': '2148-12-23T15:52:00', 'value':...",,,"[{'charttime': '2148-12-23T15:52:00', 'value':...",,"[{'charttime': '2148-12-20T14:20:00', 'value':...",,,,,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2148-12-23T15:35:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2148-12-23T14:35:00', 'endtime...",[{'suspected_infection_time': '2148-12-20T12:2...,,,,"[{'charttime': '2148-12-23T15:35:00', 'PADP': ...","[{'charttime': '2148-12-23T15:35:00', 'mPAP': ...",,,,"[{'starttime': '2148-12-23T15:00:00', 'endtime...",0,NaT,2148-12-23 18:00:00,"[{'charttime': '2148-12-23T15:35:00', 'value':...","[{'charttime': '2148-12-23T16:00:00', 'value':...","[{'charttime': '2148-12-23T16:00:00', 'value':...","[{'charttime': '2148-12-23T15:35:00', 'value':...","[{'charttime': '2148-12-23T15:00:00', 'value':...","[{'charttime': '2148-12-23T15:45:00', 'value':...","[{'charttime': '2148-12-23T16:00:00', 'value':...","[{'charttime': '2148-12-23T15:45:00', 'ci': 2....",2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,
1,73457,184405,251108,21,F,hispanic,152.4,60.0,ELECTIVE,PHYS REFERRAL/NORMAL DELI,2198-12-28 12:00:00,2199-01-04 20:05:00,2198-12-28 10:37:23,2198-12-29 10:14:22,Medicaid,SINGLE,ENGL,1,0.984,CSRU,CSRU,metavision,NaT,NaT,6,2198-12-28 13:00:00,0,0,1,0,0,"[{'charttime': '2198-12-28T13:59:00', 'aki_sta...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...",,,,,,,,,"[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-28T13:59:00', 'value':...",,,"[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-29T02:12:00', 'value':...",,,,,,,,"[{'charttime': '2198-12-28T13:59:00', 'value':...",,,"[{'charttime': '2198-12-28T13:59:00', 'value':...",,,"[{'charttime': '2198-12-28T15:12:00', 'bloodpr...",,,,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2198-12-28T13:45:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2198-12-28T12:45:00', 'endtime...",[{'suspected_infection_time': '2198-12-28T13:5...,"[{'starttime': '2198-12-28T13:55:00', 'endtime...",,"[{'charttime': '2198-12-28T13:47:00', 'PASP': ...","[{'charttime': '2198-12-28T13:47:00', 'PADP': ...","[{'charttime': '2198-12-28T13:47:00', 'mPAP': ...",,,,"[{'starttime': '2198-12-28T13:00:00', 'endtime...",0,NaT,2198-12-28 18:50:00,,,,,,,,,2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,
2,93887,137808,242592,21,F,hispanic,160.02,74.4,ELECTIVE,PHYS REFERRAL/NORMAL DELI,2188-02-04 00:39:00,2188-02-09 16:41:00,2188-02-05 11:15:53,2188-02-07 18:14:19,Medicaid,SINGLE,ENGL,1,2.2906,CSRU,CSRU,metavision,NaT,NaT,2,2188-02-05 16:40:00,0,0,0,1,0,"[{'charttime': '2188-02-05T17:00:00', 'aki_sta...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...",,,"[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...",,,,,,,,,"[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...",,,"[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-06T03:19:00', 'value':...",,,,,,,,"[{'charttime': '2188-02-05T17:00:00', 'value':...",,,"[{'charttime': '2188-02-05T17:00:00', 'value':...",,"[{'charttime': '2188-01-16T07:05:00', 'value':...","[{'charttime': '2188-02-07T14:10:00', 'bloodpr...",,,,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2188-02-05T17:01:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2188-02-05T15:36:00', 'endtime...",[{'suspected_infection_time': '2188-02-04T22:4...,"[{'starttime': '2188-02-05T16:35:00', 'endtime...",,,,,,,,"[{'starttime': '2188-02-05T16:40:00', 'endtime...",0,NaT,2188-02-05 21:00:00,,,,,,,,,2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,
3,16927,109148,254842,22,M,white,190.5,71.1,EMERGENCY,EMERGENCY ROOM ADMIT,2166-09-24 20:15:00,2166-10-20 13:30:00,2166-10-06 09:41:05,2166-10-07 17:19:43,Private,SINGLE,,2,1.3185,CSRU,CSRU,carevue,NaT,NaT,5,2166-10-06 11:30:00,0,1,1,0,0,"[{'charttime': '2166-10-06T03:48:00', 'aki_sta...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...",,,"[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...",,,,,,,,,"[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...",,,"[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T17:59:00', 'value':...",,,,,,,,"[{'charttime': '2166-10-06T11:53:00', 'value':...",,,"[{'charttime': '2166-10-06T11:53:00', 'value':...",,,"[{'charttime': '2166-10-07T00:30:00', 'bloodpr...",,,,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2166-10-06T11:30:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2166-09-27T15:00:00', 'endtime...",[{'suspected_infection_time': '2166-09-24T11:1...,"[{'starttime': '2166-10-06T11:30:00', 'endtime...",,,"[{'charttime': '2166-10-06T11:30:00', 'PADP': ...","[{'charttime': '2166-10-06T11:30:00', 'mPAP': ...",,,,"[{'starttime': '2166-10-06T11:30:00', 'endtime...",0,NaT,2166-10-06 14:11:00,"[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T12:00:00', 'ci': 5....",2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,
4,4846,154439,264181,22,M,asian,182.88,66.0,ELECTIVE,PHYS REFERRAL/NORMAL DELI,2111-10-05 07:15:00,2111-10-13 15:30:00,2111-10-05 13:02:50,2111-10-09 10:13:52,Private,SINGLE,,1,3.8827,CSRU,CSRU,carevue,NaT,NaT,7,2111-10-05 14:00:00,0,0,1,0,0,"[{'charttime': '2111-10-05T14:30:00', 'aki_sta...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...",,"[{'charttime': '2111-10-07T09:11:00', 'value':...","[{'charttime': '2111-10-06T03:10:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-06T11:26:00', 'value':...",,,,,,,,"[{'charttime': '2111-10-06T03:10:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...",,,"[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-07T02:09:00', 'value':...",,,,,,,,"[{'charttime': '2111-10-05T14:44:00', 'value':...",,,"[{'charttime': '2111-10-05T14:44:00', 'value':...",,"[{'charttime': '2111-09-29T12:00:00', 'value':...",,,,,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2111-10-05T15:00:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2111-10-05T13:30:00', 'endtime...","[{'suspected_infection_time': None, 'antibioti...","[{'starttime': '2111-10-05T16:15:00', 'endtime...",,,"[{'charttime': '2111-10-05T14:15:00', 'PADP': ...","[{'charttime': '2111-10-05T14:15:00', 'mPAP': ...",,,,"[{'starttime': '2111-10-05T14:00:00', 'endtime...",0,NaT,2111-10-05 17:15:00,"[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:00:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'ci': 3....",2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,


In [17]:
def ts_parser(value, timeDelta=None, timeLimits=None, valuename='value'):
    '''
    # timeDelta is timedelta in hours from earliest entry
    # timeLimits = (startTime, endTime)
    # if both timeDelta and timeLimits are provided, timeDelta overrules.
    # if both are None, then all timepoints are accepted
    # returns arithmetic mean, max, min, first
    '''
    if value == np.NaN or pd.isna(value):
        return np.NaN, np.NaN, np.NaN, np.NaN
    a = value.replace("'", '"')
    a = a.replace('\n ...\n',',').replace('\n', ',').replace('...', '')
    a = a.replace('datetime.', '"dt.')
    a = a.replace(f'), "{valuename}"', f')", "{valuename}"')
    a = a.replace('"unit": None', '"unit": "None"')
    a = a.replace('starttime', 'charttime')
    a = json.loads(a)
    b = [(dt.datetime.strptime(i['charttime'],'%Y-%m-%dT%H:%M:%S'), i[valuename]) for i in a]
    
    if timeDelta:
        startTime = min(b, key=lambda x:x[0])[0]
        inc_b = [i[1] for i in b if i[0] <= startTime + dt.timedelta(hours=timeDelta)]
    else:
        if timeLimits:
            inc_b = [i[1] for i in b if i[0] >= timeLimits[0] and i[0] <= timeLimits[1]]
        else:
            inc_b = [i[1] for i in b]
    if len(inc_b) == 0:
        return np.NaN, np.NaN, np.NaN, np.NaN
    
    return sum(inc_b) / len(inc_b), max(inc_b), min(inc_b), sorted(inc_b)[0]

In [18]:
# delete all rows where int_time1 OR ext_time1 are missing ## NOTE: no need as postop_intime has now taken this bit over
df = df[~(pd.isnull(df['postop_intime']) | pd.isnull(df['outtime']))]
df = df.reset_index()
df.head(5)

Unnamed: 0,index,subject_id,hadm_id,stay_id,age,gender,ethnicity,height,weight,admission_type,admission_location,admittime,dischtime,intime,outtime,insurance,marital_status,language,icustay_seq,los,first_careunit,last_careunit,dbsource,deathtime,dod,sofa,postop_intime,cabg,aortic,mitral,tricuspid,pulmonary,aki,ph,bicarb,baseexcess,chloride,free_calcium,glucose,potassium,sodium,lactate,hematocrit,hb,pco2,po2,bg_temp,fio2,ventrate,tidalvol,aado2,specimen,neutrophils,lymphocytes,wcc,plt,crp,albumin,creatinine,bun,magnesium,alt,alp,ast,ggt,bilirubin_total,bilirubin_direct,bilirubin_indirect,inr,fibrinogen,bleed_time,ptt,pt,hba1c,prbc,plts,ffp,cryo,mi,arrhythmia,ccf,pvd,cvd,dementia,copd,rheum,pud,liver_mild,diab_un,diab_cc,t1dm,t2dm,paraplegia,ckd,malig,liver_severe,met_ca,aids,smoking,dtoutput,echo,fluid,infection,intropes,insulin,pasp,padp,mpap,readmissions,readmit_times,first_readmission,vent_array,reintubation,reint_time,ext_time,hr,sbp,dbp,meanbp,rr,temp,spo2,cardiac_index,int_time1,ext_time1,duration1,int_time2,ext_time2,duration2
0,0,28772,132385,293560,19,F,white,170.18,115.4,EMERGENCY,EMERGENCY ROOM ADMIT,2148-12-15 00:19:00,2149-01-04 15:45:00,2148-12-23 13:00:34,2148-12-24 16:42:20,Private,SINGLE,ENGL,2,1.154,CSRU,CSRU,carevue,NaT,NaT,2,2148-12-23 15:00:00,0,0,1,0,0,"[{'charttime': '2148-12-23T15:35:00', 'aki_sta...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...",,,"[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...","[{'charttime': '2148-12-23T15:58:00', 'value':...",,,,,,,,,"[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...",,,"[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-23T15:52:00', 'value':...","[{'charttime': '2148-12-24T02:47:00', 'value':...",,,,,,,,"[{'charttime': '2148-12-23T15:52:00', 'value':...",,,"[{'charttime': '2148-12-23T15:52:00', 'value':...",,"[{'charttime': '2148-12-20T14:20:00', 'value':...",,,,,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2148-12-23T15:35:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2148-12-23T14:35:00', 'endtime...",[{'suspected_infection_time': '2148-12-20T12:2...,,,,"[{'charttime': '2148-12-23T15:35:00', 'PADP': ...","[{'charttime': '2148-12-23T15:35:00', 'mPAP': ...",,,,"[{'starttime': '2148-12-23T15:00:00', 'endtime...",0,NaT,2148-12-23 18:00:00,"[{'charttime': '2148-12-23T15:35:00', 'value':...","[{'charttime': '2148-12-23T16:00:00', 'value':...","[{'charttime': '2148-12-23T16:00:00', 'value':...","[{'charttime': '2148-12-23T15:35:00', 'value':...","[{'charttime': '2148-12-23T15:00:00', 'value':...","[{'charttime': '2148-12-23T15:45:00', 'value':...","[{'charttime': '2148-12-23T16:00:00', 'value':...","[{'charttime': '2148-12-23T15:45:00', 'ci': 2....",2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,
1,1,73457,184405,251108,21,F,hispanic,152.4,60.0,ELECTIVE,PHYS REFERRAL/NORMAL DELI,2198-12-28 12:00:00,2199-01-04 20:05:00,2198-12-28 10:37:23,2198-12-29 10:14:22,Medicaid,SINGLE,ENGL,1,0.984,CSRU,CSRU,metavision,NaT,NaT,6,2198-12-28 13:00:00,0,0,1,0,0,"[{'charttime': '2198-12-28T13:59:00', 'aki_sta...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...","[{'charttime': '2198-12-28T13:38:00', 'value':...",,,,,,,,,"[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-28T13:59:00', 'value':...",,,"[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-28T13:59:00', 'value':...","[{'charttime': '2198-12-29T02:12:00', 'value':...",,,,,,,,"[{'charttime': '2198-12-28T13:59:00', 'value':...",,,"[{'charttime': '2198-12-28T13:59:00', 'value':...",,,"[{'charttime': '2198-12-28T15:12:00', 'bloodpr...",,,,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2198-12-28T13:45:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2198-12-28T12:45:00', 'endtime...",[{'suspected_infection_time': '2198-12-28T13:5...,"[{'starttime': '2198-12-28T13:55:00', 'endtime...",,"[{'charttime': '2198-12-28T13:47:00', 'PASP': ...","[{'charttime': '2198-12-28T13:47:00', 'PADP': ...","[{'charttime': '2198-12-28T13:47:00', 'mPAP': ...",,,,"[{'starttime': '2198-12-28T13:00:00', 'endtime...",0,NaT,2198-12-28 18:50:00,,,,,,,,,2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,
2,2,93887,137808,242592,21,F,hispanic,160.02,74.4,ELECTIVE,PHYS REFERRAL/NORMAL DELI,2188-02-04 00:39:00,2188-02-09 16:41:00,2188-02-05 11:15:53,2188-02-07 18:14:19,Medicaid,SINGLE,ENGL,1,2.2906,CSRU,CSRU,metavision,NaT,NaT,2,2188-02-05 16:40:00,0,0,0,1,0,"[{'charttime': '2188-02-05T17:00:00', 'aki_sta...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...",,,"[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...","[{'charttime': '2188-02-05T17:07:00', 'value':...",,,,,,,,,"[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...",,,"[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-05T17:00:00', 'value':...","[{'charttime': '2188-02-06T03:19:00', 'value':...",,,,,,,,"[{'charttime': '2188-02-05T17:00:00', 'value':...",,,"[{'charttime': '2188-02-05T17:00:00', 'value':...",,"[{'charttime': '2188-01-16T07:05:00', 'value':...","[{'charttime': '2188-02-07T14:10:00', 'bloodpr...",,,,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2188-02-05T17:01:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2188-02-05T15:36:00', 'endtime...",[{'suspected_infection_time': '2188-02-04T22:4...,"[{'starttime': '2188-02-05T16:35:00', 'endtime...",,,,,,,,"[{'starttime': '2188-02-05T16:40:00', 'endtime...",0,NaT,2188-02-05 21:00:00,,,,,,,,,2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,
3,3,16927,109148,254842,22,M,white,190.5,71.1,EMERGENCY,EMERGENCY ROOM ADMIT,2166-09-24 20:15:00,2166-10-20 13:30:00,2166-10-06 09:41:05,2166-10-07 17:19:43,Private,SINGLE,,2,1.3185,CSRU,CSRU,carevue,NaT,NaT,5,2166-10-06 11:30:00,0,1,1,0,0,"[{'charttime': '2166-10-06T03:48:00', 'aki_sta...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...",,,"[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...","[{'charttime': '2166-10-06T12:01:00', 'value':...",,,,,,,,,"[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...",,,"[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T11:53:00', 'value':...","[{'charttime': '2166-10-06T17:59:00', 'value':...",,,,,,,,"[{'charttime': '2166-10-06T11:53:00', 'value':...",,,"[{'charttime': '2166-10-06T11:53:00', 'value':...",,,"[{'charttime': '2166-10-07T00:30:00', 'bloodpr...",,,,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2166-10-06T11:30:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2166-09-27T15:00:00', 'endtime...",[{'suspected_infection_time': '2166-09-24T11:1...,"[{'starttime': '2166-10-06T11:30:00', 'endtime...",,,"[{'charttime': '2166-10-06T11:30:00', 'PADP': ...","[{'charttime': '2166-10-06T11:30:00', 'mPAP': ...",,,,"[{'starttime': '2166-10-06T11:30:00', 'endtime...",0,NaT,2166-10-06 14:11:00,"[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T11:30:00', 'value':...","[{'charttime': '2166-10-06T12:00:00', 'ci': 5....",2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,
4,4,4846,154439,264181,22,M,asian,182.88,66.0,ELECTIVE,PHYS REFERRAL/NORMAL DELI,2111-10-05 07:15:00,2111-10-13 15:30:00,2111-10-05 13:02:50,2111-10-09 10:13:52,Private,SINGLE,,1,3.8827,CSRU,CSRU,carevue,NaT,NaT,7,2111-10-05 14:00:00,0,0,1,0,0,"[{'charttime': '2111-10-05T14:30:00', 'aki_sta...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...",,"[{'charttime': '2111-10-07T09:11:00', 'value':...","[{'charttime': '2111-10-06T03:10:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-05T14:52:00', 'value':...","[{'charttime': '2111-10-06T11:26:00', 'value':...",,,,,,,,"[{'charttime': '2111-10-06T03:10:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...",,,"[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-05T14:44:00', 'value':...","[{'charttime': '2111-10-07T02:09:00', 'value':...",,,,,,,,"[{'charttime': '2111-10-05T14:44:00', 'value':...",,,"[{'charttime': '2111-10-05T14:44:00', 'value':...",,"[{'charttime': '2111-09-29T12:00:00', 'value':...",,,,,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"[{'charttime': '2111-10-05T15:00:00', 'output'...","[{'CHARTTIME': None, 'STORETIME': None, 'CATEG...","[{'starttime': '2111-10-05T13:30:00', 'endtime...","[{'suspected_infection_time': None, 'antibioti...","[{'starttime': '2111-10-05T16:15:00', 'endtime...",,,"[{'charttime': '2111-10-05T14:15:00', 'PADP': ...","[{'charttime': '2111-10-05T14:15:00', 'mPAP': ...",,,,"[{'starttime': '2111-10-05T14:00:00', 'endtime...",0,NaT,2111-10-05 17:15:00,"[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:00:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'value':...","[{'charttime': '2111-10-05T14:15:00', 'ci': 3....",2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,


### 1.4: Handling missing data

#### 1.4.0 Assessing for missing data

In [19]:
def missing_values_table(df): 
    mis_val = df.isnull().sum()
    mis_val_percent = 100 * df.isnull().sum() / len(df)
    mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)
    mis_val_table_ren_columns = mis_val_table.rename(columns = {0: 'Missing Values', 1: '% Missing Values'})
    return mis_val_table_ren_columns

missing_data = missing_values_table(df)

#set limit and get list of variables missing above limit in `missing_cols`
# missing_limit = 50 #allen
missing_limit = 70 #marcel
missing_cols = missing_data.loc[missing_data['% Missing Values']>missing_limit].index.tolist()
print(missing_cols)
missing_data = missing_data.loc[missing_data['% Missing Values']>missing_limit]
missing_data = missing_data.sort_values(by=['% Missing Values'])
pd.set_option('display.max_rows', None)
print(missing_data)
pd.reset_option('display.max_rows')

['deathtime', 'dod', 'bg_temp', 'fio2', 'ventrate', 'tidalvol', 'aado2', 'specimen', 'neutrophils', 'lymphocytes', 'crp', 'albumin', 'alt', 'alp', 'ast', 'ggt', 'bilirubin_total', 'bilirubin_direct', 'bilirubin_indirect', 'fibrinogen', 'bleed_time', 'pt', 'plts', 'ffp', 'cryo', 'insulin', 'readmissions', 'readmit_times', 'first_readmission', 'reint_time', 'int_time2', 'ext_time2', 'duration2']
                    Missing Values  % Missing Values
bg_temp                       4657         71.034167
fio2                          4768         72.727273
dod                           5145         78.477730
bilirubin_total               5299         80.826724
alt                           5315         81.070775
ast                           5318         81.116534
alp                           5337         81.406345
cryo                          5502         83.923124
insulin                       5502         83.923124
ffp                           5502         83.923124
tidalvol            

In [20]:
dfDroppedMissing = df[[i for i in df.columns if i not in missing_data.index and i not in ['duration2','int_time2','ext_time2','aado2','fio2','deathtime']]]
print(list(dfDroppedMissing.columns))

['index', 'subject_id', 'hadm_id', 'stay_id', 'age', 'gender', 'ethnicity', 'height', 'weight', 'admission_type', 'admission_location', 'admittime', 'dischtime', 'intime', 'outtime', 'insurance', 'marital_status', 'language', 'icustay_seq', 'los', 'first_careunit', 'last_careunit', 'dbsource', 'sofa', 'postop_intime', 'cabg', 'aortic', 'mitral', 'tricuspid', 'pulmonary', 'aki', 'ph', 'bicarb', 'baseexcess', 'chloride', 'free_calcium', 'glucose', 'potassium', 'sodium', 'lactate', 'hematocrit', 'hb', 'pco2', 'po2', 'wcc', 'plt', 'creatinine', 'bun', 'magnesium', 'inr', 'ptt', 'hba1c', 'prbc', 'mi', 'arrhythmia', 'ccf', 'pvd', 'cvd', 'dementia', 'copd', 'rheum', 'pud', 'liver_mild', 'diab_un', 'diab_cc', 't1dm', 't2dm', 'paraplegia', 'ckd', 'malig', 'liver_severe', 'met_ca', 'aids', 'smoking', 'dtoutput', 'echo', 'fluid', 'infection', 'intropes', 'pasp', 'padp', 'mpap', 'vent_array', 'reintubation', 'ext_time', 'hr', 'sbp', 'dbp', 'meanbp', 'rr', 'temp', 'spo2', 'cardiac_index', 'int_time

#### 1.4.1 Beginning imputation

In [23]:
dfForImpute = pd.DataFrame([0 for i in range(dfDroppedMissing.shape[0])])

# generating timeseries summary values
for column in timeseries:
    if column not in dfDroppedMissing.columns:
        continue
    x = timeseries_valuenames[column] if column in timeseries_valuenames else "value"
    meanList = []
    maxList = []
    minList = []
    for i in range(len(dfDroppedMissing[column])):
        inTime = dfDroppedMissing['postop_intime'][i].to_pydatetime()
        outTime = dfDroppedMissing['outtime'][i].to_pydatetime()
        y = ts_parser(dfDroppedMissing[column][i], timeLimits=(inTime, outTime), valuename=x)
        meanList.append(y[0])
        maxList.append(y[1])
        minList.append(y[2])
    dfForImpute[column+'_mean'] = meanList
    dfForImpute[column+'_max'] = maxList
    dfForImpute[column+'_min'] = minList
    dfForImpute = dfForImpute.copy()

dfForImpute = dfForImpute[[i for i in dfForImpute.columns if i not in (0, '0')]]
dfForImpute.head(5)

Unnamed: 0,ph_mean,ph_max,ph_min,bicarb_mean,bicarb_max,bicarb_min,baseexcess_mean,baseexcess_max,baseexcess_min,chloride_mean,chloride_max,chloride_min,free_calcium_mean,free_calcium_max,free_calcium_min,glucose_mean,glucose_max,glucose_min,potassium_mean,potassium_max,potassium_min,sodium_mean,sodium_max,sodium_min,lactate_mean,lactate_max,lactate_min,hematocrit_mean,hematocrit_max,hematocrit_min,hb_mean,hb_max,hb_min,pco2_mean,pco2_max,pco2_min,po2_mean,po2_max,po2_min,wcc_mean,wcc_max,wcc_min,plt_mean,plt_max,plt_min,creatinine_mean,creatinine_max,creatinine_min,bun_mean,bun_max,bun_min,magnesium_mean,magnesium_max,magnesium_min,inr_mean,inr_max,inr_min,ptt_mean,ptt_max,ptt_min,hba1c_mean,hba1c_max,hba1c_min,prbc_mean,prbc_max,prbc_min,hr_mean,hr_max,hr_min,sbp_mean,sbp_max,sbp_min,dbp_mean,dbp_max,dbp_min,meanbp_mean,meanbp_max,meanbp_min,rr_mean,rr_max,rr_min,temp_mean,temp_max,temp_min,spo2_mean,spo2_max,spo2_min,cardiac_index_mean,cardiac_index_max,cardiac_index_min,dtoutput_mean,dtoutput_max,dtoutput_min,pasp_mean,pasp_max,pasp_min,padp_mean,padp_max,padp_min,mpap_mean,mpap_max,mpap_min
0,7.343333,7.38,7.3,26.0,27.0,25.0,0.0,2.0,-2.0,107.0,109.0,105.0,1.176667,1.27,1.1,121.518519,152.0,88.0,4.466667,4.7,4.0,138.0,138.0,138.0,,,,,,,9.5,9.9,9.1,47.5,55.0,37.0,0.0,2.0,-2.0,19.1,21.0,17.2,332.5,356.0,309.0,0.55,0.6,0.5,9.5,11.0,8.0,2.15,2.3,2.0,1.3,1.3,1.3,41.9,41.9,41.9,,,,,,,93.066667,101.0,83.0,100.160714,128.0,88.0,53.339286,74.0,39.0,65.633333,91.0,54.0,18.666667,28.0,8.0,37.52657,38.200001,35.900002,98.185185,100.0,95.0,2.786307,3.40249,2.11618,28.0,80.0,0.0,,,,15.409091,21.0,11.0,25.727273,35.0,22.0
1,7.352,7.4,7.29,21.5,23.0,20.0,-2.4,-1.0,-4.0,113.666667,119.0,107.0,1.246667,1.33,1.13,101.0,131.0,67.0,3.72,4.5,3.3,139.0,145.0,137.0,2.65,3.5,1.8,22.0,22.0,22.0,7.933333,9.9,6.7,39.8,46.0,35.0,-2.4,-1.0,-4.0,11.9,15.5,8.3,155.5,166.0,145.0,0.45,0.5,0.4,9.0,9.0,9.0,1.8,1.8,1.8,1.7,1.9,1.5,34.65,38.2,31.1,,,,211.94445,350.00001,-64.166669,,,,,,,,,,,,,,,,,,,,,,,,,28.181818,90.0,5.0,23.48,31.0,10.0,4.28,14.0,-4.0,11.76,20.0,4.0
2,7.38,7.44,7.32,24.0,25.0,23.0,-1.5,-1.0,-2.0,107.0,110.0,105.0,1.15,1.21,1.06,107.833333,127.0,98.0,3.9,4.1,3.6,136.666667,138.0,136.0,,,,,,,8.033333,8.9,7.4,41.0,46.0,36.0,-1.5,-1.0,-2.0,8.266667,9.8,7.3,168.0,181.0,155.0,0.533333,0.6,0.5,6.333333,7.0,5.0,1.65,1.7,1.6,1.2,1.2,1.2,43.8,43.8,43.8,,,,350.000008,350.00001,350.000004,,,,,,,,,,,,,,,,,,,,,,,,,40.454545,70.0,0.0,,,,,,,,,
3,7.41625,7.46,7.28,24.0,24.0,24.0,-0.25,0.0,-2.0,106.5,109.0,104.0,1.06,1.13,0.96,98.615385,130.0,69.0,3.975,4.6,3.6,138.0,139.0,137.0,,,,,,,9.1,9.6,8.6,36.625,51.0,30.0,-0.25,0.0,-2.0,26.4,34.7,18.1,231.5,241.0,222.0,0.65,0.7,0.6,15.0,15.0,15.0,1.95,2.1,1.8,1.65,1.8,1.5,39.85,47.8,31.9,,,,375.0,375.0,375.0,103.666667,123.0,83.0,120.74359,151.0,106.0,70.794872,87.0,49.0,86.846154,100.0,66.0,21.1,29.0,12.0,37.008602,38.599998,35.400002,95.046512,100.0,86.0,4.057592,5.39267,3.4555,22.083333,60.0,0.0,,,,21.16129,33.0,14.0,35.387097,49.0,25.0
4,7.361429,7.43,7.26,28.625,32.0,24.0,0.0,6.0,-5.0,100.222222,111.0,91.0,1.088889,1.18,0.9,106.184211,138.0,51.0,4.042857,4.6,3.1,131.25,143.0,120.0,,,,26.0,26.0,26.0,9.285714,11.4,8.6,47.6,63.0,41.0,0.0,6.0,-5.0,8.566667,13.6,6.0,98.142857,132.0,68.0,0.5625,0.6,0.5,10.5,16.0,7.0,1.925,2.6,1.3,1.25,1.5,1.1,31.75,43.3,23.3,,,,,,,80.693694,101.0,69.0,106.294643,135.0,78.0,50.861607,86.0,27.0,68.355854,94.0,49.0,18.219828,27.0,11.0,37.018849,37.799999,35.555556,97.145631,100.0,94.0,3.95824,5.21978,3.07692,19.069767,80.0,0.0,,,,12.830769,22.0,3.0,24.646154,36.0,17.0


In [24]:
# parsing infection

def inf_parser_abx(value, timeLimits=None):
    if value == np.NaN or pd.isna(value):
        return 0

    a = value.replace("'", '"')
    a = a.replace('\n ...\n',',').replace('\n', ',').replace('...', '')
    a = a.replace('": None', '": "None"')
    for valuename in ['antibiotic','antibiotic_time']:
        a = a.replace(f'), "{valuename}"', f')", "{valuename}"')
    a = json.loads(a)
    b = [(dt.datetime.strptime(i['suspected_infection_time'], '%Y-%m-%dT%H:%M:%S') if i['suspected_infection_time'] not in ("None", None) else np.NaN, 
          dt.datetime.strptime(i['antibiotic_time'], '%Y-%m-%dT%H:%M:%S') if i['antibiotic_time'] not in ("None", None) else np.NaN,
          i['antibiotic'] if i['antibiotic'] not in ("None", None) else np.NaN,
          i['specimen'], i['positiveculture']) for i in a]
    b = [i for i in b if not pd.isnull(i[1])]
    if timeLimits == None:
        inc_b = b.copy()
    else:
        inc_b = [i for i in b if not pd.isnull(i[1]) and timeLimits[0] <= i[1] <= timeLimits[1]]
    return len(list(set([i[2] for i in b])))

infList = []
for i in range(len(df['infection'])):
    y = inf_parser_abx(df['infection'][i], timeLimits=(df['postop_intime'][i].to_pydatetime(), df['postop_intime'][i].to_pydatetime() + dt.timedelta(hours=hoursRange)))
    infList.append(y)

dfForImpute['abx_no'] = infList
dfForImpute['abx_bool'] = (dfForImpute['abx_no'] > 0.5).astype('int')
dfForImpute['abx_no'] = dfForImpute['abx_no'].astype('category')
dfForImpute['abx_bool'] = dfForImpute['abx_bool'].astype('category')

In [25]:
# parsing aki

def aki_parser(value, timeLimits=None):
    if value == np.NaN or pd.isna(value):
        return {'creat':[np.NaN for i in range(4)], 'uo':[np.NaN for i in range(4)]}

    a = value.replace("'", '"')
    a = a.replace('\n ...\n',',').replace('\n', ',').replace('...', '')
    a = a.replace('": None', '": "None"')
    a = json.loads(a)
    b = [(dt.datetime.strptime(i['charttime'], '%Y-%m-%dT%H:%M:%S'),
          i['aki_stage_creat'] if i['aki_stage_creat'] not in ("None", None) else np.NaN,
          i['aki_stage_uo'] if i['aki_stage_uo'] not in ("None", None) else np.NaN) for i in a if i['charttime'] not in ('None', None)]
    
    if timeLimits == None:
        creat = [i[1] for i in b if not pd.isnull(i[1])]
        uo = [i[2] for i in b if not pd.isnull(i[2])]
    else:
        creat = [i[1] for i in b if not pd.isnull(i[1]) and timeLimits[0] <= i[0] <= timeLimits[1]]
        uo = [i[2] for i in b if not pd.isnull(i[2]) and timeLimits[0] <= i[0] <= timeLimits[1]]
    return {'creat':(sum(creat)/len(creat), max(creat), min(creat), sorted(creat)[0]) if len(creat)>0 else tuple([np.NaN for i in range(4)]),
            'uo':(sum(uo)/len(uo), max(uo), min(uo), sorted(uo)[0]) if len(uo)>0 else tuple([np.NaN for i in range(4)])}

creatList = []
uoList = []
for i in range(len(df['aki'])):
    y = aki_parser(df['aki'][i], timeLimits=(df['postop_intime'][i].to_pydatetime(), df['postop_intime'][i].to_pydatetime() + dt.timedelta(hours=hoursRange)))
    creatList.append(y['creat'][0])
    uoList.append(y['uo'][0])
dfForImpute['aki_stage_creat_mean'] = creatList
dfForImpute['aki_stage_creat_mean'] = dfForImpute['aki_stage_creat_mean'].astype('category')
dfForImpute['aki_stage_uo_mean'] = uoList
dfForImpute['aki_stage_uo_mean'] = dfForImpute['aki_stage_uo_mean'].astype('category')

In [26]:
# add on non-time data for imputation
# dfForImpute = dfForImpute.copy()
# extraColumns = [i for i in dfDroppedMissing.columns if i not in list(dfForImpute.columns) + timeseries + ['infection', 'vent_array', 'int_time1', 'ext_time1'] + ptinfo + adm_num]
# print(extraColumns)
# for i in extraColumns:
#     if i in ('weight', 'height', 'duration1','duration2'):
#         dfForImpute[i] = df[i]
#     else:
#         dfForImpute[i] = df[i].astype('category')
# dfForImpute = dfForImpute.copy()

dfForImpute.head(5)
extraColumns = [i for i in dfDroppedMissing.columns if i not in [*dfForImpute.columns.tolist(), 
                                                                 *timeseries, 
                                                                 *colCats['admInfo'], *colCats['admNum'], *colCats['admTime'], *colCats['diabUnits'],
                                                                 *colCats['ventTimes'], *colCats['readmitInfo'],
                                                                 'index', 'aki', 'vent_array', 'echo', 'fluid', 'infection', 'intropes',]]
for i in extraColumns:
    if i in ('weight', 'height', 'age'):
        dfForImpute[i] = df[i]
    else:
        dfForImpute[i] = df[i].astype('category')
dfForImpute = dfForImpute.copy()

In [27]:
# before imputation again
pd.set_option('display.max_rows', None)
print(dfForImpute.dtypes)
pd.reset_option('display.max_rows')


ph_mean                  float64
ph_max                   float64
ph_min                   float64
bicarb_mean              float64
bicarb_max               float64
bicarb_min               float64
baseexcess_mean          float64
baseexcess_max           float64
baseexcess_min           float64
chloride_mean            float64
chloride_max             float64
chloride_min             float64
free_calcium_mean        float64
free_calcium_max         float64
free_calcium_min         float64
glucose_mean             float64
glucose_max              float64
glucose_min              float64
potassium_mean           float64
potassium_max            float64
potassium_min            float64
sodium_mean              float64
sodium_max               float64
sodium_min               float64
lactate_mean             float64
lactate_max              float64
lactate_min              float64
hematocrit_mean          float64
hematocrit_max           float64
hematocrit_min           float64
hb_mean   

In [28]:
dfForImpute2 = dfForImpute[[i for i in dfForImpute.columns]]

kds = mf.ImputationKernel(
  dfForImpute2,
  datasets=1,
  save_all_iterations=True,
  random_state=1991
)

# Run the MICE algorithm for 3 iterations
kds.mice(3)

print(kds)

dfImputed = kds.complete_data(dataset=0, inplace=False)
print(dfImputed.isnull().sum(0))

# after imputation
dfImputed.head(10)

              Class: ImputationKernel
           Datasets: 1
         Iterations: 3
  Imputed Variables: 109
save_all_iterations: True
ph_mean         0
ph_max          0
ph_min          0
bicarb_mean     0
bicarb_max      0
               ..
liver_severe    0
met_ca          0
aids            0
smoking         0
reintubation    0
Length: 145, dtype: int64


Unnamed: 0,ph_mean,ph_max,ph_min,bicarb_mean,bicarb_max,bicarb_min,baseexcess_mean,baseexcess_max,baseexcess_min,chloride_mean,chloride_max,chloride_min,free_calcium_mean,free_calcium_max,free_calcium_min,glucose_mean,glucose_max,glucose_min,potassium_mean,potassium_max,potassium_min,sodium_mean,sodium_max,sodium_min,lactate_mean,lactate_max,lactate_min,hematocrit_mean,hematocrit_max,hematocrit_min,hb_mean,hb_max,hb_min,pco2_mean,pco2_max,pco2_min,po2_mean,po2_max,po2_min,wcc_mean,wcc_max,wcc_min,plt_mean,plt_max,plt_min,creatinine_mean,creatinine_max,creatinine_min,bun_mean,bun_max,bun_min,magnesium_mean,magnesium_max,magnesium_min,inr_mean,inr_max,inr_min,ptt_mean,ptt_max,ptt_min,hba1c_mean,hba1c_max,hba1c_min,prbc_mean,prbc_max,prbc_min,hr_mean,hr_max,hr_min,sbp_mean,sbp_max,sbp_min,dbp_mean,dbp_max,dbp_min,meanbp_mean,meanbp_max,meanbp_min,rr_mean,rr_max,rr_min,temp_mean,temp_max,temp_min,spo2_mean,spo2_max,spo2_min,cardiac_index_mean,cardiac_index_max,cardiac_index_min,dtoutput_mean,dtoutput_max,dtoutput_min,pasp_mean,pasp_max,pasp_min,padp_mean,padp_max,padp_min,mpap_mean,mpap_max,mpap_min,abx_no,abx_bool,aki_stage_creat_mean,aki_stage_uo_mean,age,gender,ethnicity,height,weight,admission_type,admission_location,insurance,marital_status,language,first_careunit,last_careunit,dbsource,sofa,cabg,aortic,mitral,tricuspid,pulmonary,mi,arrhythmia,ccf,pvd,cvd,dementia,copd,rheum,pud,liver_mild,t1dm,t2dm,paraplegia,ckd,malig,liver_severe,met_ca,aids,smoking,reintubation
0,7.343333,7.38,7.3,26.0,27.0,25.0,0.0,2.0,-2.0,107.0,109.0,105.0,1.176667,1.27,1.1,121.518519,152.0,88.0,4.466667,4.7,4.0,138.0,138.0,138.0,1.1,1.1,1.0,27.666667,26.0,27.0,9.5,9.9,9.1,47.5,55.0,37.0,0.0,2.0,-2.0,19.1,21.0,17.2,332.5,356.0,309.0,0.55,0.6,0.5,9.5,11.0,8.0,2.15,2.3,2.0,1.3,1.3,1.3,41.9,41.9,41.9,7.6,7.2,7.3,375.0,375.0,375.0,93.066667,101.0,83.0,100.160714,128.0,88.0,53.339286,74.0,39.0,65.633333,91.0,54.0,18.666667,28.0,8.0,37.52657,38.200001,35.900002,98.185185,100.0,95.0,2.786307,3.40249,2.11618,28.0,80.0,0.0,26.421875,37.0,20.0,15.409091,21.0,11.0,25.727273,35.0,22.0,1,1,0.0,0.7,19,F,white,170.18,115.4,EMERGENCY,EMERGENCY ROOM ADMIT,Private,SINGLE,ENGL,CSRU,CSRU,carevue,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,7.352,7.4,7.29,21.5,23.0,20.0,-2.4,-1.0,-4.0,113.666667,119.0,107.0,1.246667,1.33,1.13,101.0,131.0,67.0,3.72,4.5,3.3,139.0,145.0,137.0,2.65,3.5,1.8,22.0,22.0,22.0,7.933333,9.9,6.7,39.8,46.0,35.0,-2.4,-1.0,-4.0,11.9,15.5,8.3,155.5,166.0,145.0,0.45,0.5,0.4,9.0,9.0,9.0,1.8,1.8,1.8,1.7,1.9,1.5,34.65,38.2,31.1,10.8,9.8,9.8,211.94445,350.00001,-64.166669,90.666667,112.0,67.0,110.617647,170.5,60.5,56.180556,90.0,31.0,74.486821,139.0,43.0,22.86875,38.0,12.0,37.644117,38.5,36.099998,98.689655,100.0,95.0,2.790728,3.91579,2.09948,28.181818,90.0,5.0,23.48,31.0,10.0,4.28,14.0,-4.0,11.76,20.0,4.0,1,1,0.0,0.0,21,F,hispanic,152.4,60.0,ELECTIVE,PHYS REFERRAL/NORMAL DELI,Medicaid,SINGLE,ENGL,CSRU,CSRU,metavision,6,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,7.38,7.44,7.32,24.0,25.0,23.0,-1.5,-1.0,-2.0,107.0,110.0,105.0,1.15,1.21,1.06,107.833333,127.0,98.0,3.9,4.1,3.6,136.666667,138.0,136.0,2.3,2.4,2.2,21.333333,22.0,22.0,8.033333,8.9,7.4,41.0,46.0,36.0,-1.5,-1.0,-2.0,8.266667,9.8,7.3,168.0,181.0,155.0,0.533333,0.6,0.5,6.333333,7.0,5.0,1.65,1.7,1.6,1.2,1.2,1.2,43.8,43.8,43.8,6.4,6.4,6.4,350.000008,350.00001,350.000004,84.313433,102.0,65.0,119.711538,159.0,96.0,51.74359,94.0,38.5,73.209878,103.0,57.0,16.05,26.0,9.0,36.773256,37.555555,35.099998,98.627119,100.0,94.0,2.391406,3.08547,1.7971,40.454545,70.0,0.0,54.744681,70.0,53.0,24.3125,59.0,15.0,47.59375,150.0,33.0,1,1,0.0,0.0,21,F,hispanic,160.02,74.4,ELECTIVE,PHYS REFERRAL/NORMAL DELI,Medicaid,SINGLE,ENGL,CSRU,CSRU,metavision,2,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,7.41625,7.46,7.28,24.0,24.0,24.0,-0.25,0.0,-2.0,106.5,109.0,104.0,1.06,1.13,0.96,98.615385,130.0,69.0,3.975,4.6,3.6,138.0,139.0,137.0,1.2,1.2,1.1,25.0,27.0,24.0,9.1,9.6,8.6,36.625,51.0,30.0,-0.25,0.0,-2.0,26.4,34.7,18.1,231.5,241.0,222.0,0.65,0.7,0.6,15.0,15.0,15.0,1.95,2.1,1.8,1.65,1.8,1.5,39.85,47.8,31.9,8.7,8.7,8.0,375.0,375.0,375.0,103.666667,123.0,83.0,120.74359,151.0,106.0,70.794872,87.0,49.0,86.846154,100.0,66.0,21.1,29.0,12.0,37.008602,38.599998,35.400002,95.046512,100.0,86.0,4.057592,5.39267,3.4555,22.083333,60.0,0.0,46.56,66.0,40.0,21.16129,33.0,14.0,35.387097,49.0,25.0,3,1,0.0,0.0,22,M,white,190.5,71.1,EMERGENCY,EMERGENCY ROOM ADMIT,Private,SINGLE,ENGL,CSRU,CSRU,carevue,5,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,7.361429,7.43,7.26,28.625,32.0,24.0,0.0,6.0,-5.0,100.222222,111.0,91.0,1.088889,1.18,0.9,106.184211,138.0,51.0,4.042857,4.6,3.1,131.25,143.0,120.0,1.2,1.6,0.9,26.0,26.0,26.0,9.285714,11.4,8.6,47.6,63.0,41.0,0.0,6.0,-5.0,8.566667,13.6,6.0,98.142857,132.0,68.0,0.5625,0.6,0.5,10.5,16.0,7.0,1.925,2.6,1.3,1.25,1.5,1.1,31.75,43.3,23.3,5.4,5.4,5.5,328.125,375.0,0.0,80.693694,101.0,69.0,106.294643,135.0,78.0,50.861607,86.0,27.0,68.355854,94.0,49.0,18.219828,27.0,11.0,37.018849,37.799999,35.555556,97.145631,100.0,94.0,3.95824,5.21978,3.07692,19.069767,80.0,0.0,25.75,48.0,13.0,12.830769,22.0,3.0,24.646154,36.0,17.0,2,1,0.0,0.0,22,M,asian,182.88,66.0,ELECTIVE,PHYS REFERRAL/NORMAL DELI,Private,SINGLE,CANT,CSRU,CSRU,carevue,7,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,7.396667,7.48,7.29,27.0,28.0,26.0,0.75,3.0,0.0,100.333333,106.0,96.0,1.128333,1.16,1.1,102.714286,133.0,68.0,4.8125,5.2,4.2,133.0,137.0,129.0,1.35,1.4,1.3,27.714286,29.0,27.0,8.825,9.3,8.0,45.25,60.0,37.0,0.75,3.0,0.0,10.85,11.5,9.9,329.75,360.0,300.0,0.75,0.8,0.7,20.5,26.0,15.0,2.175,2.5,1.9,1.3,1.3,1.3,47.3,47.3,47.3,6.8,6.1,6.0,277.999992,277.999992,277.999992,84.644068,132.0,62.0,136.121469,181.0,86.0,58.765976,101.0,31.0,80.611112,143.0,43.0,26.488449,48.0,10.0,37.808844,39.38889,36.400002,97.363636,100.0,83.0,2.471725,4.08,2.00483,57.222222,130.0,10.0,26.740741,37.0,15.0,17.296296,26.0,8.0,21.666667,31.0,12.0,1,1,0.0,0.0,22,M,hispanic,177.8,86.6,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,Medicaid,SINGLE,ENGL,CSRU,CSRU,metavision,4,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
6,7.415,7.49,7.34,24.0,25.0,23.0,0.0,2.0,-2.0,108.5,111.0,106.0,1.19,1.19,1.19,123.0,140.0,98.0,4.2,4.4,4.1,137.5,138.0,137.0,1.242857,1.5,0.9,31.666667,32.0,32.0,10.0,10.7,9.3,38.0,44.0,32.0,0.0,2.0,-2.0,10.5,12.1,8.9,168.0,168.0,168.0,0.65,0.7,0.6,13.0,15.0,11.0,2.071429,2.4,1.8,1.1,1.1,1.1,27.3,27.3,27.3,8.7,8.7,9.8,187.499982,279.99999,0.0,93.613699,111.0,75.0,111.231383,154.0,80.0,56.322581,74.0,36.0,74.283687,97.0,53.0,16.535714,30.0,9.0,37.55698,38.555556,36.299999,97.466667,100.0,94.0,2.05308,2.58929,1.45148,7.727273,20.0,0.0,19.761905,28.0,14.0,14.714286,22.0,9.0,17.190476,24.0,14.0,1,1,0.0,0.090909,23,F,white,165.1,69.8,ELECTIVE,PHYS REFERRAL/NORMAL DELI,Private,SINGLE,ENGL,CSRU,CSRU,metavision,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,7.385,7.43,7.35,27.75,31.0,24.0,1.6,3.0,0.0,103.5,107.0,100.0,1.16,1.24,1.08,111.414634,156.0,70.0,3.9625,4.6,3.3,138.25,141.0,137.0,2.7,3.1,2.2,25.0,26.0,25.0,7.966667,8.4,7.4,45.8,55.0,39.0,1.6,3.0,0.0,9.466667,11.4,8.5,266.5,296.0,227.0,0.625,0.7,0.5,13.25,17.0,10.0,1.9,2.0,1.8,1.2,1.4,1.0,31.7,34.9,28.5,7.0,7.0,7.0,375.0,375.0,375.0,108.150943,121.0,93.0,112.830189,132.0,93.0,63.169811,96.0,46.0,79.477983,105.333,62.0,18.82,29.0,9.0,36.804889,37.599998,35.099998,97.367347,100.0,91.0,3.547739,4.62312,3.01508,19.270833,90.0,0.0,31.0,40.0,20.0,8.176471,20.0,4.0,28.058824,32.0,19.0,3,1,0.0,0.0,23,M,white,182.88,78.0,EMERGENCY,PHYS REFERRAL/NORMAL DELI,Private,SINGLE,ENGL,CSRU,CSRU,carevue,3,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,7.41,7.46,7.36,23.0,23.0,23.0,-0.5,0.0,-2.0,109.0,109.0,109.0,1.0825,1.18,1.02,94.6875,117.0,46.0,3.825,3.9,3.7,139.0,139.0,139.0,1.6,1.6,1.3,30.5,35.0,26.0,10.033333,11.6,8.7,36.75,40.0,32.0,-0.5,0.0,-2.0,17.5,17.5,17.5,119.5,120.0,119.0,0.5,0.5,0.5,8.0,8.0,8.0,1.7,1.7,1.7,1.45,1.6,1.3,30.6,32.0,29.2,7.3,7.0,7.0,416.666675,570.0,350.00001,91.895765,112.0,76.0,108.842105,146.0,80.0,56.253086,71.0,43.0,73.734937,112.0,58.0,17.511628,29.0,9.0,37.908449,38.799999,36.400002,96.455446,100.0,70.0,4.256099,4.60209,3.64674,28.315789,120.0,0.0,24.882353,45.0,16.0,17.16129,25.0,10.0,32.956522,54.0,21.0,1,1,0.0,0.0,23,F,hispanic,165.1,54.5,ELECTIVE,PHYS REFERRAL/NORMAL DELI,Private,SINGLE,PORT,CSRU,CSRU,carevue,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,7.357778,7.41,7.3,22.0,23.0,21.0,-1.285714,3.0,-3.0,105.0,110.0,100.0,1.232857,1.32,1.15,111.307692,137.0,85.0,5.341667,6.6,3.7,134.333333,137.0,131.0,1.55,1.7,1.2,25.125,26.0,25.0,8.45,8.8,8.1,43.428571,47.0,38.0,-1.285714,3.0,-3.0,21.3,25.9,16.7,280.0,303.0,257.0,1.5,1.6,1.3,16.666667,19.0,13.0,2.0,2.4,1.6,1.4,1.6,1.3,34.533333,44.7,28.3,10.8,10.8,10.8,187.5,375.0,0.0,82.705882,99.0,68.0,105.097938,140.0,75.0,51.953608,72.0,29.0,67.873684,88.0,37.0,18.116667,32.0,8.0,37.006481,37.799999,35.5,98.738095,100.0,95.0,3.33617,3.77143,2.45714,13.108108,50.0,0.0,20.851852,28.0,15.0,12.434783,16.0,9.0,24.608696,34.0,17.0,4,1,0.5,0.0,23,M,white,167.64,65.0,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,Government,SINGLE,ENGL,CSRU,CSRU,carevue,6,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [29]:
y = np.asarray([i.to_pydatetime() for i in df["postop_intime"]])
z = np.asarray([i.to_pydatetime() for i in df["outtime"]])
dfImputed['icu_stay_hrs'] = [i.total_seconds()/3600 for i in z-y]
# dfImputed[[i for i in adm_num if i != 'reint_time']] = df[[i for i in adm_num if i != 'reint_time']] #marcel
dfImputed[colCats['admInfo']] = df[colCats['admInfo']]
dfImputed[colCats['admTime']] = df[colCats['admTime']]
dfImputed[colCats['admNum']] = df[colCats['admNum']]
dfImputed[colCats['ventTimes']] = df[colCats['ventTimes']]
dfImputed[colCats['readmitInfo']] = df[colCats['readmitInfo']]
dfImputed.head(5)

Unnamed: 0,ph_mean,ph_max,ph_min,bicarb_mean,bicarb_max,bicarb_min,baseexcess_mean,baseexcess_max,baseexcess_min,chloride_mean,chloride_max,chloride_min,free_calcium_mean,free_calcium_max,free_calcium_min,glucose_mean,glucose_max,glucose_min,potassium_mean,potassium_max,potassium_min,sodium_mean,sodium_max,sodium_min,lactate_mean,lactate_max,lactate_min,hematocrit_mean,hematocrit_max,hematocrit_min,hb_mean,hb_max,hb_min,pco2_mean,pco2_max,pco2_min,po2_mean,po2_max,po2_min,wcc_mean,wcc_max,wcc_min,plt_mean,plt_max,plt_min,creatinine_mean,creatinine_max,creatinine_min,bun_mean,bun_max,bun_min,magnesium_mean,magnesium_max,magnesium_min,inr_mean,inr_max,inr_min,ptt_mean,ptt_max,ptt_min,hba1c_mean,hba1c_max,hba1c_min,prbc_mean,prbc_max,prbc_min,hr_mean,hr_max,hr_min,sbp_mean,sbp_max,sbp_min,dbp_mean,dbp_max,dbp_min,meanbp_mean,meanbp_max,meanbp_min,rr_mean,rr_max,rr_min,temp_mean,temp_max,temp_min,spo2_mean,spo2_max,spo2_min,cardiac_index_mean,cardiac_index_max,cardiac_index_min,dtoutput_mean,dtoutput_max,dtoutput_min,pasp_mean,pasp_max,pasp_min,padp_mean,padp_max,padp_min,mpap_mean,mpap_max,mpap_min,abx_no,abx_bool,aki_stage_creat_mean,aki_stage_uo_mean,age,gender,ethnicity,height,weight,admission_type,admission_location,insurance,marital_status,language,first_careunit,last_careunit,dbsource,sofa,cabg,aortic,mitral,tricuspid,pulmonary,mi,arrhythmia,ccf,pvd,cvd,dementia,copd,rheum,pud,liver_mild,t1dm,t2dm,paraplegia,ckd,malig,liver_severe,met_ca,aids,smoking,reintubation,icu_stay_hrs,subject_id,hadm_id,stay_id,admittime,dischtime,intime,outtime,reint_time,ext_time,postop_intime,deathtime,dod,los,icustay_seq,int_time1,ext_time1,duration1,int_time2,ext_time2,duration2,readmit_times,readmissions,first_readmission
0,7.343333,7.38,7.3,26.0,27.0,25.0,0.0,2.0,-2.0,107.0,109.0,105.0,1.176667,1.27,1.1,121.518519,152.0,88.0,4.466667,4.7,4.0,138.0,138.0,138.0,1.1,1.1,1.0,27.666667,26.0,27.0,9.5,9.9,9.1,47.5,55.0,37.0,0.0,2.0,-2.0,19.1,21.0,17.2,332.5,356.0,309.0,0.55,0.6,0.5,9.5,11.0,8.0,2.15,2.3,2.0,1.3,1.3,1.3,41.9,41.9,41.9,7.6,7.2,7.3,375.0,375.0,375.0,93.066667,101.0,83.0,100.160714,128.0,88.0,53.339286,74.0,39.0,65.633333,91.0,54.0,18.666667,28.0,8.0,37.52657,38.200001,35.900002,98.185185,100.0,95.0,2.786307,3.40249,2.11618,28.0,80.0,0.0,26.421875,37.0,20.0,15.409091,21.0,11.0,25.727273,35.0,22.0,1,1,0.0,0.7,19,F,white,170.18,115.4,EMERGENCY,EMERGENCY ROOM ADMIT,Private,SINGLE,ENGL,CSRU,CSRU,carevue,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25.705556,28772,132385,293560,2148-12-15 00:19:00,2149-01-04 15:45:00,2148-12-23 13:00:34,2148-12-24 16:42:20,NaT,2148-12-23 18:00:00,2148-12-23 15:00:00,NaT,NaT,1.154,2,2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,,,,
1,7.352,7.4,7.29,21.5,23.0,20.0,-2.4,-1.0,-4.0,113.666667,119.0,107.0,1.246667,1.33,1.13,101.0,131.0,67.0,3.72,4.5,3.3,139.0,145.0,137.0,2.65,3.5,1.8,22.0,22.0,22.0,7.933333,9.9,6.7,39.8,46.0,35.0,-2.4,-1.0,-4.0,11.9,15.5,8.3,155.5,166.0,145.0,0.45,0.5,0.4,9.0,9.0,9.0,1.8,1.8,1.8,1.7,1.9,1.5,34.65,38.2,31.1,10.8,9.8,9.8,211.94445,350.00001,-64.166669,90.666667,112.0,67.0,110.617647,170.5,60.5,56.180556,90.0,31.0,74.486821,139.0,43.0,22.86875,38.0,12.0,37.644117,38.5,36.099998,98.689655,100.0,95.0,2.790728,3.91579,2.09948,28.181818,90.0,5.0,23.48,31.0,10.0,4.28,14.0,-4.0,11.76,20.0,4.0,1,1,0.0,0.0,21,F,hispanic,152.4,60.0,ELECTIVE,PHYS REFERRAL/NORMAL DELI,Medicaid,SINGLE,ENGL,CSRU,CSRU,metavision,6,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21.239444,73457,184405,251108,2198-12-28 12:00:00,2199-01-04 20:05:00,2198-12-28 10:37:23,2198-12-29 10:14:22,NaT,2198-12-28 18:50:00,2198-12-28 13:00:00,NaT,NaT,0.984,1,2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,,,,
2,7.38,7.44,7.32,24.0,25.0,23.0,-1.5,-1.0,-2.0,107.0,110.0,105.0,1.15,1.21,1.06,107.833333,127.0,98.0,3.9,4.1,3.6,136.666667,138.0,136.0,2.3,2.4,2.2,21.333333,22.0,22.0,8.033333,8.9,7.4,41.0,46.0,36.0,-1.5,-1.0,-2.0,8.266667,9.8,7.3,168.0,181.0,155.0,0.533333,0.6,0.5,6.333333,7.0,5.0,1.65,1.7,1.6,1.2,1.2,1.2,43.8,43.8,43.8,6.4,6.4,6.4,350.000008,350.00001,350.000004,84.313433,102.0,65.0,119.711538,159.0,96.0,51.74359,94.0,38.5,73.209878,103.0,57.0,16.05,26.0,9.0,36.773256,37.555555,35.099998,98.627119,100.0,94.0,2.391406,3.08547,1.7971,40.454545,70.0,0.0,54.744681,70.0,53.0,24.3125,59.0,15.0,47.59375,150.0,33.0,1,1,0.0,0.0,21,F,hispanic,160.02,74.4,ELECTIVE,PHYS REFERRAL/NORMAL DELI,Medicaid,SINGLE,ENGL,CSRU,CSRU,metavision,2,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,49.571944,93887,137808,242592,2188-02-04 00:39:00,2188-02-09 16:41:00,2188-02-05 11:15:53,2188-02-07 18:14:19,NaT,2188-02-05 21:00:00,2188-02-05 16:40:00,NaT,NaT,2.2906,1,2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,,,,
3,7.41625,7.46,7.28,24.0,24.0,24.0,-0.25,0.0,-2.0,106.5,109.0,104.0,1.06,1.13,0.96,98.615385,130.0,69.0,3.975,4.6,3.6,138.0,139.0,137.0,1.2,1.2,1.1,25.0,27.0,24.0,9.1,9.6,8.6,36.625,51.0,30.0,-0.25,0.0,-2.0,26.4,34.7,18.1,231.5,241.0,222.0,0.65,0.7,0.6,15.0,15.0,15.0,1.95,2.1,1.8,1.65,1.8,1.5,39.85,47.8,31.9,8.7,8.7,8.0,375.0,375.0,375.0,103.666667,123.0,83.0,120.74359,151.0,106.0,70.794872,87.0,49.0,86.846154,100.0,66.0,21.1,29.0,12.0,37.008602,38.599998,35.400002,95.046512,100.0,86.0,4.057592,5.39267,3.4555,22.083333,60.0,0.0,46.56,66.0,40.0,21.16129,33.0,14.0,35.387097,49.0,25.0,3,1,0.0,0.0,22,M,white,190.5,71.1,EMERGENCY,EMERGENCY ROOM ADMIT,Private,SINGLE,ENGL,CSRU,CSRU,carevue,5,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29.828611,16927,109148,254842,2166-09-24 20:15:00,2166-10-20 13:30:00,2166-10-06 09:41:05,2166-10-07 17:19:43,NaT,2166-10-06 14:11:00,2166-10-06 11:30:00,NaT,NaT,1.3185,2,2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,,,,
4,7.361429,7.43,7.26,28.625,32.0,24.0,0.0,6.0,-5.0,100.222222,111.0,91.0,1.088889,1.18,0.9,106.184211,138.0,51.0,4.042857,4.6,3.1,131.25,143.0,120.0,1.2,1.6,0.9,26.0,26.0,26.0,9.285714,11.4,8.6,47.6,63.0,41.0,0.0,6.0,-5.0,8.566667,13.6,6.0,98.142857,132.0,68.0,0.5625,0.6,0.5,10.5,16.0,7.0,1.925,2.6,1.3,1.25,1.5,1.1,31.75,43.3,23.3,5.4,5.4,5.5,328.125,375.0,0.0,80.693694,101.0,69.0,106.294643,135.0,78.0,50.861607,86.0,27.0,68.355854,94.0,49.0,18.219828,27.0,11.0,37.018849,37.799999,35.555556,97.145631,100.0,94.0,3.95824,5.21978,3.07692,19.069767,80.0,0.0,25.75,48.0,13.0,12.830769,22.0,3.0,24.646154,36.0,17.0,2,1,0.0,0.0,22,M,asian,182.88,66.0,ELECTIVE,PHYS REFERRAL/NORMAL DELI,Private,SINGLE,CANT,CSRU,CSRU,carevue,7,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,92.231111,4846,154439,264181,2111-10-05 07:15:00,2111-10-13 15:30:00,2111-10-05 13:02:50,2111-10-09 10:13:52,NaT,2111-10-05 17:15:00,2111-10-05 14:00:00,NaT,NaT,3.8827,1,2197-06-09 14:00:00,2197-06-09 14:00:00,5.283333,,,,,,


In [30]:
dfImputed.to_csv('imputedReadmission.csv')