# DL Survival - Ventilation Outcomes
#### Updated 17/11/21

In [16]:
import pandas as pd
import numpy as np
import math
import datetime

## 1. Data cleaning

- Import MIMIC III data
- Assign correct data types
- Impute missing values


In [8]:
data = pd.read_csv('mimic_combined.csv')

In [9]:
#view columns
print(data.columns[0:50])
print(data.columns[50:100])
print(data.columns[100:])

Index(['Unnamed: 0', 'liver_severe', 'temp', 'po2', 'rheum', 'first_careunit',
       'ffp', 'cvd', 'bilirubin_indirect', 'mit', 'inr_1', 'aids', 'chloride',
       'admission_location', 'hb', 'ckd', 'magnesium', 'bilirubin_total',
       'ventrate', 'plts', 'pco2', 'ext_time', 'baseexcess', 'copd',
       'lymphocytes', 'marital_status', 'dischtime', 'dbp', 'admittime', 'crp',
       'bleed_time', 'albumin', 'hospital_expire_flag', 'arrhythmia',
       'potassium', 'icustay_seq', 'ethnicity', 'tidalvol', 'weight', 'pt',
       'cardiac_index', 'bilirubin_direct', 'plt', 'aortic', 'alp', 'pud',
       'fibrinogen', 'creatinine', 'outtime', 'bg_temp'],
      dtype='object')
Index(['hadm_id', 'tricuspid', 'smoking', 'aado2', 'pvd', 'vent_array',
       'free_calcium', 'ph', 'meanbp', 'insulin', 'paraplegia', 'alt', 'los',
       'reint_time', 'ccf', 'insurance', 'admission_type', 'intime',
       'reintubation', 'diab_un', 'language', 'dtoutput', 'rr', 'sodium',
       'liver_mild', 'spe

In [10]:
# set column types as datetime
time_cols = ['admittime','dischtime','intime','outtime','reint_time','ext_time','deathtime']
for col in time_cols:
    data[col] = pd.to_datetime(data[col], format='%Y-%m-%d %H:%M:%S')

In [19]:
# define function for getting ventilation duration (1st ventilation)
def get_vent_duration(row):
    time_s = (row['ext_time']-row['intime']).total_seconds()
    if math.isnan(time_s):
        time_s = (row['deathtime']-row['intime']).total_seconds()
    time_min = time_s / 60
    time_h = time_min / 60
    return time_h

In [26]:
# create new column for vent_duration
data['vent_duration'] = data.apply(get_vent_duration, axis=1)
data[['ext_time','vent_duration','admittime','dischtime','intime','outtime','reint_time','deathtime']]

Unnamed: 0,ext_time,vent_duration,admittime,dischtime,intime,outtime,reint_time,deathtime
0,2172-01-06 20:00:00,8.255556,2172-01-03 20:49:00,2172-01-11 13:14:00,2172-01-06 11:44:40,2172-01-07 11:08:40,NaT,NaT
1,2175-11-24 06:15:00,18.209167,2175-11-22 17:01:00,2175-11-30 14:55:00,2175-11-23 12:02:27,2175-11-27 17:02:16,NaT,NaT
2,2169-11-17 15:00:00,5.370278,2169-11-16 13:00:00,2169-11-21 14:10:00,2169-11-17 09:37:47,2169-11-18 16:21:23,NaT,NaT
3,2181-06-11 19:30:00,4.516944,2181-06-11 12:30:00,2181-06-15 11:00:00,2181-06-11 14:58:59,2181-06-12 09:39:24,NaT,NaT
4,2152-07-05 15:30:00,6.056667,2152-07-03 18:10:00,2152-07-13 11:35:00,2152-07-05 09:26:36,2152-07-10 14:10:28,NaT,NaT
...,...,...,...,...,...,...,...,...
12327,2182-08-18 18:00:00,7.654444,2182-08-17 14:54:00,2182-08-25 19:49:00,2182-08-18 10:20:44,2182-08-20 17:42:26,NaT,NaT
12328,NaT,,2183-07-10 11:45:00,2183-07-14 15:06:00,2183-07-10 12:27:45,2183-07-11 15:02:27,NaT,NaT
12329,NaT,,2114-01-28 00:00:00,2114-02-02 17:52:00,2114-01-29 08:49:30,2114-01-30 18:24:30,NaT,NaT
12330,NaT,,2137-07-09 18:08:00,2137-07-20 13:00:00,2137-07-09 18:57:18,2137-07-10 15:40:30,NaT,NaT


In [None]:
## CHECK FOR ROWS WHERE DEATHTIME < INTIME OR ADMITTIME

In [29]:
data.loc[1449,['ext_time','vent_duration','admittime','dischtime','intime','outtime','reint_time','deathtime']]

ext_time                         NaT
vent_duration               -10.9125
admittime        2136-03-19 22:54:00
dischtime        2136-03-19 12:00:00
intime           2136-03-19 22:54:45
outtime          2136-03-20 05:55:41
reint_time                       NaT
deathtime        2136-03-19 12:00:00
Name: 1449, dtype: object

In [24]:
data[time_cols].dtypes

admittime     datetime64[ns]
dischtime     datetime64[ns]
intime        datetime64[ns]
outtime       datetime64[ns]
reint_time    datetime64[ns]
ext_time      datetime64[ns]
deathtime     datetime64[ns]
dtype: object

In [None]:
data[['vent_array','hr','sbp','reintubation','reint_time','ext_time','intime']]