# MIMIC 4 data - dataset construction admissions

Code taken from GRU-ODE-Bayes preprocessing; simplified and adapted for MIMIC 4 1.0

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
import numpy as np

In [None]:
fn = '/path/core/admissions.csv.gz'
adm = pd.read_csv(fn, compression='gzip')
adm.head()

In [None]:
#keep only patients present in patients data
patients_df=pd.read_csv('/path/core/patients.csv.gz')
patients_df[["subject_id","anchor_age"]].head()
adm_dob=pd.merge(patients_df[["subject_id","anchor_age"]],adm,on="subject_id")

df=adm.groupby("subject_id")["hadm_id"].nunique()
subj_ids=list(df[df==1].index)
adm_1=adm_dob.loc[adm_dob["subject_id"].isin(subj_ids)]
print("Number of patients remaining in the dataframe: ")
print(len(adm_1.index))

In [None]:
# time of stay in ICU
adm_1=adm_1.copy()
adm_1['admittime']=pd.to_datetime(adm_1["admittime"], format='%Y-%m-%d %H:%M:%S')
adm_1['dischtime']=pd.to_datetime(adm_1["dischtime"], format='%Y-%m-%d %H:%M:%S')

adm_1["elapsed_time"]=adm_1["dischtime"]-adm_1["admittime"]
adm_1.head()
adm_1["elapsed_days"]=adm_1["elapsed_time"].dt.days 

adm_2=adm_1.loc[(adm_1["elapsed_days"]<30) & (adm_1["elapsed_days"]>2)]
print("Number of patients remaining in the dataframe: ")
print(len(adm_2.index))

In [None]:
# only patients older than 15
adm_2_15=adm_2.loc[adm_2["anchor_age"]>15].copy()
print("Number of patients remaining in the dataframe: ")
print(len(adm_2_15.index))

In [None]:
fn = '/path/icu/chartevents.csv.gz'
# this file is huge, we need to read in the data in chunks
# chartevents = pd.read_csv(fn, compression='gzip')

# workaround:
ids = np.array([])
for chunk in pd.read_csv(fn, chunksize=1000000):
    ids = np.append(ids, chunk['hadm_id'].unique())
    ids = np.unique(ids)

In [None]:
adm_2_15_chart=adm_2_15.loc[adm_2_15["hadm_id"].isin(ids)].copy()
print("Number of patients remaining in the dataframe: ")
print(len(adm_2_15_chart.index))

In [None]:
adm_2_15_chart.to_csv("/path/processed/admissions_processed.csv")