In [1]:
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm
import pickle
import os

In [2]:
with open('./mimiciii/III_label.pickle', 'rb') as f:
    III_label = pickle.load(f)
with open('./mimiciii/III_lab.pickle', 'rb') as f:
    III_lab = pickle.load(f)
with open('./mimiciii/III_prescrip.pickle', 'rb') as f:
    III_prescrip = pickle.load(f)
with open('./mimiciii/III_input.pickle', 'rb') as f:
    III_input = pickle.load(f)
with open('./mimiciii/III_icustay.pickle', 'rb') as f:
    III_icustay = pickle.load(f)

In [3]:
with open('./mimiciv/IV_label.pickle', 'rb') as f:
    IV_label = pickle.load(f)
with open('./mimiciv/IV_lab.pickle', 'rb') as f:
    IV_lab = pickle.load(f)
with open('./mimiciv/IV_prescrip.pickle', 'rb') as f:
    IV_prescrip = pickle.load(f)
with open('./mimiciv/IV_input.pickle', 'rb') as f:
    IV_input = pickle.load(f)
with open('./mimiciv/IV_icustay.pickle', 'rb') as f:
    IV_icustay = pickle.load(f)

## Look-up table

## Dictionary

In [4]:
def make_charttime_data(data_type, lab, prescrip, input_):
    
    if data_type == 'III':
        LAB_TIME = 'CHARTTIME'
        PRESCRIP_TIME = 'STARTDATE'
        INPUT_TIME = 'CHARTTIME'
        lab_columns = ['ITEMID', 'VALUE', 'VALUEUOM', 'FLAG']
        prescrip_columns = ['DRUG_TYPE', 'DRUG', 'PROD_STRENGTH', 'DOSE_VAL_RX', 'DOSE_UNIT_RX', 'FORM_VAL_DISP', 'FORM_UNIT_DISP', 'ROUTE']
        input_columns = ['ITEMID', 'AMOUNT', 'AMOUNTUOM', 'ORIGINALAMOUNT']
    
    elif data_type == 'IV':
        LAB_TIME = 'charttime'
        PRESCRIP_TIME = 'starttime'
        INPUT_TIME = 'starttime'
        lab_columns = ['itemid', 'value', 'valueuom', 'ref_range_lower', 'ref_range_upper', 'flag']
        prescrip_columns = ['pharmacy_id', 'drug_type', 'drug', 'formulary_drug_cd', 'gsn', 'ndc', 'prod_strength', 'dose_val_rx', 'dose_unit_rx', 'form_val_disp', 'form_unit_disp', 'route']
        input_columns = ['itemid', 'amount', 'amountuom', 'rate', 'rateuom', 'originalamount', 'originalrate']
            
    data=[]
    unique_time_list = list(set(list(lab[LAB_TIME])+list(prescrip[PRESCRIP_TIME])+list(input_[INPUT_TIME])))
    unique_time_list.sort()
    for time in unique_time_list:
        by_charttime = {}
        labs=[]
        prescrips=[]
        inputs=[]

        lab_df = lab[lab[LAB_TIME]==time]
        prescrip_df = prescrip[prescrip[PRESCRIP_TIME]==time]
        input_df = input_[input_[INPUT_TIME]==time]

        for i in range(len(lab_df)):
            by_lab={}
            for col in lab_columns:
                by_lab[col] = lab_df[col].iloc[i]
            labs.append(by_lab)
            
        for i in range(len(prescrip_df)):
            by_prescrip={}
            for col in prescrip_columns:
                by_prescrip[col] = prescrip_df[col].iloc[i]
            prescrips.append(by_prescrip) 
            
        for i in range(len(input_df)):
            by_input={}
            for col in input_columns:
                by_input[col] = input_df[col].iloc[i]
            inputs.append(by_input)

        by_charttime['time'] = time
        by_charttime['labs'] = labs
        by_charttime['prescrips'] = prescrips
        by_charttime['inputs'] = inputs
        data.append(by_charttime)

    return data

In [5]:
def make_stayid_data(data_type, lab_data, prescrip_data, input_data, label_data, icustay_data):
    if data_type == 'III':
        ICUSTAY_ID = 'ICUSTAY_ID'
        INTIME = 'INTIME'
    elif data_type == 'IV':
        ICUSTAY_ID = 'stay_id'
        INTIME = 'intime'
        
    final = []
    for id in tqdm(label_data[ICUSTAY_ID].unique()): # III_label.ICUSTAY_ID.unique()
        dict_by_icuid = {}
        lab = lab_data[lab_data[ICUSTAY_ID] == id]
        prescrip = prescrip_data[prescrip_data[ICUSTAY_ID] == id]
        input_ = input_data[input_data[ICUSTAY_ID] == id]
        
        data = make_charttime_data(data_type, lab, prescrip, input_)
        
        dict_by_icuid['icustay_id'] = id
        dict_by_icuid['label'] = label_data[label_data[ICUSTAY_ID]==id].labels.iloc[0]
        dict_by_icuid['intime'] = icustay_data[icustay_data[ICUSTAY_ID]==id][INTIME].iloc[0]
        dict_by_icuid['data'] = data
        
        final.append(dict_by_icuid)
    return final

In [6]:
IV_final = make_stayid_data('IV', IV_lab, IV_prescrip, IV_input, IV_label, IV_icustay)

100%|██████████| 46159/46159 [19:44<00:00, 38.98it/s]


In [22]:
III_final = make_stayid_data('III', III_lab, III_prescrip, III_input, III_label, III_icustay)

100%|██████████| 34987/34987 [10:57<00:00, 53.25it/s] 


In [None]:
# 첫번째 icustay_id
III_final[0]

In [None]:
# data 구성
III_final[0]['data'][0].keys()

dict_keys(['time', 'labs', 'prescrips', 'inputs'])

In [23]:
with open('./mimiciii.pickle', 'wb') as f:
    pickle.dump(III_final, f)
with open('./mimiciv.pickle', 'wb') as f:
    pickle.dump(IV_final, f)