# Construct Master Dataset by linking different tables in MIMIC-IV-ED

Note: 

1.MIMIC-ED should be downloaded at “data/ed" folder

2.

3.

## Prepare Python library and raw data

In [None]:
import argparse
from helpers import *
from medcode_utils import commorbidity

In [None]:
## Defining "argparse" parameters
mimic_iv_path = 'C:/Users/XFE/Documents/mimic4ed-benchmark/data/'
#mimic_iv_path = '/Users/jin/Desktop/Temporal_AutoScore/Data/mimic-iv-1.0'
output_path = 'C:/Users/XFE/Documents/mimic4ed-benchmark/data_processed/'
#output_path = '/Users/jin/Desktop/Temporal_AutoScore/Output/mimic-iv-1.0'

In [None]:
## Defining auxillary parameters
## Assume that "ed" directory placed under "mimic_iv_path"
## ZJ: Make this assumption bold
mimic_iv_core_path = os.path.join(mimic_iv_path, 'core')
mimic_iv_hosp_path = os.path.join(mimic_iv_path , 'hosp')   
mimic_iv_icu_path = os.path.join(mimic_iv_path, 'icu')
mimic_iv_ed_path = os.path.join(mimic_iv_path, 'ed')

icu_filename_dict = {"chartevents":"chartevents.csv","datetimeevents":"datetimeevents.csv","d_items":"d_items.csv","icustays":"icustays.csv","inputevents":"inputevents.csv","outputevents":"outputevents.csv","procedureevents":"procedureevents.csv"}
core_filename_dict = {"patients":"patients.csv", "admissions":"admissions.csv", "transfers":"transfers.csv"}
hosp_filename_dict = {"d_hcpcs":"d_hcpcs.csv","d_icd_diagnoses":"d_icd_diagnoses.csv","d_labitems":"d_labitems.csv","emar":"emar.csv","hcpcsevents":"hcpcsevents.csv","microbiologyevents":"microbiologyevents.csv","poe":"poe.csv","prescriptions":"prescriptions.csv","services":"services.csv","diagnoses_icd":"diagnoses_icd.csv","d_icd_procedures":"d_icd_procedures.csv","drgcodes":"drgcodes.csv","emar_detail":"emar_detail.csv","labevents":"labevents.csv","pharmacy":"pharmacy.csv","poe_detail":"poe_detail.csv","procedures_icd":"procedures_icd.csv"}
ed_filename_dict = {'diagnosis':'diagnosis.csv', 'edstays':'edstays.csv',  'medrecon':'medrecon.csv',  'pyxis':'pyxis.csv',  'triage':'triage.csv',  'vitalsign':'vitalsign.csv'}


complaint_dict = {"chiefcom_chest_pain" : "chest pain", "chiefcom_abdominal_pain" : "abdominal pain|abd pain", 
"chiefcom_headache" : "headache", "chiefcom_shortness_of_breath" : "shortness of breath", "chiefcom_back_pain" : "back pain", "chiefcom_cough" : "cough", 
"chiefcom_nausea_vomiting" : "nausea|vomiting", "chiefcom_fever_chills" : "fever|chills", "chiefcom_syncope" :"syncope", "chiefcom_dizziness" : "dizziness"}

## Defining health utilization timerange parameters in days
icu_transfer_timerange = 12 # hours
# past_ed_visits_timerange = 365
# past_admissions_timerange = 365 
# past_icu_visits_timerange = 365
next_ed_visit_timerange = 3



## Load raw data tables through pandas library

In [None]:
## Reading main tables
df_edstays = read_edstays_table(os.path.join(mimic_iv_ed_path, ed_filename_dict['edstays']))
df_patients = read_patients_table(os.path.join(mimic_iv_core_path, core_filename_dict['patients']))
df_admissions = read_admissions_table(os.path.join(mimic_iv_core_path, core_filename_dict["admissions"]))
df_icustays = read_icustays_table(os.path.join(mimic_iv_icu_path, icu_filename_dict['icustays']))
df_triage = read_triage_table(os.path.join(mimic_iv_ed_path, ed_filename_dict['triage']))
df_vitalsign = read_vitalsign_table(os.path.join(mimic_iv_ed_path, ed_filename_dict['vitalsign']))
df_pyxis = read_pyxis_table(os.path.join(mimic_iv_ed_path, ed_filename_dict['pyxis']))
df_medrecon = read_pyxis_table(os.path.join(mimic_iv_ed_path, ed_filename_dict['medrecon']))

## Read data here for ICD.
df_diagnoses = read_diagnoses_table(os.path.join(mimic_iv_hosp_path, hosp_filename_dict['diagnoses_icd']))


## ED root table, demographic and outcomes

In [None]:
## Merging patients -> merging admissions -> merging triage -> master
df_master = merge_edstays_patients_on_subject(df_edstays ,df_patients)
df_master = merge_edstays_admissions_on_subject(df_master ,df_admissions)

In [None]:
## Adding age, mortality and ICU transfer outcome
df_master = add_age(df_master)
df_master = add_inhospital_mortality(df_master)
df_master = add_ed_mortality(df_master)
df_master = add_before_ed_mortality(df_master)
df_master = add_ed_los(df_master)
df_master = add_outcome_icu_transfer(df_master, df_icustays, icu_transfer_timerange)

# Sort Master table for further process
df_master = df_master.sort_values(['subject_id', 'intime']).reset_index()


## Health Utilization

In [None]:
## Generate past ED visits
df_master = generate_past_ed_visits(df_master, past_ed_visits_timerange=30)
df_master = generate_past_ed_visits(df_master, past_ed_visits_timerange=90)
df_master = generate_past_ed_visits(df_master, past_ed_visits_timerange=365)

In [None]:
## Oucome:  future ED revisit variables
df_master = generate_future_ed_visits(df_master, next_ed_visit_timerange)

In [None]:
## Generate past admissions
df_master = generate_past_admissions(df_master, df_admissions, past_admissions_timerange=30)
df_master = generate_past_admissions(df_master, df_admissions, past_admissions_timerange=90)
df_master = generate_past_admissions(df_master, df_admissions, past_admissions_timerange=365)

In [None]:
## Generate past icu visits
df_master  = generate_past_icu_visits(df_master, df_icustays, past_icu_visits_timerange=30)
df_master  = generate_past_icu_visits(df_master, df_icustays, past_icu_visits_timerange=90)
df_master  = generate_past_icu_visits(df_master, df_icustays, past_icu_visits_timerange=365)

## Triage Information

In [None]:
## Mergining with triage table, Comment: revise the variable names? triage_*
df_master = merge_edstays_triage_on_subject(df_master, df_triage) ## note change to merge master 

In [None]:
## Encoding 10 chief complaints
df_master = encode_chief_complaints(df_master, complaint_dict)

In [None]:
## XF comments:
## give a paramter: time/days=30/90/365
## Process chief complaints
## other outcomes
## ED revisit/ next revisit time?

## Comorbidities from diagnosis ICD

In [None]:
# This function takes about 10 min
df_master = commorbidity(df_master, df_diagnoses, df_admissions, timerange = 356*5)


## ED Vital signs

In [None]:
df_master = merge_vitalsign_info_on_edstay(df_master, df_vitalsign, options=['last'])

## Medication

In [None]:
df_master = merge_med_count_on_edstay(df_master, df_pyxis)

In [None]:
df_master = merge_medrecon_count_on_edstay(df_master, df_medrecon)

## Review the master dataset and output

In [None]:
#df_master.head(100).to_csv(os.path.join(output_path, 'master_dataset_part.csv'), index=False)
# Full dataset:
df_master.to_csv(os.path.join(output_path, 'master_dataset.csv'), index=False)