# Extract ICD code to a list

## Prepare Python library and raw data

In [1]:
import argparse
from helpers import *
from medcode_utils import icd_list,diagnosis_with_time
from medcodes.diagnoses.icd_conversion import convert_9to10_list, convert_9to10


In [2]:
## Defining "argparse" parameters
mimic_iv_path = '/dataset/mimiciv/1.0/'
#mimic_iv_path = '/Users/jin/Desktop/Temporal_AutoScore/Data/mimic-iv-1.0'
#mimic_iv_path = '/Users/siqili/Desktop/National University of Singapore/Liu Nan - WP_MIMIC_Benchmark/OriginalData/data'
output_path = '/home/jun/data/MIMIC/processed'
#output_path = '/Users/jin/Desktop/Temporal_AutoScore/Output/mimic-iv-1.0'
#output_path = '/Users/siqili/Desktop/National University of Singapore/Liu Nan - WP_MIMIC_Benchmark'

In [3]:
## Defining auxillary parameters
## Assume that "ed" directory placed under "mimic_iv_path"
## ZJ: Make this assumption bold
mimic_iv_core_path = os.path.join(mimic_iv_path, 'core')
mimic_iv_hosp_path = os.path.join(mimic_iv_path , 'hosp')   
mimic_iv_icu_path = os.path.join(mimic_iv_path, 'icu')
mimic_iv_ed_path = os.path.join(mimic_iv_path, 'ed')

icu_filename_dict = {"chartevents":"chartevents.csv","datetimeevents":"datetimeevents.csv","d_items":"d_items.csv","icustays":"icustays.csv","inputevents":"inputevents.csv","outputevents":"outputevents.csv","procedureevents":"procedureevents.csv"}
core_filename_dict = {"patients":"patients.csv", "admissions":"admissions.csv", "transfers":"transfers.csv"}
hosp_filename_dict = {"d_hcpcs":"d_hcpcs.csv","d_icd_diagnoses":"d_icd_diagnoses.csv","d_labitems":"d_labitems.csv","emar":"emar.csv","hcpcsevents":"hcpcsevents.csv","microbiologyevents":"microbiologyevents.csv","poe":"poe.csv","prescriptions":"prescriptions.csv","services":"services.csv","diagnoses_icd":"diagnoses_icd.csv","d_icd_procedures":"d_icd_procedures.csv","drgcodes":"drgcodes.csv","emar_detail":"emar_detail.csv","labevents":"labevents.csv","pharmacy":"pharmacy.csv","poe_detail":"poe_detail.csv","procedures_icd":"procedures_icd.csv"}
ed_filename_dict = {'diagnosis':'diagnosis.csv', 'edstays':'edstays.csv',  'medrecon':'medrecon.csv',  'pyxis':'pyxis.csv',  'triage':'triage.csv',  'vitalsign':'vitalsign.csv'}


## Load raw data tables through pandas library

In [4]:
## Reading main tables
df_edstays = read_edstays_table(os.path.join(mimic_iv_ed_path, ed_filename_dict['edstays']))
df_admissions = read_admissions_table(os.path.join(mimic_iv_core_path, core_filename_dict["admissions"]))
df_diagnoses = read_diagnoses_table(os.path.join(mimic_iv_hosp_path, hosp_filename_dict['diagnoses_icd']))


## Get diagnosis ICD list

In [13]:
version = 'v9_3digit'
df_master, icd_encode_map = icd_list(df_edstays, df_diagnoses, df_admissions, timerange = 356*5, digit3=True)

#df_master.head(100).to_csv(os.path.join(output_path, 'master_dataset_part.csv'), index=False)
# Full dataset:
df_master.to_csv(os.path.join(output_path, 'icd_list_dataset_'+version+'.csv'), index=False)
import pickle
with open(os.path.join(output_path, 'icd_encode_map_'+version),'wb') as f:
    pickle.dump(icd_encode_map,f)
print('Number of unique ICD codes '+version+': ', len(icd_encode_map))

Number of unique ICD codes v9_3digit:  5571


In [5]:
version = 'v9'
df_master, icd_encode_map = icd_list(df_edstays, df_diagnoses, df_admissions, timerange = 356*5, digit3=False)

#df_master.head(100).to_csv(os.path.join(output_path, 'master_dataset_part.csv'), index=False)
# Full dataset:
df_master.to_csv(os.path.join(output_path, 'icd_list_dataset_'+version+'.csv'), index=False)
import pickle
with open(os.path.join(output_path, 'icd_encode_map_'+version),'wb') as f:
    pickle.dump(icd_encode_map,f)
print('Number of unique ICD codes '+version+': ', len(icd_encode_map))

Number of unique ICD codes v9:  5679


In [15]:
version = 'v10'
df_master, icd_encode_map = icd_list(df_edstays, df_diagnoses, df_admissions, timerange = 356*5, digit3=False, version=10)

#df_master.head(100).to_csv(os.path.join(output_path, 'master_dataset_part.csv'), index=False)
# Full dataset:
df_master.to_csv(os.path.join(output_path, 'icd_list_dataset_'+version+'.csv'), index=False)
import pickle
with open(os.path.join(output_path, 'icd_encode_map_'+version),'wb') as f:
    pickle.dump(icd_encode_map,f)
print('Number of unique ICD codes '+version+': ', len(icd_encode_map))

Number of unique ICD codes v10:  7930
