# MIMIC-Multimodal: Master Dataset Generation

The data structure of master dataset is inspired by 
Soenksen, L. R. et al. Integrated multimodal artificial intelligence framework for healthcare applications. npj Digit. Med. 5, 149 (2022).

For more details, please visit:
https://physionet.org/content/haim-multimodal/1.0.1/

For data access and description, please visit:
https://mimic.mit.edu/

MIMIC-IV https://physionet.org/content/mimiciv/2.2/#files-panel \
MIMIC-CXR https://physionet.org/content/mimic-cxr/2.0.0/#files-panel \
MIMIC-CXR-JPG https://physionet.org/content/mimic-cxr-jpg/2.0.0/ \
MIMIC-IV-Note https://physionet.org/content/mimic-iv-note/2.2/note/#files-panel 


In [1]:
import numpy as np
import pandas as pd
import pickle
import datetime as dt
from pandasql import sqldf
from data_utils import *

import warnings
warnings.filterwarnings("ignore")

In [2]:
# File path

# MIMIC-IV
mimiciv_hosp_path = '../mimic-iv-2.2/hosp/'
mimiciv_icu_path = '../mimic-iv-2.2/icu/'
# MIMIV-CXR & MIMIC-CXR-JPG
mimiciv_cxr_path = '../mimic-cxr/'
mimiciv_cxr_jpg_path = '../mimic-cxr-jpg/'
# MIMIC-IV-Note
mimiciv_note_path = '../mimic-iv-note/note/'

## Load Data
For memory efficiency, we first load all files into  **Dask DataFrames** \
When processing is required, we call **ddf.compute()** to convert the data into a Pandas DataFrame

### read files by folder

In [3]:
# MIMIC-IV hosp module
dfs_hosp = {}
dfs_hosp = read_folder(dfs_hosp, mimiciv_hosp_path)

Read files in folder ../mimic-iv-2.2/hosp/


100%|██████████| 21/21 [16:28<00:00, 47.08s/it] 


In [4]:
# MIMIC-IV icu module
dfs_icu = {}
# Read large dataframes into pandas dataframe since computing such dask dataframe requires a great amount of time and memory
dfs_icu['chartevents'] = pd.read_csv(mimiciv_icu_path+'chartevents.csv.gz', compression='gzip')
dfs_icu = read_folder(dfs_icu,mimiciv_icu_path)

Read files in folder ../mimic-iv-2.2/icu/


100%|██████████| 8/8 [01:12<00:00,  9.12s/it]


In [5]:
# MIMIC-IV-CXR
dfs_cxr = {}
dfs_cxr = read_folder(dfs_cxr, mimiciv_cxr_path)
dfs_cxr_jpg = {}
dfs_cxr_jpg = read_folder(dfs_cxr_jpg, mimiciv_cxr_jpg_path)

Read files in folder ../mimic-cxr/


100%|██████████| 6/6 [00:01<00:00,  4.20it/s]


Read files in folder ../mimic-cxr-jpg/


100%|██████████| 5/5 [00:01<00:00,  2.94it/s]


In [6]:
# MIMIC-IV-Notes
dfs_note = {}
dfs_note = read_folder(dfs_note, mimiciv_note_path)

Read files in folder ../mimic-iv-note/note/


100%|██████████| 4/4 [02:08<00:00, 32.02s/it]


### datetime conversion

In [7]:
# Hosp
dfs_hosp = convert_datetime(dfs_hosp)
# ICU
dfs_icu = convert_datetime(dfs_icu)
# Note
dfs_note = convert_datetime(dfs_note)

In [8]:
# convert time-related variables in CXR metadata
dfs_cxr_jpg['mimic-cxr-2.0.0-metadata'] = dfs_cxr_jpg['mimic-cxr-2.0.0-metadata'].compute()
df = dfs_cxr_jpg['mimic-cxr-2.0.0-metadata']
df['StudyDate'] = df['StudyDate'].astype('int')
df['StudyDate'] = pd.to_datetime(df['StudyDate'],format='%Y%m%d')
df['StudyTime'] = df.apply(lambda x : '%#010.3f' % x['StudyTime'] ,1)
df['StudyTime'] = pd.to_datetime(df['StudyTime'], format='%H%M%S.%f').dt.strftime('%H%M%S')
df['StudyTime'] = pd.to_datetime(df['StudyTime'], format='%H%M%S').dt.time
df['StudyDatetime'] = df.apply(lambda r : dt.datetime.combine(r['StudyDate'],r['StudyTime']),1)

## ID combinations

### get ID lists from each module

In [9]:
## MIMIC-IV
dfs_hosp['admissions'] = dfs_hosp['admissions'].compute()
dfs_icu['icustays'] = dfs_icu['icustays'].compute()
## MIMIC-IV CXR
dfs_cxr['cxr-record-list'] = dfs_cxr['cxr-record-list'].compute()
## MIMIC-IV Note
dfs_note['discharge'] = dfs_note['discharge'].compute()
dfs_note['radiology'] = dfs_note['radiology'].compute()

In [10]:
# Get all combinations of IDs in ICU module
icu_info = dfs_icu['icustays'][['subject_id','hadm_id','stay_id','intime','outtime']].copy()
icu_info = icu_info.merge(dfs_hosp['admissions'][['subject_id','hadm_id','admittime','dischtime','edregtime','edouttime']],
                          on=['subject_id','hadm_id'],how='left')
icu_info['earliest_intime'] = icu_info[['intime','admittime','edregtime']].min(axis=1) # earliest entering time for each hospitalization
# Get all combination of IDs in CXR module
cxr_info = dfs_cxr_jpg['mimic-cxr-2.0.0-metadata'][['subject_id','study_id','dicom_id','StudyDate','StudyTime','StudyDatetime']].copy()
# Get all combinations of IDs in Note module
note_ds_info = dfs_note['discharge'][['note_id','subject_id','hadm_id','charttime']].copy()
note_ds_info.rename(columns={'note_id':'ds_note_id','charttime':'ds_charttime'},inplace=True)
note_rad_info = dfs_note['radiology'][['note_id','subject_id','hadm_id','charttime']].copy()
note_rad_info.rename(columns={'note_id':'rad_note_id','charttime':'rad_charttime'},inplace=True)

### merge IDs by key identifiers and time

In [11]:
pysqldf = lambda q: sqldf(q, globals())

In [12]:
# For radiology reports and chest X-ray, we combine the data also by time

## Join on MIMIC-IV,MIMIC-CXR and MIMICIV-Note
sql_query = """
select distinct key_subject_id as subject_id,key_hadm_id as hadm_id,stay_id,study_id,dicom_id,ds_note_id,rad_note_id
from 
(
    select subject_id as key_subject_id,hadm_id as key_hadm_id,stay_id,intime,outtime,admittime,dischtime,earliest_intime
    from icu_info
) as i
left join cxr_info c
on i.key_subject_id = c.subject_id and c.StudyDatetime >= i.earliest_intime and c.StudyDatetime <= i.outtime
left join note_ds_info ds
on i.key_subject_id = ds.subject_id and i.key_hadm_id = ds.hadm_id
left join note_rad_info rad
on i.key_subject_id = rad.subject_id and i.key_hadm_id = rad.hadm_id and rad.rad_charttime >= i.earliest_intime and rad.rad_charttime <= i.outtime
"""
list_ids = pysqldf(sql_query)

In [14]:
# key identifiers
key_ids = list_ids[['subject_id','hadm_id','stay_id']].drop_duplicates().reset_index(drop=True)
key_ids

Unnamed: 0,subject_id,hadm_id,stay_id
0,10000032.0,29079034.0,39553978.0
1,10000980.0,26913865.0,39765666.0
2,10001217.0,24597018.0,37067082.0
3,10001217.0,27703517.0,34592300.0
4,10001725.0,25563031.0,31205490.0
...,...,...,...
73176,19999442.0,26785317.0,32336619.0
73177,19999625.0,25304202.0,31070865.0
73178,19999828.0,25744818.0,36075953.0
73179,19999840.0,21033226.0,38978960.0


### summary

In [15]:
print('For patients admitted to ICU')
print('Number of unique patients:',list_ids['subject_id'].nunique())
print('Number of unique hospital admissions:',list_ids['hadm_id'].nunique())
print('Number of unique ICU stays:',list_ids['stay_id'].nunique())
print('Number of unique chest xray studies:',list_ids['study_id'].nunique())
print('Number of unique chest xray images:',list_ids['dicom_id'].nunique())
print('Number of unique discharge summaries:',list_ids['ds_note_id'].nunique())
print('Number of unique radiology reports:',list_ids['rad_note_id'].nunique())

For patients admitted to ICU
Number of unique patients: 50920
Number of unique hospital admissions: 66239
Number of unique ICU stays: 73181
Number of unique chest xray studies: 72180
Number of unique chest xray images: 86661
Number of unique discharge summaries: 65330
Number of unique radiology reports: 412664


## Extract information for each unique ICU stay

### functions

In [18]:
# Get full MIMIC-IV patient records using key_identifiers
def get_patient_icustay(key_subject_id, key_hadm_id, key_stay_id):
    """
    Inputs:
    key_subject_id -> subject_id is unique to a patient
    key_hadm_id    -> hadm_id is unique to a patient hospital stay
    key_stay_id    -> stay_id is unique to a patient ward stay
    Outputs:
    Patient_ICUstay -> ICU patient stay structure
    """
    # Data Extraction
    
    ## Table of identifiers
    df_core = list_ids[(list_ids.subject_id == key_subject_id) & (list_ids.hadm_id == key_hadm_id) & 
                       (list_ids.stay_id == key_stay_id)]
    
    ## Hosp - Tables are merged based on subject_id & hadm_id
    # Since miscellaneous information in OMR table is less detailed than in chartevents table, 
    # thus information from OMR table will not be included
    df_admissions = dfs_hosp['admissions'][(dfs_hosp['admissions'].subject_id == key_subject_id) & 
                                           (dfs_hosp['admissions'].hadm_id == key_hadm_id)]
    df_patients = dfs_hosp['patients'][(dfs_hosp['patients'].subject_id == key_subject_id)]
    df_transfers = dfs_hosp['transfers'][(dfs_hosp['transfers'].subject_id == key_subject_id) & 
                                         (dfs_hosp['transfers'].hadm_id == key_hadm_id)]
    df_diagnoses_icd = dfs_hosp['diagnoses_icd'][(dfs_hosp['diagnoses_icd'].subject_id == key_subject_id) &
                                                 (dfs_hosp['diagnoses_icd'].hadm_id == key_hadm_id)]
    df_diagnoses_icd = df_diagnoses_icd.merge(dfs_hosp['d_icd_diagnoses'],
                                              how='left', on=['icd_code', 'icd_version'])
    df_procedures_icd = dfs_hosp['procedures_icd'][(dfs_hosp['procedures_icd'].subject_id == key_subject_id) & 
                                                   (dfs_hosp['procedures_icd'].hadm_id == key_hadm_id)]
    df_procedures_icd = df_procedures_icd.merge(dfs_hosp['d_icd_procedures'], 
                                                how='left', on=['icd_code', 'icd_version'])
    df_drgcodes = dfs_hosp['drgcodes'][(dfs_hosp['drgcodes'].subject_id == key_subject_id) & 
                                       (dfs_hosp['drgcodes'].hadm_id == key_hadm_id)]
    df_services = dfs_hosp['services'][(dfs_hosp['services'].subject_id == key_subject_id) & 
                                       (dfs_hosp['services'].hadm_id == key_hadm_id)]
    df_labevents = dfs_hosp['labevents'][(dfs_hosp['labevents'].subject_id == key_subject_id) & 
                                         (dfs_hosp['labevents'].hadm_id == key_hadm_id)]
    df_labevents = df_labevents.merge(dfs_hosp['d_labitems'], how='left',on='itemid')
    df_hcpcsevents = dfs_hosp['hcpcsevents'][(dfs_hosp['hcpcsevents'].subject_id == key_subject_id) & 
                                             (dfs_hosp['hcpcsevents'].hadm_id == key_hadm_id)]
    df_hcpcsevents = df_hcpcsevents.merge(dfs_hosp['d_hcpcs'], how='left',
                                          left_on='hcpcs_cd',right_on='code')
    df_microbiologyevents = dfs_hosp['microbiologyevents'][(dfs_hosp['microbiologyevents'].subject_id == key_subject_id) & 
                                                           (dfs_hosp['microbiologyevents'].hadm_id == key_hadm_id)]
    df_emar = dfs_hosp['emar'][(dfs_hosp['emar'].subject_id == key_subject_id) & 
                               (dfs_hosp['emar'].hadm_id == key_hadm_id)]
    df_emar = df_emar.merge(dfs_hosp['emar_detail'], how='left', on='emar_id' )
    df_poe = dfs_hosp['poe'][(dfs_hosp['poe'].subject_id == key_subject_id) & (dfs_hosp['poe'].hadm_id == key_hadm_id)]
    df_poe = df_poe.merge(dfs_hosp['poe_detail'], how='left', on='poe_id')
    df_prescriptions = dfs_hosp['prescriptions'][(dfs_hosp['prescriptions'].subject_id == key_subject_id) & 
                                                 (dfs_hosp['prescriptions'].hadm_id == key_hadm_id)]
    df_prescriptions = df_prescriptions.merge(dfs_hosp['pharmacy'], how='left', on='pharmacy_id')
    
    ## ICU - Tables are merged based on subject_id & hadm_id & stay_id
    df_icustays = dfs_icu['icustays'][(dfs_icu['icustays'].subject_id == key_subject_id) & 
                                      (dfs_icu['icustays'].hadm_id == key_hadm_id) & 
                                      (dfs_icu['icustays'].stay_id == key_stay_id)]
    df_procedureevents = dfs_icu['procedureevents'][(dfs_icu['procedureevents'].subject_id == key_subject_id) & 
                                                    (dfs_icu['procedureevents'].hadm_id == key_hadm_id) & 
                                                    (dfs_icu['procedureevents'].stay_id == key_stay_id)]
    df_outputevents = dfs_icu['outputevents'][(dfs_icu['outputevents'].subject_id == key_subject_id) & 
                                              (dfs_icu['outputevents'].hadm_id == key_hadm_id) & 
                                              (dfs_icu['outputevents'].stay_id == key_stay_id)]
    df_inputevents = dfs_icu['inputevents'][(dfs_icu['inputevents'].subject_id == key_subject_id) & 
                                            (dfs_icu['inputevents'].hadm_id == key_hadm_id) & 
                                            (dfs_icu['inputevents'].stay_id == key_stay_id)]
    df_datetimeevents = dfs_icu['datetimeevents'][(dfs_icu['datetimeevents'].subject_id == key_subject_id) & 
                                                  (dfs_icu['datetimeevents'].hadm_id == key_hadm_id) & 
                                                  (dfs_icu['datetimeevents'].stay_id == key_stay_id)]
    df_chartevents = dfs_icu['chartevents'][(dfs_icu['chartevents'].subject_id == key_subject_id) & 
                                            (dfs_icu['chartevents'].hadm_id == key_hadm_id) & 
                                            (dfs_icu['chartevents'].stay_id == key_stay_id)]
    df_ingredientevents = dfs_icu['ingredientevents'][(dfs_icu['ingredientevents'].subject_id == key_subject_id) & 
                                                      (dfs_icu['ingredientevents'].hadm_id == key_hadm_id) & 
                                                      (dfs_icu['ingredientevents'].stay_id == key_stay_id)]
    # Merge descriptions into each table
    df_procedureevents = df_procedureevents.merge(dfs_icu['d_items'], how='left', on='itemid')
    df_outputevents = df_outputevents.merge(dfs_icu['d_items'], how='left', on='itemid')
    df_inputevents = df_inputevents.merge(dfs_icu['d_items'], how='left', on='itemid')
    df_datetimeevents = df_datetimeevents.merge(dfs_icu['d_items'], how='left', on='itemid')
    df_chartevents = df_chartevents.merge(dfs_icu['d_items'], how='left', on='itemid')
    df_ingredientevents = df_ingredientevents.merge(dfs_icu['d_items'], how='left', on='itemid')
    
    ## CXR
    # Get lists of study_id and dicom_id for each ICU stay
    study_id_list = df_core['study_id'].unique()
    dicom_id_list = df_core['dicom_id'].unique()
    # Extract tables from MIMIC-CXR
    df_cxr_image_path = dfs_cxr['cxr-record-list'][(dfs_cxr['cxr-record-list'].subject_id == key_subject_id) &
                                                   (dfs_cxr['cxr-record-list'].study_id.isin(study_id_list)) &
                                                   (dfs_cxr['cxr-record-list'].dicom_id.isin(dicom_id_list))]
    df_cxr_text_path = dfs_cxr['cxr-study-list'][(dfs_cxr['cxr-study-list'].subject_id == key_subject_id) &
                                                   (dfs_cxr['cxr-study-list'].study_id.isin(study_id_list))]
    # Extract tables from MIMIC-CXR-JPG
    df_cxr_metadata = dfs_cxr_jpg['mimic-cxr-2.0.0-metadata'][(dfs_cxr_jpg['mimic-cxr-2.0.0-metadata'].subject_id == key_subject_id) &
                                                              (dfs_cxr_jpg['mimic-cxr-2.0.0-metadata'].study_id.isin(study_id_list)) &
                                                              (dfs_cxr_jpg['mimic-cxr-2.0.0-metadata'].dicom_id.isin(dicom_id_list))]
    df_cxr_chexpert = dfs_cxr_jpg['mimic-cxr-2.0.0-chexpert'][(dfs_cxr_jpg['mimic-cxr-2.0.0-chexpert'].subject_id == key_subject_id) &
                                                              (dfs_cxr_jpg['mimic-cxr-2.0.0-chexpert'].study_id.isin(study_id_list))]
    df_cxr_negbio = dfs_cxr_jpg['mimic-cxr-2.0.0-negbio'][(dfs_cxr_jpg['mimic-cxr-2.0.0-negbio'].subject_id == key_subject_id) & 
                                                          (dfs_cxr_jpg['mimic-cxr-2.0.0-negbio'].study_id.isin(study_id_list))]
    df_cxr_split = dfs_cxr_jpg['mimic-cxr-2.0.0-split'][(dfs_cxr_jpg['mimic-cxr-2.0.0-split'].subject_id == key_subject_id) &
                                                        (dfs_cxr_jpg['mimic-cxr-2.0.0-split'].study_id.isin(study_id_list)) &
                                                        (dfs_cxr_jpg['mimic-cxr-2.0.0-split'].dicom_id.isin(dicom_id_list))]
    
    ## Notes
    ds_note_id_list = df_core['ds_note_id'].unique()
    rad_note_id_list = df_core['rad_note_id'].unique()
    df_dsnotes = dfs_note['discharge'][(dfs_note['discharge'].subject_id == key_subject_id) &
                                       (dfs_note['discharge'].hadm_id == key_hadm_id) &
                                       (dfs_note['discharge'].note_id.isin(ds_note_id_list))]
    df_radnotes = dfs_note['radiology'][(dfs_note['radiology'].subject_id == key_subject_id) &
                                        (dfs_note['radiology'].hadm_id == key_hadm_id) &
                                        (dfs_note['radiology'].note_id.isin(rad_note_id_list))]
    df_radnotes = df_radnotes.merge(dfs_note['radiology_detail'], how='left', on='note_id')
        
    # Create patient object and return
    Patient_ICUstay = Patient_ICU(df_core, df_admissions, df_patients, df_transfers, df_diagnoses_icd, df_procedures_icd, df_drgcodes,
                                  df_services, df_labevents, df_hcpcsevents, df_microbiologyevents, df_emar, df_poe, df_prescriptions, 
                                  df_icustays, df_procedureevents, df_outputevents, df_inputevents, df_datetimeevents, df_chartevents, df_ingredientevents,
                                  df_cxr_split, df_cxr_metadata, df_cxr_chexpert, df_cxr_negbio, df_cxr_image_path, df_cxr_text_path, 
                                  df_dsnotes, df_radnotes)
     
    return Patient_ICUstay

In [19]:
# Extract all single ICU stay records
def generate_master_dataset(key_ids, storage_path):
    # Inputs:
    #   key_ids -> Dataframe with all unique available records by key identifiers
    #   storage_path -> Path to structured MIMIC IV databases in pickle files
    
    # Outputs:
    #   nfiles -> Number of single patient files produced
    
    # Extract information for patient
    nfiles = len(key_ids)
    with tqdm(total = nfiles) as pbar:

        #Iterate through all patients
        for _, content in key_ids.iterrows():
            key_subject_id = content['subject_id']
            key_hadm_id = content['hadm_id']
            key_stay_id = content['stay_id']
            
            # Save objects
            filename = f'ICUstay_{int(key_stay_id)}'+'.pkl'
            icustay = get_patient_icustay(key_subject_id,key_hadm_id,key_stay_id)
            pickle.dump(icustay,open(storage_path+filename,'wb'))
            
            # Update process bar
            pbar.update(1)

### extract and save patient ICU stay information

In [21]:
dfs_icu['d_items'] = dfs_icu['d_items'].compute()
dfs_note['radiology_detail'] = dfs_note['radiology_detail'].compute()

In [22]:
ICU_path = 'D:/Master Dataset/'
generate_master_dataset(key_ids=key_ids[0:30000],storage_path=ICU_path)

100%|██████████| 30000/30000 [23:50:38<00:00,  2.86s/it]   


In [23]:
generate_master_dataset(key_ids=key_ids[30000:],storage_path=ICU_path)

100%|██████████| 43181/43181 [35:38:34<00:00,  2.97s/it]   


~3s/item

## Examples

In [2]:
ICUstay_test = pickle.load(open('D:/Master Dataset/ICUstay_30000646.pkl','rb'))

In [3]:
ICUstay_test.__dict__.keys()

dict_keys(['core', 'admissions', 'patients', 'transfers', 'diagnoses_icd', 'procedures_icd', 'drgcodes', 'services', 'labevents', 'hcpcsevents', 'microbiologyevents', 'emar', 'poe', 'prescriptions', 'icustays', 'procedureevents', 'outputevents', 'inputevents', 'datetimeevents', 'chartevents', 'ingredientevents', 'cxr_split', 'cxr_metadata', 'cxr_chexpert', 'cxr_negbio', 'cxr_image_path', 'cxr_text_path', 'dsnotes', 'radnotes'])

In [5]:
with tqdm(total=len(ICUstay_test.__dict__.keys())) as pbar:
    for attribute, value in ICUstay_test.__dict__.items():
        if isinstance(value,pd.DataFrame):
            print(attribute)
            display(value.head())
            pbar.update(1)
        else:
            pbar.update(1)    

  0%|          | 0/29 [00:00<?, ?it/s]

core


Unnamed: 0,subject_id,hadm_id,stay_id,study_id,dicom_id,ds_note_id,rad_note_id
506636,12207593.0,22795209.0,30000646.0,56369155.0,7f124ef7-a1714162-b7e9da18-5862d297-3fd8f3a9,12207593-DS-22,12207593-RR-33
506637,12207593.0,22795209.0,30000646.0,56369155.0,7f124ef7-a1714162-b7e9da18-5862d297-3fd8f3a9,12207593-DS-22,12207593-RR-34
506638,12207593.0,22795209.0,30000646.0,56369155.0,7f124ef7-a1714162-b7e9da18-5862d297-3fd8f3a9,12207593-DS-22,12207593-RR-35
506639,12207593.0,22795209.0,30000646.0,56369155.0,7f124ef7-a1714162-b7e9da18-5862d297-3fd8f3a9,12207593-DS-22,12207593-RR-36
506640,12207593.0,22795209.0,30000646.0,56369155.0,7f124ef7-a1714162-b7e9da18-5862d297-3fd8f3a9,12207593-DS-22,12207593-RR-37


admissions


Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admit_provider_id,admission_location,discharge_location,insurance,language,marital_status,race,edregtime,edouttime,hospital_expire_flag
94098,12207593.0,22795209.0,2194-04-27 18:43:00,2194-05-06 02:29:00,2194-05-06 02:29:00,EW EMER.,P75X31,EMERGENCY ROOM,DIED,Medicaid,?,MARRIED,ASIAN - CHINESE,2194-04-27 13:49:00,2194-04-27 22:03:00,1.0


icustays


Unnamed: 0,subject_id,hadm_id,stay_id,first_careunit,last_careunit,intime,outtime,los
16210,12207593.0,22795209.0,30000646.0,Coronary Care Unit (CCU),Coronary Care Unit (CCU),2194-04-29 01:39:22,2194-05-03 18:23:48,4.697523


chartevents


Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
0,12207593,22795209,30000646,2234.0,2194-05-01 06:00:00,2194-05-01 08:43:00,220045,98,98.0,bpm,0.0,Heart Rate,HR,chartevents,Routine Vital Signs,bpm,Numeric,,
1,12207593,22795209,30000646,2234.0,2194-05-01 06:00:00,2194-05-01 08:43:00,220179,100,100.0,mmHg,0.0,Non Invasive Blood Pressure systolic,NBPs,chartevents,Routine Vital Signs,mmHg,Numeric,,
2,12207593,22795209,30000646,2234.0,2194-05-01 06:00:00,2194-05-01 08:43:00,220180,67,67.0,mmHg,0.0,Non Invasive Blood Pressure diastolic,NBPd,chartevents,Routine Vital Signs,mmHg,Numeric,,
3,12207593,22795209,30000646,2234.0,2194-05-01 06:00:00,2194-05-01 08:43:00,220181,73,73.0,mmHg,0.0,Non Invasive Blood Pressure mean,NBPm,chartevents,Routine Vital Signs,mmHg,Numeric,,
4,12207593,22795209,30000646,2234.0,2194-05-01 06:00:00,2194-05-01 08:43:00,220210,23,23.0,insp/min,0.0,Respiratory Rate,RR,chartevents,Respiratory,insp/min,Numeric,,


cxr_metadata


Unnamed: 0,dicom_id,subject_id,study_id,PerformedProcedureStepDescription,ViewPosition,Rows,Columns,StudyDate,StudyTime,ProcedureCodeSequence_CodeMeaning,ViewCodeSequence_CodeMeaning,PatientOrientationCodeSequence_CodeMeaning,StudyDatetime
82844,7f124ef7-a1714162-b7e9da18-5862d297-3fd8f3a9,12207593.0,56369155.0,CHEST (PORTABLE AP),AP,2544.0,3056.0,2194-04-30,08:33:53,CHEST (PORTABLE AP),antero-posterior,Erect,2194-04-30 08:33:53
82845,49d77ea6-5f9a013f-d0e6c33e-bb3aae40-88d57b43,12207593.0,57440284.0,CHEST (PORTABLE AP),AP,2544.0,3056.0,2194-04-29,06:25:10,CHEST (PORTABLE AP),antero-posterior,Erect,2194-04-29 06:25:10
82846,40f24d85-94293a52-b7e4961d-d2ad9561-864efe77,12207593.0,57520242.0,CHEST (PORTABLE AP),AP,2539.0,3050.0,2194-05-02,10:45:39,CHEST (PORTABLE AP),antero-posterior,Erect,2194-05-02 10:45:39
82847,f725e388-3b557432-84d5882b-4c406d73-68132627,12207593.0,57688955.0,CHEST (PORTABLE AP),AP,2539.0,3050.0,2194-05-01,08:06:11,CHEST (PORTABLE AP),antero-posterior,Erect,2194-05-01 08:06:11


cxr_image_path


Unnamed: 0,subject_id,study_id,dicom_id,path
82844,12207593.0,56369155.0,7f124ef7-a1714162-b7e9da18-5862d297-3fd8f3a9,files/p12/p12207593/s56369155/7f124ef7-a171416...
82845,12207593.0,57440284.0,49d77ea6-5f9a013f-d0e6c33e-bb3aae40-88d57b43,files/p12/p12207593/s57440284/49d77ea6-5f9a013...
82846,12207593.0,57520242.0,40f24d85-94293a52-b7e4961d-d2ad9561-864efe77,files/p12/p12207593/s57520242/40f24d85-94293a5...
82847,12207593.0,57688955.0,f725e388-3b557432-84d5882b-4c406d73-68132627,files/p12/p12207593/s57688955/f725e388-3b55743...


dsnotes


Unnamed: 0,note_id,subject_id,hadm_id,note_type,note_seq,charttime,storetime,text
72331,12207593-DS-22,12207593.0,22795209.0,DS,22.0,2194-05-06,2194-05-15 17:32:00,\nName: ___ Unit No: ...


radnotes


Unnamed: 0,note_id,subject_id_x,hadm_id,note_type,note_seq,charttime,storetime,text,subject_id_y,field_name,field_value,field_ordinal
0,12207593-RR-33,12207593.0,22795209.0,RR,33.0,2194-04-29 06:17:00,2194-04-29 08:48:00,PORTABLE CHEST ___ \n\nCOMPARISON: Radiograph...,12207593.0,exam_code,C12,1.0
1,12207593-RR-33,12207593.0,22795209.0,RR,33.0,2194-04-29 06:17:00,2194-04-29 08:48:00,PORTABLE CHEST ___ \n\nCOMPARISON: Radiograph...,12207593.0,exam_name,CHEST (PORTABLE AP),1.0
2,12207593-RR-34,12207593.0,22795209.0,RR,34.0,2194-04-30 08:16:00,2194-04-30 10:35:00,INDICATION: ___ man with history of non small...,12207593.0,exam_code,C12,1.0
3,12207593-RR-34,12207593.0,22795209.0,RR,34.0,2194-04-30 08:16:00,2194-04-30 10:35:00,INDICATION: ___ man with history of non small...,12207593.0,exam_name,CHEST (PORTABLE AP),1.0
4,12207593-RR-35,12207593.0,22795209.0,RR,35.0,2194-05-01 07:14:00,2194-05-01 09:14:00,"HISTORY: NSLC, to assess for change.\n\nFINDI...",12207593.0,exam_code,C12,1.0


100%|██████████| 29/29 [00:00<00:00, 531.87it/s]
