In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#import data
DATA_PATH = r"D:\ICUDATASET\mimic-iv-3.1"
ADMISSIONS_PATH = f"{DATA_PATH}/hosp/admissions.csv"
PATIENTS_PATH = f"{DATA_PATH}/hosp/patients.csv"
ICU_PATH = f"{DATA_PATH}/icu/icustays.csv"
DIAGNOSES_PATH = f"{DATA_PATH}/hosp/diagnoses_icd.csv"
PRESCRIPTIONS_PATH = f"{DATA_PATH}/hosp/prescriptions.csv"
LABS_PATH = f"{DATA_PATH}/hosp/labevents.csv"
D_LABITEMS_PATH = f"{DATA_PATH}/hosp/d_labitems.csv" # To map lab item IDs to names
CHARTEVENTS_PATH = f"{DATA_PATH}/icu/chartevents.csv" # For vital signs and GCS
D_ITEMS_PATH = f"{DATA_PATH}/icu/d_items.csv" # To map chartevent item IDs to names
INPUTEVENTS_PATH = f"{DATA_PATH}/icu/inputevents.csv" # For IV intake
OUTPUTEVENTS_PATH = f"{DATA_PATH}/icu/outputevents.csv" # For output
PROCEDURES_ICD_PATH = f"{DATA_PATH}/hosp/procedures_icd.csv" # For procedures

admissions = pd.read_csv(ADMISSIONS_PATH)
patients = pd.read_csv(PATIENTS_PATH)
icustays = pd.read_csv(ICU_PATH)
diagnoses = pd.read_csv(DIAGNOSES_PATH)
prescriptions = pd.read_csv(PRESCRIPTIONS_PATH, low_memory = False,nrows=100000)
labevents = pd.read_csv(LABS_PATH, low_memory= False, nrows=100000)
d_labitems = pd.read_csv(D_LABITEMS_PATH)
chartevents = pd.read_csv(CHARTEVENTS_PATH, low_memory=False, nrows=10000000) 
d_items = pd.read_csv(D_ITEMS_PATH)
inputevents = pd.read_csv(INPUTEVENTS_PATH, nrows = 100000)
outputevents = pd.read_csv(OUTPUTEVENTS_PATH, nrows = 100000)
procedures = pd.read_csv(PROCEDURES_ICD_PATH)

In [3]:
print(d_items["itemid"].unique())

[220001 220003 220045 ... 230174 230176 230177]


In [4]:
icustays.head()

Unnamed: 0,subject_id,hadm_id,stay_id,first_careunit,last_careunit,intime,outtime,los
0,10000032,29079034,39553978,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2180-07-23 14:00:00,2180-07-23 23:50:47,0.410266
1,10000690,25860671,37081114,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2150-11-02 19:37:00,2150-11-06 17:03:17,3.893252
2,10000980,26913865,39765666,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2189-06-27 08:42:00,2189-06-27 20:38:27,0.497535
3,10001217,24597018,37067082,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2157-11-20 19:18:02,2157-11-21 22:08:00,1.118032
4,10001217,27703517,34592300,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2157-12-19 15:42:24,2157-12-20 14:27:41,0.948113


In [5]:
admissions.head()

Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admit_provider_id,admission_location,discharge_location,insurance,language,marital_status,race,edregtime,edouttime,hospital_expire_flag
0,10000032,22595853,2180-05-06 22:23:00,2180-05-07 17:15:00,,URGENT,P49AFC,TRANSFER FROM HOSPITAL,HOME,Medicaid,English,WIDOWED,WHITE,2180-05-06 19:17:00,2180-05-06 23:30:00,0
1,10000032,22841357,2180-06-26 18:27:00,2180-06-27 18:49:00,,EW EMER.,P784FA,EMERGENCY ROOM,HOME,Medicaid,English,WIDOWED,WHITE,2180-06-26 15:54:00,2180-06-26 21:31:00,0
2,10000032,25742920,2180-08-05 23:44:00,2180-08-07 17:50:00,,EW EMER.,P19UTS,EMERGENCY ROOM,HOSPICE,Medicaid,English,WIDOWED,WHITE,2180-08-05 20:58:00,2180-08-06 01:44:00,0
3,10000032,29079034,2180-07-23 12:35:00,2180-07-25 17:55:00,,EW EMER.,P06OTX,EMERGENCY ROOM,HOME,Medicaid,English,WIDOWED,WHITE,2180-07-23 05:54:00,2180-07-23 14:00:00,0
4,10000068,25022803,2160-03-03 23:16:00,2160-03-04 06:26:00,,EU OBSERVATION,P39NWO,EMERGENCY ROOM,,,English,SINGLE,WHITE,2160-03-03 21:55:00,2160-03-04 06:26:00,0


COHORT CREATION

In [6]:
#cohort creation
#We selected patients who had at least one diagnostic code for acute ischemic stroke (ICD 10 I63.* or ICD 9 beginning with 433, 434, or 436)
ais_diagnoses = diagnoses[
    ((diagnoses["icd_version"] == 9) & diagnoses["icd_code"].str.startswith(("433", "434", "436"))) |
    ((diagnoses["icd_version"] == 10) & diagnoses["icd_code"].str.startswith("I63"))
]



#merge with icu stays 
ais_icu = pd.merge(ais_diagnoses, icustays, on=["subject_id", "hadm_id"])
# Only the first ICU admission was considered for patients who required multiple ICU admissions during a single hospitalization
ais_icu = ais_icu.sort_values(by="intime")
ais_icu = ais_icu.drop_duplicates(subset=["hadm_id"], keep="first")


#hospitalizations not discharged or died within 48 hours
ais_icu_48h = ais_icu[ais_icu["los"] >= 2]


# add mortality outcome
ais_final = pd.merge(
    ais_icu_48h,
    admissions[["subject_id", "hadm_id", "hospital_expire_flag"]],
    on=["subject_id", "hadm_id"]
)
print("Final cohort size:", len(ais_final))
print("Patients died:", sum(ais_final["hospital_expire_flag"]))
print("Patients survived:", (ais_final['hospital_expire_flag'] == 0).sum())

Final cohort size: 3387
Patients died: 628
Patients survived: 2759


In [7]:
ais_final.head()

Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,stay_id,first_careunit,last_careunit,intime,outtime,los,hospital_expire_flag
0,14816979,21650344,1,43411,9,38466660,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2110-01-30 13:15:53,2110-02-03 18:18:26,4.210104,0
1,12264134,25257503,1,I63312,10,37673110,Neuro Intermediate,Neuro Intermediate,2110-03-02 16:04:26,2110-03-07 22:18:13,5.259572,0
2,11993259,23072371,1,43491,9,32269643,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2110-03-27 16:48:00,2110-03-29 22:17:13,2.228623,0
3,17907596,21801758,3,43411,9,34260029,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2110-04-12 18:19:00,2110-05-01 08:34:09,18.593854,0
4,18801749,29949595,1,I63412,10,36111383,Neuro Intermediate,Neuro Intermediate,2110-04-20 16:50:00,2110-04-23 18:12:27,3.057257,0


In [8]:
patients.head()

Unnamed: 0,subject_id,gender,anchor_age,anchor_year,anchor_year_group,dod
0,10000032,F,52,2180,2014 - 2016,2180-09-09
1,10000048,F,23,2126,2008 - 2010,
2,10000058,F,33,2168,2020 - 2022,
3,10000068,F,19,2160,2008 - 2010,
4,10000084,M,72,2160,2017 - 2019,2161-02-13


Data 