In [1]:
from pyhealth.datasets import MIMIC4Dataset

dataset = MIMIC4Dataset(
    ehr_root="/srv/local/data/physionet.org/files/mimiciv/2.2/",
    ehr_tables=["diagnoses_icd", "procedures_icd", "prescriptions", "labevents"],
    dev=True,
)

from pyhealth.tasks import InHospitalMortalityMIMIC4

task = InHospitalMortalityMIMIC4()
samples = dataset.set_task(task, num_workers=10, cache_dir="../../test_cache_mortality_m4")

from pyhealth.datasets import split_by_sample


train_dataset, val_dataset, test_dataset = split_by_sample(
    dataset=samples, ratios=[0.7, 0.1, 0.2]
)




Memory usage Starting MIMIC4Dataset init: 668.4 MB
Initializing MIMIC4EHRDataset with tables: ['diagnoses_icd', 'procedures_icd', 'prescriptions', 'labevents'] (dev mode: True)
Using default EHR config: /home/johnwu3/projects/PyHealth_Branch_Testing/PyHealth/pyhealth/datasets/configs/mimic4_ehr.yaml
Memory usage Before initializing mimic4_ehr: 668.4 MB
Initializing mimic4_ehr dataset from /srv/local/data/physionet.org/files/mimiciv/2.2/ (dev mode: False)
Scanning table: diagnoses_icd from /srv/local/data/physionet.org/files/mimiciv/2.2/hosp/diagnoses_icd.csv.gz
Joining with table: /srv/local/data/physionet.org/files/mimiciv/2.2/hosp/admissions.csv.gz
Original path does not exist. Using alternative: /srv/local/data/physionet.org/files/mimiciv/2.2/hosp/admissions.csv
Scanning table: procedures_icd from /srv/local/data/physionet.org/files/mimiciv/2.2/hosp/procedures_icd.csv.gz
Joining with table: /srv/local/data/physionet.org/files/mimiciv/2.2/hosp/admissions.csv.gz
Original path does not

Collecting samples for InHospitalMortalityMIMIC4 from 10 workers: 100%|██████████| 1000/1000 [00:03<00:00, 304.76it/s]

Caching samples to ../../test_cache_mortality_m4/InHospitalMortalityMIMIC4.parquet
Failed to cache samples: failed to determine supertype of list[datetime[μs]] and object
Label mortality vocab: {0: 0, 1: 1}



Processing samples: 100%|██████████| 723/723 [00:00<00:00, 1726.48it/s]

Generated 723 samples for task InHospitalMortalityMIMIC4





In [2]:
dataset.get_patient(dataset.unique_patient_ids[0]).get_events()

Found 1000 unique patient IDs


[Event(event_type='patients', timestamp=datetime.datetime(2025, 10, 7, 18, 30, 27, 968268), attr_dict={'gender': 'F', 'anchor_age': '64', 'anchor_year': '2115', 'anchor_year_group': '2008 - 2010', 'dod': None}),
 Event(event_type='prescriptions', timestamp=datetime.datetime(2115, 5, 19, 8, 0), attr_dict={'hadm_id': '29020064', 'drug': 'BuPROPion (Sustained Release)', 'ndc': '00173094755', 'prod_strength': '100mg SR Tab', 'dose_val_rx': '200', 'dose_unit_rx': 'mg', 'route': 'PO', 'stoptime': '2115-05-21 21:00:00'}),
 Event(event_type='prescriptions', timestamp=datetime.datetime(2115, 5, 19, 10, 0), attr_dict={'hadm_id': '29020064', 'drug': 'anastrozole', 'ndc': '00310020137', 'prod_strength': '1mg Tablet', 'dose_val_rx': '1', 'dose_unit_rx': 'mg', 'route': 'ORAL', 'stoptime': '2115-05-21 21:00:00'}),
 Event(event_type='prescriptions', timestamp=datetime.datetime(2115, 5, 19, 10, 0), attr_dict={'hadm_id': '29020064', 'drug': 'Simvastatin', 'ndc': '51079045420', 'prod_strength': '10mg Tab

In [3]:
samples[0]

{'patient_id': '11625970',
 'admission_id': '20671706',
 'labs': tensor([[  0.0000,   0.0000, 136.0000,   0.0000,   0.0000,   0.0000,   4.3000,
            0.0000,   0.0000,   0.0000,  97.0000,   0.0000,   0.0000,   0.0000,
            0.0000,   0.0000, 160.0000,   0.0000,   0.0000,   0.0000,   2.0000,
           11.0000,   0.0000,   0.0000,   0.0000,   0.0000,   3.3000]]),
 'mortality': tensor([0.])}

In [4]:
from pyhealth.datasets import get_dataloader
train_dataloader = get_dataloader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = get_dataloader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = get_dataloader(test_dataset, batch_size=32, shuffle=False)

from pyhealth.models import RNN

model = RNN(
    dataset=samples,
)

from pyhealth.trainer import Trainer

trainer = Trainer(model=model, metrics=["roc_auc"])
print(trainer.evaluate(test_dataloader))

trainer.train(
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    epochs=10,
    monitor="roc_auc",  # Monitor roc_auc specifically
    optimizer_params={"lr": 1e-4}  # Using learning rate of 1e-4
)


  from .autonotebook import tqdm as notebook_tqdm


RNN(
  (embedding_model): EmbeddingModel(embedding_layers=ModuleDict(
    (labs): Linear(in_features=27, out_features=128, bias=True)
  ))
  (rnn): ModuleDict(
    (labs): RNNLayer(
      (dropout_layer): Dropout(p=0.5, inplace=False)
      (rnn): GRU(128, 128, batch_first=True)
    )
  )
  (fc): Linear(in_features=128, out_features=1, bias=True)
)
Metrics: ['roc_auc']
Device: cuda



Evaluation: 100%|██████████| 5/5 [00:00<00:00,  7.76it/s]

{'roc_auc': 0.6621438263229308, 'loss': 0.6749732613563537}
Training:
Batch size: 32
Optimizer: <class 'torch.optim.adam.Adam'>
Optimizer params: {'lr': 0.0001}
Weight decay: 0.0
Max grad norm: None
Val dataloader: <torch.utils.data.dataloader.DataLoader object at 0x7ff8ea199f90>
Monitor: roc_auc
Monitor criterion: max
Epochs: 10




Epoch 0 / 10: 100%|██████████| 16/16 [00:00<00:00, 23.26it/s]

--- Train epoch-0, step-16 ---
loss: 0.6445



Evaluation: 100%|██████████| 3/3 [00:00<00:00, 14.04it/s]

--- Eval epoch-0, step-16 ---
roc_auc: 0.3944
loss: 0.6112
New best roc_auc score (0.3944) at epoch-0, step-16




Epoch 1 / 10: 100%|██████████| 16/16 [00:00<00:00, 226.97it/s]

--- Train epoch-1, step-32 ---
loss: 0.5758



Evaluation: 100%|██████████| 3/3 [00:00<00:00, 591.80it/s]

--- Eval epoch-1, step-32 ---
roc_auc: 0.3803
loss: 0.5449




Epoch 2 / 10: 100%|██████████| 16/16 [00:00<00:00, 276.68it/s]

--- Train epoch-2, step-48 ---
loss: 0.5070



Evaluation: 100%|██████████| 3/3 [00:00<00:00, 644.62it/s]

--- Eval epoch-2, step-48 ---
roc_auc: 0.3451
loss: 0.4745




Epoch 3 / 10: 100%|██████████| 16/16 [00:00<00:00, 259.23it/s]

--- Train epoch-3, step-64 ---
loss: 0.4340



Evaluation: 100%|██████████| 3/3 [00:00<00:00, 623.38it/s]

--- Eval epoch-3, step-64 ---
roc_auc: 0.4225
loss: 0.3852
New best roc_auc score (0.4225) at epoch-3, step-64








Epoch 4 / 10: 100%|██████████| 16/16 [00:00<00:00, 209.86it/s]

--- Train epoch-4, step-80 ---





loss: 0.3323


Evaluation: 100%|██████████| 3/3 [00:00<00:00, 554.34it/s]

--- Eval epoch-4, step-80 ---





roc_auc: 0.4225
loss: 0.2752



Epoch 5 / 10: 100%|██████████| 16/16 [00:00<00:00, 153.47it/s]

--- Train epoch-5, step-96 ---
loss: 0.2137



Evaluation: 100%|██████████| 3/3 [00:00<00:00, 617.11it/s]

--- Eval epoch-5, step-96 ---
roc_auc: 0.4225
loss: 0.1670




Epoch 6 / 10: 100%|██████████| 16/16 [00:00<00:00, 243.23it/s]


--- Train epoch-6, step-112 ---
loss: 0.1249


Evaluation: 100%|██████████| 3/3 [00:00<00:00, 648.24it/s]

--- Eval epoch-6, step-112 ---
roc_auc: 0.4225
loss: 0.1178




Epoch 7 / 10: 100%|██████████| 16/16 [00:00<00:00, 259.20it/s]

--- Train epoch-7, step-128 ---
loss: 0.0980



Evaluation: 100%|██████████| 3/3 [00:00<00:00, 553.61it/s]

--- Eval epoch-7, step-128 ---
roc_auc: 0.4225
loss: 0.1035




Epoch 8 / 10: 100%|██████████| 16/16 [00:00<00:00, 251.54it/s]

--- Train epoch-8, step-144 ---
loss: 0.0860



Evaluation: 100%|██████████| 3/3 [00:00<00:00, 643.27it/s]

--- Eval epoch-8, step-144 ---
roc_auc: 0.4225
loss: 0.0961




Epoch 9 / 10: 100%|██████████| 16/16 [00:00<00:00, 238.19it/s]

--- Train epoch-9, step-160 ---
loss: 0.0881



Evaluation: 100%|██████████| 3/3 [00:00<00:00, 652.81it/s]


--- Eval epoch-9, step-160 ---
roc_auc: 0.4225
loss: 0.0912
Loaded best model
