In [1]:
from pyhealth.datasets import MIMIC3Dataset
from pyhealth.datasets import split_by_patient, get_dataloader
from pyhealth.models import Transformer, RNN, RETAIN, MLP
from pyhealth.tasks import mortality_prediction_mimic3_fn, readmission_prediction_mimic3_fn, drug_recommendation_mimic3_fn, length_of_stay_prediction_mimic3_fn
from pyhealth.trainer import Trainer

dataset = MIMIC3Dataset(
    root='/data/physionet.org/files/mimiciii/1.4/',
    tables=["DIAGNOSES_ICD", "PROCEDURES_ICD", "PRESCRIPTIONS"],
    code_mapping={
        # "ICD9CM": "CCSCM", 
        # "ICD9PROC": "CCSPROC",
        "NDC": ("ATC", {"target_kwargs": {"level": 3}})
        },
    dev=True,
    refresh_cache=True
)


  from .autonotebook import tqdm as notebook_tqdm


INFO: Pandarallel will run on 64 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
finish basic patient information parsing : 1.295158863067627s
finish parsing DIAGNOSES_ICD : 1.5444056987762451s
finish parsing PROCEDURES_ICD : 2.137986421585083s
finish parsing PRESCRIPTIONS : 9.635247707366943s


Mapping codes: 100%|██████████| 1000/1000 [00:01<00:00, 560.06it/s]


In [2]:
mimic3_ds = dataset.set_task(readmission_prediction_mimic3_fn)

train_dataset, val_dataset, test_dataset = split_by_patient(
    mimic3_ds, [0.8, 0.1, 0.1]
)
train_dataloader = get_dataloader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = get_dataloader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = get_dataloader(test_dataset, batch_size=32, shuffle=False)

Generating samples for readmission_prediction_mimic3_fn: 100%|██████████| 1000/1000 [00:00<00:00, 51093.97it/s]


In [3]:
model_w_pre = Transformer(
    dataset=mimic3_ds,
    feature_keys=["conditions", "procedures", "drugs"],
    label_key="label",
    mode="binary",
    pretrained_emb="KG/transe",
    embedding_dim=256,
)

Loading pretrained embedding for conditions...
Loading pretrained embedding for procedures...
Loading pretrained embedding for drugs...


In [37]:
# STEP 4: define trainer
trainer = Trainer(model=model_w_pre)
trainer.train(
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    epochs=15,
    optimizer_params = {"lr": 1e-4},
    monitor="pr_auc",
)

# STEP 5: evaluate
print(trainer.evaluate(test_dataloader))

Transformer(
  (embeddings): ModuleDict(
    (conditions): Embedding(4031, 512, padding_idx=0)
    (procedures): Embedding(1276, 512, padding_idx=0)
    (drugs): Embedding(194, 512, padding_idx=0)
  )
  (linear_layers): ModuleDict(
    (conditions): Linear(in_features=512, out_features=256, bias=True)
    (procedures): Linear(in_features=512, out_features=256, bias=True)
    (drugs): Linear(in_features=512, out_features=256, bias=True)
  )
  (transformer): ModuleDict(
    (conditions): TransformerLayer(
      (transformer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadedAttention(
            (linear_layers): ModuleList(
              (0): Linear(in_features=256, out_features=256, bias=False)
              (1): Linear(in_features=256, out_features=256, bias=False)
              (2): Linear(in_features=256, out_features=256, bias=False)
            )
            (output_linear): Linear(in_features=256, out_features=256, bias=False)
            (attention): 

Epoch 0 / 15: 100%|██████████| 243/243 [00:02<00:00, 81.28it/s]

--- Train epoch-0, step-243 ---
loss: 2.6863



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 259.36it/s]

--- Eval epoch-0, step-243 ---
pr_auc: 0.5940
roc_auc: 0.5701
f1: 0.6520
loss: 0.7162
New best pr_auc score (0.5940) at epoch-0, step-243




Epoch 1 / 15: 100%|██████████| 243/243 [00:02<00:00, 86.60it/s]

--- Train epoch-1, step-486 ---
loss: 1.3624



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 262.85it/s]

--- Eval epoch-1, step-486 ---
pr_auc: 0.6169
roc_auc: 0.5924
f1: 0.6379
loss: 0.7208
New best pr_auc score (0.6169) at epoch-1, step-486




Epoch 2 / 15: 100%|██████████| 243/243 [00:02<00:00, 86.38it/s]

--- Train epoch-2, step-729 ---
loss: 1.0040



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 255.64it/s]

--- Eval epoch-2, step-729 ---
pr_auc: 0.5691
roc_auc: 0.5596
f1: 0.6870
loss: 1.0541




Epoch 3 / 15: 100%|██████████| 243/243 [00:02<00:00, 87.16it/s]

--- Train epoch-3, step-972 ---
loss: 0.9451



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 263.53it/s]

--- Eval epoch-3, step-972 ---
pr_auc: 0.6197
roc_auc: 0.5927
f1: 0.6518
loss: 0.6995
New best pr_auc score (0.6197) at epoch-3, step-972




Epoch 4 / 15: 100%|██████████| 243/243 [00:02<00:00, 87.41it/s]

--- Train epoch-4, step-1215 ---
loss: 0.9677



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 263.31it/s]

--- Eval epoch-4, step-1215 ---
pr_auc: 0.6192
roc_auc: 0.6033
f1: 0.4757
loss: 0.7513




Epoch 5 / 15: 100%|██████████| 243/243 [00:02<00:00, 87.05it/s]

--- Train epoch-5, step-1458 ---
loss: 0.9120



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 257.26it/s]

--- Eval epoch-5, step-1458 ---
pr_auc: 0.6010
roc_auc: 0.5879
f1: 0.1362
loss: 0.9770




Epoch 6 / 15: 100%|██████████| 243/243 [00:02<00:00, 85.58it/s]

--- Train epoch-6, step-1701 ---
loss: 0.8684



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 166.78it/s]

--- Eval epoch-6, step-1701 ---
pr_auc: 0.6192
roc_auc: 0.6064
f1: 0.6898
loss: 0.7518




Epoch 7 / 15: 100%|██████████| 243/243 [00:02<00:00, 83.36it/s]

--- Train epoch-7, step-1944 ---
loss: 0.8355



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 149.13it/s]

--- Eval epoch-7, step-1944 ---
pr_auc: 0.6051
roc_auc: 0.5946
f1: 0.1900
loss: 0.7860




Epoch 8 / 15: 100%|██████████| 243/243 [00:03<00:00, 72.72it/s]

--- Train epoch-8, step-2187 ---
loss: 0.8441



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 260.71it/s]

--- Eval epoch-8, step-2187 ---
pr_auc: 0.6311
roc_auc: 0.6174
f1: 0.6282
loss: 0.6806
New best pr_auc score (0.6311) at epoch-8, step-2187








Epoch 9 / 15: 100%|██████████| 243/243 [00:03<00:00, 71.47it/s]

--- Train epoch-9, step-2430 ---
loss: 0.8071



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 256.04it/s]

--- Eval epoch-9, step-2430 ---
pr_auc: 0.6187
roc_auc: 0.6039
f1: 0.4269
loss: 0.6895




Epoch 10 / 15: 100%|██████████| 243/243 [00:03<00:00, 79.10it/s]


--- Train epoch-10, step-2673 ---
loss: 0.7709


Evaluation: 100%|██████████| 31/31 [00:00<00:00, 134.46it/s]

--- Eval epoch-10, step-2673 ---
pr_auc: 0.6393
roc_auc: 0.6198
f1: 0.6864
loss: 0.7150
New best pr_auc score (0.6393) at epoch-10, step-2673




Epoch 11 / 15: 100%|██████████| 243/243 [00:03<00:00, 69.03it/s]

--- Train epoch-11, step-2916 ---
loss: 0.7761



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 257.32it/s]

--- Eval epoch-11, step-2916 ---
pr_auc: 0.6172
roc_auc: 0.6027
f1: 0.3242
loss: 0.7546




Epoch 12 / 15: 100%|██████████| 243/243 [00:03<00:00, 78.62it/s]

--- Train epoch-12, step-3159 ---
loss: 0.7396



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 183.00it/s]

--- Eval epoch-12, step-3159 ---
pr_auc: 0.6332
roc_auc: 0.6176
f1: 0.4724
loss: 0.7049




Epoch 13 / 15: 100%|██████████| 243/243 [00:03<00:00, 80.60it/s]

--- Train epoch-13, step-3402 ---
loss: 0.7206



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 253.90it/s]

--- Eval epoch-13, step-3402 ---
pr_auc: 0.6470
roc_auc: 0.6315
f1: 0.6496
loss: 0.6777
New best pr_auc score (0.6470) at epoch-13, step-3402




Epoch 14 / 15: 100%|██████████| 243/243 [00:03<00:00, 74.24it/s]

--- Train epoch-14, step-3645 ---
loss: 0.6901



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 257.77it/s]

--- Eval epoch-14, step-3645 ---
pr_auc: 0.6423
roc_auc: 0.6159
f1: 0.6416
loss: 0.6913
Loaded best model



Evaluation: 100%|██████████| 32/32 [00:00<00:00, 256.77it/s]

{'pr_auc': 0.7206943820433741, 'roc_auc': 0.6867013897339791, 'f1': 0.6968174204355109, 'loss': 0.6363492039963603}





In [32]:
model_no_pre = Transformer(
    dataset=mimic3_ds,
    feature_keys=["conditions", "procedures", "drugs"],
    label_key="label",
    mode="binary",
    embedding_dim=256,
)

In [33]:
# STEP 4: define trainer
trainer = Trainer(model=model_no_pre)
trainer.train(
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    epochs=15,
    optimizer_params = {"lr": 1e-4},
    monitor="pr_auc",
)

# STEP 5: evaluate
print(trainer.evaluate(test_dataloader))

Transformer(
  (embeddings): ModuleDict(
    (conditions): Embedding(4031, 256, padding_idx=0)
    (procedures): Embedding(1276, 256, padding_idx=0)
    (drugs): Embedding(194, 256, padding_idx=0)
  )
  (linear_layers): ModuleDict()
  (transformer): ModuleDict(
    (conditions): TransformerLayer(
      (transformer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadedAttention(
            (linear_layers): ModuleList(
              (0): Linear(in_features=256, out_features=256, bias=False)
              (1): Linear(in_features=256, out_features=256, bias=False)
              (2): Linear(in_features=256, out_features=256, bias=False)
            )
            (output_linear): Linear(in_features=256, out_features=256, bias=False)
            (attention): Attention()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (feed_forward): PositionwiseFeedForward(
            (w_1): Linear(in_features=256, out_features=1024, bias=True)
          

Epoch 0 / 15: 100%|██████████| 243/243 [00:03<00:00, 66.17it/s]

--- Train epoch-0, step-243 ---
loss: 1.2175



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 227.09it/s]

--- Eval epoch-0, step-243 ---
pr_auc: 0.6090
roc_auc: 0.5884
f1: 0.6103
loss: 0.7941
New best pr_auc score (0.6090) at epoch-0, step-243




Epoch 1 / 15: 100%|██████████| 243/243 [00:02<00:00, 95.22it/s]

--- Train epoch-1, step-486 ---
loss: 1.0263



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 270.01it/s]

--- Eval epoch-1, step-486 ---
pr_auc: 0.6337
roc_auc: 0.6155
f1: 0.6277
loss: 0.7491
New best pr_auc score (0.6337) at epoch-1, step-486




Epoch 2 / 15: 100%|██████████| 243/243 [00:02<00:00, 93.65it/s]

--- Train epoch-2, step-729 ---
loss: 0.9085



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 267.19it/s]

--- Eval epoch-2, step-729 ---
pr_auc: 0.6457
roc_auc: 0.6255
f1: 0.6238
loss: 0.7093
New best pr_auc score (0.6457) at epoch-2, step-729




Epoch 3 / 15: 100%|██████████| 243/243 [00:03<00:00, 76.83it/s]

--- Train epoch-3, step-972 ---
loss: 0.8412



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 198.09it/s]

--- Eval epoch-3, step-972 ---
pr_auc: 0.6532
roc_auc: 0.6369
f1: 0.6521
loss: 0.7020
New best pr_auc score (0.6532) at epoch-3, step-972








Epoch 4 / 15: 100%|██████████| 243/243 [00:02<00:00, 91.48it/s]

--- Train epoch-4, step-1215 ---
loss: 0.7811



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 276.12it/s]

--- Eval epoch-4, step-1215 ---
pr_auc: 0.6603
roc_auc: 0.6385
f1: 0.6323
loss: 0.6859
New best pr_auc score (0.6603) at epoch-4, step-1215




Epoch 5 / 15: 100%|██████████| 243/243 [00:02<00:00, 99.31it/s] 

--- Train epoch-5, step-1458 ---
loss: 0.7451



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 270.40it/s]

--- Eval epoch-5, step-1458 ---
pr_auc: 0.6580
roc_auc: 0.6382
f1: 0.6291
loss: 0.6849




Epoch 6 / 15: 100%|██████████| 243/243 [00:02<00:00, 93.74it/s]

--- Train epoch-6, step-1701 ---
loss: 0.7158



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 224.82it/s]

--- Eval epoch-6, step-1701 ---
pr_auc: 0.6562
roc_auc: 0.6378
f1: 0.6371
loss: 0.6838




Epoch 7 / 15: 100%|██████████| 243/243 [00:02<00:00, 90.81it/s]


--- Train epoch-7, step-1944 ---
loss: 0.6777


Evaluation: 100%|██████████| 31/31 [00:00<00:00, 278.12it/s]

--- Eval epoch-7, step-1944 ---
pr_auc: 0.6590
roc_auc: 0.6417
f1: 0.6172
loss: 0.6782




Epoch 8 / 15: 100%|██████████| 243/243 [00:02<00:00, 95.73it/s]

--- Train epoch-8, step-2187 ---
loss: 0.6581



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 281.33it/s]

--- Eval epoch-8, step-2187 ---
pr_auc: 0.6592
roc_auc: 0.6462
f1: 0.6437
loss: 0.6810




Epoch 9 / 15: 100%|██████████| 243/243 [00:03<00:00, 73.40it/s]

--- Train epoch-9, step-2430 ---
loss: 0.6387



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 272.67it/s]

--- Eval epoch-9, step-2430 ---
pr_auc: 0.6576
roc_auc: 0.6428
f1: 0.6589
loss: 0.6868




Epoch 10 / 15: 100%|██████████| 243/243 [00:02<00:00, 84.12it/s]

--- Train epoch-10, step-2673 ---
loss: 0.6261



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 267.80it/s]

--- Eval epoch-10, step-2673 ---
pr_auc: 0.6570
roc_auc: 0.6419
f1: 0.6217
loss: 0.6812




Epoch 11 / 15: 100%|██████████| 243/243 [00:03<00:00, 73.52it/s]

--- Train epoch-11, step-2916 ---
loss: 0.6116



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 276.41it/s]

--- Eval epoch-11, step-2916 ---
pr_auc: 0.6522
roc_auc: 0.6367
f1: 0.5841
loss: 0.6851




Epoch 12 / 15: 100%|██████████| 243/243 [00:02<00:00, 92.59it/s]

--- Train epoch-12, step-3159 ---
loss: 0.6066



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 278.50it/s]

--- Eval epoch-12, step-3159 ---
pr_auc: 0.6551
roc_auc: 0.6330
f1: 0.5935
loss: 0.6867




Epoch 13 / 15: 100%|██████████| 243/243 [00:03<00:00, 77.78it/s]

--- Train epoch-13, step-3402 ---
loss: 0.5941



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 226.16it/s]

--- Eval epoch-13, step-3402 ---
pr_auc: 0.6570
roc_auc: 0.6378
f1: 0.6110
loss: 0.6855




Epoch 14 / 15: 100%|██████████| 243/243 [00:02<00:00, 90.75it/s]

--- Train epoch-14, step-3645 ---
loss: 0.5834



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 269.48it/s]

--- Eval epoch-14, step-3645 ---
pr_auc: 0.6611
roc_auc: 0.6421
f1: 0.6327
loss: 0.6925
New best pr_auc score (0.6611) at epoch-14, step-3645
Loaded best model



Evaluation: 100%|██████████| 32/32 [00:00<00:00, 264.14it/s]

{'pr_auc': 0.697333603874223, 'roc_auc': 0.6722280364429949, 'f1': 0.6672694394213382, 'loss': 0.6479178862646222}



