In [1]:
from pyhealth.datasets import MIMIC3Dataset
from pyhealth.datasets import split_by_patient, get_dataloader
from pyhealth.models import Transformer, RNN, RETAIN
from pyhealth.tasks import mortality_prediction_mimic3_fn, drug_recommendation_mimic3_fn
from pyhealth.trainer import Trainer

dataset = MIMIC3Dataset(
    root='/data/physionet.org/files/mimiciii/1.4/',
    tables=["DIAGNOSES_ICD", "PROCEDURES_ICD", "PRESCRIPTIONS"],
    code_mapping={
        # "ICD9CM": "CCSCM", 
        # "ICD9PROC": "CCSPROC",
        "NDC": ("ATC", {"target_kwargs": {"level": 4}})
        },
    dev=False,
    refresh_cache=True
)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
mimic3_ds = dataset.set_task(mortality_prediction_mimic3_fn)

train_dataset, val_dataset, test_dataset = split_by_patient(
    mimic3_ds, [0.8, 0.1, 0.1]
)
train_dataloader = get_dataloader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = get_dataloader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = get_dataloader(test_dataset, batch_size=32, shuffle=False)

Generating samples for mortality_prediction_mimic3_fn: 100%|██████████| 46520/46520 [00:00<00:00, 53705.41it/s] 


In [3]:
model_w_pre = RNN(
    dataset=mimic3_ds,
    feature_keys=["conditions", "procedures", "drugs"],
    label_key="label",
    mode="binary",
    pretrained_emb="KG",
    embedding_dim=256,
)

Loading pretrained embedding for conditions...


FileNotFoundError: [Errno 2] No such file or directory: '/home/pj20/.cache/pyhealth/medcode/embeddings/KG/special_tokens/special_tokens.json'

In [5]:
# STEP 4: define trainer
trainer = Trainer(model=model_w_pre)
trainer.train(
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    epochs=10,
    monitor="roc_auc",
)

# STEP 5: evaluate
print(trainer.evaluate(test_dataloader))

RNN(
  (embeddings): ModuleDict(
    (conditions): Embedding(4031, 1536, padding_idx=0)
    (procedures): Embedding(1276, 1536, padding_idx=0)
    (drugs): Embedding(472, 1536, padding_idx=0)
  )
  (linear_layers): ModuleDict(
    (conditions): Linear(in_features=1536, out_features=256, bias=True)
    (procedures): Linear(in_features=1536, out_features=256, bias=True)
    (drugs): Linear(in_features=1536, out_features=256, bias=True)
  )
  (rnn): ModuleDict(
    (conditions): RNNLayer(
      (dropout_layer): Dropout(p=0.5, inplace=False)
      (rnn): GRU(256, 128, batch_first=True)
    )
    (procedures): RNNLayer(
      (dropout_layer): Dropout(p=0.5, inplace=False)
      (rnn): GRU(256, 128, batch_first=True)
    )
    (drugs): RNNLayer(
      (dropout_layer): Dropout(p=0.5, inplace=False)
      (rnn): GRU(256, 128, batch_first=True)
    )
  )
  (fc): Linear(in_features=384, out_features=1, bias=True)
)
Metrics: None
Device: cuda

Training:
Batch size: 32
Optimizer: <class 'torch.opt

Epoch 0 / 10: 100%|██████████| 245/245 [00:02<00:00, 104.30it/s]

--- Train epoch-0, step-245 ---
loss: 0.2499



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 316.70it/s]

--- Eval epoch-0, step-245 ---
pr_auc: 0.1056
roc_auc: 0.6051
f1: 0.0000
loss: 0.2285
New best roc_auc score (0.6051) at epoch-0, step-245








Epoch 1 / 10: 100%|██████████| 245/245 [00:02<00:00, 118.08it/s]

--- Train epoch-1, step-490 ---
loss: 0.2203



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 297.62it/s]

--- Eval epoch-1, step-490 ---
pr_auc: 0.0990
roc_auc: 0.6326
f1: 0.0000
loss: 0.2438
New best roc_auc score (0.6326) at epoch-1, step-490








Epoch 2 / 10: 100%|██████████| 245/245 [00:02<00:00, 108.92it/s]

--- Train epoch-2, step-735 ---
loss: 0.1885



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 290.95it/s]

--- Eval epoch-2, step-735 ---
pr_auc: 0.0943
roc_auc: 0.6108
f1: 0.0294





loss: 0.2527



Epoch 3 / 10: 100%|██████████| 245/245 [00:02<00:00, 109.23it/s]

--- Train epoch-3, step-980 ---
loss: 0.1575



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 220.30it/s]

--- Eval epoch-3, step-980 ---
pr_auc: 0.0800
roc_auc: 0.5842
f1: 0.0000
loss: 0.2945




Epoch 4 / 10: 100%|██████████| 245/245 [00:02<00:00, 103.89it/s]

--- Train epoch-4, step-1225 ---
loss: 0.1422



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 292.67it/s]

--- Eval epoch-4, step-1225 ---
pr_auc: 0.0811
roc_auc: 0.5730
f1: 0.0000
loss: 0.3052




Epoch 5 / 10: 100%|██████████| 245/245 [00:02<00:00, 118.75it/s]

--- Train epoch-5, step-1470 ---
loss: 0.1248



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 291.59it/s]

--- Eval epoch-5, step-1470 ---
pr_auc: 0.0760
roc_auc: 0.5693
f1: 0.0000
loss: 0.3454




Epoch 6 / 10: 100%|██████████| 245/245 [00:02<00:00, 113.34it/s]

--- Train epoch-6, step-1715 ---
loss: 0.1128



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 285.59it/s]

--- Eval epoch-6, step-1715 ---
pr_auc: 0.0750
roc_auc: 0.5675
f1: 0.0230
loss: 0.3628




Epoch 7 / 10: 100%|██████████| 245/245 [00:01<00:00, 127.39it/s]

--- Train epoch-7, step-1960 ---
loss: 0.1004



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 230.47it/s]

--- Eval epoch-7, step-1960 ---





pr_auc: 0.0783
roc_auc: 0.5633
f1: 0.0435
loss: 0.3941



Epoch 8 / 10: 100%|██████████| 245/245 [00:02<00:00, 108.35it/s]

--- Train epoch-8, step-2205 ---
loss: 0.0891



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 293.65it/s]

--- Eval epoch-8, step-2205 ---
pr_auc: 0.0793
roc_auc: 0.5623
f1: 0.0426
loss: 0.4021




Epoch 9 / 10: 100%|██████████| 245/245 [00:02<00:00, 110.88it/s]

--- Train epoch-9, step-2450 ---
loss: 0.0859



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 299.16it/s]

--- Eval epoch-9, step-2450 ---
pr_auc: 0.0725
roc_auc: 0.5369
f1: 0.0404
loss: 0.4394
Loaded best model



Evaluation: 100%|██████████| 30/30 [00:00<00:00, 291.37it/s]

{'pr_auc': 0.08721309061196421, 'roc_auc': 0.5811944141051645, 'f1': 0.0, 'loss': 0.2529959950596094}





In [11]:
model_no_pre = RNN(
    dataset=mimic3_ds,
    feature_keys=["conditions", "procedures", "drugs"],
    label_key="label",
    mode="binary",
    embedding_dim=256,
)

In [12]:
# STEP 4: define trainer
trainer = Trainer(model=model_no_pre)
trainer.train(
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    epochs=10,
    monitor="roc_auc",
)

# STEP 5: evaluate
print(trainer.evaluate(test_dataloader))

RNN(
  (embeddings): ModuleDict(
    (conditions): Embedding(4031, 256, padding_idx=0)
    (procedures): Embedding(1276, 256, padding_idx=0)
    (drugs): Embedding(472, 256, padding_idx=0)
  )
  (linear_layers): ModuleDict()
  (rnn): ModuleDict(
    (conditions): RNNLayer(
      (dropout_layer): Dropout(p=0.5, inplace=False)
      (rnn): GRU(256, 128, batch_first=True)
    )
    (procedures): RNNLayer(
      (dropout_layer): Dropout(p=0.5, inplace=False)
      (rnn): GRU(256, 128, batch_first=True)
    )
    (drugs): RNNLayer(
      (dropout_layer): Dropout(p=0.5, inplace=False)
      (rnn): GRU(256, 128, batch_first=True)
    )
  )
  (fc): Linear(in_features=384, out_features=1, bias=True)
)
Metrics: None
Device: cuda

Training:
Batch size: 32
Optimizer: <class 'torch.optim.adam.Adam'>
Optimizer params: {'lr': 0.001}
Weight decay: 0.0
Max grad norm: None
Val dataloader: <torch.utils.data.dataloader.DataLoader object at 0x7ff8f40ef130>
Monitor: roc_auc
Monitor criterion: max
Epochs: 10

Epoch 0 / 10: 100%|██████████| 245/245 [00:02<00:00, 108.49it/s]

--- Train epoch-0, step-245 ---
loss: 0.2682



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 320.92it/s]

--- Eval epoch-0, step-245 ---
pr_auc: 0.0857
roc_auc: 0.5966
f1: 0.0000
loss: 0.2269
New best roc_auc score (0.5966) at epoch-0, step-245




Epoch 1 / 10: 100%|██████████| 245/245 [00:02<00:00, 117.09it/s]

--- Train epoch-1, step-490 ---
loss: 0.2399



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 304.03it/s]

--- Eval epoch-1, step-490 ---
pr_auc: 0.1045
roc_auc: 0.6664
f1: 0.0000
loss: 0.2243
New best roc_auc score (0.6664) at epoch-1, step-490




Epoch 2 / 10: 100%|██████████| 245/245 [00:01<00:00, 140.74it/s]

--- Train epoch-2, step-735 ---
loss: 0.2268



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 320.79it/s]

--- Eval epoch-2, step-735 ---
pr_auc: 0.1159
roc_auc: 0.6684
f1: 0.0000
loss: 0.2242
New best roc_auc score (0.6684) at epoch-2, step-735




Epoch 3 / 10: 100%|██████████| 245/245 [00:01<00:00, 135.04it/s]

--- Train epoch-3, step-980 ---
loss: 0.2155



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 319.88it/s]

--- Eval epoch-3, step-980 ---
pr_auc: 0.0996
roc_auc: 0.6543
f1: 0.0000
loss: 0.2319




Epoch 4 / 10: 100%|██████████| 245/245 [00:01<00:00, 132.37it/s]

--- Train epoch-4, step-1225 ---
loss: 0.2023



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 322.48it/s]

--- Eval epoch-4, step-1225 ---
pr_auc: 0.0995
roc_auc: 0.6632
f1: 0.0000
loss: 0.2403




Epoch 5 / 10: 100%|██████████| 245/245 [00:01<00:00, 132.08it/s]

--- Train epoch-5, step-1470 ---
loss: 0.1892



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 324.87it/s]

--- Eval epoch-5, step-1470 ---
pr_auc: 0.0883
roc_auc: 0.6207
f1: 0.0286
loss: 0.2617




Epoch 6 / 10: 100%|██████████| 245/245 [00:01<00:00, 137.77it/s]

--- Train epoch-6, step-1715 ---
loss: 0.1713



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 193.78it/s]

--- Eval epoch-6, step-1715 ---
pr_auc: 0.0921
roc_auc: 0.6307
f1: 0.0294
loss: 0.2663




Epoch 7 / 10: 100%|██████████| 245/245 [00:01<00:00, 137.64it/s]

--- Train epoch-7, step-1960 ---
loss: 0.1557



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 328.55it/s]


--- Eval epoch-7, step-1960 ---
pr_auc: 0.0966
roc_auc: 0.6492
f1: 0.0267
loss: 0.2688



Epoch 8 / 10: 100%|██████████| 245/245 [00:01<00:00, 137.83it/s]

--- Train epoch-8, step-2205 ---
loss: 0.1467



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 320.07it/s]

--- Eval epoch-8, step-2205 ---
pr_auc: 0.0889
roc_auc: 0.6352
f1: 0.0263
loss: 0.2841




Epoch 9 / 10: 100%|██████████| 245/245 [00:01<00:00, 136.64it/s]

--- Train epoch-9, step-2450 ---
loss: 0.1380



Evaluation: 100%|██████████| 31/31 [00:00<00:00, 320.66it/s]

--- Eval epoch-9, step-2450 ---
pr_auc: 0.0906
roc_auc: 0.6324
f1: 0.0267
loss: 0.2902
Loaded best model



Evaluation: 100%|██████████| 30/30 [00:00<00:00, 314.23it/s]

{'pr_auc': 0.09722132504096945, 'roc_auc': 0.5907902948978, 'f1': 0.0, 'loss': 0.23656417181094488}



