In [2]:
task = "lenofstay"

ratios = [
    0.1,
    0.2,
    0.3,
    0.4,
    0.5,
    0.7,
    0.9,
]

In [3]:
from pyhealth.datasets import split_by_patient, get_dataloader
import pickle

with open(f'/data/pj20/exp_data/ccscm_ccsproc/sample_dataset_mimic3_{task}_th015.pkl', 'rb') as f:
    sample_dataset = pickle.load(f)

train_dataset, _, test_dataset = split_by_patient(sample_dataset, [0.8, 0.1, 0.1], train_ratio=1.0, seed=528)
train_loader = get_dataloader(train_dataset, batch_size=64, shuffle=True)
test_loader = get_dataloader(test_dataset, batch_size=64, shuffle=False)

In [4]:
from pyhealth.trainer import Trainer
import torch
from pyhealth.models import Transformer, RETAIN, SafeDrug, MICRON, CNN, RNN, GAMENet
from collections import defaultdict


for ratio in ratios:
    with open(f'/data/pj20/exp_data/ccscm_ccsproc_atc3/val_dataset_mimic3_{task}_th015_{1-ratio}.pkl', 'rb') as f:
        val_dataset = pickle.load(f)
    val_loader = get_dataloader(val_dataset, batch_size=64, shuffle=False)


In [7]:
from pyhealth.trainer import Trainer
import torch
from pyhealth.models import Transformer, RETAIN, SafeDrug, MICRON, CNN, RNN, GAMENet
from collections import defaultdict

results = defaultdict(list)

for i in range(1):
    for model_ in [
        # Transformer, 
        # RETAIN,
        # SafeDrug,
        # MICRON,
        GAMENet
        ]:
        try:
            model = model_(
                dataset=sample_dataset,
                feature_keys=["conditions", "procedures"],
                label_key="drugs",
                mode="multilabel",
            )
        except:
            model = model_(dataset=sample_dataset)

        device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

        ## binary
        # trainer = Trainer(model=model, device=device, metrics=["pr_auc", "roc_auc", "accuracy", "f1", "jaccard"])
        # trainer.train(
        #     train_dataloader=train_loader,
        #     val_dataloader=val_loader,
        #     epochs=5,
        #     monitor="accuracy",
        # )

        ## multi-label
        trainer = Trainer(model=model, device=device, metrics=["pr_auc_samples", "roc_auc_samples", "f1_samples", "jaccard_samples"])
        trainer.train(
            train_dataloader=train_loader,
            val_dataloader=val_loader,
            epochs=5,
            monitor="pr_auc_samples",
        )

        ## multi-class
        # trainer = Trainer(model=model, device=device, metrics=["roc_auc_weighted_ovr", "cohen_kappa", "accuracy", "f1_weighted"])
        # trainer.train(
        #     train_dataloader=train_loader,
        #     val_dataloader=val_loader,
        #     epochs=5,
        #     monitor="roc_auc_weighted_ovr",
        # )

        results[model_.__name__].append(trainer.evaluate(val_loader))

GAMENet(
  (embeddings): ModuleDict(
    (conditions): Embedding(283, 128, padding_idx=0)
    (procedures): Embedding(223, 128, padding_idx=0)
  )
  (cond_rnn): GRU(128, 128, batch_first=True)
  (proc_rnn): GRU(128, 128, batch_first=True)
  (query): Sequential(
    (0): ReLU()
    (1): Linear(in_features=256, out_features=128, bias=True)
  )
  (gamenet): GAMENetLayer(
    (ehr_gcn): GCN(
      (gcn1): GCNLayer()
      (dropout_layer): Dropout(p=0.5, inplace=False)
      (gcn2): GCNLayer()
    )
    (ddi_gcn): GCN(
      (gcn1): GCNLayer()
      (dropout_layer): Dropout(p=0.5, inplace=False)
      (gcn2): GCNLayer()
    )
    (fc): Linear(in_features=384, out_features=197, bias=True)
    (bce_loss_fn): BCEWithLogitsLoss()
  )
)
Metrics: ['pr_auc_samples', 'roc_auc_samples', 'f1_samples', 'jaccard_samples']
Device: cuda:1

Training:
Batch size: 64
Optimizer: <class 'torch.optim.adam.Adam'>
Optimizer params: {'lr': 0.001}
Weight decay: 0.0
Max grad norm: None
Val dataloader: <torch.utils.

In [12]:
avg_results = defaultdict(dict)

for k, v in results.items():
    for k_, v_ in v[0].items():
        avg_results[k][k_] = sum([vv[k_] for vv in v]) / len(v)

In [13]:
import numpy as np
# calculate standard deviation
variation_results = defaultdict(dict)

for k, v in results.items():
    for k_, v_ in v[0].items():
        variation_results[k][k_] = np.std([vv[k_] for vv in v])

In [14]:
avg_results

defaultdict(dict,
            {'GAMENet': {'pr_auc_samples': 0.4980838198236469,
              'roc_auc_samples': 0.7424090396318291,
              'f1_samples': 0.4728838360695048,
              'jaccard_samples': 0.32078592771277264,
              'loss': 0.6370396333582261}})

In [11]:
variation_results

defaultdict(dict,
            {'GAMENet': {'pr_auc_samples': 0.0,
              'roc_auc_samples': 0.0,
              'f1_samples': 0.0,
              'jaccard_samples': 0.0,
              'loss': 0.0}})