In [1]:
import pandas as pd
import numpy as np
from pyhealth.datasets import eICUDataset

# Path: preprocess/preprocessing.ipynb
dataset_path = "../train/eicu/"
output_path = "../preData/"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load patient data
df = pd.read_csv(dataset_path + "patient.csv")
df_new = pd.DataFrame()
df_new['hospitalid'] = df['hospitalid'].unique()
df_new['region'] = pd.Series(dtype='int')
df_new.head()
# Save 
df_new.to_csv(dataset_path + "hospital.csv", index=False)

In [3]:
eicu_base = eICUDataset(
    root=dataset_path,
    tables = ['treatment', 'medication', 'lab'],
)

In [8]:
from pyhealth.tasks import mortality_prediction_eicu_fn
from pyhealth.datasets import split_by_patient, get_dataloader

eicu_base.stat()    

eicusample = eicu_base.set_task(task_fn=mortality_prediction_eicu_fn)

train_ds, val_ds, test_ds = split_by_patient(eicusample, 0.8, 0.1, 0.1)
train_loader = get_dataloader(train_ds, batch_size=32, shuffle=True)
val_loader = get_dataloader(val_ds, batch_size=32, shuffle=False)
test_loader = get_dataloader(test_ds, batch_size=32, shuffle=False)



Statistics of base dataset (dev=False):
	- Dataset: eICUDataset
	- Number of patients: 101851
	- Number of visits: 101851
	- Number of visits per patient: 1.0000
	- Number of events per visit in treatment: 6.3922
	- Number of events per visit in medication: 7.7206
	- Number of events per visit in lab: 34.6887



Generating samples for mortality_prediction_eicu_fn: 100%|██████████| 101851/101851 [00:00<00:00, 1065776.32it/s]


IndexError: list index out of range

In [None]:
from pyhealth.models import Transformer

model = Transformer(
    dataset=eicusample,
    feature_keys=["lab", "medication", "treatment"],
    label_key="label",
    mode="binary",
)

from pyhealth.trainer import Trainer

trainer = Trainer(model=model)
trainer.train(
    train_dataloader=train_loader,
    val_dataloader=val_loader,
    epochs=50,
    monitor="pr_auc_samples",
)

