In [23]:
import pandas as pd
from pandas import DataFrame as df
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [24]:
patients = pd.read_csv("data/patients.csv",)
encounters = pd.read_csv("data/admissions.csv")
lab_events = pd.read_csv("data/lab_events.csv")
labels = pd.read_csv("data/labels.csv")
print(patients.shape)
print(encounters.shape)
print(lab_events.shape)
print(labels.shape)

(100, 2)
(265, 52)
(32620, 6)
(265, 3)


In [122]:
class PatientEncounter(Dataset):
    def __init__(self, patients, encounters, lab_events, labels):
        super().__init__()
        self.patients = patients
        self.encounters = encounters
        self.lab_events = lab_events
        self.labels = labels
        
        self.patient_ids = encounters.subject_id
        self.encounter_ids = encounters.hadm_id

    def __len__(self):
        return len(self.patient_ids)

    def __getitem__(self, index):
        patient_id = self.patient_ids[index]
        encounter_id = self.encounter_ids[index]
        
        # Load data for the given patient-encounter
        data_patient = torch.from_numpy(self.patients.loc[self.patients.subject_id == patient_id].values).to(device)
        data_encounter = torch.from_numpy(self.encounters.loc[(self.encounters.subject_id == patient_id) & (self.encounters.hadm_id == encounter_id)].values).to(device)
        data_lab_events = torch.from_numpy(self.lab_events.loc[(self.lab_events.subject_id == patient_id) & (self.lab_events.hadm_id == encounter_id)].values).to(device)
        X = [data_patient, data_encounter, data_lab_events]
        y = torch.from_numpy(labels.loc[(labels.subject_id == patient_id) & (labels.hadm_id == encounter_id)].READMIT_ONE_WEEK.values).to(device)

        return X, y

# A custom collate function to pad the sequences
def collate_fn(batch):
    X, y = zip(*batch)
    X = list(X)
    y = torch.cat(y, dim=0)
    return X, y

train_data = PatientEncounter(patients, encounters, lab_events, labels)
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=False, collate_fn=collate_fn)

In [147]:
class LabModule(nn.Module):
    def __init__(self):
        super().__init__()

        # self.lab_type = nn.Embedding(num_embeddings=100, embedding_dim=10) # input 1-d lab type index, output 10-d representation
        self.lab_type = nn.Linear(in_features=1, out_features=10)
        self.lab_value = nn.Linear(in_features=1, out_features=5) # input 1-d lab value, output 5-d representation
        self.layer_out = nn.Linear(in_features=15, out_features=5) # input 10-d lab type + 5-d lab value, output 5-d representation
    
    def forward(self, x):
        x_type = x[:, 0].unsqueeze(1)
        x_value = x[:, 1].unsqueeze(1)
        out = torch.cat((self.lab_type(x_type), self.lab_value(x_value)), dim=1)
        out = self.layer_out(out)
        return(out)

class PatientEncounterModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lab_module = LabModule()
        self.layer_out = nn.Linear(in_features=59, out_features=1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # static features
        # x[0] = patient features
        # x[1] = encounter features
        # x[2] = lab events
        
        # dynamic features
        # x[3] = vitals
        # x[4] = prescriptions
        # x[5] = diagnoses
        # x[6] = procedures
        # x[7] = notes
        
        out = self.lab_module(x[2])
        out = torch.cat((x[0], x[1], out), dim=1) # concatenate all features (static + dynamic)
        out = self.layer_out(out)
        out = self.sigmoid(out)
        return(out)

model = PatientEncounterModel().to(device)
model

# test:
# the input of the module is a tensor of shape (batch_size, 2)
# X_example = torch.randn(1, 2).to(device)
# model(X_example)
X_example = [torch.randn(1, 2).to(device), torch.randn(1, 52).to(device), torch.randn(1, 2).to(device)]
model(X_example)

tensor([[0.2363]], grad_fn=<SigmoidBackward0>)

In [6]:
n_input, n_out, batch_size, learning_rate = 2, 1, 100, 0.01
data_x = torch.randn(batch_size, n_input)
data_y = (torch.rand(size=(batch_size, 1)) < 0.5).float()
print(data_x.size())
print(data_y.size())

torch.Size([100, 2])
torch.Size([100, 1])


In [150]:
a = model.lab_type(data_x[:, 0].unsqueeze(1))
b = model.lab_value(data_x[:, 1].unsqueeze(1))
print(a.shape)
print(b.shape)
print(torch.cat((a, b), dim=1).shape)

torch.Size([100, 10])
torch.Size([100, 5])
torch.Size([100, 15])


In [125]:
print(data_x[:, 0].shape)
print(torch.cat((data_x[:,0], data_x[:,1]), dim=0).shape)

torch.Size([100])
torch.Size([200])


In [158]:
print(model.forward(data_x).shape)

torch.Size([100, 5])
