In [2]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data.dataset import random_split
import pickle

### Load preprocessed data to save time

In [10]:
with open("los.pkl","rb") as f:
    los_7 = pickle.load(f)

with open("mort.pkl","rb") as f:
    mort = pickle.load(f)
    
with open("intervention_seq.pkl","rb") as f:
    seqs = pickle.load(f)

In [11]:
class CustomDataset(Dataset):
    
    def __init__(self, seqs, labels):
        self.x = seqs
        self.y = labels
    
    def __len__(self):
        
        """
        TODO: Return the number of samples (i.e. patients).
        """
        
        # your code here
        return len(self.y)
    
    def __getitem__(self, index):
        
        """
        TODO: Generates one sample of data.
        
        Note that you DO NOT need to covert them to tensor as we will do this later.
        """
        
        # your code here
        return self.x[index], self.y[index]
    

In [7]:
dataset = CustomDataset(seqs, los_7)

In [8]:
len(dataset)

34472

In [9]:
def collate_fn(data):
    """
    TODO: Collate the the list of samples into batches. For each patient, you need to pad the diagnosis
        sequences to the sample shape (max # visits, max # diagnosis codes). The padding infomation
        is stored in `mask`.
    
    Arguments:
        data: a list of samples fetched from `CustomDataset`
        
    Outputs:
        x: a tensor of shape (# patiens, max # visits, max # diagnosis codes) of type torch.long
        masks: a tensor of shape (# patiens, max # visits, max # diagnosis codes) of type torch.bool
        rev_x: same as x but in reversed time. This will be used in our RNN model for masking 
        rev_masks: same as mask but in reversed time. This will be used in our RNN model for masking
        y: a tensor of shape (# patiens) of type torch.float
        
    Note that you can obtains the list of diagnosis codes and the list of hf labels
        using: `sequences, labels = zip(*data)`
    """

    sequences, labels = zip(*data)

    y = torch.tensor(labels, dtype=torch.float)
    
    num_patients = len(sequences)
    num_interventions = [len(patient) for patient in sequences]
    num_codes = [len(intervention) for patient in sequences for intervention in patient]

    max_num_interventions = max(num_interventions)
    max_num_codes = max(num_codes)
    
    x = torch.zeros((num_patients, max_num_interventions, max_num_codes), dtype=torch.long)
    rev_x = torch.zeros((num_patients, max_num_interventions, max_num_codes), dtype=torch.long)
    masks = torch.zeros((num_patients, max_num_interventions, max_num_codes), dtype=torch.bool)
    rev_masks = torch.zeros((num_patients, max_num_interventions, max_num_codes), dtype=torch.bool)
    for i_patient, patient in enumerate(sequences):
        for j_intervention, intervention in enumerate(patient):
            """
            TODO: update `x`, `rev_x`, `masks`, and `rev_masks`
            """
            # your code here
            for idx, code in enumerate(intervention):
                x[i_patient,j_intervention,idx] = code
                masks[i_patient,j_intervention,idx] = 1
                rev_x[i_patient, len(patient)-j_intervention-1, idx] = code
                rev_masks[i_patient, len(patient)-j_intervention-1, idx] = 1
    
    return x, masks, rev_x, rev_masks, y

In [10]:
split = int(len(dataset)*0.8)

lengths = [split, len(dataset) - split]
train_dataset, val_dataset = random_split(dataset, lengths)

print("Length of train dataset:", len(train_dataset))
print("Length of val dataset:", len(val_dataset))

Length of train dataset: 27577
Length of val dataset: 6895


In [18]:
from torch.utils.data import DataLoader

def load_data(train_dataset, val_dataset, collate_fn):
    
    '''
    TODO: Implement this function to return the data loader for  train and validation dataset. 
    Set batchsize to 32. Set `shuffle=True` only for train dataloader.
    
    Arguments:
        train dataset: train dataset of type `CustomDataset`
        val dataset: validation dataset of type `CustomDataset`
        collate_fn: collate function
        
    Outputs:
        train_loader, val_loader: train and validation dataloaders
    
    Note that you need to pass the collate function to the data loader `collate_fn()`.
    '''
    
    # your code here
    train_loader = DataLoader(train_dataset, batch_size=16, collate_fn=collate_fn, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16, collate_fn=collate_fn)
    
    return train_loader, val_loader

In [12]:
train_loader, val_loader = load_data(train_dataset, val_dataset, collate_fn)

In [27]:
def sum_embeddings_with_mask(x, masks):
    """
    TODO: mask select the embeddings for true visits (not padding visits) and then
        sum the embeddings for each visit up.

    Arguments:
        x: the embeddings of diagnosis sequence of shape (batch_size, # visits, # diagnosis codes, embedding_dim)
        masks: the padding masks of shape (batch_size, # visits, # diagnosis codes)

    Outputs:
        sum_embeddings: the sum of embeddings of shape (batch_size, # visits, embedding_dim)
        
    NOTE: Do NOT use for loop.

    """
    x_masked = x * masks.unsqueeze(dim=-1)
    # your code here
    return x_masked.sum(dim=2)

### BILSTM

In [28]:
def get_last_visit(hidden_states, masks):
    """
    TODO: obtain the hidden state for the last true visit (not padding visits)

    Arguments:
        hidden_states: the hidden states of each visit of shape (batch_size, # visits, embedding_dim)
        masks: the padding masks of shape (batch_size, # visits, # diagnosis codes)

    Outputs:
        last_hidden_state: the hidden state for the last true visit of shape (batch_size, embedding_dim)
        
    NOTE: DO NOT use for loop.
    
    HINT: Consider using `torch.gather()`.
    """
    
    # your code here
    first_zero_idx = torch.argmin(masks.sum(dim=2),axis=1)
    last_nonzero_idx = first_zero_idx - 1
    batch_size = hidden_states.shape[0]
    return hidden_states[list(range(batch_size)), last_nonzero_idx]

In [15]:
class NaiveRNN(nn.Module):
    
    """
    TODO: implement the naive RNN model above.
    """
    
    def __init__(self, num_codes):
        super().__init__()
        """
        TODO: 
            1. Define the embedding layer using `nn.Embedding`. Set `embDimSize` to 128.
            2. Define the RNN using `nn.GRU()`; Set `hidden_size` to 128. Set `batch_first` to True.
            2. Define the RNN for the reverse direction using `nn.GRU()`;
               Set `hidden_size` to 128. Set `batch_first` to True.
            3. Define the linear layers using `nn.Linear()`; Set `in_features` to 256, and `out_features` to 1.
            4. Define the final activation layer using `nn.Sigmoid().

        Arguments:
            num_codes: total number of diagnosis codes
        """
        # your code here
        self.embedding = nn.Embedding(num_codes+1,embedding_dim=128)
        self.rnn = nn.LSTM(128, 128, 1, batch_first=True, bidirectional=True)
        self.fc1 = nn.Linear(in_features=256, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=1)
        self.relu = nn.ReLU()
        # self.sigmoid = nn.Sigmoid()

    
    def forward(self, x, masks):
        """
        Arguments:
            x: the diagnosis sequence of shape (batch_size, # visits, # diagnosis codes)
            masks: the padding masks of shape (batch_size, # visits, # diagnosis codes)

        Outputs:
            probs: probabilities of shape (batch_size)
        """
        
        batch_size = x.shape[0]
        
        # 1. Pass the sequence through the embedding layer;
        x = self.embedding(x)

        
        # 2. Sum the embeddings for each diagnosis code up for a visit of a patient.
        x = sum_embeddings_with_mask(x, masks)
        
        # 3. Pass the embegginds through the RNN layer;
        output, (hn, cn) = self.rnn(x)
        # hn = hn.view(batch_size,-1)
        # 4. Obtain the hidden state at the last visit.
        true_h_n = get_last_visit(output, masks)
        
        """
        TODO:
            5. Do the step 1-4 again for the reverse order, and concatenate the hidden
               states for both directions;
        """
#         # 5.1 Pass the sequence through the embedding layer;
#         rev_x = self.embedding(rev_x)
#         # 5.2 Sum the embeddings for each diagnosis code up for a visit of a patient.
#         rev_x = sum_embeddings_with_mask(rev_x, rev_masks)
        
#         # 5.3 Pass the embegginds through the RNN layer;
#         output, _ = self.rnn(rev_x)
#         # 5.4 Obtain the hidden state at the last visit.
#         true_h_n_rev = get_last_visit(output, rev_masks)
        
        
        # 6. Pass the hidden state through the linear and activation layers.
        x = self.relu(self.fc1(true_h_n))
        out = self.fc2(x)
        return out.view(batch_size)

In [16]:
rnn = NaiveRNN(14)

In [17]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([50]))
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.0001, weight_decay=0.001)

In [18]:
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, average_precision_score


def eval_model(model, val_loader):
    
    """
    TODO: evaluate the model.
    
    Arguments:
        model: the RNN model
        val_loader: validation dataloader
        
    Outputs:
        precision: overall precision score
        recall: overall recall score
        f1: overall f1 score
        roc_auc: overall roc_auc score
        
    Note that please pass all four arguments to the model so that we can use this function for both 
    models. (Use `model(x, masks, rev_x, rev_masks)`.)
        
    HINT: checkout https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics
    """
    
    model.eval()
    y_pred = torch.LongTensor()
    y_score = torch.Tensor()
    y_true = torch.LongTensor()
    model.eval()
    for x, masks, _, _, y in val_loader:
        y_hat = model(x, masks)
        y_score = torch.cat((y_score,  y_hat.detach().to('cpu')), dim=0)
        y_hat = (y_hat > 0.5).int()
        y_pred = torch.cat((y_pred,  y_hat.detach().to('cpu')), dim=0)
        y_true = torch.cat((y_true, y.detach().to('cpu')), dim=0)
    """
    TODO:
        Calculate precision, recall, f1, and roc auc scores.
        Use `average='binary'` for calculating precision, recall, and fscore.
    """
    p, r, f,_ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    roc_auc = roc_auc_score(y_true, y_score)
    auprc = average_precision_score(y_true, y_score, average='macro')

    return p, r, f, roc_auc, auprc

In [19]:
def train(model, train_loader, val_loader, n_epochs):
    """
    TODO: train the model.
    
    Arguments:
        model: the RNN model
        train_loader: training dataloder
        val_loader: validation dataloader
        n_epochs: total number of epochs
        
    You need to call `eval_model()` at the end of each training epoch to see how well the model performs 
    on validation data.
        
    Note that please pass all four arguments to the model so that we can use this function for both 
    models. (Use `model(x, masks, rev_x, rev_masks)`.)
    """
    
    for epoch in range(n_epochs):
        model.train()
        train_loss = 0
        for x, masks, rev_x, rev_masks, y in train_loader:
            """
            TODO:
                1. zero grad
                2. model forward
                3. calculate loss
                4. loss backward
                5. optimizer step
            """
            optimizer.zero_grad()
            y_pred = model(x, masks)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss = train_loss / len(train_loader)
        print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))
        p, r, f, roc_auc, auprc = eval_model(model, val_loader)
        print('Epoch: {} \t Validation p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}, auprc: {:.2f}'
              .format(epoch+1, p, r, f, roc_auc,auprc))

In [None]:
# number of epochs to train the model
n_epochs = 10
train(rnn, train_loader, val_loader, n_epochs)

In [None]:
p, r, f, roc_auc, auprc = eval_model(rnn, val_loader)
print(roc_auc)

### Demographics injected data

In [21]:
with open("los.pkl","rb") as f:
    los_7 = pickle.load(f)

with open("demograph_intervention_seq.pkl","rb") as f:
    demo_seqs = pickle.load(f)

In [22]:
demo_dataset = CustomDataset(demo_seqs, los_7)

In [23]:
split = int(len(demo_dataset)*0.8)

lengths = [split, len(demo_dataset) - split]
train_dataset, val_dataset = random_split(demo_dataset, lengths)

print("Length of train dataset:", len(train_dataset))
print("Length of val dataset:", len(val_dataset))

Length of train dataset: 27577
Length of val dataset: 6895


In [24]:
train_loader, val_loader = load_data(train_dataset, val_dataset, collate_fn)

In [25]:
rnn = NaiveRNN(14+49)

In [26]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([40]))
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.0001, weight_decay=0.001)

In [None]:
# number of epochs to train the model
n_epochs = 10
train(rnn, train_loader, val_loader, n_epochs)

In [21]:
p, r, f, roc_auc,auprc = eval_model(rnn, val_loader)
print(roc_auc)

0.7622375128490214


In [22]:
print(auprc)

0.17354109567339437


### 2 modal data

In [12]:
with open("los.pkl","rb") as f:
    los_7 = pickle.load(f)

with open("mort.pkl","rb") as f:
    mort = pickle.load(f)

with open("demograph_intervention_seq.pkl","rb") as f:
    demo_iseqs = pickle.load(f)
    
with open("demograph_vitals_zero_seq.pkl","rb") as f:
    demo_vseqs = pickle.load(f)

In [13]:
class CustomMMDataset(Dataset):
    
    def __init__(self, seqs1, seqs2, labels):
        self.x1 = seqs1
        self.x2 = seqs2
        self.y = labels
    
    def __len__(self):
        
        """
        TODO: Return the number of samples (i.e. patients).
        """
        
        # your code here
        return len(self.y)
    
    def __getitem__(self, index):
        
        """
        TODO: Generates one sample of data.
        
        Note that you DO NOT need to covert them to tensor as we will do this later.
        """
        
        # your code here
        return self.x1[index], self.x2[index], self.y[index]
    

In [14]:
def collate_fn(data):
    sequences1, sequences2, labels = zip(*data)

    y = torch.tensor(labels, dtype=torch.float)
    
    num_patients = len(sequences1)
    num_interventions = [len(patient) for patient in sequences1]
    num_codes = [len(intervention) for patient in sequences1 for intervention in patient]

    max_num_interventions = max(num_interventions)
    max_num_codes = max(num_codes)
    
    x1 = torch.zeros((num_patients, max_num_interventions, max_num_codes), dtype=torch.long)
    masks1 = torch.zeros((num_patients, max_num_interventions, max_num_codes), dtype=torch.bool)

    for i_patient, patient in enumerate(sequences1):
        for j_intervention, intervention in enumerate(patient):
            # your code here
            for idx, code in enumerate(intervention):
                x1[i_patient,j_intervention,idx] = code
                masks1[i_patient,j_intervention,idx] = 1
    

    num_vitals = [len(patient) for patient in sequences2]
    max_num_vitals = max(num_interventions)
    vitals_dim = 107
    x2 = torch.zeros((num_patients, max_num_vitals, vitals_dim), dtype=torch.long)
    masks2 = torch.zeros((num_patients, max_num_vitals, vitals_dim), dtype=torch.bool)
    
    for i_patient, patient in enumerate(sequences2):
        for j_vitals, vitals in enumerate(patient):
            # your code here
            x2[i_patient, j_vitals] = torch.tensor(vitals, dtype=torch.long)
            masks2[i_patient, j_vitals] = 1
    
    return x1, masks1, x2, masks2, y

#### Late Fusion

In [35]:
class LF_RNN(nn.Module):
    
    """
    TODO: implement the naive RNN model above.
    """
    
    def __init__(self, num_codes_m1,num_codes_m2):
        super().__init__()

        # your code here
        self.m1_embedding = nn.Embedding(num_codes_m1+1,embedding_dim=128)
        self.m1_rnn = nn.LSTM(128, 128, 1, batch_first=True, bidirectional=True)
        
        self.m2_embedding_num = nn.Linear(in_features=104, out_features=96)
        self.m2_embedding_cat = nn.Embedding(num_codes_m2+1,embedding_dim=32)
        self.m2_rnn = nn.LSTM(128, 128, 1, batch_first=True, bidirectional=True)
        
        self.fc1 = nn.Linear(in_features=512, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=1)
        
        self.relu = nn.ReLU()

    
    def forward(self, x1, masks1, x2, masks2):
        batch_size = x1.shape[0]
        
        # FIRST MODAL
        # 1. Pass the sequence through the embedding layer;
        x1 = self.m1_embedding(x1)
        # 2. Sum the embeddings for each diagnosis code up for a visit of a patient.
        x1 = sum_embeddings_with_mask(x1, masks1)
        # 3. Pass the embegginds through the RNN layer;
        output1, (hn1, cn1) = self.m1_rnn(x1)
        # 4. Obtain the hidden state at the last visit.
        true_h_n1 = get_last_visit(output1, masks1)
        
        # SECOND MODAL
        num_x2 = x2[:,:,:-3]
        cat_x2 = x2[:,:,-3:]
        num_x2 = self.m2_embedding_num(num_x2.float())
        cat_x2 = self.m2_embedding_cat(cat_x2)
        cat_x2 = cat_x2.sum(dim=2)
        x2 = torch.concat([num_x2, cat_x2], dim=-1)
        output2, (hn2, cn2) = self.m2_rnn(x2)
        true_h_n2 = get_last_visit(output2, masks2)
        
        # LATE FUSION
        # concat hidden stats and pass through 
        true_h_n = torch.concat([true_h_n1, true_h_n2], dim=-1)
        x = self.relu(self.fc1(true_h_n))
        out = self.fc2(x)
        return out.view(batch_size)

In [30]:
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, average_precision_score
def eval_model(model, val_loader):
    model.eval()
    y_pred = torch.LongTensor()
    y_score = torch.Tensor()
    y_true = torch.LongTensor()
    model.eval()
    for x1, masks1, x2, masks2, y in val_loader:
        y_hat = model(x1, masks1, x2, masks2)
        y_score = torch.cat((y_score,  y_hat.detach().to('cpu')), dim=0)
        y_hat = (y_hat > 0.5).int()
        y_pred = torch.cat((y_pred,  y_hat.detach().to('cpu')), dim=0)
        y_true = torch.cat((y_true, y.detach().to('cpu')), dim=0)
    """
    TODO:
        Calculate precision, recall, f1, and roc auc scores.
        Use `average='binary'` for calculating precision, recall, and fscore.
    """
    p, r, f,_ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    roc_auc = roc_auc_score(y_true, y_score)
    auprc = average_precision_score(y_true, y_score, average='macro')

    return p, r, f, roc_auc, auprc

In [25]:
def train(model, train_loader, val_loader, n_epochs):
    
    for epoch in range(n_epochs):
        model.train()
        train_loss = 0
        for x1, masks1, x2, masks2, y in train_loader:

            optimizer.zero_grad()
            y_pred = model(x1, masks1, x2, masks2)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss = train_loss / len(train_loader)
        print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))
        p, r, f, roc_auc, auprc = eval_model(model, val_loader)
        print('Epoch: {} \t Validation p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}, auprc: {:.2f}'
              .format(epoch+1, p, r, f, roc_auc,auprc))

LOS_7 Task

In [31]:
mm_dataset = CustomMMDataset(demo_iseqs, demo_vseqs, los_7)

In [32]:
split = int(len(mm_dataset)*0.8)

lengths = [split, len(mm_dataset) - split]
train_dataset, val_dataset = random_split(mm_dataset, lengths)

print("Length of train dataset:", len(train_dataset))
print("Length of val dataset:", len(val_dataset))


Length of train dataset: 27577
Length of val dataset: 6895


In [34]:
train_loader, val_loader = load_data(train_dataset, val_dataset, collate_fn)

In [38]:
lfrnn = LF_RNN(14+49, 49)

In [39]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([30]))
optimizer = torch.optim.Adam(lfrnn.parameters(), lr=0.0001, weight_decay=0.001)

In [40]:
# number of epochs to train the model
n_epochs = 2
train(lfrnn, train_loader, val_loader, n_epochs)

Epoch: 1 	 Training Loss: 1.378654
Epoch: 1 	 Validation p: 0.13, r:0.77, f: 0.22, roc_auc: 0.80, auprc: 0.19
Epoch: 2 	 Training Loss: 1.294997
Epoch: 2 	 Validation p: 0.18, r:0.61, f: 0.28, roc_auc: 0.80, auprc: 0.19


In [41]:
p, r, f, roc_auc,auprc = eval_model(lfrnn, val_loader)
print(roc_auc)
print(auprc)

0.8028271566130574
0.19366082258062162


ICU MORT Task

In [43]:
mm_dataset = CustomMMDataset(demo_iseqs, demo_vseqs, mort)

In [44]:
split = int(len(mm_dataset)*0.8)

lengths = [split, len(mm_dataset) - split]
train_dataset, val_dataset = random_split(mm_dataset, lengths)

print("Length of train dataset:", len(train_dataset))
print("Length of val dataset:", len(val_dataset))


Length of train dataset: 27577
Length of val dataset: 6895


In [45]:
train_loader, val_loader = load_data(train_dataset, val_dataset, collate_fn)

In [46]:
lfrnn = LF_RNN(14+49, 49)

In [47]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([30]))
optimizer = torch.optim.Adam(lfrnn.parameters(), lr=0.0001, weight_decay=0.001)

In [48]:
# number of epochs to train the model
n_epochs = 2
train(lfrnn, train_loader, val_loader, n_epochs)

Epoch: 1 	 Training Loss: 1.268148
Epoch: 1 	 Validation p: 0.20, r:0.85, f: 0.33, roc_auc: 0.89, auprc: 0.47
Epoch: 2 	 Training Loss: 1.000276
Epoch: 2 	 Validation p: 0.24, r:0.82, f: 0.37, roc_auc: 0.90, auprc: 0.50


In [49]:
p, r, f, roc_auc,auprc = eval_model(lfrnn, val_loader)
print(roc_auc)
print(auprc)

0.8975373213154003
0.5009417032007952


#### Early Fusion on embeddings

In [61]:
def get_last_visit(hidden_states, masks):
    
    # your code here
    first_zero_idx = torch.argmin(masks.sum(dim=-1),axis=0)
    last_nonzero_idx = first_zero_idx - 1
    batch_size = hidden_states.shape[0]
    return hidden_states[list(range(batch_size)), last_nonzero_idx]

In [51]:
class EF_RNN(nn.Module):
    
    """
    TODO: implement the naive RNN model above.
    """
    
    def __init__(self, num_codes_m1,num_codes_m2):
        super().__init__()

        # your code here
        self.m1_embedding = nn.Embedding(num_codes_m1+1,embedding_dim=128)
        
        self.m2_embedding_num = nn.Linear(in_features=104, out_features=96)
        self.m2_embedding_cat = nn.Embedding(num_codes_m2+1,embedding_dim=32)
        
        self.rnn = nn.LSTM(256, 128, 1, batch_first=True, bidirectional=True)
        
        self.fc1 = nn.Linear(in_features=256, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=1)
        
        self.relu = nn.ReLU()

    
    def forward(self, x1, masks1, x2, masks2):
        batch_size = x1.shape[0]

        # FIRST MODAL
        # 1. Pass the sequence through the embedding layer;
        x1 = self.m1_embedding(x1)
        # 2. Sum the embeddings for each diagnosis code up for a visit of a patient.
        x1 = sum_embeddings_with_mask(x1, masks1)
        
        # SECOND MODAL
        num_x2 = x2[:,:,:-3]
        cat_x2 = x2[:,:,-3:]
        num_x2 = self.m2_embedding_num(num_x2.float())
        cat_x2 = self.m2_embedding_cat(cat_x2)
        cat_x2 = cat_x2.sum(dim=2)
        x2 = torch.concat([num_x2, cat_x2], dim=-1)
        
        # 3. FUSE MODALS and Pass the embegginds through the RNN layer
        x3 = torch.concat([x1, x2], dim=-1)
        output, (hn, cn) = self.rnn(x3)
        # 4. Obtain the hidden state at the last visit.
        fused_mask = torch.logical_or(masks1.sum(dim=-1),masks2.sum(dim=-1))
        true_h_n = get_last_visit(output, fused_mask)
        
        x = self.relu(self.fc1(true_h_n))
        out = self.fc2(x)
        return out.view(batch_size)

LOS 7 task

In [52]:
mm_dataset = CustomMMDataset(demo_iseqs, demo_vseqs, los_7)

In [53]:
split = int(len(mm_dataset)*0.8)

lengths = [split, len(mm_dataset) - split]
train_dataset, val_dataset = random_split(mm_dataset, lengths)

print("Length of train dataset:", len(train_dataset))
print("Length of val dataset:", len(val_dataset))


Length of train dataset: 27577
Length of val dataset: 6895


In [54]:
train_loader, val_loader = load_data(train_dataset, val_dataset, collate_fn)

In [55]:
efrnn = EF_RNN(14+49, 49)

In [56]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([30]))
optimizer = torch.optim.Adam(efrnn.parameters(), lr=0.0001, weight_decay=0.001)

In [57]:
# number of epochs to train the model
n_epochs = 4
train(efrnn, train_loader, val_loader, n_epochs)

Epoch: 1 	 Training Loss: 1.529512
Epoch: 1 	 Validation p: 0.13, r:0.67, f: 0.22, roc_auc: 0.77, auprc: 0.17
Epoch: 2 	 Training Loss: 1.411297
Epoch: 2 	 Validation p: 0.11, r:0.82, f: 0.19, roc_auc: 0.78, auprc: 0.16
Epoch: 3 	 Training Loss: 1.356769
Epoch: 3 	 Validation p: 0.11, r:0.84, f: 0.20, roc_auc: 0.79, auprc: 0.17
Epoch: 4 	 Training Loss: 1.328977
Epoch: 4 	 Validation p: 0.16, r:0.67, f: 0.25, roc_auc: 0.80, auprc: 0.19


In [58]:
p, r, f, roc_auc,auprc = eval_model(efrnn, val_loader)
print(roc_auc)
print(auprc)

0.7969238598817623
0.1857725093636958


ICU MORT task

In [59]:
mm_dataset = CustomMMDataset(demo_iseqs, demo_vseqs, mort)

In [60]:
split = int(len(mm_dataset)*0.8)

lengths = [split, len(mm_dataset) - split]
train_dataset, val_dataset = random_split(mm_dataset, lengths)

print("Length of train dataset:", len(train_dataset))
print("Length of val dataset:", len(val_dataset))


Length of train dataset: 27577
Length of val dataset: 6895


In [61]:
train_loader, val_loader = load_data(train_dataset, val_dataset, collate_fn)

In [62]:
efrnn = EF_RNN(14+49, 49)

In [63]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([30]))
optimizer = torch.optim.Adam(efrnn.parameters(), lr=0.0001, weight_decay=0.001)

In [64]:
# number of epochs to train the model
n_epochs = 4
train(efrnn, train_loader, val_loader, n_epochs)

Epoch: 1 	 Training Loss: 1.440768
Epoch: 1 	 Validation p: 0.18, r:0.88, f: 0.29, roc_auc: 0.87, auprc: 0.36
Epoch: 2 	 Training Loss: 1.180359
Epoch: 2 	 Validation p: 0.17, r:0.86, f: 0.29, roc_auc: 0.87, auprc: 0.40
Epoch: 3 	 Training Loss: 1.134974
Epoch: 3 	 Validation p: 0.26, r:0.70, f: 0.38, roc_auc: 0.87, auprc: 0.39
Epoch: 4 	 Training Loss: 1.089461
Epoch: 4 	 Validation p: 0.25, r:0.74, f: 0.38, roc_auc: 0.89, auprc: 0.43


In [65]:
p, r, f, roc_auc,auprc = eval_model(efrnn, val_loader)
print(roc_auc)
print(auprc)

0.8850463748549182
0.4290843993476328


### Transformer

In [9]:
class LF_TF(nn.Module):
    
    """
    TODO: implement the naive RNN model above.
    """
    
    def __init__(self, num_codes_m1,num_codes_m2):
        super().__init__()

        # your code here
        self.m1_embedding = nn.Embedding(num_codes_m1+1,embedding_dim=128)
        encoder_layer1 = nn.TransformerEncoderLayer(d_model=128, nhead=2)
        self.transformer_encoder1 = nn.TransformerEncoder(encoder_layer1, num_layers=2)
        self.m2_embedding_num = nn.Linear(in_features=104, out_features=96)
        self.m2_embedding_cat = nn.Embedding(num_codes_m2+1,embedding_dim=32)
        encoder_layer2 = nn.TransformerEncoderLayer(d_model=128, nhead=2)
        self.transformer_encoder2 = nn.TransformerEncoder(encoder_layer2, num_layers=2)
        
        self.fc1 = nn.Linear(in_features=256, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=1)
        
        self.relu = nn.ReLU()

    
    def forward(self, x1, masks1, x2, masks2):
        batch_size = x1.shape[0]
        
        # FIRST MODAL
        # 1. Pass the sequence through the embedding layer;
        x1 = self.m1_embedding(x1)
        # 2. Sum the embeddings for each diagnosis code up for a visit of a patient.
        x1 = sum_embeddings_with_mask(x1, masks1)
        # 3. Pass the embegginds through the TF layer;
        output1 = self.transformer_encoder1(x1)
        # 4. Obtain the hidden state at the last visit.
        true_h_n1 = get_last_visit(output1, masks1)
        
        # SECOND MODAL
        num_x2 = x2[:,:,:-3]
        cat_x2 = x2[:,:,-3:]
        num_x2 = self.m2_embedding_num(num_x2.float())
        cat_x2 = self.m2_embedding_cat(cat_x2)
        cat_x2 = cat_x2.sum(dim=2)
        x2 = torch.concat([num_x2, cat_x2], dim=-1)
        output2 = self.transformer_encoder2(x2)
        true_h_n2 = get_last_visit(output2, masks2)
        
        # LATE FUSION
        # concat hidden stats and pass through 
        true_h_n = torch.concat([true_h_n1, true_h_n2], dim=-1)
        x = self.relu(self.fc1(true_h_n))
        out = self.fc2(x)
        return out.view(batch_size)

In [15]:
mm_dataset = CustomMMDataset(demo_iseqs, demo_vseqs, los_7)

In [16]:
split = int(len(mm_dataset)*0.8)

lengths = [split, len(mm_dataset) - split]
train_dataset, val_dataset = random_split(mm_dataset, lengths)

print("Length of train dataset:", len(train_dataset))
print("Length of val dataset:", len(val_dataset))


Length of train dataset: 27577
Length of val dataset: 6895


In [19]:
train_loader, val_loader = load_data(train_dataset, val_dataset, collate_fn)

In [36]:
lftf = LF_TF(14+49, 49)

In [37]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([30]))
optimizer = torch.optim.Adam(lftf.parameters(), lr=0.0001, weight_decay=0.001)

In [None]:
# number of epochs to train the model
n_epochs = 2
train(lftf, train_loader, val_loader, n_epochs)

In [39]:
p, r, f, roc_auc,auprc = eval_model(lftf, val_loader)
print(roc_auc)
print(auprc)

0.7480324915141734
0.17193860381483445


In [53]:
class EF_TF(nn.Module):
    
    """
    TODO: implement the naive RNN model above.
    """
    
    def __init__(self, num_codes_m1,num_codes_m2):
        super().__init__()

        # your code here
        self.m1_embedding = nn.Embedding(num_codes_m1+1,embedding_dim=128)
        
        self.m2_embedding_num = nn.Linear(in_features=104, out_features=96)
        self.m2_embedding_cat = nn.Embedding(num_codes_m2+1,embedding_dim=32)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=256, nhead=2)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
        
        self.fc1 = nn.Linear(in_features=256, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=1)
        
        self.relu = nn.ReLU()

    
    def forward(self, x1, masks1, x2, masks2):
        batch_size = x1.shape[0]

        # FIRST MODAL
        # 1. Pass the sequence through the embedding layer;
        x1 = self.m1_embedding(x1)
        # 2. Sum the embeddings for each diagnosis code up for a visit of a patient.
        x1 = sum_embeddings_with_mask(x1, masks1)
        
        # SECOND MODAL
        num_x2 = x2[:,:,:-3]
        cat_x2 = x2[:,:,-3:]
        num_x2 = self.m2_embedding_num(num_x2.float())
        cat_x2 = self.m2_embedding_cat(cat_x2)
        cat_x2 = cat_x2.sum(dim=2)
        x2 = torch.concat([num_x2, cat_x2], dim=-1)
        
        # 3. FUSE MODALS and Pass the embegginds through the RNN layer
        x3 = torch.concat([x1, x2], dim=-1)
        output = self.transformer_encoder(x3)

        # 4. Obtain the hidden state at the last visit.
        fused_mask = torch.logical_or(masks1.sum(dim=-1),masks2.sum(dim=-1))
        true_h_n = get_last_visit(output, fused_mask)
        
        x = self.relu(self.fc1(true_h_n))
        out = self.fc2(x)
        return out.view(batch_size)

In [52]:
def get_last_visit(hidden_states, masks):
    
    # your code here
    first_zero_idx = torch.argmin(masks.sum(dim=-1),axis=0)
    last_nonzero_idx = first_zero_idx - 1
    batch_size = hidden_states.shape[0]
    return hidden_states[list(range(batch_size)), last_nonzero_idx]

In [54]:
eftf = EF_TF(14+49, 49)

In [55]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([30]))
optimizer = torch.optim.Adam(eftf.parameters(), lr=0.0001, weight_decay=0.001)

In [56]:
# number of epochs to train the model
n_epochs = 2
train(eftf, train_loader, val_loader, n_epochs)

Epoch: 1 	 Training Loss: 1.662546
Epoch: 1 	 Validation p: 0.10, r:0.40, f: 0.16, roc_auc: 0.60, auprc: 0.09
Epoch: 2 	 Training Loss: 1.648254
Epoch: 2 	 Validation p: 0.12, r:0.29, f: 0.17, roc_auc: 0.62, auprc: 0.10


In [57]:
p, r, f, roc_auc,auprc = eval_model(eftf, val_loader)
print(roc_auc)
print(auprc)

0.6222487912219276
0.1029306517332224


MORT ICU

In [65]:
def get_last_visit(hidden_states, masks):
    """
    TODO: obtain the hidden state for the last true visit (not padding visits)

    Arguments:
        hidden_states: the hidden states of each visit of shape (batch_size, # visits, embedding_dim)
        masks: the padding masks of shape (batch_size, # visits, # diagnosis codes)

    Outputs:
        last_hidden_state: the hidden state for the last true visit of shape (batch_size, embedding_dim)
        
    NOTE: DO NOT use for loop.
    
    HINT: Consider using `torch.gather()`.
    """
    
    # your code here
    first_zero_idx = torch.argmin(masks.sum(dim=2),axis=1)
    last_nonzero_idx = first_zero_idx - 1
    batch_size = hidden_states.shape[0]
    return hidden_states[list(range(batch_size)), last_nonzero_idx]

In [66]:
mm_dataset = CustomMMDataset(demo_iseqs, demo_vseqs, mort)

In [67]:
split = int(len(mm_dataset)*0.8)

lengths = [split, len(mm_dataset) - split]
train_dataset, val_dataset = random_split(mm_dataset, lengths)

print("Length of train dataset:", len(train_dataset))
print("Length of val dataset:", len(val_dataset))


Length of train dataset: 27577
Length of val dataset: 6895


In [68]:
train_loader, val_loader = load_data(train_dataset, val_dataset, collate_fn)

In [69]:
lftf = LF_TF(14+49, 49)

In [70]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([30]))
optimizer = torch.optim.Adam(lftf.parameters(), lr=0.0001, weight_decay=0.001)

In [71]:
# number of epochs to train the model
n_epochs = 2
train(lftf, train_loader, val_loader, n_epochs)

Epoch: 1 	 Training Loss: 1.557366
Epoch: 1 	 Validation p: 0.15, r:0.77, f: 0.25, roc_auc: 0.79, auprc: 0.26
Epoch: 2 	 Training Loss: 1.465113
Epoch: 2 	 Validation p: 0.16, r:0.71, f: 0.26, roc_auc: 0.80, auprc: 0.28


In [72]:
p, r, f, roc_auc,auprc = eval_model(lftf, val_loader)
print(roc_auc)
print(auprc)

0.797688079444194
0.2753349927884807


In [73]:
def get_last_visit(hidden_states, masks):
    
    # your code here
    first_zero_idx = torch.argmin(masks.sum(dim=-1),axis=0)
    last_nonzero_idx = first_zero_idx - 1
    batch_size = hidden_states.shape[0]
    return hidden_states[list(range(batch_size)), last_nonzero_idx]

In [74]:
eftf = EF_TF(14+49, 49)

In [75]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([30]))
optimizer = torch.optim.Adam(eftf.parameters(), lr=0.0001, weight_decay=0.001)

In [76]:
# number of epochs to train the model
n_epochs = 2
train(eftf, train_loader, val_loader, n_epochs)

Epoch: 1 	 Training Loss: 1.749289
Epoch: 1 	 Validation p: 0.12, r:0.66, f: 0.21, roc_auc: 0.73, auprc: 0.17
Epoch: 2 	 Training Loss: 1.638730
Epoch: 2 	 Validation p: 0.16, r:0.59, f: 0.25, roc_auc: 0.75, auprc: 0.20


In [77]:
p, r, f, roc_auc,auprc = eval_model(eftf, val_loader)
print(roc_auc)
print(auprc)

0.7518612962759859
0.19828850440438192
