In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import time
from sklearn.metrics import roc_curve, auc, confusion_matrix, accuracy_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import lib.config as cfg
import lib.model as ts_model

## Model
- Network
    - GRU
    - embedding_size = 256
    - hidden_size = 128
    - batch_size = 256
- Multi-head
    - Classification: head 0-3
    - Regression: head 4-7
- Loss
    - BCE: weight 0.25
    - MSE: weight 0.05
- Optim
    - Adam, LR = 0.001
- TBD:
    - no-cumsum in classification head is disabled (not compatible with label shift)
    
### Model log
- v1
    - no cumsum
    - fc1, fc2, 1 layer gru
- v2
    - cumsun
    - fc1, 2 layer gru
- v3
    - fix sampling problem
    - no cumsum
    - fc1, fc2, 2 layer rnn
- v4
    - add loss weight upon v3
    

In [3]:
tensor_data_agg = torch.load('./tensor_data_agg.pt')
tensor_label_agg = torch.load('./tensor_label_agg.pt')
tensor_regres_agg = torch.load('./tensor_regres_agg.pt')

df_train_pid = pd.read_csv('./df_train_pid.csv')
pid_list = df_train_pid['patientunitstayid']

# def columns
col_all = df_train_pid.columns
col_meta = ['patientunitstayid', 'tsid']
col_label = list(col_all[list(map(lambda x: 'label_' in x, col_all))])
col_label_used = col_label[1:] # remove gt_column
col_regres = list(col_all[list(map(lambda x: 'regres_' in x, col_all))])
col_to_drop = col_meta + col_label + col_regres

# def max time series length
max_ts_length = 128

# build model cfg
model_config = {
    'col_to_drop': col_to_drop,
    'col_label': col_label,
    'col_regres': col_regres,
    'max_ts_length': max_ts_length,
}

In [4]:
num_layers = 2
feature_size = tensor_data_agg.shape[-1]
embedding_size = 256
hidden_size = 128
tagset_size = len(col_label_used) + len(col_regres)

model = ts_model.RNN_v4(num_layers, feature_size, embedding_size, hidden_size, tagset_size)
model.cuda()

loss_func_classi = nn.BCELoss(reduction='none') # NLLLoss requires 2 neural heads, BCELoss only requires 1
loss_func_regres = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) # SGD is converge *very* slow

### From checkpoint

In [4]:
model = ts_model.RNN_v4(num_layers, feature_size, embedding_size, hidden_size, tagset_size)
model.load_state_dict(torch.load('./gru_v4_ep50.pth'))
model.cuda()
optimizer = optim.Adam(model.parameters(), lr=0.001)

RNN_v1(
  (fc1): Linear(in_features=476, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (rnn): GRU(256, 128, num_layers=2)
  (fc_out): Linear(in_features=128, out_features=8, bias=True)
)

In [6]:
batch_size = 256

for epoch in range(50):
    loss_epoch = 0
    batch_count = 0
    loop_count = int(np.ceil(len(pid_list)/batch_size))
    
    for i in range(loop_count):
        # construct batch
        batch_id_list = np.random.choice(range(0,len(pid_list)), batch_size)

        tensor_data_batch = tensor_data_agg[:,batch_id_list,:].cuda()
        tensor_label_batch = tensor_label_agg[:,batch_id_list,:].cuda()
        tensor_regres_batch = tensor_regres_agg[:,batch_id_list,:].cuda()
        
        model.zero_grad()
        model_out = model(tensor_data_batch)

        # Classification loss 
        loss_batch = 0
        for i, col in enumerate(col_label_used):
            pred_prob = model_out[:,:,i]
            gt_label = tensor_label_batch[:,:,i]

            assert(pred_prob.shape == gt_label.shape)
            loss = loss_func_classi(pred_prob.view(-1), gt_label.view(-1))
            
            loss_weight = (gt_label.view(-1)*9+1) # balance postive label # tbd: don't hard code weight factor
            loss = (loss*loss_weight).mean()
            
            loss_batch += loss*0.25

        # Regression loss / Auxiliary tasks
        for i, col in enumerate(col_regres):
            idx_head = len(col_label_used)
            pred_val = model_out[:,:,idx_head+i]
            gt_val = tensor_regres_batch[:,:,i]

            assert(pred_val.shape == gt_val.shape)
            loss = loss_func_regres(pred_val.view(-1), gt_val.view(-1))
            loss_batch += loss*0.05

        loss_batch.backward()
        optimizer.step()
        
        batch_count += 1
        loss_epoch += loss_batch
        
    if epoch%1 == 0:
        y_train_prob = pred_prob.view(-1).cpu().detach().numpy()
        y_train = gt_label.view(-1).cpu().detach().numpy()

        fpr, tpr, _ = roc_curve(y_train, y_train_prob)
        roc_auc = auc(fpr, tpr)
        
        print(
            'Epoch ', epoch, 
            ' | Loss: ', loss_epoch.cpu().detach().numpy()/batch_count,
            ' | Train roc auc: ', roc_auc.round(3)
        )

torch.save(model.state_dict(), './gru_v4_ep100.pth')

Epoch  0  | Loss:  0.5480213165283203  | Train roc auc:  0.98
Epoch  1  | Loss:  0.4090387961443733  | Train roc auc:  0.987
Epoch  2  | Loss:  0.400241683511173  | Train roc auc:  0.986
Epoch  3  | Loss:  0.39060042886173024  | Train roc auc:  0.982
Epoch  4  | Loss:  0.3810299424564137  | Train roc auc:  0.983
Epoch  5  | Loss:  0.392298137440401  | Train roc auc:  0.983
Epoch  6  | Loss:  0.3926832816180061  | Train roc auc:  0.982
Epoch  7  | Loss:  0.3996241232928108  | Train roc auc:  0.987
Epoch  8  | Loss:  0.3920060606563793  | Train roc auc:  0.984
Epoch  9  | Loss:  0.3756248249727137  | Train roc auc:  0.987
Epoch  10  | Loss:  0.3760535857256721  | Train roc auc:  0.987
Epoch  11  | Loss:  0.3684380755704992  | Train roc auc:  0.986
Epoch  12  | Loss:  0.3715713444878073  | Train roc auc:  0.986
Epoch  13  | Loss:  0.36142587661743164  | Train roc auc:  0.986
Epoch  14  | Loss:  0.36754734375897574  | Train roc auc:  0.986
Epoch  15  | Loss:  0.3625013127046473  | Train ro