In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch import Tensor
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pycox.preprocessing.label_transforms import LabTransDiscreteTime
import tensorflow as tf


In [3]:
url = 'https://raw.githubusercontent.com/chl8856/DeepHit/master/sample%20data/SYNTHETIC/synthetic_comprisk.csv'
df_train = pd.read_csv(url)
df_train.drop(['true_time', 'true_label'], axis=1, inplace=True)
df_train.head()

df_test = df_train.sample(frac=0.2)
df_train = df_train.drop(df_test.index)
# df_val = df_train.sample(frac=0.2)   #? 5 fold CV
# df_train = df_train.drop(df_val.index)

#### Label transformation

In [4]:
get_x = lambda df: (df
                    .drop(columns=['time', 'label'])
                    .values.astype('float32'))

x_train = get_x(df_train)
# x_val = get_x(df_val)
x_test = get_x(df_test)

# Label transform
class LabTransform(LabTransDiscreteTime):
    def transform(self, durations, events):
        durations, is_event = super().transform(durations, events > 0)
        events[is_event == 0] = 0
        return durations, events.astype('int64')
    
num_durations = 10
labtrans = LabTransform(num_durations)
get_target = lambda df: (df['time'].values, df['label'].values)

y_train = labtrans.fit_transform(*get_target(df_train))
# y_val = labtrans.transform(*get_target(df_val))
y_test = labtrans.transform(*get_target(df_test))
durations_test, events_test = get_target(df_test)
len(y_train[1])


24000

In [5]:
# One-hot encoding 
label_train = np.zeros((len(y_train[1]), y_train[1].max()+1))
label_train[np.arange(len(y_train[1])), y_train[1]] = 1
label_train

# Test
label_test = np.zeros((len(y_test[1]), y_test[1].max()+1))
label_test[np.arange(len(y_test[1])), y_test[1]] = 1
label_test

#? Check

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.]])

#### Hyperparameters

In [6]:
# resnet18 = models.resnet18(pretrained=True)
random_seed = 123
learning_rate = 0.001
num_epochs = 5
batch_size = 32
n_feature = 12
num_time_units = 10   # 10 months
time_bin = 30  # ?


#### Define loss function

In [8]:
# lis = [1, 3, 5, 6, 2, ]
 
# # using reduce to compute sum of list
# print("The sum of the list elements is : ", end="")
# print(reduce(lambda a, b: a+b, lis))
 
# # using reduce to compute maximum element from list
# print("The maximum element of the list is : ", end="")
# print(reduce(lambda a, b: a if a > b else b, lis))

In [9]:
from functools import reduce
def unique_set(lifetime):
    a = lifetime.numpy()   # lifetime.data.cpu().numpy()
    t, idx = np.unique(a, return_inverse=True)
    sort_idx = np.argsort(a)
    a_sorted = a[sort_idx]
    unq_first = np.concatenate(([True], a_sorted[1:] != a_sorted[:-1]))
    unq_count = np.diff(np.nonzero(unq_first)[0])
    unq_idx = np.split(sort_idx, np.cumsum(unq_count))
    return t, unq_idx
    
def log_parlik(lifetime, censor, score1):  
    t, H = unique_set(lifetime)
    keep_index = np.nonzero(censor.numpy())[0]  #censor = 1  #.data.cpu()
    H = [list(set(h)&set(keep_index)) for h in H]
    n = [len(h) for h in H]
    
    score1 = score1.detach().numpy()   # .data.cpu()  
    total = 0
    for j in range(len(t)):
        total_1 = np.sum(np.log(score1)[H[j]])
        m = n[j]
        total_2 = 0
        for i in range(m):
            subtotal = np.sum(score1[sum(H[j:],[])]) - (i*1.0/m)*(np.sum(score1[H[j]]))
            subtotal = np.log(subtotal)
            total_2 = total_2 + subtotal
        total = total + total_1 - total_2
        total = np.array([total])
    return torch.from_numpy(total).type(torch.FloatTensor).view(-1,1)
        

def acc_pairs(censor, lifetime):
    noncensor_index = np.nonzero(censor.numpy())[0]  #.data.cpu()
    lifetime = lifetime.numpy()  # .data.cpu()
    acc_pair = []
    for i in noncensor_index:
        all_j =  np.array(range(len(lifetime)))[lifetime > lifetime[i]]
        acc_pair.append([(i,j) for j in all_j])
    
    acc_pair = reduce(lambda x,y: x + y, acc_pair)
    return acc_pair


def rank_loss(lifetime, censor, score2, t, time_bin): 
    # score2 (n(samples)*24) at time unit t = 1,2,...,24
    acc_pair = acc_pairs(censor, lifetime)
    lifetime = lifetime.numpy()   #.data.cpu()
    total = 0
    for i,j in acc_pair:
        yi = (lifetime[i] >= (t-1) * time_bin) * 1
        yj = (lifetime[j] >= (t-1) * time_bin) * 1
        a = torch.ones(1).type(torch.FloatTensor)
        L2dist = torch.dist(score2[j, t-1] - score2[i, t-1], a, 2)
        total = total + L2dist* yi * (1-yj)
    return total


def C_index(censor, lifetime, score1):
    score1 = score1.detach().numpy()  #.data.cpu()  #?
    acc_pair = acc_pairs(censor, lifetime)
    prob = sum([score1[i] >= score1[j] for (i, j) in acc_pair])[0]*1.0/len(acc_pair)
    return prob
    


In [10]:
# Loss criterion for multi-task learning

# def criterion(score1, score2, lifetime, censor):
#     for task in range(len(score1)):
#         loss1 = log_parlik(lifetime, censor[task], score1[task])
#         loss2 = []
#         for t in range(num_time_units):
#             loss2.append(rank_loss(lifetime, censor[task], score2[task], t+1, time_bin))
#         loss2 = sum(loss2)
#         loss = 1.0 * loss1 + 0.5 * loss2
#     return torch.mean(loss)

def criterion(score1, score2, lifetime, censor):
    loss1 = log_parlik(lifetime, censor, score1)
    loss2 = []
    for t in range(num_time_units):
        loss2.append(rank_loss(lifetime, censor, score2, t+1, time_bin))
    loss2 = sum(loss2)
    loss = 1.0 * loss1 + 0.5 * loss2
    return loss

## Network defining

* DeepHit is a 4-layer network consisting of 1 fully-connected layer for the shared sub-network and 2 fully- connected layers for each cause-specific sub-network and a softmax layer as the output layer.

* For hidden layers, the number of nodes were set as 3, 5, and 3 times of the covariate dimension for the layer 1, 2, and 3, respectively, with ReLu activation function. 

* The network was trained by back-propagation via Adam optimizer with a batch size of 50 and a learning rate of 10−4. 

* Dropout probability of 0.6 and Xavier initialization was applied for all the layers (DeepHit was implemented in a Tensorflow environment).

* Ref of multitask learning in Pytorch:
    * https://github.com/Hui-Li/multi-task-learning-example-PyTorch/blob/master/multi-task-learning-example-PyTorch.ipynb
    * https://github.com/yaringal/multi-task-learning-example/blob/master/multi-task-learning-example-pytorch.ipynb

In [11]:
# Define neural network    
class SpecificNet(nn.Module):
    def __init__(self, in_features = n_feature, hidden_layers = [3, 3, 5], out_features=[1, 1], alpha=0.2, sigma=0.1, p_dropout=0.6, loss=None):
        super().__init__()
        self.sharedlayer = nn.Sequential(
            nn.Linear(in_features, hidden_layers[0]* n_feature),  # 3*n_feature
            nn.ReLU(inplace=True),
            nn.Dropout(p_dropout) 
            # one output layer?
        ) 
 
        self.task1 = nn.Sequential(
            nn.Linear(hidden_layers[0]* n_feature, hidden_layers[1]*n_feature),
            nn.ReLU(inplace=True),
            nn.Dropout(p_dropout),
        
            nn.Linear(hidden_layers[1]*n_feature, hidden_layers[2]*n_feature),
            nn.ReLU(inplace=True),
            nn.Dropout(p_dropout),
            
            nn.Linear(hidden_layers[2]*n_feature, out_features[0]),
        )
        self.fc_layer1 = nn.Linear(out_features[0], num_time_units)
        
        self.task2 = nn.Sequential(
            nn.Linear(hidden_layers[0]* n_feature, hidden_layers[1]*n_feature),
            nn.ReLU(inplace=True),
            nn.Dropout(p_dropout),
        
            nn.Linear(hidden_layers[1]*n_feature, hidden_layers[2]*n_feature),
            nn.ReLU(inplace=True),
            nn.Dropout(p_dropout),
            
            nn.Linear(hidden_layers[2]*n_feature, out_features[1]),
        )
        self.fc_layer2 = nn.Linear(out_features[1], num_time_units)
        
        # Xavier initialization
        for m in self.modules():
            if isinstance(m, nn.Linear):
                m.weight.data = nn.init.xavier_uniform(m.weight.data, gain = nn.init.calculate_gain('relu'))
        
    def forward(self, x):
        residual = x
        shared = self.sharedlayer(x)
        # shared = torch.concat(shared, residual, dim = 1) # size mapping?     
        # torch.concat(out1, out2, dim = 1) 
        out1 = self.task1(shared)
        score1_1 = torch.exp(out1)   # torch.exp(x.mm(out))
        score1_2 = torch.sigmoid(self.fc_layer1(score1_1)) 
        
        out2 = self.task2(shared)
        score2_1 = torch.exp(out2)
        score2_2 = torch.sigmoid(self.fc_layer2(score2_1)) 
        return [score1_1, score1_2], [score2_1, score2_2]  # dim = 0 for softmax # torch.concat(out1, out2, dim = 1)
  

In [12]:
# Weight initialization
# SpecificNet.apply(init_weights)

torch.manual_seed(random_seed)
model1 = SpecificNet()
a, b = model1(torch.Tensor(x_train))
print(a)
print(b[1].shape)
# score1[:,0]

[tensor([[8.8227e-01],
        [1.0642e-04],
        [6.4972e+00],
        ...,
        [4.2415e-01],
        [1.7161e+01],
        [2.9226e+08]], grad_fn=<ExpBackward0>), tensor([[0.3583, 0.4382, 0.3802,  ..., 0.3225, 0.6274, 0.5640],
        [0.3528, 0.4913, 0.3412,  ..., 0.3584, 0.4078, 0.4588],
        [0.3946, 0.1669, 0.6430,  ..., 0.1466, 0.9980, 0.9502],
        ...,
        [0.3555, 0.4656, 0.3597,  ..., 0.3409, 0.5142, 0.5095],
        [0.4664, 0.0149, 0.9330,  ..., 0.0242, 1.0000, 0.9997],
        [1.0000, 0.0000, 1.0000,  ..., 0.0000, 1.0000, 1.0000]],
       grad_fn=<SigmoidBackward0>)]
torch.Size([24000, 10])




In [13]:
print(a.shape)
print(b.shape)

AttributeError: 'list' object has no attribute 'shape'

In [14]:
# torch.tensor(y_train[0]).view(-1)
y_train[0].flatten()

array([0, 2, 0, ..., 0, 3, 0])

In [15]:
y_train2 = df_train.reset_index().to_numpy()
idx = np.random.permutation(x_train.shape[0])
df_train['time'].to_numpy()

array([ 1, 34,  9, ...,  2, 68,  4])

### Training and evaluating
- Ref:  https://github.com/jysonganan/DeepLearningSurvival/blob/master/DeepSurv.py

In [30]:
# train_data = TrainData(feature_num, X, Y1, Y2)
# train_data_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

In [16]:
# Training
import time
start_time = time.time()
optimizer = torch.optim.Adam(model1.parameters(), lr=learning_rate)

time_train = df_train['time'].to_numpy()
time_test = df_test['time'].to_numpy()
def train(epoch):
    
    model1.train()
    train_loss = 0
    idx = np.random.permutation(x_train.shape[0])
    j = 0
    while j < x_train.shape[0]:
        if j < x_train.shape[0] - batch_size:
            data = torch.from_numpy(x_train[idx[j:(j + batch_size)]]).type(torch.FloatTensor)
            lifetime = torch.from_numpy(time_train[idx[j:(j + batch_size)]]).type(torch.FloatTensor)
            censor = torch.from_numpy(label_train[idx[j:(j + batch_size)],1:]).type(torch.FloatTensor)
        else:
            data = torch.from_numpy(x_train[idx[j:]]).type(torch.FloatTensor)
            lifetime = torch.from_numpy(time_train[idx[j:]]).type(torch.FloatTensor)
            censor = torch.from_numpy(label_train[idx[j:],1:]).type(torch.FloatTensor)
            
        optimizer.zero_grad()
        score_task1, score_task2 = model1(data)
        loss_1 = criterion(score_task1[0], score_task1[1], lifetime, censor)
        loss_2 = criterion(score_task2[0], score_task2[1], lifetime, censor)
        loss = (loss_1 + loss_2)/2
        train_loss += loss.item()
        loss.backward()      
        # train_loss = loss.data[0]
        optimizer.step()
        j += batch_size
    return train_loss*1.0 / x_train.shape[0]

durations_test, events_test 

def test(epoch):
    
    model1.eval()
    test_loss = 0
    j = 0
    while j < x_test.shape[0]:
        if j < x_test.shape[0] - batch_size:
            data = torch.from_numpy(x_test[j:(j + batch_size)]).type(torch.FloatTensor)
            lifetime = torch.from_numpy(time_test[j:(j + batch_size)]).type(torch.FloatTensor)
            censor = torch.from_numpy(label_test[j:(j + batch_size),1:]).type(torch.FloatTensor)
        else:
            data = torch.from_numpy(x_test[j:]).type(torch.FloatTensor)
            lifetime = torch.from_numpy(time_test[j:]).type(torch.FloatTensor)
            censor = torch.from_numpy(label_test[j:,1:]).type(torch.FloatTensor)
            
        score_task1, score_task2 = model1(data)
        loss_1 = criterion(score_task1[0], score_task1[1], lifetime, censor)
        loss_2 = criterion(score_task2[0], score_task2[1], lifetime, censor)
        loss = (loss_1 + loss_2)/2
        
        # loss = criterion(score1, score2, lifetime, censor)
        test_loss += loss.item()
        # test_loss += loss.data[0]
        j += batch_size
        
    return test_loss*1.0 / x_test.shape[0]
        
    
    
for epoch in range(1, num_epochs + 1):
    train_loss = train(epoch)
    test_loss = test(epoch)
    print('====> Epoch: %d training loss: %.4f'%(epoch, train_loss))
    print('====> Epoch: %d testing loss: %.4f'%(epoch, test_loss))
    
    



====> Epoch: 1 training loss: -3.4842
====> Epoch: 1 testing loss: -1.0980
====> Epoch: 2 training loss: -3.5079
====> Epoch: 2 testing loss: -1.0980
====> Epoch: 3 training loss: -3.4521
====> Epoch: 3 testing loss: -1.0980
====> Epoch: 4 training loss: -3.4347
====> Epoch: 4 testing loss: -1.0980
====> Epoch: 5 training loss: -3.4183
====> Epoch: 5 testing loss: -1.0980


In [20]:
# import time
# start_time = time.time()
# optimizer = torch.optim.Adam(model1.parameters(), lr=learning_rate)

# time_train = df_train['time'].to_numpy()
# time_test = df_test['time'].to_numpy()



# concordance - training
data_train = torch.from_numpy(x_train).type(torch.FloatTensor)
lifetime_train = torch.from_numpy(time_train).type(torch.FloatTensor)
censor_train = torch.from_numpy(label_train[:,1]).type(torch.FloatTensor)

score_task1_train, score_task2_train = model1(data_train)
C_index_train = C_index(censor_train, lifetime_train, score_task1_train[0])
print('Concordance index for training data: {:.4f}'.format(C_index_train))


# concordance - test
data_test = torch.from_numpy(x_test).type(torch.FloatTensor)
lifetime_test = torch.from_numpy(time_test).type(torch.FloatTensor)
censor_test = torch.from_numpy(label_test[:,1]).type(torch.FloatTensor)

score_task1_test, score_task2_test = model1(data_test)
C_index_test = C_index(censor_test, lifetime_test, score_task1_test[0])
print('Concordance index for test data: {:.4f}'.format(C_index_test))


Concordance index for training data: 0.4910
Concordance index for test data: 0.4859
