In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES']="0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [55]:
import torch
import torch.utils.data
import torch.nn as nn 
import torch.optim as optim
from torchvision import datasets, transforms
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt 
from torch.utils.data import DataLoader, TensorDataset
from lifelines.utils import concordance_index


### Dataset

In [21]:
url = 'https://raw.githubusercontent.com/chl8856/DeepHit/master/sample%20data/SYNTHETIC/synthetic_comprisk.csv'
dataset = pd.read_csv(url)
dataset.drop(['true_time', 'true_label'], axis=1, inplace=True)
dataset.head()

Unnamed: 0,time,label,feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,feature9,feature10,feature11,feature12
0,0,0,-0.4405,-0.035066,-0.025341,-0.029775,-0.55787,-0.15355,0.56819,-0.15432,-0.25023,0.33915,0.70388,0.28174
1,1,0,0.015579,-0.84608,0.48753,0.65193,0.20099,-0.11238,-1.3963,-0.18874,-0.30001,-0.24032,-0.38533,-1.0245
2,34,2,0.44649,1.641,-1.745,0.31795,-1.1406,0.3656,0.2811,-0.58253,-1.6907,1.2022,-0.5192,1.784
3,9,0,0.62946,-0.61575,-0.32345,-0.9002,0.4536,-0.61992,2.1624,0.19875,-1.1196,-2.7321,-0.25673,-0.81836
4,2,0,1.2498,-0.18561,-0.18378,-0.98108,-0.01499,-0.14437,-1.2529,-0.58432,-0.090523,0.93692,1.0749,0.79117


### Train - test split


In [22]:
get_x = lambda df: (df
                    .drop(columns=['time', 'label'])
                    .values.astype('float32'))

df_test = dataset.sample(frac=0.2)
df_train = dataset.drop(df_test.index)
# df_val = df_train.sample(frac=0.2)
# df_train = df_train.drop(df_val.index)

X_train = get_x(df_train)
X_test = get_x(df_test)

Y_train = df_train[['label', 'time']].to_numpy()
Y_test = df_test[['label', 'time']].to_numpy()

# One-hot encoding 
# Train
label_train = np.zeros((len(df_train['label']), df_train['label'].max()+1))
label_train[np.arange(len(df_train['label'])), df_train['label']] = 1
label_train = np.column_stack((np.array(df_train['time']),
                             label_train[:,1:]))
label_train

# Test
label_test = np.zeros((len(df_test['label']), df_test['label'].max()+1))
label_test[np.arange(len(df_test['label'])), df_test['label']] = 1
label_test = np.column_stack((np.array(df_test['time']),
                             label_test[:,1:]))
label_test

array([[ 0.,  0.,  0.],
       [26.,  0.,  0.],
       [33.,  0.,  1.],
       ...,
       [ 8.,  0.,  0.],
       [ 0.,  1.,  0.],
       [44.,  0.,  1.]])

### Hyperparameters

In [56]:
n_feature = X_train.shape[1]   
batch_size = 32
num_time_units = 10 
n_epochs = 3
learning_rate = 1e-3


### Defining Network
#### DeepHit (rewritten)

In [None]:

class deephit(nn.Module):
    """ Deep network for competing risks in survival analysis (based on DeepHit)

    Args:
        in_features (int): number of covariates
        hidden_layers (list): size of each hidden layer = list element * in_features
        out_features (list): number of outputs for each event
        p_dropout (float): probability of dropout
    """
    def __init__(self, in_features = n_feature, hidden_layers = [30, 30, 50], out_features=[1, 1], p_dropout=0.6):
        super().__init__()
        self.sharedlayer = nn.Sequential(
            nn.Linear(in_features, hidden_layers[0]* n_feature),  
            nn.BatchNorm1d(hidden_layers[0]* n_feature),
            nn.ReLU(), 
            # nn.Dropout(p_dropout) 
        ) 
 
        self.task1 = nn.Sequential(
            nn.Linear(hidden_layers[0]* n_feature + in_features, hidden_layers[1]*n_feature),
            nn.BatchNorm1d(hidden_layers[1]*n_feature),
            nn.ReLU(),
            # nn.Dropout(p_dropout),
        
            nn.Linear(hidden_layers[1]*n_feature, hidden_layers[2]*n_feature),
            nn.BatchNorm1d(hidden_layers[2]*n_feature),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            
            nn.ReLU(),  #
            
            nn.Linear(hidden_layers[2]*n_feature, out_features[0]),
        )
        self.fc_layer1 = nn.Linear(out_features[0], num_time_units)
        
        self.task2 = nn.Sequential(
            nn.Linear(hidden_layers[0]* n_feature + in_features, hidden_layers[1]*n_feature),
            nn.BatchNorm1d(hidden_layers[1]*n_feature),
            nn.ReLU(),
            # nn.Dropout(p_dropout),
        
            nn.Linear(hidden_layers[1]*n_feature, hidden_layers[2]*n_feature),
            nn.BatchNorm1d(hidden_layers[2]*n_feature),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            
            nn.ReLU(),  #
            
            nn.Linear(hidden_layers[2]*n_feature, out_features[1]),
        )
        self.fc_layer2 = nn.Linear(out_features[1], num_time_units)
        
        # Xavier initialization
        for m in self.modules():
            if isinstance(m, nn.Linear):
                m.weight.data = nn.init.xavier_uniform(m.weight.data, gain = nn.init.calculate_gain('relu'))
        
    def forward(self, x):
        residual = x
        shared = self.sharedlayer(x)
        
        # Residual concatenating
        shared = torch.concat((shared, residual), dim=1) 
       
        out1 = self.task1(shared)
        score1_1 = out1   # torch.exp(x.mm(out))
        # score1_2 = torch.sigmoid(self.fc_layer1(score1_1))   # For predicting survival
        
        out2 = self.task2(shared)
        score2_1 = out2
        # score2_2 = torch.sigmoid(self.fc_layer2(score2_1)) 
        return [score1_1, score2_1]  

#### Model with one more share block

In [64]:
class plusshare(nn.Module):
    """ One more block for share network

    Args:
        in_features (int): number of covariates
        hidden_layers (list): size of each hidden layer = list element * in_features
        out_features (list): number of outputs for each event
        p_dropout (float): probability of dropout
    """
    def __init__(self, in_features = n_feature, hidden_layers = [30, 30, 50], out_features=[1, 1], p_dropout=0.6):
        super().__init__()
        self.sharedlayer = nn.Sequential(
            nn.Linear(in_features, hidden_layers[0]* n_feature),  
            nn.BatchNorm1d(hidden_layers[0]* n_feature),
            nn.ReLU(), 
            # nn.Dropout(p_dropout) 
            
            nn.Linear(hidden_layers[0]* n_feature, hidden_layers[0]* n_feature),  
            nn.BatchNorm1d(hidden_layers[0]* n_feature),
            nn.ReLU(), 
        ) 
 
        self.task1 = nn.Sequential(
            nn.Linear(hidden_layers[0]* n_feature + in_features, hidden_layers[1]*n_feature),
            nn.BatchNorm1d(hidden_layers[1]*n_feature),
            nn.ReLU(),
            # nn.Dropout(p_dropout),
        
            nn.Linear(hidden_layers[1]*n_feature, hidden_layers[2]*n_feature),
            nn.BatchNorm1d(hidden_layers[2]*n_feature),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            
            nn.ReLU(),  #
            
            nn.Linear(hidden_layers[2]*n_feature, out_features[0]),
        )
        self.fc_layer1 = nn.Linear(out_features[0], num_time_units)
        
        self.task2 = nn.Sequential(
            nn.Linear(hidden_layers[0]* n_feature + in_features, hidden_layers[1]*n_feature),
            nn.BatchNorm1d(hidden_layers[1]*n_feature),
            nn.ReLU(),
            # nn.Dropout(p_dropout),
        
            nn.Linear(hidden_layers[1]*n_feature, hidden_layers[2]*n_feature),
            nn.BatchNorm1d(hidden_layers[2]*n_feature),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            
            nn.ReLU(),  #
            
            nn.Linear(hidden_layers[2]*n_feature, out_features[1]),
        )
        self.fc_layer2 = nn.Linear(out_features[1], num_time_units)
        
        # Xavier initialization
        for m in self.modules():
            if isinstance(m, nn.Linear):
                m.weight.data = nn.init.xavier_uniform(m.weight.data, gain = nn.init.calculate_gain('relu'))
        
    def forward(self, x):
        residual = x
        shared = self.sharedlayer(x)
        
        # Residual concatenating
        shared = torch.concat((shared, residual), dim=1) 
       
        out1 = self.task1(shared)
        score1_1 = out1   # torch.exp(x.mm(out))
        # score1_2 = torch.sigmoid(self.fc_layer1(score1_1))   # For predicting survival
        
        out2 = self.task2(shared)
        score2_1 = out2
        # score2_2 = torch.sigmoid(self.fc_layer2(score2_1)) 
        return [score1_1, score2_1]  

#### Change FC into Conv layers

In [None]:
# Example
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 6 ,5)  #(input color channel of 3, output channel of 6, kernel size)
        self.pool = nn.MaxPool2d(2, 2) #(kernel size=2, stride=2) -> reduce the image by a factor of 2
        self.conv2 = nn.Conv2d(6, 16, 5) #(= output channel size of last layer,_,_)
        self.fc1 = nn.Linear(16*5*5, 120) #(flattened size of the output tensor of last layer,_)
        self.fc2 = nn.Linear(120, 84) 
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        # -> n, 3, 32, 32  #batch size, color channel, image size, image size
        x = self.pool(F.relu(self.conv1(x))) # -> n, 6, 14, 14  # 1st activation
        x = self.pool(F.relu(self.conv2(x))) # -> n, 16, 5, 5
        x = x.view(-1, 16*5*5)              # -> n, 400  # flatten the tensor
        x = F.relu(self.fc1(x))             # -> n, 120  # fully connected layer from now
        x = F.relu(self.fc2(x))             # -> n, 84
        x = self.fc3(x)
        return x
    

In [76]:
class convo(nn.Module):
    """ Change fully connected layers into convolutional layers

    Args:
        in_features (int): number of covariates
        hidden_layers (list): size of each hidden layer = list element * in_features
        out_features (list): number of outputs for each event
        p_dropout (float): probability of dropout
    """
    def __init__(self, in_features=n_feature, hidden_layers = [30, 30, 50], out_features=[1, 1], p_dropout=0.6):
        super().__init__()
        self.sharedlayer = nn.Sequential(
            nn.Conv1d(in_channels=n_feature, out_channels=hidden_layers[0]* n_feature, kernel_size=3),  
            nn.BatchNorm1d(hidden_layers[0]* n_feature),
            nn.ReLU(), 
            # nn.Dropout(p_dropout) 
        ) 
 
        self.task1 = nn.Sequential(
            nn.Linear(hidden_layers[0]* n_feature + in_features, hidden_layers[1]*n_feature),
            nn.BatchNorm1d(hidden_layers[1]*n_feature),
            nn.ReLU(),
            # nn.Dropout(p_dropout),
        
            nn.Linear(hidden_layers[1]*n_feature, hidden_layers[2]*n_feature),
            nn.BatchNorm1d(hidden_layers[2]*n_feature),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            
            nn.ReLU(),  #
            
            nn.Linear(hidden_layers[2]*n_feature, out_features[0]),
        )
        self.fc_layer1 = nn.Linear(out_features[0], num_time_units)
        
        self.task2 = nn.Sequential(
            nn.Linear(hidden_layers[0]* n_feature + in_features, hidden_layers[1]*n_feature),
            nn.BatchNorm1d(hidden_layers[1]*n_feature),
            nn.ReLU(),
            # nn.Dropout(p_dropout),
        
            nn.Linear(hidden_layers[1]*n_feature, hidden_layers[2]*n_feature),
            nn.BatchNorm1d(hidden_layers[2]*n_feature),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            
            nn.ReLU(),  #
            
            nn.Linear(hidden_layers[2]*n_feature, out_features[1]),
        )
        self.fc_layer2 = nn.Linear(out_features[1], num_time_units)
        
        # Xavier initialization
        for m in self.modules():
            if isinstance(m, nn.Linear):
                m.weight.data = nn.init.xavier_uniform(m.weight.data, gain = nn.init.calculate_gain('relu'))
        
    def forward(self, x):
        residual = x
        shared = self.sharedlayer(x)
        
        # Residual concatenating
        shared = torch.concat((shared, residual), dim=1) 
       
        out1 = self.task1(shared)
        score1_1 = out1   # torch.exp(x.mm(out))
        # score1_2 = torch.sigmoid(self.fc_layer1(score1_1))   # For predicting survival
        
        out2 = self.task2(shared)
        score2_1 = out2
        # score2_2 = torch.sigmoid(self.fc_layer2(score2_1)) 
        return [score1_1, score2_1]  

### Defining loss function

In [25]:
def onePair(x0, x1):
    c = np.log(2.)
    m = nn.LogSigmoid() 
    return 1 + m(x1-x0) / c
  
def rank_loss(pred, obs, delta):
    N = pred.size(0)
    allPairs = onePair(pred.view(N,1), pred.view(1,N))

    temp0 = obs.view(1, N) - obs.view(N, 1)
    # indices based on obs time
    temp1 = temp0>0
    # indices of event-event or event-censor pair
    temp2 = delta.view(1, N) + delta.view(N, 1)
    temp3 = temp2>0
    # indices of events
    temp4 = delta.view(N, 1) * torch.ones(1, N, device = device)
    # selected indices
    final_ind = temp1 * temp3 * temp4
    out = allPairs * final_ind
    return out.sum() / final_ind.sum()

def mse_loss(pred,  obs, delta):
    mse = delta*((pred - obs) ** 2)

    ind = pred < obs
    delta0 = 1 - delta
    p = ind * delta0 * (obs - pred)**2 
    return mse.mean(), p.mean()

def loss_func(pred, lifetime, event, lambda1 = 1, lambda2 = 0.2):
    mseloss, penaltyloss = mse_loss(pred, lifetime.unsqueeze(1), event.unsqueeze(1))
    rankloss = rank_loss(pred, lifetime.unsqueeze(1), event.unsqueeze(1))
    loss = mseloss + lambda1*penaltyloss - lambda2*rankloss
    return loss

### Training and evaluating

In [59]:

def evaluation(model_instance):
    torch.manual_seed(1)

    train_dataset = TensorDataset(torch.from_numpy(X_train).float().to(device), 
                                torch.from_numpy(label_train[:,0]).float().to(device), # time
                                torch.from_numpy(label_train[:,1]).float().to(device), # event 1
                                torch.from_numpy(label_train[:,2]).float().to(device)) # event 2
    test_dataset = TensorDataset(torch.from_numpy(X_test).float().to(device), 
                                torch.from_numpy(label_test[:,0]).float().to(device), # time
                                torch.from_numpy(label_test[:,1]).float().to(device), # event 1
                                torch.from_numpy(label_test[:,2]).float().to(device)) # event 2

    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=len(test_dataset))

    model = model_instance
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=learning_rate,weight_decay=1e-8)

    # Training
    # epoch_loss_train = []
    for e in range(1, n_epochs+1):
        model.train()
        for X_train_batch, lifetime_batch, event1_batch, event2_batch, in train_loader:
            optimizer.zero_grad()

            yhat1, yhat2 = model(X_train_batch)
            loss1 = loss_func(pred = yhat1, lifetime=lifetime_batch, event=event1_batch)
            loss2 = loss_func(pred = yhat2, lifetime=lifetime_batch, event=event2_batch)
            train_loss = loss1 + loss2
            train_loss.backward()
            optimizer.step()

    # Predicting train
    train_loader1 = DataLoader(dataset=train_dataset, batch_size=len(train_dataset))
    y_pred_list0_1 = []
    y_pred_list0_2 = []
    with torch.no_grad():
        model.eval()
        for X_batch, lifetime_batch, event1_batch, event2_batch in train_loader1:
            X_batch = X_batch.to(device)
            y_test_pred1, y_test_pred2 = model(X_batch)
            y_pred_list0_1.append(y_test_pred1.cpu().numpy())
            y_pred_list0_2.append(y_test_pred2.cpu().numpy())
    y_pred_list0_1 = [a.squeeze().tolist() for a in y_pred_list0_1]
    y_pred_list0_1 = sum(y_pred_list0_1, [])
    y_pred_list0_2 = [a.squeeze().tolist() for a in y_pred_list0_2]
    y_pred_list0_2 = sum(y_pred_list0_2, [])

    # Predicting test
    with torch.no_grad():
        model.train() 
        result1 = []
        result2 = []
        for _ in range(100):   
            y_pred_list_1 = []
            y_pred_list_2 = [] 
            for X_batch, lifetime_batch, even1_batch, event2_batch in test_loader:
                y_test_pred1, y_test_pred2 = model(X_batch)
                y_pred_list_1.append(y_test_pred1.cpu().numpy())
                y_pred_list_1 = [a.squeeze().tolist() for a in y_pred_list_1]
                y_pred_list_1 = sum(y_pred_list_1, [])
                
                y_pred_list_2.append(y_test_pred2.cpu().numpy())
                y_pred_list_2 = [a.squeeze().tolist() for a in y_pred_list_2]
                y_pred_list_2 = sum(y_pred_list_2, [])
            result1.append(y_pred_list_1)
            result2.append(y_pred_list_2)

        # result = np.array(result)
        # y_test_pred_mean = result.mean(axis=0).reshape(-1,)
        # y_test_pred_sd = result.std(axis=0).reshape(-1,)
        # y_pred_list_upper = y_test_pred_mean + 1.96*y_test_pred_sd
        # y_pred_list_lower = y_test_pred_mean - 1.96*y_test_pred_sd
        

    print("Train C-index for event 1: ", concordance_index(label_train[:,0], 
                                                            np.exp(y_pred_list0_1),
                                                            label_train[:,1]))

    print("Train C-index for event 2: ", concordance_index(label_train[:,0], 
                                                            np.exp(y_pred_list0_2),
                                                            label_train[:,2]))

    print("Test C-index for event 1: ", concordance_index(label_test[:,0], 
                                                            np.exp(y_pred_list_1),
                                                            Y_test[:,0]))

    print("Test C-index for event 2: ", concordance_index(Y_test[:,1], 
                                                            np.exp(y_pred_list_2),
                                                            Y_test[:,0]))
    # print("")
    # print("Train MSE: ", mean_squared_error(np.log(train_df["FT"]), y_train_pred))
    # print("Test MSE: ", mean_squared_error(np.log(test_df["FT"]), y_test_pred))

#### DeepHit


In [60]:
evaluation(deephit())



Train C-index for event 1:  0.7328074905969261
Train C-index for event 2:  0.7360915498663
Test C-index for event 1:  0.7215800631836892
Test C-index for event 2:  0.7203477989394438


#### One more block for share network

In [65]:
evaluation(plusshare())



Train C-index for event 1:  0.7285313151385455
Train C-index for event 2:  0.7358306698973992
Test C-index for event 1:  0.7206152421218518
Test C-index for event 2:  0.7210531526392243


#### Change FC into Conv layers

In [77]:
evaluation(convo())





RuntimeError: Expected 3-dimensional input for 3-dimensional weight [360, 12, 3], but got 2-dimensional input of size [32, 12] instead

In [73]:
sample = convo()
sample(torch.tensor(X_test))

TypeError: __init__() missing 1 required positional argument: 'in_features'