In [1]:
import sys
import viz
import torch
from torch import nn
import survival_analysis
import numpy as np
import pandas as pd
import network
from torch.utils.data import TensorDataset, Dataset
import torch.utils.data.dataloader as dataloader
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

# event_col is the header in the df that represents the 'Event / Status' indicator
# time_col is the header in the df that represents the event time
def dataframe_to_deepsurv_ds(df, event_col = 'Event', time_col = 'Time'):
    # Extract the event and time columns as numpy arrays
    e = df[event_col].values.astype(np.int32)
    t = df[time_col].values.astype(np.float32)

    # Extract the patient's covariates as a numpy array
    x_df = df.drop([event_col, time_col], axis = 1)
    x = x_df.values.astype(np.float32)
    
    # Return the deep surv dataframe
    return {
        'x' : x,
        'e' : e,
        't' : t
    }

class MyTrainDataset(Dataset):
    
    def __init__(self, train_file_path, standardize=False):
        self.train_df = pd.read_csv(train_file_path)
        print(self.train_df.head())
        # If the headers of the csv change, you can replace the values of 
        # 'event_col' and 'time_col' with the names of the new headers
        # You can also use this function on your training dataset, validation dataset, and testing dataset
        train_data = dataframe_to_deepsurv_ds(self.train_df, event_col = 'Event', time_col= 'Time')

        self.x, self.e, self.t = train_data['x'], train_data['e'], train_data['t']
        
        if standardize:
            offset = self.x.mean(axis = 0)
            scale = self.x.std(axis = 0)
            self.x = (self.x - offset) / scale
        
        # Sort Training Data for Accurate Likelihood
        sort_idx = np.argsort(self.t)[::-1]
        self.x = self.x[sort_idx]
        self.e = self.e[sort_idx]
        self.t = self.t[sort_idx]
        
        self.processed_count = 1
                
    def __len__(self):
        return len(self.train_df.index)
    
    def __getitem__(self, i):
        self.processed_count += 1
        return self.x[i], self.e[i], self.t[i]
#         return (torch.from_numpy(x), torch.from_numpy(e), torch.from_numpy(t))

train_ds = MyTrainDataset('example_data.csv', True)
train_loader = dataloader.DataLoader(train_ds, shuffle=False, batch_size=16,num_workers=8)

   Variable_1   Variable_2  Variable_3  Variable_4  Event  Time
0            0           3           2         4.6      1    43
1            0           2           0         1.6      0    52
2            0           3           0         3.5      1    73
3            0           3           1         5.1      0    51
4            0           2           0         1.7      0    51


# Transform the dataset to "DeepSurv" format
DeepSurv expects a dataset to be in the form:

    {
        'x': numpy array of float32
        'e': numpy array of int32
        't': numpy array of float32
        'hr': (optional) numpy array of float32
    }
    
You are providing me a csv, which I read in as a pandas dataframe. Then I convert the pandas dataframe into the DeepSurv dataset format above. 

In [2]:
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)

In [4]:
n_epochs = 1
L2_reg = 1e-05
batch_norm = True
dropout = 0.4
hidden_layers_sizes = [25, 25]
learning_rate = 1e-05
lr_decay = 0.001
momentum = 0.9
n_in = train_ds.x.shape[1]
standardize = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

my_network = network.DeepSurv(n_in, hidden_layers_sizes=hidden_layers_sizes, dropout=dropout, batch_norm=batch_norm, momentum=0.1)
my_network.apply(init_weights)
# network.load_state_dict(torch.load("model_99.pt"))

optimizer = optimizer = torch.optim.SGD(my_network.parameters(), lr=learning_rate, momentum=momentum, weight_decay=L2_reg, nesterov=True)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,1,gamma=lr_decay,last_epoch=-1)
my_network.train()

my_network.to(device)

# If you have validation data, you can add it as the valid_dataloader parameter to the function
metrics = survival_analysis.train(my_network, train_loader, device=device, optimizer=optimizer, scheduler=exp_lr_scheduler, n_epochs=n_epochs)
print()
# print(my_network.layers[0].weight)
# print(my_network.layers[0].bias)
print("Done")

neg_likelihood:  tensor(5.1479, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(9.5214, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(6.6618, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(3.4682, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(-0., device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(8.9888, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(9.1800, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(1.5888, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(3.9015, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(-0., device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(3.9355, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(2.0350, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(2.4757, device='cuda:0', grad_fn=<NegBackward>)
neg_likelihood:  tensor(9.0517, device='cuda:0', grad_fn=<NegBackward>

In [4]:
# Print the final metrics
print('Train C-Index:', metrics['c-index'][-1])
# print('Valid C-Index: ',metrics['valid_c-index'][-1])

# Plot the training / validation curves
viz.plot_log(metrics)

KeyError: 'c-index'