# PyCox
## Libraries and Data
This notebook uses the surv_nn environment

Recreating this introduction to pycox
 - https://nbviewer.org/github/havakv/pycox/blob/master/examples/01_introduction.ipynb
 - https://towardsdatascience.com/how-to-implement-random-forest-svm-and-gradient-boosted-models-for-time-to-event-analyses-5d79d8153bcd
 - https://towardsdatascience.com/how-to-implement-deep-neural-networks-for-time-to-event-analyses-9aa0aeac4717
 


In [1]:
import torch
import lifelines
import numpy as np
import torchtuples as tt 
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

from pycox.datasets import metabric
from pycox.models import LogisticHazard
from pycox.evaluation import EvalSurv

In [2]:
np.random.seed(1234)
_ = torch.manual_seed(123)

In [3]:
df = metabric.read_df()

In [None]:
df.shape, df.columns

In [None]:
df.duration.hist()

In [None]:
df.event.hist()

In [None]:
df_train = metabric.read_df()
df_test = df_train.sample(frac=0.2)
df_train = df_train.drop(df_test.index)
df_val = df_train.sample(frac=0.2)
df_train = df_train.drop(df_val.index)

In [None]:
df_train.shape, df_test.shape, df_val.shape

In [None]:
cols_standardize = ['x0', 'x1', 'x2', 'x3', 'x8']
cols_leave = ['x4', 'x5', 'x6', 'x7']

In [None]:
standardize = [([col], StandardScaler()) for col in cols_standardize]


In [None]:
for row, scaler in standardize:
    df_train[row] = scaler.fit_transform(df_train[row])
    df_test[row] = scaler.transform(df_test[row])
    df_val[row] = scaler.transform(df_val[row])

In [None]:
x_train = df_train[cols_standardize + cols_leave].astype('float32').to_numpy()
x_test = df_test[cols_standardize + cols_leave].astype('float32').to_numpy()
x_val = df_val[cols_standardize + cols_leave].astype('float32').to_numpy()

## Label Transforms

In [None]:
num_durations = 10

labtrans = LogisticHazard.label_transform(num_durations)
# labtrans = PMF.label_transform(num_durations)
# labtrans = DeepHitSingle.label_transform(num_durations)

get_target = lambda df: (df['duration'].values, df['event'].values)
y_train = labtrans.fit_transform(*get_target(df_train))
y_val = labtrans.transform(*get_target(df_val))

train = (x_train, y_train)
val = (x_val, y_val)

# We don't need to transform the test labels
durations_test, events_test = get_target(df_test)

## Model

In [None]:
in_features = x_train.shape[1]
num_nodes = [32, 32]
out_features = labtrans.out_features
batch_norm = True
dropout = 0.1

net = torch.nn.Sequential(
    torch.nn.Linear(in_features, 32),
     torch.nn.ReLU(),
     torch.nn.BatchNorm1d(32),
     torch.nn.Dropout(0.1),
    
     torch.nn.Linear(32, 32),
     torch.nn.ReLU(),
     torch.nn.BatchNorm1d(32),
     torch.nn.Dropout(0.1),
   
     torch.nn.Linear(32, out_features)
)

In [None]:
net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)

In [None]:
model = LogisticHazard(net, tt.optim.Adam(0.01), duration_index=labtrans.cuts)


In [None]:
batch_size = 256 #256
epochs = 100
callbacks = [tt.cb.EarlyStopping()]

In [None]:
log = model.fit(x_train, y_train, batch_size, epochs, callbacks, val_data=val) # , callbacks, val_data=val)

In [None]:
log.plot()

In [None]:
val

In [None]:
x_train.shape

## Prediction

In [None]:
surv = model.predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot(drawstyle='steps-post')
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
surv = model.interpolate(10).predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot(drawstyle='steps-post')
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')

In [None]:
ev.concordance_td('antolini')

# Comparison to CPH

In [None]:
cph = lifelines.CoxPHFitter().fit(df_train, 'duration', 'event')
cph.print_summary()

In [None]:
cph.score(df_test, scoring_method = "concordance_index")

# Next Steps

 - Add embedding layers
 - train on gpu
 - try other survival model types?