In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MinMaxScaler
import torch
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import math
from sklearn.model_selection import train_test_split,cross_val_score,KFold,StratifiedKFold
import torchtuples as tt

In [2]:
from sksurv.linear_model import CoxPHSurvivalAnalysis, CoxnetSurvivalAnalysis
from sksurv.preprocessing import OneHotEncoder, encode_categorical

from sklearn import set_config
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [3]:
datas = pd.read_csv('Simulated_Linear.csv')
datas

Unnamed: 0,x.1,x.2,x.3,x.4,x.5,time,status
0,-1.174347,0.573573,-0.374858,1.708831,0.429901,0.045127,0
1,0.534466,-1.686468,-0.453762,0.506122,-0.401646,0.215132,0
2,0.411428,-1.942484,1.573165,-0.217526,0.442370,0.012064,0
3,0.552328,1.121415,-0.552002,1.358358,1.529469,0.000430,0
4,-0.501650,0.310209,0.648147,-1.036585,1.438970,0.006050,0
...,...,...,...,...,...,...,...
495,-1.174363,0.718894,-0.798380,-1.761006,-0.192303,9.568746,0
496,0.085166,0.031047,-0.126165,1.745919,0.362750,0.004023,0
497,0.449437,0.035172,0.162151,1.113984,0.849036,0.004372,0
498,0.773209,1.154781,0.646107,-0.220550,1.611070,0.010570,1


In [4]:
data_y = datas.loc[:, ['time', 'status']].copy()
data_y = data_y.rename(columns={'status': 'cens'})
data_x = datas.drop(['time', 'status'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.3, random_state=1)
list_of_tuples = list(y_train.itertuples(index=False, name=None))
swapped_list = [(t[1], t[0]) for t in list_of_tuples]
y_train = np.array(swapped_list, dtype=[('cens', bool), ('time', float)])
list_of_tuples = list(y_test.itertuples(index=False, name=None))
swapped_list = [(t[1], t[0]) for t in list_of_tuples]
y_test = np.array(swapped_list, dtype=[('cens', bool), ('time', float)])

In [5]:
for df in [X_train, X_test]:
    numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
    df[numeric_columns] = (df[numeric_columns] - df[numeric_columns].min()) / (df[numeric_columns].max() - df[numeric_columns].min())

In [6]:
X_test

Unnamed: 0,x.1,x.2,x.3,x.4,x.5
304,0.577449,0.538967,0.177132,0.807942,0.561790
340,0.693532,0.690496,0.588189,0.478435,0.919364
47,0.000000,0.590699,0.725248,0.491762,0.600817
67,0.828707,0.366272,0.134227,0.737205,0.155820
479,0.568571,0.406520,0.413673,0.993383,0.261575
...,...,...,...,...,...
331,0.796787,0.350476,0.716229,0.750751,0.400029
197,0.927889,0.488966,0.359376,0.604546,0.525752
95,0.340278,0.516512,0.425743,1.000000,0.722189
122,0.426319,0.525442,0.479658,0.471904,0.584641


# MODELS

In [7]:
cox_lasso = CoxnetSurvivalAnalysis(l1_ratio=1, alpha_min_ratio=0.01)
cox_lasso.fit(X_train, y_train)
cox_lasso.score(X_test, y_test)

0.8458310527276045

In [8]:
cox_ridge = CoxnetSurvivalAnalysis(l1_ratio=0.00001, alpha_min_ratio=0.01)
cox_ridge.fit(X_train, y_train)
cox_ridge.score(X_test, y_test)

0.8281335522714833

In [9]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns

sns.set_style("white")

import torch # For building the networks
import torchtuples as tt # Some useful functions
from torch import nn
import torch.nn.functional as F

from pycox.datasets import support
from pycox.models import DeepHitSingle
from pycox.models import CoxPH
from pycox.evaluation import EvalSurv
from pycox.preprocessing.feature_transforms import OrderedCategoricalLong
from pycox.models.loss import rank_loss_deephit_single

In [10]:
data_y_transform_train = pd.DataFrame.from_records(y_train, columns=['event', 'time'])
data_y_transform_test = pd.DataFrame.from_records(y_test, columns=['event', 'time'])
data_y_transform_train['cens'] = data_y_transform_train['cens'].replace({True: 1, False: 0})
data_y_transform_test['cens'] = data_y_transform_test['cens'].replace({True: 1, False: 0})
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train_values= X_train.values
X_test_values= X_test.values

In [11]:
num_durations = 10

labtrans = DeepHitSingle.label_transform(num_durations)
get_target = lambda df: (df['time'].values, df['cens'].values)
y_train_deephit = labtrans.fit_transform(*get_target(data_y_transform_train))
durations_test, events_test = get_target(data_y_transform_test)

In [12]:
in_features = X_train.shape[1]
out_features = labtrans.out_features
num_nodes = [32]
batch_norm = True
dropout = 0.2

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)
optimizer = tt.optim.AdamWR(decoupled_weight_decay=0.01, cycle_eta_multiplier=0.8,
                            cycle_multiplier=2)
model_deep_hit = DeepHitSingle(net, optimizer, alpha = 0.2, sigma = 0.1, duration_index=labtrans.cuts)
epochs = 30
batch_size=8
model_deep_hit.fit(X_train_values, y_train_deephit, batch_size, epochs)
surv = model_deep_hit.predict_surv_df(X_test_values)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td('antolini')

	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:1025.)
  p.data = p.data.add(-weight_decay * eta, p.data)


0:	[0s / 0s],		train_loss: 0.2804
1:	[0s / 0s],		train_loss: 0.2747
2:	[0s / 1s],		train_loss: 0.2566
3:	[0s / 1s],		train_loss: 0.2521
4:	[0s / 1s],		train_loss: 0.2407
5:	[0s / 1s],		train_loss: 0.2281
6:	[0s / 1s],		train_loss: 0.2172
7:	[0s / 1s],		train_loss: 0.2120
8:	[0s / 1s],		train_loss: 0.2054
9:	[0s / 2s],		train_loss: 0.1853
10:	[0s / 2s],		train_loss: 0.1728
11:	[0s / 2s],		train_loss: 0.1643
12:	[0s / 2s],		train_loss: 0.1598
13:	[0s / 2s],		train_loss: 0.1545
14:	[0s / 2s],		train_loss: 0.1517
15:	[0s / 2s],		train_loss: 0.1512
16:	[0s / 3s],		train_loss: 0.1317
17:	[0s / 3s],		train_loss: 0.1309
18:	[0s / 3s],		train_loss: 0.1113
19:	[0s / 3s],		train_loss: 0.1000
20:	[0s / 3s],		train_loss: 0.1035
21:	[0s / 3s],		train_loss: 0.0877
22:	[0s / 3s],		train_loss: 0.0964
23:	[0s / 4s],		train_loss: 0.0817
24:	[0s / 4s],		train_loss: 0.0734
25:	[0s / 4s],		train_loss: 0.0766
26:	[0s / 4s],		train_loss: 0.0819
27:	[0s / 4s],		train_loss: 0.0729
28:	[0s / 4s],		train_loss: 0.

0.45192483123517607

In [15]:
from sksurv.ensemble import RandomSurvivalForest
rsf = RandomSurvivalForest(n_estimators=1000,
                           min_samples_split=10,
                           min_samples_leaf=15,
                           n_jobs=-1,
                           random_state=42)
rsf.fit(X_train,y_train)
rsf.score(X_test, y_test)

0.7409231891990513

In [16]:
from sksurv.linear_model import CoxPHSurvivalAnalysis
estimator = CoxPHSurvivalAnalysis().fit(X_train, y_train)
estimator.score(X_test,y_test)

0.8465608465608465

In [17]:
from sksurv.linear_model import CoxnetSurvivalAnalysis
estimator = CoxnetSurvivalAnalysis(l1_ratio=0.99, fit_baseline_model=True)
estimator.fit(X_train, y_train)
estimator.score(X_test, y_test)

0.8461959496442255

In [18]:
from sksurv.ensemble import GradientBoostingSurvivalAnalysis
est_cph_tree = GradientBoostingSurvivalAnalysis(
    n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0
)
est_cph_tree.fit(X_train, y_train)
cindex = est_cph_tree.score(X_test, y_test)
cindex

0.8202882685641306

In [31]:
import torch
import torchtuples as tt

from pycox.datasets import metabric
from pycox.models import CoxPH
from pycox.evaluation import EvalSurv

In [32]:
get_target = lambda df: (df['time'].values, df['cens'].values)
y_train_deepsurv = get_target(data_y_transform_train)
durations_test, events_test = get_target(data_y_transform_test)

In [33]:
in_features = X_train.shape[1]
num_nodes = [64, 64]
out_features = 1
batch_norm = True
dropout = 0.2
output_bias = False

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout, output_bias=output_bias)

In [36]:
optimizer = tt.optim.AdamWR(decoupled_weight_decay=0.01, cycle_eta_multiplier=0.8,
                            cycle_multiplier=2)
model_deepsurv = CoxPH(net, optimizer)
batch_size= 4
epochs= 100
verbose=1
callbacks = [tt.callbacks.EarlyStopping()]
log = model_deepsurv.fit(X_train_values, y_train_deepsurv, batch_size, epochs, callbacks, verbose)
model_deepsurv.compute_baseline_hazards()
surv = model_deepsurv.predict_surv_df(X_test_values)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td()

0:	[0s / 0s],	
1:	[0s / 0s],	
2:	[0s / 0s],	
3:	[0s / 1s],	
4:	[0s / 1s],	
5:	[0s / 1s],	
6:	[0s / 2s],	
7:	[0s / 2s],	
8:	[0s / 2s],	
9:	[0s / 2s],	


0.561758803138113

In [45]:
from pycox.models import LogisticHazard
# from pycox.models import PMF
# from pycox.models import DeepHitSingle
from pycox.evaluation import EvalSurv

In [46]:
num_durations = 10

labtrans = LogisticHazard.label_transform(num_durations)
# labtrans = PMF.label_transform(num_durations)
# labtrans = DeepHitSingle.label_transform(num_durations)

get_target = lambda df: (df['time'].values, df['cens'].values)
y_train_loghazard = labtrans.fit_transform(*get_target(data_y_transform_train))
y_test_loghazard = labtrans.transform(*get_target(data_y_transform_test))


# # We don't need to transform the test labels
durations_test, events_test = get_target(data_y_transform_test)

In [48]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train_values= X_train.values
X_test_values= X_test.values

In [49]:
in_features = X_train.shape[1]
num_nodes = [32, 32]
out_features = labtrans.out_features
batch_norm = True
dropout = 0.1
net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)
model = LogisticHazard(net, tt.optim.Adam(0.01), duration_index=labtrans.cuts)
batch_size = 32
epochs = 10
log= model.fit(X_train_values, y_train_loghazard, batch_size, epochs)
surv = model.predict_surv_df(X_test_values)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td('antolini')

0:	[0s / 0s],		train_loss: 0.9839
1:	[0s / 0s],		train_loss: 0.6826
2:	[0s / 0s],		train_loss: 0.3399
3:	[0s / 0s],		train_loss: 0.1622
4:	[0s / 0s],		train_loss: 0.1052
5:	[0s / 0s],		train_loss: 0.0809
6:	[0s / 0s],		train_loss: 0.0671
7:	[0s / 0s],		train_loss: 0.0549
8:	[0s / 0s],		train_loss: 0.0620
9:	[0s / 0s],		train_loss: 0.0716


0.5088487502280605

In [50]:
cox= CoxnetSurvivalAnalysis()
rsf = RandomSurvivalForest(n_estimators=1000,
                           min_samples_split=10,
                           min_samples_leaf=15,
                           random_state=42)
cox.fit(X_train_values, y_train)
rsf.fit(X_train_values, y_train)
cox_pred = cox.predict(X_test_values)
rsf_pred = rsf.predict(X_test_values)
ensemble_pred= (cox_pred+rsf_pred)/2
from sksurv.metrics import concordance_index_censored
cindex = concordance_index_censored(y_test["cens"], y_test["time"], ensemble_pred)
print("Concordance Index:", cindex[0])

Concordance Index: 0.7456668491151249


In [51]:
cox= CoxPHSurvivalAnalysis()
rsf = RandomSurvivalForest(n_estimators=1000,
                           min_samples_split=10,
                           min_samples_leaf=15,
                           random_state=42)
cox_lasso = CoxnetSurvivalAnalysis(l1_ratio=1, alpha_min_ratio=0.01)
cox_lasso.fit(X_train, y_train)
cox.fit(X_train_values, y_train)
rsf.fit(X_train_values, y_train)
lasso_predict = cox_lasso.predict(X_test_values)
cox_pred = cox.predict(X_test_values)
rsf_pred = rsf.predict(X_test_values)
ensemble_pred = np.maximum(cox_pred, rsf_pred, lasso_predict)
from sksurv.metrics import concordance_index_censored
cindex = concordance_index_censored(y_test["cens"], y_test["time"], ensemble_pred)
print("Concordance Index:", cindex[0])

Concordance Index: 0.7409231891990513


