In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MinMaxScaler
import torch
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import math
from sklearn.model_selection import train_test_split,cross_val_score,KFold,StratifiedKFold
import torchtuples as tt

In [2]:
from sksurv.datasets import load_gbsg2
from sksurv.linear_model import CoxPHSurvivalAnalysis, CoxnetSurvivalAnalysis
from sksurv.preprocessing import OneHotEncoder, encode_categorical

from sklearn import set_config
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [3]:
data_x, data_y = load_gbsg2()
data_y

array([( True, 1814.), ( True, 2018.), ( True,  712.), ( True, 1807.),
       ( True,  772.), ( True,  448.), (False, 2172.), (False, 2161.),
       ( True,  471.), (False, 2014.), ( True,  577.), ( True,  184.),
       (False, 1840.), (False, 1842.), (False, 1821.), ( True, 1371.),
       ( True,  707.), (False, 1743.), (False, 1781.), ( True,  865.),
       ( True, 1684.), (False, 1701.), (False, 1701.), (False, 1693.),
       ( True,  379.), ( True, 1105.), ( True,  548.), ( True, 1296.),
       (False, 1483.), (False, 1570.), (False, 1469.), (False, 1472.),
       (False, 1342.), (False, 1349.), ( True, 1162.), (False, 1342.),
       ( True,  797.), (False, 1232.), (False, 1230.), (False, 1205.),
       (False, 1090.), (False, 1095.), ( True,  449.), (False,  972.),
       (False,  825.), (False, 2438.), (False, 2233.), ( True,  286.),
       (False, 1861.), ( True, 1080.), ( True, 1521.), (False, 1693.),
       ( True, 1528.), ( True,  169.), ( True,  272.), ( True,  731.),
      

In [4]:
data_x, data_y = load_gbsg2()
data_x_transform = OneHotEncoder().fit_transform(data_x)
data_y_transform = pd.DataFrame.from_records(data_y, columns=['event', 'time'])
data_y_transform['cens'] = data_y_transform['cens'].replace({True: 1, False: 0})
X_train, X_test, y_train, y_test = train_test_split(data_x_transform, data_y, test_size=0.2, random_state=1)

In [5]:
cox_lasso = CoxnetSurvivalAnalysis(l1_ratio=1, alpha_min_ratio=0.01)
cox_lasso.fit(X_train, y_train)
cox_lasso.score(X_test, y_test)

0.6548471615720524

In [6]:
cox_ridge = CoxnetSurvivalAnalysis(l1_ratio=0.00001, alpha_min_ratio=0.01)
cox_ridge.fit(X_train, y_train)
cox_ridge.score(X_test, y_test)

0.6648908296943231

In [6]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns

sns.set_style("white")

import torch # For building the networks 
import torchtuples as tt # Some useful functions
from torch import nn
import torch.nn.functional as F

from pycox.datasets import support
from pycox.models import DeepHitSingle
from pycox.models import CoxPH
from pycox.evaluation import EvalSurv
from pycox.preprocessing.feature_transforms import OrderedCategoricalLong
from pycox.models.loss import rank_loss_deephit_single

In [7]:
data_y_transform_train = pd.DataFrame.from_records(y_train, columns=['event', 'time'])
data_y_transform_test = pd.DataFrame.from_records(y_test, columns=['event', 'time'])
data_y_transform_train['cens'] = data_y_transform_train['cens'].replace({True: 1, False: 0})
data_y_transform_test['cens'] = data_y_transform_test['cens'].replace({True: 1, False: 0})
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train_values= X_train.values
X_test_values= X_test.values

In [66]:
num_durations = 10

labtrans = DeepHitSingle.label_transform(num_durations)
get_target = lambda df: (df['time'].values, df['cens'].values)
y_train = labtrans.fit_transform(*get_target(data_y_transform_train))
durations_test, events_test = get_target(data_y_transform_test)

In [67]:
in_features = X_train.shape[1]
out_features = labtrans.out_features
num_nodes = [32]
batch_norm = True
dropout = 0.2

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)
optimizer = tt.optim.AdamWR(decoupled_weight_decay=0.01, cycle_eta_multiplier=0.8,
                            cycle_multiplier=2)
model_deep_hit = DeepHitSingle(net, optimizer, alpha = 0.2, sigma = 0.1, duration_index=labtrans.cuts)
epochs = 20
batch_size=8
model_deep_hit.fit(X_train_values, y_train, batch_size, epochs)
surv = model_deep_hit.predict_surv_df(X_test_values)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td('antolini')

0:	[0s / 0s],		train_loss: 0.5967
1:	[0s / 0s],		train_loss: 0.5058
2:	[0s / 0s],		train_loss: 0.5046
3:	[0s / 1s],		train_loss: 0.4776
4:	[0s / 1s],		train_loss: 0.4773
5:	[0s / 1s],		train_loss: 0.4706
6:	[0s / 1s],		train_loss: 0.4688
7:	[0s / 2s],		train_loss: 0.4623
8:	[0s / 2s],		train_loss: 0.4689
9:	[0s / 2s],		train_loss: 0.4642
10:	[0s / 2s],		train_loss: 0.4612
11:	[0s / 2s],		train_loss: 0.4606
12:	[0s / 3s],		train_loss: 0.4535
13:	[0s / 3s],		train_loss: 0.4507
14:	[0s / 3s],		train_loss: 0.4463
15:	[0s / 3s],		train_loss: 0.4423
16:	[0s / 3s],		train_loss: 0.4467
17:	[0s / 4s],		train_loss: 0.4412
18:	[0s / 4s],		train_loss: 0.4489
19:	[0s / 4s],		train_loss: 0.4555


0.6385709330558446

In [68]:
import torch
import torchtuples as tt

from pycox.datasets import metabric
from pycox.models import CoxPH
from pycox.evaluation import EvalSurv

In [69]:
get_target = lambda df: (df['time'].values, df['cens'].values)
y_train_deepsurv = get_target(data_y_transform_train)
durations_test, events_test = get_target(data_y_transform_test)

In [70]:
X_train

Unnamed: 0,age,estrec,horTh=yes,menostat=Post,pnodes,progrec,tgrade=II,tgrade=III,tsize
360,36.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,30.0
224,46.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,11.0
676,56.0,700.0,1.0,1.0,5.0,113.0,0.0,0.0,42.0
662,49.0,41.0,0.0,1.0,1.0,14.0,1.0,0.0,22.0
185,61.0,9.0,1.0,1.0,2.0,23.0,0.0,1.0,35.0
...,...,...,...,...,...,...,...,...,...
144,62.0,1060.0,1.0,1.0,1.0,35.0,1.0,0.0,19.0
645,43.0,43.0,0.0,0.0,1.0,171.0,1.0,0.0,32.0
72,54.0,81.0,1.0,1.0,6.0,15.0,1.0,0.0,30.0
235,63.0,30.0,1.0,1.0,1.0,26.0,1.0,0.0,21.0


In [71]:
in_features = X_train.shape[1]
num_nodes = [64, 64]
out_features = 1
batch_norm = True
dropout = 0.2
output_bias = False

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout, output_bias=output_bias)

In [72]:
optimizer = tt.optim.AdamWR(decoupled_weight_decay=0.01, cycle_eta_multiplier=0.8,
                            cycle_multiplier=2)
model_deepsurv = CoxPH(net, optimizer) 
batch_size= 4
epochs= 100
verbose=1
callbacks = [tt.callbacks.EarlyStopping()]
log = model_deepsurv.fit(X_train_values, y_train_deepsurv, batch_size, epochs, callbacks, verbose)

0:	[0s / 0s],	
1:	[0s / 0s],	
2:	[0s / 1s],	
3:	[0s / 1s],	
4:	[0s / 2s],	
5:	[0s / 2s],	
6:	[0s / 2s],	
7:	[0s / 3s],	
8:	[0s / 3s],	
9:	[0s / 3s],	


In [73]:
model_deepsurv.compute_baseline_hazards()
surv = model_deepsurv.predict_surv_df(X_test_values)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td()

  .assign(expg=np.exp(self.predict(input, batch_size, True, eval_, num_workers=num_workers)))


0.4933017669476845

In [74]:
data_x, data_y = load_gbsg2()
data_x_transform = OneHotEncoder().fit_transform(data_x)
data_y_transform = pd.DataFrame.from_records(data_y, columns=['event', 'time'])
data_y_transform['cens'] = data_y_transform['cens'].replace({True: 1, False: 0})
X_train, X_test, y_train, y_test = train_test_split(data_x_transform, data_y, random_state=1)

In [49]:
from sksurv.ensemble import RandomSurvivalForest
rsf = RandomSurvivalForest(n_estimators=1000,
                           min_samples_split=10,
                           min_samples_leaf=15,
                           n_jobs=-1,
                           random_state=42)
rsf.fit(X_train,y_train)
rsf.score(X_test, y_test)

0.6919875130072841

In [7]:
from sksurv.linear_model import CoxPHSurvivalAnalysis
estimator = CoxPHSurvivalAnalysis().fit(X_train, y_train)
estimator.score(X_test,y_test)

0.6733624454148471

In [76]:
from sksurv.linear_model import CoxnetSurvivalAnalysis
estimator = CoxnetSurvivalAnalysis(l1_ratio=0.99, fit_baseline_model=True)
estimator.fit(X_train, y_train)
estimator.score(X_test, y_test)

0.6754538096889814

In [77]:
from sksurv.ensemble import ComponentwiseGradientBoostingSurvivalAnalysis
from sksurv.ensemble import GradientBoostingSurvivalAnalysis
est_cph_tree = GradientBoostingSurvivalAnalysis(
    n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0
)
est_cph_tree.fit(X_train, y_train)
cindex = est_cph_tree.score(X_test, y_test)
cindex

0.654873395768297

In [78]:
from pycox.models import LogisticHazard
# from pycox.models import PMF
# from pycox.models import DeepHitSingle
from pycox.evaluation import EvalSurv

In [81]:
num_durations = 10

labtrans = LogisticHazard.label_transform(num_durations)
# labtrans = PMF.label_transform(num_durations)
# labtrans = DeepHitSingle.label_transform(num_durations)

get_target = lambda df: (df['time'].values, df['cens'].values)
y_train_transform = labtrans.fit_transform(*get_target(data_y_transform_train))
y_test_transform = labtrans.transform(*get_target(data_y_transform_test))


# # We don't need to transform the test labels
durations_test, events_test = get_target(data_y_transform_test)

In [83]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train_values= X_train.values
X_test_values= X_test.values

In [86]:
in_features = X_train.shape[1]
num_nodes = [32, 32]
out_features = labtrans.out_features
batch_norm = True
dropout = 0.1
net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)
model = LogisticHazard(net, tt.optim.Adam(0.01), duration_index=labtrans.cuts)
batch_size = 4
epochs = 60
log= model.fit(X_train_values, y_train_transform, batch_size, epochs)

0:	[0s / 0s],		train_loss: 1.8390
1:	[0s / 0s],		train_loss: 1.3571
2:	[0s / 0s],		train_loss: 1.3273
3:	[0s / 1s],		train_loss: 1.3146
4:	[0s / 1s],		train_loss: 1.3089
5:	[0s / 1s],		train_loss: 1.3299
6:	[0s / 2s],		train_loss: 1.3039
7:	[0s / 2s],		train_loss: 1.3079
8:	[0s / 2s],		train_loss: 1.2856
9:	[0s / 3s],		train_loss: 1.2957
10:	[0s / 3s],		train_loss: 1.2884
11:	[0s / 3s],		train_loss: 1.3082
12:	[0s / 4s],		train_loss: 1.2888
13:	[0s / 4s],		train_loss: 1.2980
14:	[0s / 4s],		train_loss: 1.2931
15:	[0s / 4s],		train_loss: 1.2981
16:	[0s / 5s],		train_loss: 1.2890
17:	[0s / 5s],		train_loss: 1.2910
18:	[0s / 5s],		train_loss: 1.2916
19:	[0s / 6s],		train_loss: 1.2857
20:	[0s / 6s],		train_loss: 1.3030
21:	[0s / 6s],		train_loss: 1.2840
22:	[0s / 7s],		train_loss: 1.2744
23:	[0s / 7s],		train_loss: 1.2889
24:	[0s / 7s],		train_loss: 1.2924
25:	[0s / 8s],		train_loss: 1.2694
26:	[0s / 8s],		train_loss: 1.2532
27:	[0s / 8s],		train_loss: 1.3001
28:	[0s / 9s],		train_loss: 1.

In [87]:
surv = model.predict_surv_df(X_test_values)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td('antolini')

0.608394033992369

In [88]:
cox= CoxnetSurvivalAnalysis()
rsf = RandomSurvivalForest(n_estimators=1000,
                           min_samples_split=10,
                           min_samples_leaf=15,
                           random_state=42)
cox.fit(X_train_values, y_train)
rsf.fit(X_train_values, y_train)
cox_pred = cox.predict(X_test_values)
rsf_pred = rsf.predict(X_test_values)
ensemble_pred= (cox_pred+rsf_pred)/2
from sksurv.metrics import concordance_index_censored
cindex = concordance_index_censored(y_test["cens"], y_test["time"], ensemble_pred)
print("Concordance Index:", cindex[0])

Concordance Index: 0.6924499942189849


In [5]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train_values= X_train.values
X_test_values= X_test.values

In [7]:
from sksurv.ensemble import RandomSurvivalForest
from sksurv.linear_model import CoxPHSurvivalAnalysis


In [12]:
from sksurv.linear_model import CoxPHSurvivalAnalysis
cox = CoxPHSurvivalAnalysis()
rsf = RandomSurvivalForest(n_estimators=1000,
                           min_samples_split=10,
                           min_samples_leaf=15,
                           random_state=42)
cox_lasso = CoxnetSurvivalAnalysis(l1_ratio=1, alpha_min_ratio=0.01)
cox_lasso.fit(X_train, y_train)
cox.fit(X_train_values, y_train)
rsf.fit(X_train_values, y_train)
lasso_predict = cox_lasso.predict(X_test_values)
cox_pred = cox.predict(X_test_values)
rsf_pred = rsf.predict(X_test_values)
ensemble_pred = np.maximum(cox_pred, rsf_pred, lasso_predict)
from sksurv.metrics import concordance_index_censored
cindex = concordance_index_censored(y_test["cens"], y_test["time"], ensemble_pred)
print("Concordance Index:", cindex[0])

Concordance Index: 0.6919875130072841


