In [2]:
import pandas as pd
import numpy as np
import time
from lifelines.utils import concordance_index
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import log_loss
from sklearn.metrics import brier_score_loss
from sksurv.ensemble import GradientBoostingSurvivalAnalysis

In [3]:
df_train = pd.read_csv('df_train.csv')
df_test = pd.read_csv('df_test.csv')

In [4]:
# select features
X_surv_train = df_train.drop(columns=['TIMETOEVENT', 'MORTSTAT'])
X_surv_test = df_test.drop(columns=['TIMETOEVENT', 'MORTSTAT'])
# select target
y_surv_train = np.array([(event, time) for event, time in zip(df_train['MORTSTAT'], df_train['TIMETOEVENT'])], dtype=[('MORTSTAT', bool), ('TIMETOEVENT', float)])
y_surv_test = np.array([(event, time) for event, time in zip(df_test['MORTSTAT'], df_test['TIMETOEVENT'])], dtype=[('MORTSTAT', bool), ('TIMETOEVENT', float)])

In [5]:
# GBS1

In [6]:
gbs1 = GradientBoostingSurvivalAnalysis(loss='coxph', max_depth = None, random_state=42, verbose=2, 
                                        n_estimators=20, learning_rate=0.1, subsample=1, max_features=20, max_leaf_nodes=20)

start_time = time.time()
gbs1.fit(X_surv_train, y_surv_train)
end_time = time.time()
print(f"Time for one model: {end_time - start_time:.2f} seconds")

      Iter       Train Loss   Remaining Time 
         1       51189.6063           22.04m
         2       51107.2734           20.90m
         3       51025.9465           19.62m
         4       50948.0000           18.41m
         5       50868.5316           17.24m
         6       50795.7521           16.14m
         7       50716.5617           15.09m
         8       50639.2478           13.95m
         9       50571.9470           12.84m
        10       50499.3035           11.69m
        11       50425.4078           10.51m
        12       50352.7674            9.37m
        13       50280.5776            8.17m
        14       50212.3643            6.99m
        15       50142.6095            5.80m
        16       50072.0705            4.63m
        17       50006.1976            3.47m
        18       49939.5909            2.31m
        19       49872.8214            1.15m
        20       49807.9166            0.00s
Time for one model: 1384.98 seconds


In [7]:
# predict survival function
surv_probs_train = gbs1.predict_survival_function(X_surv_train)
surv_probs_test = gbs1.predict_survival_function(X_surv_test)

# all time-to-event points
time_points_train = surv_probs_train[0].x
time_points_test = surv_probs_test[0].x 
 
# convert survival probabilities to df
df_surv_probs_train = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_train], columns=time_points_train)
df_surv_probs_test = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_test], columns=time_points_train)

# surv probabilites after 1 and 3 years
suvr_prob_3y_train = df_surv_probs_train.iloc[:,3]
suvr_prob_3y_test = df_surv_probs_test.iloc[:,3]

# extract status
# get 3-year event status based on TIMETOEVENT and MORTSTAT
status_3y_train_surv = np.where((df_train["MORTSTAT"] == 1) & (df_train["TIMETOEVENT"] <= 3), 0, 1)
status_3y_test_surv = np.where((df_test["MORTSTAT"] == 1) & (df_test["TIMETOEVENT"] <= 3), 0, 1)

# log-loss
log_loss_3y_train = log_loss(status_3y_train_surv, suvr_prob_3y_train)
log_loss_3y_test = log_loss(status_3y_test_surv, suvr_prob_3y_test)

# compute Brier
bs_train_3y = brier_score_loss(status_3y_train_surv, suvr_prob_3y_train)
bs_test_3y = brier_score_loss(status_3y_test_surv, suvr_prob_3y_test)

# c-index
c_is = concordance_index(status_3y_train_surv, suvr_prob_3y_train)

print(f'Log Loss is: {log_loss_3y_train:.4f}')
print(f'Log Loss os: {log_loss_3y_test:.4f}')
print(f'BS is: {bs_train_3y:.4f}')
print(f'BS os: {bs_test_3y:.4f}')
print(f'C is: {c_is:.4f}')
print(f'APLL is: {-gbs1.score(X_surv_train, y_surv_train):.4f}')
print(f'APLL os: {-gbs1.score(X_surv_test, y_surv_test):.4f}')

Log Loss is: 0.1456
Log Loss os: 0.1444
BS is: 0.0343
BS os: 0.0340
C is: 0.8700
APLL is: -0.8670
APLL os: -0.8672


In [8]:
# GBS2

In [9]:
gbs2 = GradientBoostingSurvivalAnalysis(loss='coxph', max_depth = None, random_state=42, verbose=2, 
                                        n_estimators=20, learning_rate=0.1, subsample=1, max_features=20, max_leaf_nodes=8)

start_time = time.time()
gbs2.fit(X_surv_train, y_surv_train)
end_time = time.time()
print(f"Time for one model: {end_time - start_time:.2f} seconds")

      Iter       Train Loss   Remaining Time 
         1       51201.9415           21.58m
         2       51129.2189           20.74m
         3       51056.2456           19.51m
         4       50986.7604           18.31m
         5       50920.6225           17.06m
         6       50857.1684           15.93m
         7       50787.5830           14.76m
         8       50718.8475           13.65m
         9       50667.0110           12.55m
        10       50608.7452           11.41m
        11       50543.1131           10.27m
        12       50478.1304            9.13m
        13       50415.2397            7.98m
        14       50354.6448            6.82m
        15       50292.0554            5.68m
        16       50228.1579            4.54m
        17       50169.4058            3.40m
        18       50112.1336            2.27m
        19       50052.1193            1.13m
        20       49994.1354            0.00s
Time for one model: 1359.32 seconds


In [10]:
# predict survival function
surv_probs_train = gbs2.predict_survival_function(X_surv_train)
surv_probs_test = gbs2.predict_survival_function(X_surv_test)

# all time-to-event points
time_points_train = surv_probs_train[0].x
time_points_test = surv_probs_test[0].x 
 
# convert survival probabilities to df
df_surv_probs_train = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_train], columns=time_points_train)
df_surv_probs_test = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_test], columns=time_points_train)

# surv probabilites after 1 and 3 years
suvr_prob_3y_train = df_surv_probs_train.iloc[:,3]
suvr_prob_3y_test = df_surv_probs_test.iloc[:,3]

# extract status
# get 3-year event status based on TIMETOEVENT and MORTSTAT
status_3y_train_surv = np.where((df_train["MORTSTAT"] == 1) & (df_train["TIMETOEVENT"] <= 3), 0, 1)
status_3y_test_surv = np.where((df_test["MORTSTAT"] == 1) & (df_test["TIMETOEVENT"] <= 3), 0, 1)

# log-loss
log_loss_3y_train = log_loss(status_3y_train_surv, suvr_prob_3y_train)
log_loss_3y_test = log_loss(status_3y_test_surv, suvr_prob_3y_test)

# compute Brier
bs_train_3y = brier_score_loss(status_3y_train_surv, suvr_prob_3y_train)
bs_test_3y = brier_score_loss(status_3y_test_surv, suvr_prob_3y_test)

# c-index
c_is = concordance_index(status_3y_train_surv, suvr_prob_3y_train)

print(f'Log Loss is: {log_loss_3y_train:.4f}')
print(f'Log Loss os: {log_loss_3y_test:.4f}')
print(f'BS is: {bs_train_3y:.4f}')
print(f'BS os: {bs_test_3y:.4f}')
print(f'C is: {c_is:.4f}')
print(f'APLL is: {-gbs2.score(X_surv_train, y_surv_train):.4f}')
print(f'APLL os: {-gbs2.score(X_surv_test, y_surv_test):.4f}')

Log Loss is: 0.1472
Log Loss os: 0.1460
BS is: 0.0345
BS os: 0.0341
C is: 0.8600
APLL is: -0.8577
APLL os: -0.8578


In [11]:
# GBS3

In [12]:
gbs3 = GradientBoostingSurvivalAnalysis(loss='coxph', max_depth = None, random_state=42, verbose=2, 
                                        n_estimators=10, learning_rate=0.1, subsample=1, max_features=20, max_leaf_nodes=20)

start_time = time.time()
gbs3.fit(X_surv_train, y_surv_train)
end_time = time.time()
print(f"Time for one model: {end_time - start_time:.2f} seconds")

      Iter       Train Loss   Remaining Time 
         1       51189.6063           10.13m
         2       51107.2734            9.01m
         3       51025.9465            7.89m
         4       50948.0000            6.78m
         5       50868.5316            5.66m
         6       50795.7521            4.53m
         7       50716.5617            3.41m
         8       50639.2478            2.31m
         9       50571.9470            1.16m
        10       50499.3035            0.00s
Time for one model: 696.82 seconds


In [13]:
# predict survival function
surv_probs_train = gbs3.predict_survival_function(X_surv_train)
surv_probs_test = gbs3.predict_survival_function(X_surv_test)

# all time-to-event points
time_points_train = surv_probs_train[0].x
time_points_test = surv_probs_test[0].x 
 
# convert survival probabilities to df
df_surv_probs_train = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_train], columns=time_points_train)
df_surv_probs_test = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_test], columns=time_points_train)

# surv probabilites after 1 and 3 years
suvr_prob_3y_train = df_surv_probs_train.iloc[:,3]
suvr_prob_3y_test = df_surv_probs_test.iloc[:,3]

# extract status
# get 3-year event status based on TIMETOEVENT and MORTSTAT
status_3y_train_surv = np.where((df_train["MORTSTAT"] == 1) & (df_train["TIMETOEVENT"] <= 3), 0, 1)
status_3y_test_surv = np.where((df_test["MORTSTAT"] == 1) & (df_test["TIMETOEVENT"] <= 3), 0, 1)

# log-loss
log_loss_3y_train = log_loss(status_3y_train_surv, suvr_prob_3y_train)
log_loss_3y_test = log_loss(status_3y_test_surv, suvr_prob_3y_test)

# compute Brier
bs_train_3y = brier_score_loss(status_3y_train_surv, suvr_prob_3y_train)
bs_test_3y = brier_score_loss(status_3y_test_surv, suvr_prob_3y_test)

# c-index
c_is = concordance_index(status_3y_train_surv, suvr_prob_3y_train)

print(f'Log Loss is: {log_loss_3y_train:.4f}')
print(f'Log Loss os: {log_loss_3y_test:.4f}')
print(f'BS is: {bs_train_3y:.4f}')
print(f'BS os: {bs_test_3y:.4f}')
print(f'C is: {c_is:.4f}')
print(f'APLL is: {-gbs3.score(X_surv_train, y_surv_train):.4f}')
print(f'APLL os: {-gbs3.score(X_surv_test, y_surv_test):.4f}')

Log Loss is: 0.1512
Log Loss os: 0.1500
BS is: 0.0349
BS os: 0.0345
C is: 0.8697
APLL is: -0.8666
APLL os: -0.8662


In [14]:
# GBS4

In [15]:
gbs4 = GradientBoostingSurvivalAnalysis(loss='coxph', max_depth = None, random_state=42, verbose=2, 
                                        n_estimators=20, learning_rate=0.1, subsample=0.5, max_features=20, max_leaf_nodes=20)

start_time = time.time()
gbs4.fit(X_surv_train, y_surv_train)
end_time = time.time()
print(f"Time for one model: {end_time - start_time:.2f} seconds")

      Iter       Train Loss      OOB Improve   Remaining Time 
         1       23581.8506          40.5421           18.33m
         2       24348.1463         848.5778           16.54m
         3       24106.4712        -161.2017           15.16m
         4       24596.6353         571.2544           14.02m
         5       23641.2846        -880.5423           13.04m
         6       23465.2976        -102.4496           12.06m
         7       23826.4619         437.2002           11.14m
         8       23771.4147          15.9157           10.25m
         9       23600.2120        -101.5789            9.38m
        10       23683.4900         157.2443            8.50m
        11       23331.7372        -276.9486            7.63m
        12       23697.6428         440.1851            6.79m
        13       23266.7117        -371.3434            5.93m
        14       23438.0558         238.7864            5.08m
        15       23815.4808         441.8190            4.23m
       

In [16]:
# predict survival function
surv_probs_train = gbs4.predict_survival_function(X_surv_train)
surv_probs_test = gbs4.predict_survival_function(X_surv_test)

# all time-to-event points
time_points_train = surv_probs_train[0].x
time_points_test = surv_probs_test[0].x 
 
# convert survival probabilities to df
df_surv_probs_train = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_train], columns=time_points_train)
df_surv_probs_test = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_test], columns=time_points_train)

# surv probabilites after 1 and 3 years
suvr_prob_3y_train = df_surv_probs_train.iloc[:,3]
suvr_prob_3y_test = df_surv_probs_test.iloc[:,3]

# extract status
# get 3-year event status based on TIMETOEVENT and MORTSTAT
status_3y_train_surv = np.where((df_train["MORTSTAT"] == 1) & (df_train["TIMETOEVENT"] <= 3), 0, 1)
status_3y_test_surv = np.where((df_test["MORTSTAT"] == 1) & (df_test["TIMETOEVENT"] <= 3), 0, 1)

# log-loss
log_loss_3y_train = log_loss(status_3y_train_surv, suvr_prob_3y_train)
log_loss_3y_test = log_loss(status_3y_test_surv, suvr_prob_3y_test)

# compute Brier
bs_train_3y = brier_score_loss(status_3y_train_surv, suvr_prob_3y_train)
bs_test_3y = brier_score_loss(status_3y_test_surv, suvr_prob_3y_test)

# c-index
c_is = concordance_index(status_3y_train_surv, suvr_prob_3y_train)

print(f'Log Loss is: {log_loss_3y_train:.4f}')
print(f'Log Loss os: {log_loss_3y_test:.4f}')
print(f'BS is: {bs_train_3y:.4f}')
print(f'BS os: {bs_test_3y:.4f}')
print(f'C is: {c_is:.4f}')
print(f'APLL is: {-gbs4.score(X_surv_train, y_surv_train):.4f}')
print(f'APLL os: {-gbs4.score(X_surv_test, y_surv_test):.4f}')

Log Loss is: 0.1457
Log Loss os: 0.1446
BS is: 0.0343
BS os: 0.0340
C is: 0.8691
APLL is: -0.8665
APLL os: -0.8665


In [17]:
# GBS5

In [18]:
gbs5 = GradientBoostingSurvivalAnalysis(loss='coxph', max_depth = None, random_state=42, verbose=2, 
                                        n_estimators=20, learning_rate=0.2, subsample=0.5, max_features=20, max_leaf_nodes=20)

start_time = time.time()
gbs5.fit(X_surv_train, y_surv_train)
end_time = time.time()
print(f"Time for one model: {end_time - start_time:.2f} seconds")

      Iter       Train Loss      OOB Improve   Remaining Time 
         1       23540.9341          80.8824           17.96m
         2       24265.5203         887.4475           16.01m
         3       23986.7467        -120.8497           14.81m
         4       24433.1391         603.2800           13.82m
         5       23451.4837        -837.5883           12.93m
         6       23241.2867         -70.6458           12.05m
         7       23564.3245         467.2411           11.14m
         8       23485.6083          52.2161           10.24m
         9       23280.3094         -75.0093            9.36m
        10       23330.8874         183.6560            8.49m
        11       22953.0118        -244.6377            7.64m
        12       23286.8931         463.1004            6.78m
        13       22853.3945        -326.9295            5.92m
        14       22991.3101         254.1182            5.07m
        15       23336.0324         451.1375            4.22m
       

In [19]:
# predict survival function
surv_probs_train = gbs5.predict_survival_function(X_surv_train)
surv_probs_test = gbs5.predict_survival_function(X_surv_test)

# all time-to-event points
time_points_train = surv_probs_train[0].x
time_points_test = surv_probs_test[0].x 
 
# convert survival probabilities to df
df_surv_probs_train = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_train], columns=time_points_train)
df_surv_probs_test = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_test], columns=time_points_train)

# surv probabilites after 1 and 3 years
suvr_prob_3y_train = df_surv_probs_train.iloc[:,3]
suvr_prob_3y_test = df_surv_probs_test.iloc[:,3]

# extract status
# get 3-year event status based on TIMETOEVENT and MORTSTAT
status_3y_train_surv = np.where((df_train["MORTSTAT"] == 1) & (df_train["TIMETOEVENT"] <= 3), 0, 1)
status_3y_test_surv = np.where((df_test["MORTSTAT"] == 1) & (df_test["TIMETOEVENT"] <= 3), 0, 1)

# log-loss
log_loss_3y_train = log_loss(status_3y_train_surv, suvr_prob_3y_train)
log_loss_3y_test = log_loss(status_3y_test_surv, suvr_prob_3y_test)

# compute Brier
bs_train_3y = brier_score_loss(status_3y_train_surv, suvr_prob_3y_train)
bs_test_3y = brier_score_loss(status_3y_test_surv, suvr_prob_3y_test)

# c-index
c_is = concordance_index(status_3y_train_surv, suvr_prob_3y_train)

print(f'Log Loss is: {log_loss_3y_train:.4f}')
print(f'Log Loss os: {log_loss_3y_test:.4f}')
print(f'BS is: {bs_train_3y:.4f}')
print(f'BS os: {bs_test_3y:.4f}')
print(f'C is: {c_is:.4f}')
print(f'APLL is: {-gbs5.score(X_surv_train, y_surv_train):.4f}')
print(f'APLL os: {-gbs5.score(X_surv_test, y_surv_test):.4f}')

Log Loss is: 0.1364
Log Loss os: 0.1356
BS is: 0.0330
BS os: 0.0327
C is: 0.8697
APLL is: -0.8671
APLL os: -0.8673


In [20]:
# GBS6

In [21]:
gbs6 = GradientBoostingSurvivalAnalysis(loss='coxph', max_depth = None, random_state=42, verbose=2, 
                                        n_estimators=30, learning_rate=0.2, subsample=0.5, max_features=20, max_leaf_nodes=20)

start_time = time.time()
gbs6.fit(X_surv_train, y_surv_train)
end_time = time.time()
print(f"Time for one model: {end_time - start_time:.2f} seconds")

      Iter       Train Loss      OOB Improve   Remaining Time 
         1       23540.9341          80.8824           28.36m
         2       24265.5203         887.4475           25.57m
         3       23986.7467        -120.8497           24.01m
         4       24433.1391         603.2800           22.81m
         5       23451.4837        -837.5883           21.77m
         6       23241.2867         -70.6458           20.78m
         7       23564.3245         467.2411           19.82m
         8       23485.6083          52.2161           18.88m
         9       23280.3094         -75.0093           17.96m
        10       23330.8874         183.6560           17.08m
        11       22953.0118        -244.6377           16.21m
        12       23286.8931         463.1004           15.34m
        13       22853.3945        -326.9295           14.48m
        14       22991.3101         254.1182           13.60m
        15       23336.0324         451.1375           12.73m
       

In [22]:
# predict survival function
surv_probs_train = gbs6.predict_survival_function(X_surv_train)
surv_probs_test = gbs6.predict_survival_function(X_surv_test)

# all time-to-event points
time_points_train = surv_probs_train[0].x
time_points_test = surv_probs_test[0].x 
 
# convert survival probabilities to df
df_surv_probs_train = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_train], columns=time_points_train)
df_surv_probs_test = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_test], columns=time_points_train)

# surv probabilites after 1 and 3 years
suvr_prob_3y_train = df_surv_probs_train.iloc[:,3]
suvr_prob_3y_test = df_surv_probs_test.iloc[:,3]

# extract status
# get 3-year event status based on TIMETOEVENT and MORTSTAT
status_3y_train_surv = np.where((df_train["MORTSTAT"] == 1) & (df_train["TIMETOEVENT"] <= 3), 0, 1)
status_3y_test_surv = np.where((df_test["MORTSTAT"] == 1) & (df_test["TIMETOEVENT"] <= 3), 0, 1)

# log-loss
log_loss_3y_train = log_loss(status_3y_train_surv, suvr_prob_3y_train)
log_loss_3y_test = log_loss(status_3y_test_surv, suvr_prob_3y_test)

# compute Brier
bs_train_3y = brier_score_loss(status_3y_train_surv, suvr_prob_3y_train)
bs_test_3y = brier_score_loss(status_3y_test_surv, suvr_prob_3y_test)

# c-index
c_is = concordance_index(status_3y_train_surv, suvr_prob_3y_train)

print(f'Log Loss is: {log_loss_3y_train:.4f}')
print(f'Log Loss os: {log_loss_3y_test:.4f}')
print(f'BS is: {bs_train_3y:.4f}')
print(f'BS os: {bs_test_3y:.4f}')
print(f'C is: {c_is:.4f}')
print(f'APLL is: {-gbs6.score(X_surv_train, y_surv_train):.4f}')
print(f'APLL os: {-gbs6.score(X_surv_test, y_surv_test):.4f}')

Log Loss is: 0.1301
Log Loss os: 0.1294
BS is: 0.0318
BS os: 0.0316
C is: 0.8702
APLL is: -0.8676
APLL os: -0.8673


In [23]:
# GBS7

In [24]:
gbs7 = GradientBoostingSurvivalAnalysis(loss='coxph', max_depth = None, random_state=42, verbose=2, 
                                        n_estimators=20, learning_rate=0.2, subsample=0.5, max_features=30, max_leaf_nodes=20)

start_time = time.time()
gbs7.fit(X_surv_train, y_surv_train)
end_time = time.time()
print(f"Time for one model: {end_time - start_time:.2f} seconds")

      Iter       Train Loss      OOB Improve   Remaining Time 
         1       23539.9144          80.4688           18.11m
         2       24267.2531         886.9015           16.07m
         3       23987.0292        -123.0795           14.95m
         4       24431.0310         607.6200           13.95m
         5       23447.6333        -837.3858           12.92m
         6       23233.0899         -63.3138           11.99m
         7       23551.0084         471.4685           11.11m
         8       23468.4218          55.2097           10.23m
         9       23266.6173         -69.8982            9.36m
        10       23311.7696         181.4880            8.50m
        11       22931.0004        -243.0287            7.65m
        12       23269.7829         468.4661            6.80m
        13       22826.2968        -325.5371            5.94m
        14       22961.3364         257.2978            5.09m
        15       23296.6354         453.5470            4.24m
       

In [25]:
# predict survival function
surv_probs_train = gbs7.predict_survival_function(X_surv_train)
surv_probs_test = gbs7.predict_survival_function(X_surv_test)

# all time-to-event points
time_points_train = surv_probs_train[0].x
time_points_test = surv_probs_test[0].x 
 
# convert survival probabilities to df
df_surv_probs_train = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_train], columns=time_points_train)
df_surv_probs_test = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_test], columns=time_points_train)

# surv probabilites after 1 and 3 years
suvr_prob_3y_train = df_surv_probs_train.iloc[:,3]
suvr_prob_3y_test = df_surv_probs_test.iloc[:,3]

# extract status
# get 3-year event status based on TIMETOEVENT and MORTSTAT
status_3y_train_surv = np.where((df_train["MORTSTAT"] == 1) & (df_train["TIMETOEVENT"] <= 3), 0, 1)
status_3y_test_surv = np.where((df_test["MORTSTAT"] == 1) & (df_test["TIMETOEVENT"] <= 3), 0, 1)

# log-loss
log_loss_3y_train = log_loss(status_3y_train_surv, suvr_prob_3y_train)
log_loss_3y_test = log_loss(status_3y_test_surv, suvr_prob_3y_test)

# compute Brier
bs_train_3y = brier_score_loss(status_3y_train_surv, suvr_prob_3y_train)
bs_test_3y = brier_score_loss(status_3y_test_surv, suvr_prob_3y_test)

# c-index
c_is = concordance_index(status_3y_train_surv, suvr_prob_3y_train)

print(f'Log Loss is: {log_loss_3y_train:.4f}')
print(f'Log Loss os: {log_loss_3y_test:.4f}')
print(f'BS is: {bs_train_3y:.4f}')
print(f'BS os: {bs_test_3y:.4f}')
print(f'C is: {c_is:.4f}')
print(f'APLL is: {-gbs7.score(X_surv_train, y_surv_train):.4f}')
print(f'APLL os: {-gbs7.score(X_surv_test, y_surv_test):.4f}')

Log Loss is: 0.1357
Log Loss os: 0.1346
BS is: 0.0329
BS os: 0.0325
C is: 0.8688
APLL is: -0.8660
APLL os: -0.8650


In [26]:
# GBS8

In [27]:
gbs8 = GradientBoostingSurvivalAnalysis(loss='coxph', max_depth = None, random_state=42, verbose=2, 
                                         n_estimators=50, learning_rate=0.2, subsample=0.5, max_features=30, max_leaf_nodes=20)

start_time = time.time()
gbs8.fit(X_surv_train, y_surv_train)
end_time = time.time()
print(f"Time for one model: {end_time - start_time:.2f} seconds")

      Iter       Train Loss      OOB Improve   Remaining Time 
         1       23539.9144          80.4688           48.21m
         2       24267.2531         886.9015           43.86m
         3       23987.0292        -123.0795           41.81m
         4       24431.0310         607.6200           40.45m
         5       23447.6333        -837.3858           39.19m
         6       23233.0899         -63.3138           38.12m
         7       23551.0084         471.4685           37.06m
         8       23468.4218          55.2097           36.11m
         9       23266.6173         -69.8982           35.14m
        10       23311.7696         181.4880           34.23m
        11       22931.0004        -243.0287           33.32m
        12       23269.7829         468.4661           32.44m
        13       22826.2968        -325.5371           31.57m
        14       22961.3364         257.2978           30.68m
        15       23296.6354         453.5470           29.82m
       

In [28]:
# predict survival function
surv_probs_train = gbs8.predict_survival_function(X_surv_train)
surv_probs_test = gbs8.predict_survival_function(X_surv_test)

# all time-to-event points
time_points_train = surv_probs_train[0].x
time_points_test = surv_probs_test[0].x 
 
# convert survival probabilities to df
df_surv_probs_train = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_train], columns=time_points_train)
df_surv_probs_test = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_test], columns=time_points_train)

# surv probabilites after 1 and 3 years
suvr_prob_3y_train = df_surv_probs_train.iloc[:,3]
suvr_prob_3y_test = df_surv_probs_test.iloc[:,3]

# extract status
# get 3-year event status based on TIMETOEVENT and MORTSTAT
status_3y_train_surv = np.where((df_train["MORTSTAT"] == 1) & (df_train["TIMETOEVENT"] <= 3), 0, 1)
status_3y_test_surv = np.where((df_test["MORTSTAT"] == 1) & (df_test["TIMETOEVENT"] <= 3), 0, 1)

# log-loss
log_loss_3y_train = log_loss(status_3y_train_surv, suvr_prob_3y_train)
log_loss_3y_test = log_loss(status_3y_test_surv, suvr_prob_3y_test)

# compute Brier
bs_train_3y = brier_score_loss(status_3y_train_surv, suvr_prob_3y_train)
bs_test_3y = brier_score_loss(status_3y_test_surv, suvr_prob_3y_test)

# c-index
c_is = concordance_index(status_3y_train_surv, suvr_prob_3y_train)

print(f'Log Loss is: {log_loss_3y_train:.4f}')
print(f'Log Loss os: {log_loss_3y_test:.4f}')
print(f'BS is: {bs_train_3y:.4f}')
print(f'BS os: {bs_test_3y:.4f}')
print(f'C is: {c_is:.4f}')
print(f'APLL is: {-gbs8.score(X_surv_train, y_surv_train):.4f}')
print(f'APLL os: {-gbs8.score(X_surv_test, y_surv_test):.4f}')

Log Loss is: 0.1220
Log Loss os: 0.1218
BS is: 0.0302
BS os: 0.0302
C is: 0.8747
APLL is: -0.8717
APLL os: -0.8706


In [29]:
# GBS9

In [30]:
gbs9 = GradientBoostingSurvivalAnalysis(loss='coxph', max_depth = None, random_state=42, verbose=2, 
                                         n_estimators=100, learning_rate=0.15, subsample=0.5, max_features=30, max_leaf_nodes=20)

start_time = time.time()
gbs9.fit(X_surv_train, y_surv_train)
end_time = time.time()
print(f"Time for one model: {end_time - start_time:.2f} seconds")

      Iter       Train Loss      OOB Improve   Remaining Time 
         1       23560.6016          60.4288           93.93m
         2       24307.9465         867.6903           88.07m
         3       24046.2125        -141.6124           84.86m
         4       24512.2155         591.1889           82.97m
         5       23541.3388        -858.9403           81.75m
         6       23345.2764         -79.4828           80.55m
         7       23681.0280         454.8693           79.44m
         8       23611.4388          38.0953           78.62m
         9       23424.1602         -82.6953           77.63m
        10       23486.2273         170.4384           76.57m
        11       23117.2396        -258.8763           75.67m
        12       23468.9862         456.0450           74.76m
        13       23026.9469        -343.6696           73.83m
        14       23177.2709         250.8890           72.82m
        15       23530.4940         452.0614           71.92m
       

In [31]:
# predict survival function
surv_probs_train = gbs9.predict_survival_function(X_surv_train)
surv_probs_test = gbs9.predict_survival_function(X_surv_test)

# all time-to-event points
time_points_train = surv_probs_train[0].x
time_points_test = surv_probs_test[0].x 
 
# convert survival probabilities to df
df_surv_probs_train = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_train], columns=time_points_train)
df_surv_probs_test = pd.DataFrame(data=[fn(time_points_train) for fn in surv_probs_test], columns=time_points_train)

# surv probabilites after 1 and 3 years
suvr_prob_3y_train = df_surv_probs_train.iloc[:,3]
suvr_prob_3y_test = df_surv_probs_test.iloc[:,3]

# extract status
# get 3-year event status based on TIMETOEVENT and MORTSTAT
status_3y_train_surv = np.where((df_train["MORTSTAT"] == 1) & (df_train["TIMETOEVENT"] <= 3), 0, 1)
status_3y_test_surv = np.where((df_test["MORTSTAT"] == 1) & (df_test["TIMETOEVENT"] <= 3), 0, 1)

# log-loss
log_loss_3y_train = log_loss(status_3y_train_surv, suvr_prob_3y_train)
log_loss_3y_test = log_loss(status_3y_test_surv, suvr_prob_3y_test)

# compute Brier
bs_train_3y = brier_score_loss(status_3y_train_surv, suvr_prob_3y_train)
bs_test_3y = brier_score_loss(status_3y_test_surv, suvr_prob_3y_test)

# c-index
c_is = concordance_index(status_3y_train_surv, suvr_prob_3y_train)

print(f'Log Loss is: {log_loss_3y_train:.4f}')
print(f'Log Loss os: {log_loss_3y_test:.4f}')
print(f'BS is: {bs_train_3y:.4f}')
print(f'BS os: {bs_test_3y:.4f}')
print(f'C is: {c_is:.4f}')
print(f'APLL is: {-gbs9.score(X_surv_train, y_surv_train):.4f}')
print(f'APLL os: {-gbs9.score(X_surv_test, y_surv_test):.4f}')

Log Loss is: 0.1170
Log Loss os: 0.1181
BS is: 0.0293
BS os: 0.0297
C is: 0.8810
APLL is: -0.8780
APLL os: -0.8768


In [None]:
#______________________________________________________________________________________________________________________________