# Load packages

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import StratifiedKFold, RepeatedStratifiedKFold
from sklearn.model_selection import ShuffleSplit, GridSearchCV
from sksurv.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.utils import resample
from sksurv.util import Surv
from sksurv.kernels import clinical_kernel
from sklearn import set_config

from sklearn.metrics import make_scorer
from sksurv.metrics import concordance_index_censored
from sksurv.metrics import concordance_index_ipcw
from sksurv.metrics import cumulative_dynamic_auc
from sksurv.metrics import integrated_brier_score
import shap
from sksurv.linear_model import CoxPHSurvivalAnalysis, CoxnetSurvivalAnalysis
from sksurv.ensemble import GradientBoostingSurvivalAnalysis
from sksurv.svm import FastKernelSurvivalSVM, FastSurvivalSVM
from sksurv.ensemble import RandomSurvivalForest
from sksurv.tree import SurvivalTree
import shap
import pickle
import time
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import  RandomForestRegressor

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
dt_train_imputed = pd.read_csv('dt_train_imputed.csv')
dt_val_imputed = pd.read_csv('dt_val_imputed.csv')
dt_test_imputed = pd.read_csv('dt_test_imputed.csv')

dt_train_imputed[dt_train_imputed.columns[10:29]] = dt_train_imputed[dt_train_imputed.columns[10:29]].astype('category')
dt_val_imputed[dt_val_imputed.columns[10:29]] = dt_val_imputed[dt_val_imputed.columns[10:29]].astype('category')
dt_test_imputed[dt_test_imputed.columns[10:29]] = dt_test_imputed[dt_test_imputed.columns[10:29]].astype('category')

dt_train_imputed = dt_train_imputed[(dt_train_imputed['event_14d'] != 1) | (dt_train_imputed['length_14d'] != 14)]
dt_val_imputed = dt_val_imputed[(dt_val_imputed['event_14d'] != 1) | (dt_val_imputed['length_14d'] != 14)]
dt_test_imputed = dt_test_imputed[(dt_test_imputed['event_14d'] != 1) | (dt_test_imputed['length_14d'] != 14)]
X_train = dt_train_imputed.iloc[:, 10:]
X_val = dt_val_imputed.iloc[:, 10:]
X_test = dt_test_imputed.iloc[:, 10:]

y_train = Surv.from_arrays(dt_train_imputed['event_14d'],  dt_train_imputed['length_14d'], name_event='event', name_time='time')
y_val = Surv.from_arrays(dt_val_imputed['event_14d'],  dt_val_imputed['length_14d'], name_event='event', name_time='time')
y_test = Surv.from_arrays(dt_test_imputed['event_14d'],  dt_test_imputed['length_14d'], name_event='event', name_time='time')

categorical_columns = X_train.select_dtypes(include=['category']).columns
encoder = OneHotEncoder()
X_train_encoded = encoder.fit_transform(X_train[categorical_columns])
X_val_encoded = encoder.transform(X_val[categorical_columns])
X_test_encoded = encoder.transform(X_test[categorical_columns])


X_train_encoded = pd.concat([X_train.drop(columns=categorical_columns), X_train_encoded], axis=1)
X_val_encoded = pd.concat([X_val.drop(columns=categorical_columns), X_val_encoded], axis=1)
X_test_encoded = pd.concat([X_test.drop(columns=categorical_columns), X_test_encoded], axis=1)


# Define several functions used in the project

In [None]:
def score_survival_model(model, X, y):
    prediction = model.predict(X)
    result = concordance_index_censored(y["event"], y["time"], prediction)
    return result[0]

from sklearn.model_selection import KFold
from sklearn.utils import resample
from sksurv.metrics import concordance_index_censored, cumulative_dynamic_auc, integrated_brier_score

## SurvSHAP explanation

In [None]:
from survshap import SurvivalModelExplainer,ModelSurvSHAP
from sksurv.ensemble import RandomSurvivalForest



RVF_best = RandomSurvivalForest(min_samples_leaf = 20, min_samples_split = 2,
                                max_features = 'log2',
                                n_estimators = 1000,
                                random_state = 42)
model = RVF_best
model.fit(X_test_encoded, y_test)

# create explainer
explainer = SurvivalModelExplainer(model = model, data = X_test_encoded, y = y_test)


# compute SHAP values for a group of instances
model_survshap = ModelSurvSHAP(calculation_method="sampling", random_state = 42, B=10)
model_survshap.fit(explainer = explainer, new_observations = X_test_encoded)
model_survshap.result
model_survshap.plot_mean_abs_shap_values()

In [None]:
model_survshap.result