In [None]:
import sys
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import math
#import scikitplot as skplt

from sklearn.metrics import accuracy_score, f1_score, roc_curve, roc_auc_score, balanced_accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, cross_val_predict
from sklearn.metrics import classification_report,confusion_matrix

from sklearn.utils.class_weight import compute_sample_weight

In [None]:
from sklearn.model_selection import StratifiedKFold

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import SVC
from sklearn import metrics

from sklearn.model_selection import GridSearchCV



from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.linear_model import CoxnetSurvivalAnalysis
from sksurv.metrics import concordance_index_censored, cumulative_dynamic_auc, concordance_index_ipcw, integrated_brier_score
from sksurv.ensemble import RandomSurvivalForest, ExtraSurvivalTrees
from sksurv.ensemble import GradientBoostingSurvivalAnalysis
from sksurv.svm import FastKernelSurvivalSVM
from sksurv.nonparametric import kaplan_meier_estimator


In [None]:
X_DCR = pd.read_csv('data/classification/DCR/X_test.csv', index_col=0)
y_DCR = pd.read_csv('data/classification/DCR/y_test.csv', index_col=0)


X_OS = pd.read_csv('data/survival/OS/X_test.csv', index_col=0)
y_OS = pd.read_csv('data/survival/OS/y_test.csv', index_col=0)
y_tr_OS = pd.read_csv('data/survival/OS/y_train.csv', index_col=0)

X_PFS = pd.read_csv('data/survival/PFS/X_test.csv', index_col=0)
y_PFS = pd.read_csv('data/survival/PFS/y_test.csv', index_col=0)
y_tr_PFS = pd.read_csv('data/survival/PFS/y_train.csv', index_col=0)


# values of the event must be boolean
y_OS['STATUS OS']=y_OS['STATUS OS'].astype(bool)
y_PFS['STATUS PD']=y_PFS['STATUS PD'].astype(bool)
y_tr_OS['STATUS OS']=y_tr_OS['STATUS OS'].astype(bool)
y_tr_PFS['STATUS PD']=y_tr_PFS['STATUS PD'].astype(bool)
# targets must be an array (not a dataframe)
y_tr_OS=y_tr_OS.to_records(index=False)
y_tr_PFS=y_tr_PFS.to_records(index=False)
y_OS=y_OS.to_records(index=False)
y_PFS=y_PFS.to_records(index=False)

from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=10)


In [None]:
names_c = ['XGB', 'LR', 'RF', 'MLP','SVM','AB','ET','LGBM']
names_s = ['CPH','RSF','GB','SSVM','EST']

In [None]:
outcome = 'DCR'

# <br><br> CLASSIFICATION

In [None]:
sw = compute_sample_weight(class_weight='balanced', y=y_DCR)
selection = 'RFE'

In [None]:
## SAVE MODELS
import pickle
name = names_c[2]
model_path = 'results/classification/{o}/{fs}/MODELS/{mn}.pkl'.format(o = outcome, fs = selection, mn = name)
mod = pickle.load(open(model_path, 'rb'))
mod

In [None]:
# TEST ON EXTERNAL VALIDATION SET
y_pred = mod.predict(X_DCR[mod.feature_names_in_])
F1 = f1_score(y_DCR, y_pred, average="weighted", sample_weight=sw)
ACC = balanced_accuracy_score(y_DCR, y_pred, sample_weight=sw)
print(F1)
print(ACC)

In [None]:
# CLASSIFICATION REPORT
rep = classification_report(y_DCR, y_pred, output_dict=True,sample_weight=sw)
print(classification_report(y_DCR, y_pred, sample_weight=sw))
rep = pd.DataFrame(rep).transpose()
path='results\classification\external\Report_{m}_{fs}.xlsx'.format(o=outcome, m=names_c[2], fs=selection);
rep.to_excel(path)

In [None]:
# CONFUSION MATRIX
sns_plot=sns.heatmap(confusion_matrix(y_DCR, y_pred), annot=True, fmt='d', cmap="Blues"); #annot=True to annotate cells fmt: format
sns_plot.set_title("Confusion Matrix of "+names_c[2])
fig = sns_plot.get_figure()
path='results\classification\external\Matrix_{m}_{fs}.png'.format(o=outcome, m=names_c[2], fs=selection);
fig.savefig(path,format="png")

In [None]:
# ROC CURVE
y_probs = mod.predict_proba(X_DCR[mod.feature_names_in_])
AUC = roc_auc_score(y_DCR, y_probs[:,1])
    
plt.figure()
fpr, tpr, thresholds=roc_curve(y_DCR,  y_probs[:,1])
plt.plot(fpr, tpr, label='AUC = %.2f '%AUC)
plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve of '+names_c[2])
plt.legend(loc="lower right")
path='results\classification\external\ROC_{m}_{fs}.png'.format(o=outcome, m=names_c[2], fs=selection);
plt.savefig(path,format="png")

## <br><br> OS

In [None]:
import pickle
path = 'results/survival/OS/MODELS(ALL)/EST.pkl'   # EST-ALL

m1 = pickle.load(open(path, 'rb'))

In [None]:
# C-index
ci = concordance_index_censored(y_OS["STATUS OS"], y_OS["OS"], m1.predict(X_OS[m1.feature_names_in_]))
print(ci[0])

In [None]:
time = np.linspace(min(y_OS['OS']),max(y_OS['OS'])-0.001,30)

In [None]:
# Average A-AUC
risk_scores=m1.predict(X_OS[m1.feature_names_in_])
auc, mean_auc = cumulative_dynamic_auc(
    y_tr_OS, y_OS, risk_scores, time
)

print(mean_auc)

## PFS

In [None]:
import pickle
path = 'results/survival/PFS/MODELS(ALL)/GB.pkl'

m1 = pickle.load(open(path, 'rb'))

In [None]:
# C-index
ci = concordance_index_censored(y_PFS["STATUS PD"], y_PFS["PFS"], m1.predict(X_PFS[m1.feature_names_in_]))
print(ci[0])

In [None]:
y_tr_PFS = pd.read_csv('data/survival/PFS/y_train.csv', index_col=0) 
y_tr_PFS['STATUS PD']=y_tr_PFS['STATUS PD'].astype(bool)
# targets must be an array (not a dataframe)
y_tr_PFS=y_tr_PFS.to_records(index=False)


In [None]:
time = np.linspace(min(y_PFS['PFS']),max(y_PFS['PFS'])-0.001,100)

In [None]:
# Average A-AUC
risk_scores=m1.predict(X_PFS[m1.feature_names_in_])
auc, mean_auc = cumulative_dynamic_auc(
    y_tr_PFS, y_PFS, risk_scores, time
)

print(mean_auc)