In [None]:
### import libraries #####

import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.losses import Loss
from keras.saving import register_keras_serializable
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, TimeDistributed, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LSTM
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report,\
                            ConfusionMatrixDisplay, recall_score, \
                            f1_score, roc_auc_score, roc_curve, precision_score

In [None]:
### Load Dataset ###

## Dataset for cross-validation
DF_internal = pd.read_excel("Input the path of Internal Dataset")
## Dataset for external validation
DF_external = pd.read_excel("Input the path of External Dataset")


DF_internal=DF_internal.sample(frac=1, random_state=42)
DF_external=DF_external.sample(frac=1, random_state=42)

X_internal=DF_internal.iloc[:,0:-1]
Y_internal=DF_internal.iloc[:,-1]

X_external=DF_external.iloc[:,0:-1]
Y_external=DF_external.iloc[:,-1]

In [None]:
### Focal Loss Function ########

@register_keras_serializable()
def focal_loss(gamma=2., alpha=0.25):
    @register_keras_serializable()
    def focal_loss_fixed(y_true, y_pred):
        y_true = K.cast(y_true, K.floatx())
        y_pred = K.clip(y_pred, K.epsilon(), 1. - K.epsilon())
        cross_entropy = -y_true * K.log(y_pred)
        loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy
        return K.sum(loss, axis=-1)
    return focal_loss_fixed

In [None]:
### Architecture of ANN-model for Local Recurrence Prediction ###

def ANN_model(input_dimension):
    model = Sequential()
    model.add(Dense(50, input_dim=input_dimension, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(BatchNormalization())
    model.add(Dense(100, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(BatchNormalization())
    model.add(Dense(150, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(50, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(BatchNormalization())
    model.add(Dense(1, activation='sigmoid'))
    opt = AdamW(learning_rate=1e-3, weight_decay=1e-4)
    model.compile(loss=focal_loss(gamma=2., alpha=0.25), optimizer=opt, metrics=['AUC'])
    return model

In [None]:
### Stratified Cross-validation

def kfold_cv(X, Y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits)
    for train_idx, test_idx in skf.split(X, Y):
        Y_test = Y.iloc[test_idx]
        if len(np.unique(Y_test)) < 2:
            print("Skipping fold: only one class")
            continue
        yield train_idx, test_idx

In [None]:
### Main execution ###

best_score = -np.inf
best_model = None
auc_scores_train, auc_scores_test = [], []

### Nornalization ###
scaler=StandardScaler().fit(X_internal)
X=scaler.transform(X_internal)
X_external_v=scaler.transform(X_external)

Y=Y_internal
Y_external_v=Y_internal

for fold, (train_index, test_index) in enumerate(kfold_cv(X, Y, n_splits=5), 1):
    X_train, X_test = X[train_index,:], X[test_index,:]
    y_train, y_test = Y.iloc[train_index], Y.iloc[test_index]

    model = ANN_model(X_train.shape[1])
       
    reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=20, verbose=1, mode='min')
    early_stopping = EarlyStopping(monitor='loss', patience=20, restore_best_weights=True, mode='min')
    model_checkpoint = ModelCheckpoint("best_model.keras", monitor="loss", save_best_only=True, mode="min")
    y_train = np.array(y_train).flatten().astype(int)
    classes = np.array([0, 1])
    weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
    class_weight = dict(zip(classes, weights))
    model.fit(X_train, y_train, epochs=200, class_weight=class_weight, callbacks=[reduce_lr, early_stopping, model_checkpoint],verbose=0)  
    
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    fpr, tpr, thresholds1 = roc_curve(y_train, y_train_pred)
    optimal_idx = np.argmax(tpr - fpr)
    optimal_threshold = thresholds1[optimal_idx]

    ### Output for training phase ###
    y_train_pred_optimal = (y_train_pred >= optimal_threshold).astype(int)
    ### Output for testing phase ###
    y_test_pred_optimal = (y_test_pred >= optimal_threshold).astype(int)

    ### Performance Evaluation ###
    print(classification_report(y_train, y_train_pred_optimal))
    print(classification_report(y_test, y_test_pred_optimal))

    ### ROC-AUC score ###
    roc_auc_train = roc_auc_score(y_train, y_train_pred_optimal)
    roc_auc_test = roc_auc_score(y_test, y_test_pred_optimal)

    
    auc_scores_train.append(roc_auc_train)
    auc_scores_test.append(roc_auc_test)
    
    print(f"Fold {fold} ROC-AUC: {roc_auc_train:.2f}")
    print(f"Fold {fold} ROC-AUC: {roc_auc_test:.2f}")
    
    ### Save the Best Model ###
    if roc_auc_test > best_score:
        best_score = roc_auc_test
        best_model = model
        best_model.save("Best_Model.keras")


print(f"\nMean ROC AUC over {len(auc_scores_train)} valid folds: {np.mean(auc_scores_train):.4f}")
print(f"\nMean ROC AUC over {len(auc_scores_test)} valid folds: {np.mean(auc_scores_test):.4f}")


In [None]:
### External Validation ###

## roc-auc score
ROC_AUC = []
## sensitivity score
SEN = []
## specificity score
SPE = [] 
## Weighted F1-score
F1 = [] 
## Weighted Precision score
Pr=[]
## Weighted Recall score
Rew=[]

X_test2=np.array(X_external_v)
Y_test2 = np.array(Y_external_v)
samples = 1000 

## Load trained model
Model = load_model("best_model_1.keras", custom_objects={'focal_loss': focal_loss})


for i in range(samples):
    indices = np.random.choice(np.arange(X_test2.shape[0]), size=X_test2.shape[0], replace=True)
    X_boot, Y_boot = X_test2[indices], Y_test2[indices]
    
    Y_pred=Model.predict(X_boot)
    Y_pred_optimal = (Y_pred >= optimal_threshold).astype(int)
    
    ## Performance metrics evaluation 
    report = classification_report(Y_boot, Y_pred_optimal, output_dict=True)
    auc = roc_auc_score(Y_boot, Y_pred_optimal)
    Sensitivity=report['1']['recall']
    Specificity=report['0']['recall']
    F1score=report['weighted avg']['f1-score']
    Recall_w=report['weighted avg']['recall']
    Precision=report['weighted avg']['precision']
  
    ROC_AUC.append(auc)
    SEN.append(Sensitivity)
    SPE.append(Specificity)
    F1.append(F1score)
    Pr.append(Precision)
    Rew.append(Recall_w) 

confidence_interval = np.percentile(ROC_AUC, [2.5, 97.5])

print(np.mean(ROC_AUC), np.mean(SEN), np.mean(SPE), np.mean(F1), np.mean(Pr), np.mean(Rew))
print(np.std(ROC_AUC), np.std(SEN), np.std(SPE), np.std(F1), np.std(Pr), np.std(Rew))
print(f"Bootstrap 95% Confidence Interval: {confidence_interval}")