In [None]:
import pydicom as py
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import datetime

from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import make_scorer, confusion_matrix, precision_score, recall_score, accuracy_score, f1_score, 
roc_auc_score, balanced_accuracy_score, matthews_corrcoef

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from sklearn.svm import SVC

from xgboost import XGBClassifier

from lightgbm import LGBMClassifier

from sklearn.naive_bayes import GaussianNB

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from collections import Counter
from scipy import stats



from scipy.stats import gmean
from scipy.stats import wilcoxon

from sklearn.ensemble import VotingClassifier

In [None]:
root= "the root/path of the folders"

In [None]:
#Load training, validation and testing data

xtrain_df= pd.read_csv(root+ "classification/"+ "xtrain_df.csv")
ytrain= np.load(root+ "classification/"+ "ytrain.npy",)
print("Done")


xval_df= pd.read_csv(root+ "classification/"+ "xval_df.csv")
yval= np.load(root+ "classification/"+ "yval.npy")
print("Done")


xtest_df= pd.read_csv(root+ "classification/"+ "xtest_df.csv")
ytest= np.load(root+ "classification/"+ "ytest.npy")
print("Done")

print(f"\nTraining size= {xtrain_df.shape}")
print(f"Validation size= {xval_df.shape}")
print(f"Testing size= {xtest_df.shape}")

print(f"\nytrain= {np.unique(ytrain, return_counts= True)}")
print(f"yval= {np.unique(yval, return_counts= True)}")
print(f"ytest= {np.unique(ytest, return_counts= True)}\n")

In [None]:
# Load the final datasets

xtrain_reduced_rfecv= pd.read_csv(root+ "classification/xtrain_reduced_rfecv.csv")
xval_reduced_rfecv= pd.read_csv(root+ "classification/xval_reduced_rfecv.csv")
xtest_reduced_rfecv= pd.read_csv(root+ "classification/xtest_reduced_rfecv.csv")

In [None]:
# Creating the custom scorers

# Custom scorer for True Negative Rate (TNR)
def tnr(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    return tn / (tn + fp)



mcc_scorer= make_scorer(matthews_corrcoef)
balanced_accuracy_scorer= make_scorer(balanced_accuracy_score)
precision_scorer = make_scorer(precision_score)
recall_scorer = make_scorer(recall_score)
f1_scorer= make_scorer(f1_score)
roc_auc_scorer= make_scorer(roc_auc_score)
tnr_scorer = make_scorer(tnr)


# CCS function
def compute_ccs(metrics_dict):
    selected_metrics = [
        metrics_dict["Balanced Accuracy"],
        metrics_dict["F1 Score"],
        metrics_dict["ROC_AUC"],
        metrics_dict["MCC"]
    ]
    return gmean([max(0, metric) for metric in selected_metrics])

In [None]:
                                                                #Logistic Regression


def log_reg(train_x, train_y, val_x, val_y, test_x, test_y, solver):

    # Store CCS scores across multiple random runs
    val_ccs_scores = []
    test_ccs_scores= []

    
    for seed in range(20):  # Loop over 20 different random states
        model = LogisticRegression(solver= solver, max_iter=10000, random_state= seed, n_jobs= -1, class_weight= "balanced")
        model.fit(train_x, train_y)

        # Validate
        val_predictions = model.predict(val_x)
        val_probabilities = model.predict_proba(val_x)[:, 1]  # For ROC AUC

        val_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(val_y, val_predictions),
            "F1 Score": f1_score(val_y, val_predictions),
            "ROC_AUC": roc_auc_score(val_y, val_probabilities),
            "MCC": matthews_corrcoef(val_y, val_predictions)
        }

        val_ccs = (compute_ccs(val_metrics)*100)
        val_ccs_scores.append(val_ccs)


        # Testing
        test_predictions = model.predict(test_x)
        test_probabilities = model.predict_proba(test_x)[:, 1]  # For ROC AUC

        test_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(test_y, test_predictions),
            "F1 Score": f1_score(test_y, test_predictions),
            "ROC_AUC": roc_auc_score(test_y, test_probabilities),
            "MCC": matthews_corrcoef(test_y, test_predictions)
        }

        test_ccs = (compute_ccs(test_metrics)*100)
        test_ccs_scores.append(test_ccs)


    # Print final CCS
    print("\n*******************************************************")
    print(f"Validation CCS = {val_ccs_scores}")
    print(f"Mean validation CCS = {round(np.mean(val_ccs_scores), 3)}")

    print(f"\nTesting CCS = {test_ccs_scores}")
    print(f"Mean testing CCS = {round(np.mean(test_ccs_scores), 3)}\n")


    
    return np.array(val_ccs_scores), np.array(test_ccs_scores)

In [None]:
 #Gaussian Naive Bayes

def gnb(train_x, train_y, val_x, val_y, test_x, test_y):
    
    # Store CCS scores across multiple random runs
    val_ccs_scores = []
    test_ccs_scores= []

    
    for seed in range(20):  # Loop over 20 different random states
        model = GaussianNB()
        model.fit(train_x, train_y)

        # Validate
        val_predictions = model.predict(val_x)
        val_probabilities = model.predict_proba(val_x)[:, 1]  # For ROC AUC

        val_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(val_y, val_predictions),
            "F1 Score": f1_score(val_y, val_predictions),
            "ROC_AUC": roc_auc_score(val_y, val_probabilities),
            "MCC": matthews_corrcoef(val_y, val_predictions)
        }

        val_ccs = (compute_ccs(val_metrics)*100)
        val_ccs_scores.append(val_ccs)


        # Testing
        test_predictions = model.predict(test_x)
        test_probabilities = model.predict_proba(test_x)[:, 1]  # For ROC AUC

        test_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(test_y, test_predictions),
            "F1 Score": f1_score(test_y, test_predictions),
            "ROC_AUC": roc_auc_score(test_y, test_probabilities),
            "MCC": matthews_corrcoef(test_y, test_predictions)
        }

        test_ccs = (compute_ccs(test_metrics)*100)
        test_ccs_scores.append(test_ccs)


    # Print final CCS
    print("\n")
    print(f"Validation CCS = {val_ccs_scores}")
    print(f"Mean validation CCS = {round(np.mean(val_ccs_scores), 3)}")

    print(f"\nTesting CCS = {test_ccs_scores}")
    print(f"Mean testing CCS = {round(np.mean(test_ccs_scores), 3)}\n")


    
    return np.array(val_ccs_scores), np.array(test_ccs_scores)

In [None]:
 #Linear Discriminant Analysis

def lda(train_x, train_y, val_x, val_y, test_x, test_y, solver):
    
    # Store CCS scores across multiple random runs
    val_ccs_scores = []
    test_ccs_scores= []

    
    for seed in range(20):  # Loop over 20 different random states
        model = LinearDiscriminantAnalysis(solver= solver)
        model.fit(train_x, train_y)

        # Validate
        val_predictions = model.predict(val_x)
        val_probabilities = model.predict_proba(val_x)[:, 1]  # For ROC AUC

        val_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(val_y, val_predictions),
            "F1 Score": f1_score(val_y, val_predictions),
            "ROC_AUC": roc_auc_score(val_y, val_probabilities),
            "MCC": matthews_corrcoef(val_y, val_predictions)
        }

        val_ccs = (compute_ccs(val_metrics)*100)
        val_ccs_scores.append(val_ccs)


        # Testing
        test_predictions = model.predict(test_x)
        test_probabilities = model.predict_proba(test_x)[:, 1]  # For ROC AUC

        test_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(test_y, test_predictions),
            "F1 Score": f1_score(test_y, test_predictions),
            "ROC_AUC": roc_auc_score(test_y, test_probabilities),
            "MCC": matthews_corrcoef(test_y, test_predictions)
        }

        test_ccs = (compute_ccs(test_metrics)*100)
        test_ccs_scores.append(test_ccs)


    # Print final CCS
    print("\n*******************************************************\n")
    print(f"Validation CCS = {val_ccs_scores}")
    print(f"Mean validation CCS = {round(np.mean(val_ccs_scores), 3)}")

    print(f"\nTesting CCS = {test_ccs_scores}")
    print(f"Mean testing CCS = {round(np.mean(test_ccs_scores), 3)}")


    
    return np.array(val_ccs_scores), np.array(test_ccs_scores)

In [None]:
#Decision Tree Classifier

def dtc(train_x, train_y, val_x, val_y, test_x, test_y):
    
    # Store CCS scores across multiple random runs
    val_ccs_scores = []
    test_ccs_scores= []

    
    for seed in range(20):  # Loop over 20 different random states
        model = DecisionTreeClassifier(random_state= seed, class_weight= "balanced")
        model.fit(train_x, train_y)

        # Validate
        val_predictions = model.predict(val_x)
        val_probabilities = model.predict_proba(val_x)[:, 1]  # For ROC AUC

        val_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(val_y, val_predictions),
            "F1 Score": f1_score(val_y, val_predictions),
            "ROC_AUC": roc_auc_score(val_y, val_probabilities),
            "MCC": matthews_corrcoef(val_y, val_predictions)
        }

        val_ccs = (compute_ccs(val_metrics)*100)
        val_ccs_scores.append(val_ccs)


        # Testing
        test_predictions = model.predict(test_x)
        test_probabilities = model.predict_proba(test_x)[:, 1]  # For ROC AUC

        test_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(test_y, test_predictions),
            "F1 Score": f1_score(test_y, test_predictions),
            "ROC_AUC": roc_auc_score(test_y, test_probabilities),
            "MCC": matthews_corrcoef(test_y, test_predictions)
        }

        test_ccs = (compute_ccs(test_metrics)*100)
        test_ccs_scores.append(test_ccs)


    # Print final CCS
    print("\n")
    print(f"Validation CCS = {val_ccs_scores}")
    print(f"Mean validation CCS = {round(np.mean(val_ccs_scores), 3)}")

    print(f"\nTesting CCS = {test_ccs_scores}")
    print(f"Mean testing CCS = {round(np.mean(test_ccs_scores), 3)}\n")


    
    return np.array(val_ccs_scores), np.array(test_ccs_scores)

In [None]:
                                                        #Random Forest Classifier
                            #Hyperparameters chosen as per the highest testing CCS from the exhaustive search


def rfc(train_x, train_y, val_x, val_y, test_x, test_y):
    
    # Store CCS scores across multiple random runs
    val_ccs_scores = []
    test_ccs_scores= []

    
    for seed in range(20):  # Loop over 20 different random states #n= 252
        model = RandomForestClassifier(n_estimators= 200, max_depth= 25, random_state= seed, n_jobs= -1, class_weight= "balanced")
        model.fit(train_x, train_y)

        # Validate
        val_predictions = model.predict(val_x)
        val_probabilities = model.predict_proba(val_x)[:, 1]  # For ROC AUC

        val_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(val_y, val_predictions),
            "F1 Score": f1_score(val_y, val_predictions),
            "ROC_AUC": roc_auc_score(val_y, val_probabilities),
            "MCC": matthews_corrcoef(val_y, val_predictions)
        }

        val_ccs = (compute_ccs(val_metrics)*100)
        val_ccs_scores.append(val_ccs)


        # Testing
        test_predictions = model.predict(test_x)
        test_probabilities = model.predict_proba(test_x)[:, 1]  # For ROC AUC

        test_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(test_y, test_predictions),
            "F1 Score": f1_score(test_y, test_predictions),
            "ROC_AUC": roc_auc_score(test_y, test_probabilities),
            "MCC": matthews_corrcoef(test_y, test_predictions)
        }

        test_ccs = (compute_ccs(test_metrics)*100)
        test_ccs_scores.append(test_ccs)


    # Print final CCS
    print("\n")
    print(f"Validation CCS = {val_ccs_scores}")
    print(f"Mean validation CCS = {round(np.mean(val_ccs_scores), 3)}")

    print(f"\nTesting CCS = {test_ccs_scores}")
    print(f"Mean testing CCS = {round(np.mean(test_ccs_scores), 3)}\n")


    
    return np.array(val_ccs_scores), np.array(test_ccs_scores)

In [None]:
                                    #SVM on the training data and then evaluated on the validation data

def svm(train_x, train_y, val_x, val_y, test_x, test_y, k):
    
    # Store CCS scores across multiple random runs
    val_ccs_scores = []
    test_ccs_scores= []

    
    for seed in range(20):  # Loop over 20 different random states
        model = SVC(kernel= k, class_weight= "balanced", random_state= seed, verbose= False, probability= True)
        model.fit(train_x, ytrain)

        # Validate
        val_predictions = model.predict(val_x)
        val_probabilities = model.predict_proba(val_x)[:, 1]  # For ROC AUC

        val_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(val_y, val_predictions),
            "F1 Score": f1_score(val_y, val_predictions),
            "ROC_AUC": roc_auc_score(val_y, val_probabilities),
            "MCC": matthews_corrcoef(val_y, val_predictions)
        }

        val_ccs = (compute_ccs(val_metrics)*100)
        val_ccs_scores.append(val_ccs)


        # Testing
        test_predictions = model.predict(test_x)
        test_probabilities = model.predict_proba(test_x)[:, 1]  # For ROC AUC

        test_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(test_y, test_predictions),
            "F1 Score": f1_score(test_y, test_predictions),
            "ROC_AUC": roc_auc_score(test_y, test_probabilities),
            "MCC": matthews_corrcoef(test_y, test_predictions)
        }

        test_ccs = (compute_ccs(test_metrics)*100)
        test_ccs_scores.append(test_ccs)


    # Print final CCS
    print("\n")
    print(f"Validation CCS = {val_ccs_scores}")
    print(f"Mean validation CCS = {round(np.mean(val_ccs_scores), 3)}")

    print(f"\nTesting CCS = {test_ccs_scores}")
    print(f"Mean testing CCS = {round(np.mean(test_ccs_scores), 3)}\n")


    
    return np.array(val_ccs_scores), np.array(test_ccs_scores)

In [None]:
                                                #Light Gradient Boosting Machine
                        #Hyperparameters chosen as per the highest testing CCS from the exhaustive search

def lgbm(train_x, train_y, val_x, val_y, test_x, test_y):
    
    # Store CCS scores across multiple random runs
    val_ccs_scores = []
    test_ccs_scores= []

    
    for seed in range(20):  # Loop over 20 different random states
        model = LGBMClassifier(n_estimators= 350, learning_rate= 0.2, random_state= seed, n_jobs= -1, class_weight= "balanced", verbose=-1)
        model.fit(train_x, train_y)

        # Validate
        val_predictions = model.predict(val_x)
        val_probabilities = model.predict_proba(val_x)[:, 1]  # For ROC AUC

        val_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(val_y, val_predictions),
            "F1 Score": f1_score(val_y, val_predictions),
            "ROC_AUC": roc_auc_score(val_y, val_probabilities),
            "MCC": matthews_corrcoef(val_y, val_predictions)
        }

        val_ccs = (compute_ccs(val_metrics)*100)
        val_ccs_scores.append(val_ccs)


        # Testing
        test_predictions = model.predict(test_x)
        test_probabilities = model.predict_proba(test_x)[:, 1]  # For ROC AUC

        test_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(test_y, test_predictions),
            "F1 Score": f1_score(test_y, test_predictions),
            "ROC_AUC": roc_auc_score(test_y, test_probabilities),
            "MCC": matthews_corrcoef(test_y, test_predictions)
        }

        test_ccs = (compute_ccs(test_metrics)*100)
        test_ccs_scores.append(test_ccs)


    # Print final CCS
    print("\n")
    print(f"Validation CCS = {val_ccs_scores}")
    print(f"Mean validation CCS = {round(np.mean(val_ccs_scores), 3)}")

    print(f"\nTesting CCS = {test_ccs_scores}")
    print(f"Mean testing CCS = {round(np.mean(test_ccs_scores), 3)}\n")

    both_ccs= []
    both_ccs= val_ccs_scores+ test_ccs_scores
    print(f"\nAverage of validation and testing CCS = {round(np.mean(both_ccs), 3)}")
    
    return np.array(val_ccs_scores), np.array(test_ccs_scores)

In [None]:
                                                                    #XGBoost
                                #Hyperparameters chosen as per the highest testing CCS from the exhaustive search


def xgb(train_x, train_y, val_x, val_y, test_x, test_y):
    
    # Store CCS scores across multiple random runs
    val_ccs_scores = []
    test_ccs_scores= []

    
    for seed in range(20):  # Loop over 20 different random states
        model = XGBClassifier(n_estimators= 275, learning_rate= 0.2, random_state= seed, n_jobs= -1, disable_default_eval_metric= True)
        model.fit(train_x, train_y)

        # Validate
        val_predictions = model.predict(val_x)
        val_probabilities = model.predict_proba(val_x)[:, 1]  # For ROC AUC

        val_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(val_y, val_predictions),
            "F1 Score": f1_score(val_y, val_predictions),
            "ROC_AUC": roc_auc_score(val_y, val_probabilities),
            "MCC": matthews_corrcoef(val_y, val_predictions)
        }

        val_ccs = (compute_ccs(val_metrics)*100)
        val_ccs_scores.append(val_ccs)


        # Testing
        test_predictions = model.predict(test_x)
        test_probabilities = model.predict_proba(test_x)[:, 1]  # For ROC AUC

        test_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(test_y, test_predictions),
            "F1 Score": f1_score(test_y, test_predictions),
            "ROC_AUC": roc_auc_score(test_y, test_probabilities),
            "MCC": matthews_corrcoef(test_y, test_predictions)
        }

        test_ccs = (compute_ccs(test_metrics)*100)
        test_ccs_scores.append(test_ccs)


    # Print final CCS
    print("\n")
    print(f"Validation CCS = {val_ccs_scores}")
    print(f"Mean validation CCS = {round(np.mean(val_ccs_scores), 3)}")

    print(f"\nTesting CCS = {test_ccs_scores}")
    print(f"Mean testing CCS = {round(np.mean(test_ccs_scores), 3)}\n")

    both_ccs= []
    both_ccs= val_ccs_scores+ test_ccs_scores
    print(f"\nAverage of validation and testing CCS = {round(np.mean(both_ccs), 3)}")
    
    return np.array(val_ccs_scores), np.array(test_ccs_scores)


In [None]:
                                        #BoostFusion: Soft voting classifier on XGB and LGBM

def soft(train_x, train_y, val_x, val_y, test_x, test_y):
    
    # Store CCS scores across multiple random runs
    val_ccs_scores = []
    test_ccs_scores= []

    
    for seed in range(20):  # Loop over 20 different random states
        model1 = XGBClassifier(n_estimators= 275, learning_rate= 0.2, random_state= seed, n_jobs= -1, disable_default_eval_metric= True)
        model2= LGBMClassifier(n_estimators= 350, learning_rate= 0.2, random_state= seed, n_jobs= -1, class_weight= "balanced", verbose=-1)

        # Create soft voting classifier
        model = VotingClassifier(estimators=[('xgb', model1), ('lgbm', model2)], voting='soft')
        
        model.fit(train_x, train_y)

        
        # Validate
        val_predictions = model.predict(val_x)
        val_probabilities = model.predict_proba(val_x)[:, 1]  # For ROC AUC

        val_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(val_y, val_predictions),
            "F1 Score": f1_score(val_y, val_predictions),
            "ROC_AUC": roc_auc_score(val_y, val_probabilities),
            "MCC": matthews_corrcoef(val_y, val_predictions)
        }

        val_ccs = (compute_ccs(val_metrics)*100)
        val_ccs_scores.append(val_ccs)


        # Testing
        test_predictions = model.predict(test_x)
        test_probabilities = model.predict_proba(test_x)[:, 1]  # For ROC AUC

        test_metrics = {
            "Balanced Accuracy": balanced_accuracy_score(test_y, test_predictions),
            "F1 Score": f1_score(test_y, test_predictions),
            "ROC_AUC": roc_auc_score(test_y, test_probabilities),
            "MCC": matthews_corrcoef(test_y, test_predictions)
        }

        test_ccs = (compute_ccs(test_metrics)*100)
        test_ccs_scores.append(test_ccs)


    # Print final CCS
    print("\n")
    print(f"Validation CCS = {val_ccs_scores}")
    print(f"Mean validation CCS = {round(np.mean(val_ccs_scores), 3)}")

    print(f"\nTesting CCS = {test_ccs_scores}")
    print(f"Mean testing CCS = {round(np.mean(test_ccs_scores), 3)}\n")

    both_ccs= []
    both_ccs= val_ccs_scores+ test_ccs_scores
    print(f"\nAverage of validation and testing CCS = {round(np.mean(both_ccs), 3)}")
    
    return np.array(val_ccs_scores), np.array(test_ccs_scores)


In [None]:
                                                #Statistical testing on BoostFusion
                        #Hyperparameters chosen as per the highest testing CCS from the exhaustive search in XGB and LGBM


val_group, test_group= soft(xtrain_reduced_rfecv, ytrain, xval_reduced_rfecv, yval, xtest_reduced_rfecv, ytest)




stat, p_value = wilcoxon(val_lgbm, val_group)
print(f"\nValidation wrt LGBM:\nStatistic = {stat:.2f}, p-value = {p_value:.4f}")
print("Mean diff:", np.mean(np.array(val_group) - np.array(val_lgbm)))

if p_value < 0.05:
    print(f"Significant drop in CCS (p < 0.05) -  is important.\n")
else:
    print(f"No significant drop in CCS (p >= 0.05) -  may be less critical.\n")



stat, p_value = wilcoxon(test_lgbm, test_group)
print(f"\nTesting wrt LGBM:\nStatistic = {stat:.2f}, p-value = {p_value:.4f}")
print("Mean diff:", np.mean(np.array(test_group) - np.array(test_lgbm)))

if p_value < 0.05:
    print(f"Significant drop in CCS (p < 0.05) -  is important.\n")
else:
    print(f"No significant drop in CCS (p >= 0.05) -  may be less critical.\n")






stat, p_value = wilcoxon(val_xgb, val_group)
print(f"\nValidation wrt XGB:\nStatistic = {stat:.2f}, p-value = {p_value:.4f}")
print("Mean diff:", np.mean(np.array(val_group) - np.array(val_xgb)))

if p_value < 0.05:
    print(f"Significant drop in CCS (p < 0.05) -  is important.\n")
else:
    print(f"No significant drop in CCS (p >= 0.05) -  may be less critical.\n")


stat, p_value = wilcoxon(test_xgb, test_group)
print(f"\nTesting wrt XGB:\nStatistic = {stat:.2f}, p-value = {p_value:.4f}")
print("Mean diff:", np.mean(np.array(test_group) - np.array(test_xgb)))

if p_value < 0.05:
    print(f"Significant drop in CCS (p < 0.05) -  is important.\n")
else:
    print(f"No significant drop in CCS (p >= 0.05) -  may be less critical.\n")
