#### ROC Curves

## Import

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.metrics import precision_recall_curve, auc, roc_curve
import matplotlib.patches as patches
import matplotlib.font_manager as fm
from scipy import stats
from itertools import combinations

plt.rcParams['font.family'] = 'sans-serif' 
plt.rcParams['font.sans-serif'] = ['DejaVu Sans'] #change font to a known standard font

## Functions

In [None]:
# set up the env

path = "/home/jupyter/workspaces/machinelearningforlivercancerriskprediction"
fig_path = f"{path}/HCC/visuals"
auroc_path = f"{fig_path}/AUROCs"

if not os.path.exists(auroc_path):
    os.makedirs(auroc_path)


#Define all possible scenarios and colors
all_scenarios = {
    'A': '#4995AD',
    'B': '#385579',
    'C': '#C13617',
    'D': '#F0903E',
    'E': '#F0C872',
    'Demographics': '#4995AD',
    'Diagnosis': '#385579',
    'Blood': '#C13617',
    'SNP': '#F0903E',
    'Metabolomics': '#F0C872',
    'Csmall': '#402155',
    'AMAP-RFC': '#c9c9c9',
    'TOP75' : '#cb6043',
    'TOP30' : '#d1846e',
    'TOP15' : '#d0a79a'
}


# 'Demographics': '#402155',
# 'Diagnosis': '#1E477C',
# 'Blood': '#21968C',
# 'SNP': '#74E980',
# 'Metabolomics': '#F8E61E'

# Define different scenario lists (combinations of scenarios plotted together)
scenario_lists = {
    'incremental': ['A', 'B', 'C', 'D', 'E'],
    'separate': ['Demographics', 'Diagnosis', 'Blood', 'SNP', 'Metabolomics'],
    'small_prev': ['C', 'Csmall', 'AMAP-RFC'],
    'small': ['AMAP-RFC', 'TOP75', 'TOP30', 'TOP15'],
    'c': ['TOP75', 'TOP15'],
    'all': list(all_scenarios.keys())
}

def get_colors(scenario_list):
    return {scenario: all_scenarios[scenario] for scenario in scenario_list}


def plot_colorbar(scenarios):
    """
    Plots a colorbar based on the given scenarios.

    Parameters:
    - scenarios (list): A list of scenario labels.
    """
    colors = get_colors(scenarios)
    fig, ax = plt.subplots(figsize=(5.5, 1.1))
    for i, (label, color) in enumerate(colors.items()):
        rect = plt.Rectangle((i * 55, 0), 55, 55, linewidth=2, edgecolor='white', facecolor=color)
        ax.add_patch(rect)
        ax.text(i * 55 + 27.5, -10, label, ha='center', va='top', fontsize=10, color='black')
    ax.set_xlim(0, len(scenarios) * 55)
    ax.set_ylim(-20, 55)
    ax.axis('off')
    plt.show()


def plot_roc_curve(test_scores, true_labels, ax=False, label=None, color='#c9c9c9', lw=2.5, linestyle="--"):
    # Calculate ROC curve and AUC
    fpr, tpr, thresholds = roc_curve(true_labels, test_scores)
    roc_auc = auc(fpr, tpr)
    roc_auc = round(roc_auc, 2)
    base_fpr = np.linspace(0, 1, 100)
    tpr = np.interp(base_fpr, fpr, tpr)

    if label is None:
        plot_label = 'aMAP ({:.2f})'.format(roc_auc)
    else:
        plot_label = '{} ({:.2f})'.format(label, roc_auc)

    # Create the ROC curve plot
    if ax == False:
        plt.plot(base_fpr, tpr, color=color, lw=lw, label=plot_label, alpha=1, linestyle=linestyle)
    else:
        ax.plot(base_fpr, tpr, color=color, lw=lw, label=plot_label, alpha=1, linestyle=linestyle)
    return thresholds, fpr, tpr




def plot_rocs(tprs, fig, ax, plot_all=True, y_amap=None, col_line='b', scenario='', fill_bet=True, title='', fig_type='', n_splits=5, line_style='-'):
    # Compute mean ROC curve and AUC
    tprs = np.array(tprs)
    mean_tprs = tprs.mean(axis=0)
    std = tprs.std(axis=0)
    base_fpr = np.linspace(0, 1, 100)

    tprs_upper = np.minimum(mean_tprs + std, 1)
    tprs_lower = mean_tprs - std

    # Plot ROC curves for each fold and mean ROC curve
    if plot_all:
        for i in range(n_splits):
            ax.plot(base_fpr, tprs[i], 'b', alpha=0.3, lw=3)
    ax.plot(base_fpr, mean_tprs, col_line, linestyle=line_style, label=f'{scenario} ({round(auc(base_fpr,mean_tprs),ndigits=2)})', lw=2.5)
    if fill_bet:
        ax.fill_between(base_fpr, tprs_lower, tprs_upper, color='grey', alpha=0.3)

    ax.plot([0, 1], [0, 1], color="grey", linestyle="--", lw=2.5)
    if y_amap is not None:
        plot_roc_curve(test_scores=y_amap.amap, true_labels=y_amap.status, ax=ax)

    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.0])
    ax.set_xlabel('False Positive Rate', fontsize=14)
    ax.set_ylabel('True Positive Rate', fontsize=14)
    ax.tick_params(axis='both', which='major', labelsize=13)
    ax.set_title(title, fontsize=14, pad=10)
    condensed_font = fm.FontProperties(family='sans-serif', style='normal', weight='normal', stretch='condensed')
    ax.legend(loc="lower right", bbox_to_anchor=(1.01, -0.02), fontsize=12, frameon=False, prop=condensed_font)
    plt.rcParams.update({'font.size': 16})

    # Export
    if fig_type:
        save_figure(fig, title, fig_type, fig_path)


def plot_rocs_flexible(mapped_tprs, fig, ax, scenarios, cohort, plot_all=False, fill_bet=False, title='', fig_type='', n_splits=5):
    colors = get_colors(scenarios)
    for scenario in scenarios:
        color = colors[scenario]
        scenario_tprs = mapped_tprs.loc[(cohort, scenario), :]
        plot_rocs(tprs=scenario_tprs.values, fig=fig, ax=ax, plot_all=plot_all,
                  fill_bet=fill_bet, col_line=color, scenario=scenario,
                  title=title, fig_type=fig_type, n_splits=n_splits)



def plot_combined_roc(tprs1, tprs2, label1='amap_cld', label2='amap_all'):
    """
    Plots two or more ROC curves in the same figure, currently used to compare the capacity of AMAP for different cohorts
    """
    fig, ax = plt.subplots()
    mean_tpr1 = np.mean(tprs1, axis=0)
    mean_tpr2 = np.mean(tprs2, axis=0)

    ax.plot(mean_tpr1, label=label1)
    ax.plot(mean_tpr2, label=label2)

    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('Combined ROC Curve')
    ax.legend(loc='best', frameon=False)

    plt.show()

# def plot_rocs(tprs,fig, ax, plot_all=True,y_amap=None,col_line='b',scenario='',fill_bet=True, title='', fig_type='', n_splits=5, line_style='-'):
#     # Compute mean ROC curve and AUC
#     tprs = np.array(tprs)
#     mean_tprs = tprs.mean(axis=0)
#     std = tprs.std(axis=0)
#     base_fpr=np.linspace(0, 1, 100)  # Create a range of fprs for the x

#     tprs_upper = np.minimum(mean_tprs + std, 1)
#     tprs_lower = mean_tprs - std

#     # Plot ROC curves for each fold and mean ROC curve
#     if plot_all==True:
#         for i in range(n_splits):
#             plt.plot(base_fpr, tprs[i], 'b', alpha=0.3, lw=3)
#     plt.plot(base_fpr, mean_tprs, col_line, linestyle=line_style, label=f'{scenario} (AUC = {round(auc(base_fpr,mean_tprs),ndigits=3)})', lw=2.5)
#     if fill_bet:
#         plt.fill_between(base_fpr, tprs_lower, tprs_upper, color='grey', alpha=0.3)


#     plt.plot([0, 1], [0, 1],color="grey", linestyle="--", lw=2.5)
#     if y_amap is not None:
#         plot_roc_curve(test_scores=y_amap.amap,true_labels=y_amap.status)

#     ax.set_xlim([0.0, 1.0])
#     ax.set_ylim([0.0, 1.0])
#     ax.set_xlabel('False Positive Rate', fontsize=14)
#     ax.set_ylabel('True Positive Rate', fontsize=14)
#     ax.tick_params(axis='both', which='major', labelsize=13)
#     ax.set_title(title, fontsize=16, pad=10)
#     ax.legend(loc="lower right", fontsize=12)
#     plt.rcParams.update({'font.size': 16})  # Set a default font size for all elements

#     #Export
#     name = "ROCs"
#     if fig_path:
#         save_figure(fig, title, fig_type, fig_path)


# def plot_combined_roc(tprs1, tprs2, label1='amap_cld', label2='amap_all'):
#     """
#     Plots two or more ROC curves in the same figure, currently used to compare the capacity of AMAP for different cohorts

#     Parameters:
#     - tprs1: DataFrame or array-like, TPRs for the first data.
#     - tprs2: DataFrame or array-like, TPRs for the second data.
#     - label1: str, label for the first data.
#     - label2: str, label for the second data.
#     """
#     fig, ax = plt.subplots()
#     mean_tpr1 = np.mean(tprs1, axis=0)
#     mean_tpr2 = np.mean(tprs2, axis=0)

#     ax.plot(mean_tpr1, label=label1)
#     ax.plot(mean_tpr2, label=label2)

#     ax.set_xlabel('False Positive Rate')
#     ax.set_ylabel('True Positive Rate')
#     ax.set_title('Combined ROC Curve')
#     ax.legend(loc='best')

#     plt.show()

# # Usage
# # plot_combined_roc(tprs_amap_cld, tprs_amap_all)

def save_figure(fig, title, fig_type, fig_path):
    # Create necessary directories
    os.makedirs(fig_path, exist_ok=True)

    # Replace spaces and special characters in title for filename
    file_name = title.replace(' ', '_').replace('/', '_')

    # Construct file paths for PNG and SVG
    #png_path = os.path.join(auroc_path, f"{fig_type}_{file_name}_{model_type}.png")
    svg_path = os.path.join(auroc_path, f"{fig_type}_{file_name}_{model_type}.svg")


    # Save the figure in both formats
    #fig.savefig(png_path, format='png', dpi=300)
    fig.savefig(svg_path, format='svg', transparent=True)


plot_colorbar(scenario_lists['incremental'])
plot_colorbar(scenario_lists['separate'])


## Import single TPRS

In [None]:
model_type = "RFC"
# import the tprs
#tprs=pd.read_excel(path+'/Models/Pipelines/'+model_type+'/combined_output/val/TPRS_combined.xlsx')
tprs=pd.read_excel(path+'/combined_output/val/TPRS_combined.xlsx')

# import the amap data
#amap_cld=pd.read_excel(path+'/Models/amaps_cld_all_with_y.xlsx')
amap_all=pd.read_csv(path+'/HCC/df_amap.csv')
benchmarks= pd.read_csv(path+'/HCC/df_benchmark.csv')
# amap_cirrhosis=pd.read_csv(path+'/Models/df_amap_cirrhosis.csv')
# amap_nafld=pd.read_csv(path+'/Models/df_amap_nafld.csv')
# amap_par=pd.read_csv(path+'/Models/df_amap_par.csv')

columns=tprs.columns.tolist()
mapper=pd.DataFrame({'col_names':columns})
mapper["estimator"] = model_type
mapper['cohort']=[i.split('_')[0] for i in mapper.col_names]
mapper['scenario']=[i.split('_')[2] for i in mapper.col_names]
mapper['model']=[i.split('_model')[1] for i in mapper.col_names]
mapper.set_index('col_names',inplace=True)
tprs.transpose()
mapped_tprs=pd.concat([mapper,tprs.transpose()],axis=1).set_index(['cohort','scenario','model', 'estimator'])
mapped_tprs.groupby(level=['cohort','scenario']).agg('mean').transpose()
mapped_tprs


### Preprocess aMAP for AUROC

In [None]:
amap_all["aMAP"] = amap_all["aMAP"].apply(lambda x: x if 0 <= x <= 1 else pd.NA)
amap_all = amap_all.dropna()
amap_all['aMAP'] = pd.to_numeric(amap_all['aMAP'], errors='coerce')
amap_all['status'] = pd.to_numeric(amap_all['status'], errors='coerce')



In [None]:
columns_to_impute = ['aMAP', 'NFS']

# Impute the specified columns with their respective means
for column in columns_to_impute:
    benchmarks[column].fillna(benchmarks[column].mean(), inplace=True)

# Verify the imputation
print("NA counts after imputation:")
print(benchmarks[columns_to_impute].isnull().sum())

# Optional: Display summary statistics of imputed columns
print("\nSummary of imputed columns:")
print(benchmarks[columns_to_impute].describe())

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
plot_roc_curve(test_scores=benchmarks["aMAP"], true_labels=benchmarks.status, ax=ax, label="aMAP", linestyle="-")
plot_roc_curve(test_scores=benchmarks["APRI"], true_labels=benchmarks.status, ax=ax, label="APRI", linestyle="--")
plot_roc_curve(test_scores=benchmarks["FIB4"], true_labels=benchmarks.status, ax=ax, label="FIB4", linestyle="-.")
plot_roc_curve(test_scores=benchmarks["NFS"], true_labels=benchmarks.status, ax=ax, label="NFS", linestyle=":")
plot_roc_curve(test_scores=benchmarks["cirrhosis"], true_labels=benchmarks.status, ax=ax, label="Cirrhosis", linestyle="-", color="#385579")
plot_rocs_flexible(mapped_tprs, fig, ax, scenario_lists['c'], 'all',
                   title='Literature Benchmark (All Of Us)', fig_type="AUROCS_combined")
plt.show()


## Combined AUROCs (Incremental) for one estimator class

##### PAR

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
plot_roc_curve(test_scores=amap_par["aMAP"], true_labels=amap_par.status, ax=ax)
plot_rocs_flexible(mapped_tprs, fig, ax, scenario_lists['incremental'], 'par',
                   title='Chronic Liver Disease', fig_type="AUROCS_combined")
plt.show()

##### All

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
plot_roc_curve(test_scores=amap_all["aMAP"], true_labels=amap_all.status, ax=ax)
plot_rocs_flexible(mapped_tprs, fig, ax, scenario_lists['incremental'], 'all',
                   title="All", fig_type="AUROCS_combined")
plt.show()


## Separately trained Models

##### PAR

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
plot_roc_curve(test_scores=amap_par["aMAP"], true_labels=amap_par.status, ax=ax)
plot_rocs_flexible(mapped_tprs, fig, ax, scenario_lists['separate'], 'par',
                   title='Chronic Liver Disease', fig_type="AUROCS_separately")
plt.show()

##### All

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
plot_roc_curve(test_scores=amap_all["aMAP"], true_labels=amap_all.status, ax=ax)
plot_rocs_flexible(mapped_tprs, fig, ax, scenario_lists['separate'], 'all',
                   title='All', fig_type="AUROCS_separately")
plt.show()


# fig, ax = plt.subplots(figsize=(6, 5))
# plot_roc_curve(test_scores=amap_all["aMAP"], true_labels=amap_all.status, ax=ax)
# plot_rocs_flexible(mapped_tprs, fig, ax, scenario_lists['small'], 'all',
#                    title="All Patients - Small Models", fig_type="AUROCS_small_models")
# plt.show()

## Small Models

##### Small Models All

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
plot_roc_curve(test_scores=amap_all["aMAP"], true_labels=amap_all["status"], ax=ax)
plot_rocs_flexible(mapped_tprs, fig, ax, scenario_lists['small'], 'all',
                   title="All Patients - Small Models", fig_type="AUROCS_small_models")


plt.show()

#### Small models PAR

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
#plot_roc_curve(test_scores=amap_par["aMAP"], true_labels=amap_par.status, ax=ax)
plot_rocs_flexible(mapped_tprs, fig, ax, scenario_lists['small'], 'par',
                   title="Patients at Risk - Small Models", fig_type="AUROCS_small_models")


plt.show()

## Single Plots

##### For all 5 scenarios

In [None]:
n_splits = 5

# Loop through each scenario in the scenarios_colors dictionary
for scenario, color in scenarios_colors.items():
    fig, ax = plt.subplots()
    plot_rocs(tprs=mapped_tprs.transpose()['cld', scenario].transpose(), col_line=color, scenario=scenario, plot_all=True, fill_bet=True, title=f'Chronic liver disease - Scenario {scenario}', fig_type="AUROC_sep")

for scenario, color in scenarios_colors.items():
    fig_all, ax_all =plt.subplots()
    plot_rocs(tprs=mapped_tprs.transpose()['all',scenario].transpose(),col_line=color,scenario=scenario,plot_all=True,fill_bet=True, title=f"All - Scenario {scenario}", fig_type="AUROC_sep")


##### Separately

In [None]:


fig, ax =plt.subplots()
n_splits=5

for scenario,color in zip(['A'],['#36617B']): #y, g, r, b, brown
    plot_rocs(tprs=mapped_tprs.transpose()['par',scenario].transpose(),col_line=color,scenario=scenario,plot_all=True,fill_bet=True, title=f'Patients at Risk', fig_type="AUROC sep")

fig_all, ax_all =plt.subplots()
n_splits=5
for scenario,color in zip(['A'],['#36617B']):
   plot_rocs(tprs=mapped_tprs.transpose()['all',scenario].transpose(),col_line=color,scenario=scenario,plot_all=False,fill_bet=False, title="All", fig_type="AUROC_sep")

## Comparing AMAP Subcohorts

In [None]:
# Comparing AMAP Subcohorts
fig, ax = plt.subplots(figsize=(10, 8))
#plot_roc_curve(test_scores=amap_cld.amap, true_labels=amap_cld.status, ax=ax, label="Chronic Liver Disease", color="blue", lw=1.5)
plot_roc_curve(test_scores=amap_all["aMAP"], true_labels=amap_all.status, ax=ax, label="All", color="green", lw=1.5)
plot_roc_curve(test_scores=amap_par["aMAP"], true_labels=amap_par.status, ax=ax, label="PAR", color="green", lw=1.5)
#plot_roc_curve(test_scores=amap_cirrhosis.aMAP, true_labels=amap_cirrhosis.status, ax=ax, label="Cirrhosis", color="red", lw=1.5)
#plot_roc_curve(test_scores=amap_nafld.aMAP, true_labels=amap_nafld.status, ax=ax, label="MASLD", color="purple", lw=1.5)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.plot([0, 1], [0, 1], color="grey", linestyle="--", lw=1)
plt.xlabel('False Positive Rate', fontsize=14)
plt.ylabel('True Positive Rate', fontsize=14)
plt.tick_params(axis='both', which='major', labelsize=13)
plt.title("AMAP Score For Different Groups at Risk", fontsize=16)
plt.legend(loc="lower right", fontsize=12)
plt.rcParams.update({'font.size': 16})
plt.show()

In [None]:


fig, ax =plt.subplots()
n_splits=5

for scenario,color in zip(['A'],['#36617B']): #y, g, r, b, brown
    plot_rocs(tprs=mapped_tprs.transpose()['par',scenario].transpose(),col_line=color,scenario=scenario,plot_all=True,fill_bet=True, title=f'Patients at Risk', fig_type="AUROC sep")

fig_all, ax_all =plt.subplots()
n_splits=5
for scenario,color in zip(['A'],['#36617B']):
   plot_rocs(tprs=mapped_tprs.transpose()['all',scenario].transpose(),col_line=color,scenario=scenario,plot_all=False,fill_bet=False, title="All", fig_type="AUROC_sep")

# Multiple Estimators Comparison

### Import multiple TPRS

In [None]:
model_types = ["XGB", "RFC"]
base_path = path + '/Models/Pipelines/'
all_tprs = pd.DataFrame()

for model_type in model_types:
    # Construct path to the TPR file
    tprs_path = f'{base_path}{model_type}/combined_output/val/TPRS_combined.xlsx'

    # Load the TPRs
    if os.path.exists(tprs_path):
        tprs = pd.read_excel(tprs_path)
        print(tprs.head)

        columns=tprs.columns.tolist()
        mapper=pd.DataFrame({'col_names':columns})
        mapper["estimator"] = model_type
        mapper['cohort']=[i.split('_')[0] for i in mapper.col_names]
        mapper['scenario']=[i.split('_')[2] for i in mapper.col_names]
        mapper['model']=[i.split('_model')[1] for i in mapper.col_names]
        mapper.set_index('col_names',inplace=True)
        tprs.transpose()
        mapped_tprs=pd.concat([mapper,tprs.transpose()],axis=1).set_index(['cohort','scenario','model', 'estimator'])
        mapped_tprs.groupby(level=['cohort','scenario']).agg('mean').transpose()
        mapped_tprs

        # # Concatenate to the main DataFrame
        all_tprs = pd.concat([all_tprs, mapped_tprs])

### Benchmark distinct estimators

In [None]:
n_splits = 5
# Define cohorts to loop through
cohorts = ['par']

# Loop through each cohort
for cohort in cohorts:
    # Set up the plot for the current cohort
    fig, ax = plt.subplots(figsize=(10, 8))  # Adjust size as needed

    # Loop through each scenario
    for scenario, color in scenarios_colors_incremental.items():
        # Loop through each estimator within the current scenario
        for estimator in ['XGB', 'RFC']:  # Add other estimators as needed
            # Extract TPRs for the current scenario, cohort, and estimator
            scenario_tprs = all_tprs.loc[(cohort, scenario, slice(None), estimator), :]

            line_style = "--" if estimator == 'XGB' else '-'


            # Check if the scenario and estimator data is not empty
            if not scenario_tprs.empty:
                # Call plot_rocs function to plot the ROC curve
                plot_rocs(tprs=scenario_tprs.values, fig=fig, ax=ax, plot_all=False, fill_bet=True, col_line=color, scenario=f'{scenario} - {estimator}', line_style=line_style, title=f'Estimator Benchmark - {cohort}', fig_type='AUROCS_combined', n_splits=n_splits)

    # Finalize the plot settings
    ax.set_title(f'AUROC Comparison by Scenario and Estimator for {cohort.upper()} Cohort')
    ax.legend(title='Scenarios')
    plt.show()


# Statistics

In [None]:
def check_data_structure(all_tprs, cohorts, scenarios, estimators):
    print("Checking data structure...")
    print(f"all_tprs shape: {all_tprs.shape}")
    print(f"all_tprs index levels: {all_tprs.index.names}")
    print(f"all_tprs columns: {all_tprs.columns}")

    # Check if cohorts, scenarios, and estimators exist in all_tprs
    missing_cohorts = [cohort for cohort in cohorts if cohort not in all_tprs.index.get_level_values('cohort')]
    missing_scenarios = [scenario for scenario in scenarios if scenario not in all_tprs.index.get_level_values('scenario')]
    missing_estimators = [estimator for estimator in estimators if estimator not in all_tprs.index.get_level_values('estimator')]

    if missing_cohorts:
        print(f"Error: Missing cohorts in all_tprs: {missing_cohorts}")
    if missing_scenarios:
        print(f"Error: Missing scenarios in all_tprs: {missing_scenarios}")
    if missing_estimators:
        print(f"Error: Missing estimators in all_tprs: {missing_estimators}")

def delong_roc_variance(tpr1, tpr2):
    """
    Computes the variance for DeLong test using TPRs.
    """
    n = len(tpr1)
    v10 = np.var(tpr1)
    v11 = np.var(tpr2)

    # Estimate covariance
    cov = np.cov(tpr1, tpr2)[0, 1]

    return (v10 + v11 - 2 * cov) / n

def delong_roc_test(tpr1, tpr2):
    """
    Performs DeLong test using TPRs, accounting for multiple folds.
    """
    # Assuming tpr1 and tpr2 are 2D arrays where each row is a fold
    auc1 = np.mean(tpr1, axis=1)  # AUC for each fold
    auc2 = np.mean(tpr2, axis=1)  # AUC for each fold

    # Compute the differences in AUC for each fold
    auc_diffs = auc1 - auc2

    # Compute mean and standard error of the differences
    mean_diff = np.mean(auc_diffs)
    se_diff = np.std(auc_diffs, ddof=1) / np.sqrt(len(auc_diffs))

    z = mean_diff / se_diff
    p = 2 * (1 - stats.norm.cdf(abs(z))) # Two-sided test

    return z, p, mean_diff, se_diff

def perform_delong_test(all_tprs, cohorts, scenarios, estimators, compare_all=False, reference_scenario=None, reference_estimator=None):
    check_data_structure(all_tprs, cohorts, scenarios, estimators)

    results = {}

    for cohort in cohorts:
        cohort_results = []

        if compare_all:
            # Compare all scenarios with each other
            scenario_pairs = list(combinations(scenarios, 2))
        else:
            # Compare only with the reference scenario
            if reference_scenario is None or reference_estimator is None:
                raise ValueError("reference_scenario and reference_estimator must be provided when compare_all is False")
            scenario_pairs = [(reference_scenario, scenario) for scenario in scenarios if scenario != reference_scenario]

        for scenario1, scenario2 in scenario_pairs:
            for estimator in estimators:
                tpr1 = all_tprs.loc[(cohort, scenario1, slice(None), estimator), :].values
                tpr2 = all_tprs.loc[(cohort, scenario2, slice(None), estimator), :].values

                # Perform DeLong's test
                z, p_value, mean_diff, se_diff = delong_roc_test(tpr1, tpr2)

                cohort_results.append({
                    'Estimator' : f"{estimator}",
                    'Model1': f"{scenario1}",
                    'Model2': f"{scenario2}",
                    'Z-statistic': np.round(z, 4),
                    'p-value': (p_value),
                    'Mean AUC Difference': round(mean_diff, 4),
                    'SE of Difference': round(se_diff, 4)
                })

        # Create DataFrame for the cohort
        results[cohort] = pd.DataFrame(cohort_results)

        # Apply Bonferroni correction
        n_tests = len(results[cohort])
        results[cohort]['Bonferroni-adjusted p-value'] = np.minimum(results[cohort]['p-value'] * n_tests, 1.0)

        # Determine significance after Bonferroni correction
        results[cohort]['Significant (α=0.05)'] = results[cohort]['Bonferroni-adjusted p-value'] < 0.05

    return results

In [None]:
delong_results_all = perform_delong_test(
    all_tprs=mapped_tprs,
    cohorts=['all'],
    scenarios=['TOP75', 'TOP30', 'TOP15', 'AMAP-RFC'],
    estimators=['RFC'],
    compare_all=True
)


def save_results_to_excel(results, file_name):
    with pd.ExcelWriter(file_name) as writer:
        for cohort, df in results.items():
            df.to_excel(writer, sheet_name=cohort, index=False)

save_results_to_excel(delong_results_all, f"{path}/HCC/tables/delong_test_results_all.xlsx")


# Print results
for result_type, delong_results in [("All Comparisons", delong_results_all)]:
    print(f"\n--- {result_type} ---")
    for cohort, results in delong_results.items():
        print(f"\nDeLong Test Results for {cohort}:")
        print(results)
