In [2]:
import os
import pandas as pd
import numpy as np

# Functions to compute the numerical results


In [3]:
def load_metrics(result_folder, split):
    if "LFLR" in result_folder:
        metrics_file = os.path.join(result_folder, "LR", f"split_{split}", f"metrics_split{split}.csv")
    elif "LFCO" in result_folder:
        metrics_file = os.path.join(result_folder, "LR", f"split_{split}", f"metrics_split{split}.csv")
    else:
        metrics_file = os.path.join(result_folder, f"split_{split}", f"metrics_split{split}.csv")
    
    if not os.path.exists(metrics_file):
        raise FileNotFoundError(f"File not found: {metrics_file}")
    
    return pd.read_csv(metrics_file)


# Function to calculate the mean and standard deviation of metrics
def calculate_statistics(result_folder, splits=[1, 2, 3]):
    all_metrics = []

    for split in splits:
        split_metrics = load_metrics(result_folder, split)
        all_metrics.append(split_metrics)

    metrics_df = pd.concat(all_metrics)

    statistics = {
        "mean": metrics_df.mean(),
        "std_dev": metrics_df.std()
    }

    return statistics

def get_valid_subfolders(directory):
    return [f for f in os.listdir(directory) if os.path.isdir(os.path.join(directory, f)) and not f.startswith('.')]

In [4]:
models = ["./MLP", "./GRU", "./JHF", "./FHSI", "./LFLR", "./LFCO", "./Transformer"]
results = []

for model in models:
    result_subfolders = get_valid_subfolders(model)
    
    for result_folder in result_subfolders:
        result_path = os.path.join(model, result_folder)
        
        try:
            statistics = calculate_statistics(result_path)
            
            accuracy = f"{statistics['mean']['accuracy'] * 100:.2f} ± {statistics['std_dev']['accuracy'] * 100:.2f}"
            specificity = f"{statistics['mean']['specificity'] * 100:.2f} ± {statistics['std_dev']['specificity'] * 100:.2f}"
            recall = f"{statistics['mean']['recall'] * 100:.2f} ± {statistics['std_dev']['recall'] * 100:.2f}"
            roc_auc = f"{statistics['mean']['roc_auc'] * 100:.2f} ± {statistics['std_dev']['roc_auc'] * 100:.2f}"
            
            results.append({
                "Model": result_folder,
                "Accuracy": accuracy,
                "Specificity": specificity,
                "Sensitivity": recall,
                "ROC AUC": roc_auc
            })
        
        except FileNotFoundError as e:
            print(e)


results_df = pd.DataFrame(results)

In [5]:
results_df

Unnamed: 0,Model,Accuracy,Specificity,Sensitivity,ROC AUC
0,Results_MLP-CMI,56.07 ± 1.35,54.81 ± 2.31,61.21 ± 3.08,62.28 ± 1.15
1,Results_MLP-Glasso,58.81 ± 2.19,59.17 ± 3.56,57.44 ± 3.47,61.51 ± 0.99
2,Results_MLP-CIB,57.28 ± 0.32,56.16 ± 1.61,61.36 ± 6.39,62.81 ± 1.43
3,Results_MLP-Majority,52.64 ± 5.17,49.09 ± 9.44,65.86 ± 11.24,62.34 ± 1.34
4,Results_GRU-Majority,65.51 ± 0.57,64.98 ± 1.03,67.75 ± 4.17,69.81 ± 1.84
5,Results_GRU-CIB,66.67 ± 3.53,66.58 ± 4.43,66.93 ± 5.31,69.03 ± 3.91
6,Results_GRU-Glasso,68.25 ± 1.76,69.71 ± 1.39,62.48 ± 6.53,69.52 ± 2.18
7,Results_GRU-CMI,51.53 ± 4.71,53.28 ± 5.96,44.39 ± 2.21,48.96 ± 3.94
8,Results_JHF-Glasso,66.24 ± 1.71,67.00 ± 2.12,63.18 ± 3.75,70.05 ± 1.54
9,Results_JHF-CIB,66.03 ± 2.25,65.74 ± 3.09,67.09 ± 1.13,70.49 ± 2.48


In [5]:
results_df['ROC_AUC_numeric'] = results_df['ROC AUC'].str.extract(r'([0-9.]+)')[0].astype(float)

results_df['Group'] = results_df['Model'].str.extract(r'-(.*)$')[0] 
grouped_results = results_df.groupby('Group').agg({
    'ROC_AUC_numeric': 'mean'
}).reset_index()
grouped_results.rename(columns={'ROC_AUC_numeric': 'Mean_ROC_AUC'}, inplace=True)
grouped_results

Unnamed: 0,Group,Mean_ROC_AUC
0,CIB,69.143333
1,CMI,58.503333
2,Glasso,68.583333
3,Majority,69.795


In [6]:
for col in ['Accuracy', 'Specificity', 'Sensitivity', 'ROC AUC']:
    results_df[[f'{col}_Mean', f'{col}_StdDev']] = results_df[col].str.split(' ± ', expand=True).astype(float)

mean_values = results_df[[f'{col}_Mean' for col in ['Accuracy', 'Specificity', 'Sensitivity', 'ROC AUC']]].mean()
mean_stddevs = results_df[[f'{col}_StdDev' for col in ['Accuracy', 'Specificity', 'Sensitivity', 'ROC AUC']]].mean()

print("Mean of the values:")
print(mean_values)
print("\nMean of the standard deviations:")
print(mean_stddevs)

Mean of the values:
Accuracy_Mean       62.443333
Specificity_Mean    62.121250
Sensitivity_Mean    63.639583
ROC AUC_Mean        66.506250
dtype: float64

Mean of the standard deviations:
Accuracy_StdDev       2.447500
Specificity_StdDev    3.687917
Sensitivity_StdDev    4.440000
ROC AUC_StdDev        1.708750
dtype: float64
