# Script Plan

1. Imports 
2. Parameters and variables 
3. Data imports 
4. Load Pre-trained model 
5. Model building functions 
6. Cross validation loop

## IMPORTS

In [None]:
import os

import tensorflow as tf
from tensorflow import keras 
from keras.optimizers import Adam
from keras import regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import PIL
import PIL.Image
from datetime import datetime

import sklearn.metrics as metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay, roc_curve, RocCurveDisplay, roc_auc_score
from sklearn.utils import class_weight
from sklearn.utils.class_weight import compute_class_weight

import csv
import time 
from pathlib import Path

# print (tf.version.VERSION)

## TRAINING PARAMETERS & VARIABLES

In [None]:
### Training Parameters ###
seed = 2806
batch_size = 8 #16 
# learning_rate = 0.0001
max_epochs = 100
# patience = 15
image_size = 300
img_width, img_height = image_size, image_size

### Label Information ### 
# target_labels = ['mild rejection','moderate-to-severe rejection','no rejection']
# classifier = 'binary'
training_style = "Ensemble"
Dataset = "Binary-1"
Pretrained_model = "ENB3"

# ==== Create target directory for saving/loading models === 
project_dir = 'D://Rui//'

# === Create net directory for saving/loading models === 
date = datetime.now()
net_dir = project_dir + 'Transplant_prediction_models//' + training_style + Pretrained_model + '_' + Dataset + '_' + "5_folds" + '_' + str(date.day).rjust(2, '0')+str(date.month).rjust(2, '0')+str(date.year)+'_'+ str(date.hour)+str(date.minute)
os.mkdir(net_dir)

if Dataset == "Binary-1":
    target_labels = ['0-1R', "2R"]
    nClasses = 2
elif Dataset == "Binary-2":
    target_labels = ['0', "1-2R"]
    nClasses = 2

# === Pre-trained Net Location ===
if Pretrained_model == "AF":
    pretrained_net_dir = project_dir + 'AFIBvsNOTNet(080120254x3cols)//'
elif Pretrained_model == "MI":
    pretrained_net_dir = project_dir + 'MulticlassMIPTBXLTraining(4x3)//'

print("target_labels =", target_labels)
print("nClasses = ", nClasses)
# print(pretrained_net_dir)

## DATA IMPORT

In [None]:
# Data Input Folders
if Dataset == "Binary-1":
    dataset_dir = "D://Rui//NIDACT2 Transplant ECGs//Multiclass_HTx_Rejection_Dataset_1(300_300)//Binary1_0-1Rvs2R (5-Folds)"
elif Dataset == "Binary-2": 
    dataset_dir = "D://Rui//NIDACT2 Transplant ECGs//Multiclass_HTx_Rejection_Dataset_1(300_300)//Binary2_0vs1-2R (5-Folds)"

print("dataset_dir =", dataset_dir)

### TFDS Import & Organise Function

In [None]:
# 1. Convert the string to a Path object
dataset_path = Path(dataset_dir)
# 2. Iterate over the contents of that path
fold_dirs = [d for d in dataset_path.iterdir() if d.is_dir()]
# Get the number of folds
num_folds = len(fold_dirs)
print(f"Found {num_folds} fold directories.")


## Model building 

### Add classifier function

In [None]:
# USE THIS ONE script 
def add_classification_layers(cutModel): #l1_reg, l2_reg, dropout_rate_1, dropout_rate_2, nClasses
    classifier = cutModel.output
    classifier = tf.keras.layers.GlobalAveragePooling2D()(classifier)
    # --- First dense layer with customizable regularization and dropout ---
    classifier = tf.keras.layers.Dense(128, activation="relu", 
                                     kernel_regularizer=regularizers.L1L2(l1=0.0001, l2=0.0001))(classifier) #l2_reg
    classifier = tf.keras.layers.Dropout(0.4)(classifier) #dropout_rate_1
    # --- Second dense layer with customizable regularization and dropout ---
    classifier = tf.keras.layers.Dense(64, activation="relu",
                                     kernel_regularizer=regularizers.L1L2(l1=0.0001, l2=0.0001))(classifier)
    classifier = tf.keras.layers.Dropout(0.5)(classifier) #dropout_rate_2
    classifier = tf.keras.layers.Dense(nClasses, activation="softmax")(classifier)
    full_model = tf.keras.Model(inputs=cutModel.input, outputs=classifier)
    return full_model


### Training function

In [None]:
# USE THIS ONE script 
def training_function(full_model, filepath, train_ds, val_ds): 
    # Define optimizer with an initial learning rate 
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001) # This is my initial learning rate 

    # Define ReduceLROnPlateau callback (based on validation loss)
    reduce_lr_patience = 10
    lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2, # Reduce LR by a factor of 5 (1/5 = 0.2)
        patience=reduce_lr_patience,
        min_lr=0.00001, # Don't let the LR get ridiculously small
        verbose=1
    )
    
    # Early stopping 
    early_stopping_patience = 25 #50
    early_stopper = EarlyStopping(
        monitor='val_loss',
        patience=early_stopping_patience,
        verbose=1,
        restore_best_weights=True # Automatically restore the model from the best epoch
    )

    saving_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath = filepath, 
        monitor="val_loss", 
        verbose=1, 
        save_best_only=True, 
        save_weights_only=False, 
        save_freq="epoch",
        initial_value_threshold=None
    )

    full_model.compile(optimizer=optimizer, 
                    loss = tf.keras.losses.BinaryCrossentropy(), #CategoricalFocalCrossentropy
                    metrics=['binary_accuracy'])
    
    # Calculate class weights 
    labels_train = []
    for _ , label in train_ds:
        labels_train.append (np.array (label))
    labels_train = np.concatenate(labels_train)
    weight_function = lambda i: (1/sum(labels_train[:,i])*(labels_train.shape[0]/labels_train.shape[1]))
    class_weights = {i: weight_function(i) for i in range(labels_train.shape[1])}
    # print(class_weights)

    # Training 
    results = full_model.fit(train_ds,
                        batch_size=batch_size, 
                        validation_data=val_ds, 
                        epochs=max_epochs,
                        callbacks=[early_stopper, lr_callback, saving_callback], # early_stopper, lr_callback, 
                        class_weight = class_weights
                        )
    return results 

### Write results function

In [None]:
def collect_fold_results(labels_test, ensemble_prediction, ensemble_scores, nClasses, t_start, t_end, fold_number=None, output_base_dir=None):
    """
    Calculates metrics for a single fold and returns them as a dictionary.
    Optionally saves ROC curve if output_base_dir is provided.
    """
    results = {} # Initialize results dictionary as empty
    # FYI: The output_base_dir parameter already carries the correct path.
    # Binary Classifier (nClasses == 2)
    if nClasses == 2:
        positive_labels = sum(labels_test)
        negative_labels = len(labels_test) - positive_labels
        tn, fp, fn, tp = confusion_matrix(labels_test, ensemble_prediction).ravel()
        accuracy = accuracy_score(labels_test, ensemble_prediction)
        precision = precision_score(labels_test, ensemble_prediction)
        recall = recall_score(labels_test, ensemble_prediction)
        f1 = f1_score(labels_test, ensemble_prediction)
        train_time = (t_end - t_start)
        fpr, tpr, _ = roc_curve(labels_test, ensemble_scores[:, 1])
        roc_auc = roc_auc_score(labels_test, ensemble_scores[:, 1])

        # Store results in a dictionary
        results = {
            'Fold': fold_number if fold_number is not None else 'Overall',
            'Positive Labels': int(positive_labels),
            'Negative Labels': int(negative_labels),
            'TP': int(tp),
            'FP': int(fp),
            'TN': int(tn),
            'FN': int(fn),
            'Accuracy': float(accuracy),
            'Precision': float(precision),
            'Recall': float(recall),
            'F1-score': float(f1),
            'ROC AUC': float(roc_auc),
            'Training Time (s)': float(train_time)
        }

        if output_base_dir:
            os.makedirs(output_base_dir, exist_ok=True)
            roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
            if fold_number is not None:
                roc_display.figure_.savefig(f"{output_base_dir}/ROC_Fold_{fold_number}.png")
            else:
                roc_display.figure_.savefig(f"{output_base_dir}/ROC_Overall.png")
            plt.close(roc_display.figure_)

    # Multi-class Classifier (nClasses > 2)
    elif nClasses > 2:
        accuracy = accuracy_score(labels_test, ensemble_prediction)
        f1_macro = f1_score(labels_test, ensemble_prediction, average='macro')
        precision_macro = precision_score(labels_test, ensemble_prediction, average='macro')
        recall_macro = recall_score(labels_test, ensemble_prediction, average='macro')
        train_time = (t_end - t_start)

        results = {
            'Fold': fold_number if fold_number is not None else 'Overall',
            'Accuracy': float(accuracy),
            'Precision (Macro)': float(precision_macro),
            'Recall (Macro)': float(recall_macro),
            'F1-score (Macro)': float(f1_macro),
            'Training Time (s)': float(train_time)
        }
        if output_base_dir:
            # Multi-class ROC is more complex, so this part is often left out or implemented differently
            print(f"Note: ROC curve generation for multi-class (nClasses={nClasses}) not fully implemented here.")
            # If you need multi-class ROC, you'd add specific logic here,
            # potentially using OneVsRestClassifier or similar strategies.

    return results

In [None]:
# --- Calculate Metrics ---
def _calculate_metrics(labels_test, prediction, scores, nClasses, t_start, t_end, model_name=""):
    """
    Calculates and returns a dictionary of performance metrics.
    Does NOT write any files or plot anything.

    Args:
        labels_test (np.array): True labels.
        prediction (np.array): Predicted labels.
        scores (np.array): Prediction scores (probabilities).
        nClasses (int): Number of classes.
        t_start (float): Start time for training.
        t_end (float): End time for training.
        model_name (str, optional): Name of the model for identification in results. Defaults to "".

    Returns:
        dict: A dictionary containing calculated metrics.
    """
    metrics = {'Model Name': model_name}
    train_time = (t_end - t_start)

    if nClasses == 2:
        positive_labels = sum(labels_test)
        negative_labels = len(labels_test) - positive_labels
        tn, fp, fn, tp = confusion_matrix(labels_test, prediction).ravel()
        accuracy = accuracy_score(labels_test, prediction)
        precision = precision_score(labels_test, prediction)
        recall = recall_score(labels_test, prediction)
        f1 = f1_score(labels_test, prediction)
        roc_auc = roc_auc_score(labels_test, scores[:, 1])

        metrics.update({
            'Positive Labels': positive_labels,
            'Negative Labels': negative_labels,
            'TP': tp, 'FP': fp, 'TN': tn, 'FN': fn,
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1-score': f1,
            'ROC AUC': roc_auc,
            'Training Time': train_time
        })
    elif nClasses > 2:
        # For combined table, let's use overall averages
        accuracy = accuracy_score(labels_test, prediction)
        precision = precision_score(labels_test, prediction, average='macro', zero_division=0)
        recall = recall_score(labels_test, prediction, average='macro', zero_division=0)
        f1 = f1_score(labels_test, prediction, average='macro', zero_division=0)

        try:
            roc_auc = roc_auc_score(labels_test, scores, multi_class='ovr', average='macro')
        except ValueError:
            roc_auc = np.nan # Or some other indicator

        metrics.update({
            'Accuracy': accuracy,
            'Precision (Macro)': precision,
            'Recall (Macro)': recall,
            'F1-score (Macro)': f1,
            'ROC AUC (Macro OVR)': roc_auc,
            'Training Time': train_time
        })

    return metrics

# --- individual model performance: only saves ROC plots ---
def _save_individual_model_roc_plot(labels_test, scores, nClasses, writeDir, filename_identifier="", target_labels=None):
    """
    Saves individual model ROC plots to the specified directory.
    Does NOT save any CSV files.

    Args:
        labels_test (np.array): True labels.
        scores (np.array): Prediction scores (probabilities).
        nClasses (int): Number of classes.
        writeDir (str): The base directory where files will be saved.
        filename_identifier (str, optional): An identifier to prepend to filenames.
                                            Defaults to "".
        target_labels (list, optional): List of class names for multiclass plotting.
    """
    # Ensure the base directory exists before writing any files
    os.makedirs(writeDir, exist_ok=True)

    # Binary Classifier
    if nClasses == 2:
        fpr, tpr, _ = roc_curve(labels_test, scores[:, 1])
        roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
        roc_display.figure_.savefig(os.path.join(writeDir, f"{filename_identifier}ROC.png"))
        plt.close(roc_display.figure_) # Close the figure to free memory

    # Multiclass
    elif nClasses > 2:
        # Ensure target_labels are provided for multiclass ROC plotting
        if target_labels is None:
            print("Warning: target_labels not provided for multiclass ROC plotting. Using default names.")
            target_labels = [f"Class_{i}" for i in range(nClasses)]

        for labelID in range(nClasses):
            name = target_labels[labelID]
            # roc_curve needs binary_truths and scores for that class
            fpr, tpr, _ = roc_curve(labels_test[:,labelID], scores[:,labelID])
            roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
            roc_display.figure_.savefig(os.path.join(writeDir, f"{filename_identifier}ROC_{name}.png"))
            plt.close(roc_display.figure_) # Close the figure to free memory

    print(f"ROC plots saved with identifier '{filename_identifier}' in '{writeDir}'")

# --- Write Combined Ensemble Results to a Single CSV ---
def write_combined_ensemble_results_csv(all_metrics_data, writeDir, nClasses):
    """
    Writes combined performance metrics from multiple models (including ensemble)
    into a single CSV file.

    Args:
        all_metrics_data (list): A list of dictionaries, where each dictionary
                                 contains metrics for a model/ensemble.
        writeDir (str): The directory where the combined CSV will be saved.
        nClasses (int): Number of classes (to determine header).
    """
    os.makedirs(writeDir, exist_ok=True) # Ensure the directory exists

    combined_csv_path = os.path.join(writeDir, "SWAFP_Combined_Ensemble_Results.csv")

    # Define the header based on nClasses
    if nClasses == 2:
        header = ['Model Name', 'Positive Labels', 'Negative Labels', 'TP', 'FP', 'TN', 'FN',
                  'Accuracy', 'Precision', 'Recall', 'F1-score', 'ROC AUC', 'Training Time']
    elif nClasses > 2:
        # The metrics dictionary from _calculate_metrics defines these for multiclass
        header = ['Model Name', 'Accuracy', 'Precision (Macro)', 'Recall (Macro)',
                  'F1-score (Macro)', 'ROC AUC (Macro OVR)', 'Training Time']
    else:
        print(f"Warning: nClasses={nClasses} is not supported for combined CSV header.")
        return

    with open(combined_csv_path, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        for metrics_dict in all_metrics_data:
            writer.writerow(metrics_dict)
    print(f"Combined ensemble results saved to: {combined_csv_path}")

## Cross-Validation Loop

In [None]:
fold_accuracies = []
fold_losses = []
all_fold_results = []

for i in range(num_folds):
    print(f"\n===== FOLD {i+1}/{num_folds} =====")

### ===== DATA ORGANISATION ===== ###
# Identify validation and training fold directories
    val_dir = fold_dirs[i]
    if i >=(num_folds -1): 
        test_dir = fold_dirs[0]
    else:
        test_dir = fold_dirs[i+1]
    train_dirs = [fold_dirs[d] for d in range(num_folds) if fold_dirs[d] != (val_dir) and fold_dirs[d] != (test_dir)]
    
    print(f"Validation fold: {val_dir.name}")
    print(f"Test fold: {test_dir.name}")
    print(f"Training folds: {[d.name for d in train_dirs]}")

# Load the validation dataset
    val_dataset = tf.keras.utils.image_dataset_from_directory(
        val_dir,
        label_mode='categorical', #(sparse -> image can only belong to one group)
        class_names = target_labels, 
        # colour_mode = "rgb" -> three colour channels image
        batch_size = batch_size,
        image_size = (img_height, img_width),
        shuffle = True, 
        seed = seed
    )

# Load the test dataset
    test_dataset = tf.keras.utils.image_dataset_from_directory(
        test_dir,
        label_mode='categorical', #(sparse -> image can only belong to one group)
        class_names = target_labels, 
        # colour_mode = "rgb" -> three colour channels image
        batch_size = batch_size,
        image_size = (img_height, img_width),
        shuffle = True, 
        seed = seed
    )

# Sixty Forty Split 
    val_dataset = val_dataset.concatenate(test_dataset) # combined testing and validation together

# Load and combine the training datasets
    train_datasets = []
    for train_dir in train_dirs:
        ds = tf.keras.utils.image_dataset_from_directory(
            train_dir,
            label_mode='categorical', #(sparse -> image can only belong to one group)
            class_names = target_labels, 
            # colour_mode = "rgb" -> three colour channels image
            batch_size = batch_size,
            image_size = (img_height, img_width),
            shuffle = True, 
            seed = seed
        )
        train_datasets.append(ds)
    
# Combine all training datasets into one
    train_dataset = train_datasets[0]
    for ds in train_datasets[1:]:
        train_dataset = train_dataset.concatenate(ds)

# Set up Class Weights 
    # 1. Extract labels from the training dataset
    train_labels = []
    for _, labels in train_dataset:
        train_labels.append(labels.numpy())
    train_labels = np.concatenate(train_labels)
    train_labels = np.argmax(train_labels, axis=1)
    # 2. Use scikit-learn's utility function to calculate weights
    class_weights_array = compute_class_weight(
        class_weight='balanced',
        classes=np.unique(train_labels),
        y=train_labels
    )
    # 3. Create a dictionary for Keras
    class_weights = dict(enumerate(class_weights_array))
    print(f"Class Weights for Fold {i+1}: {class_weights}")

    filepath = os.path.join(net_dir, f"Fold{i+1}")
    os.mkdir(filepath)

# ----- TRAINING & TESTING: USING SINGLE MODEL OR ENSEMBLE ENB3/AF/MI --------
    split_layers = [125, 198, 272, 385]
    results = []
    graph_labels = [] # This will store labels for each plot line
    
    ### ====== TRAINING ===== ###
    if Pretrained_model == "ENB3": 
        print("--- Starting Ensemble ENB3 Training ---")
        ENB3_model = keras.applications.EfficientNetB3(
            include_top = True)

        for layer in ENB3_model.layers[:385]:
            layer.trainable = False # false: freezing, true: unfreeze 
        
        for split_layer in split_layers:
            cutModel = tf.keras.Model(ENB3_model.input, ENB3_model.layers[split_layer-1].output)
            EnsembleENB3_model = add_classification_layers(cutModel)

            modelpath = filepath + "//" + f"{training_style}_{split_layer}" + "_" + "model.keras"
            print(f"--- Training model truncated at layer {split_layer} --- ")
            t_start_fold = time.time() # Start time for training
            history = training_function(EnsembleENB3_model, modelpath, train_dataset, val_dataset)
            results.append(history)
            t_end_fold = time.time() # End time for training
    
        combined_filepath = os.path.join(net_dir, training_style)
    
    else:
        print(f"--- Starting Ensemble {Pretrained_model} Training ---")
        for idx, split_layer in enumerate(split_layers): 
            loaded_model = tf.keras.models.load_model(
                    filepath = pretrained_net_dir + "//" + str(split_layer) + ".h5")
        
            for layer in loaded_model.layers[:split_layer]:
                layer.trainable = False # false: freezing, true: unfreeze 
            
            cutModel = tf.keras.Model(loaded_model.input, loaded_model.layers[split_layer-1].output)
            full_model = add_classification_layers(cutModel)
            
            modelpath = filepath + "//" + f"{training_style}_{split_layer}" + "_" + "model.keras"
            # Run training and append the history object to the results list
            print(f"--- Training model truncated at layer {split_layer} --- ")
            t_start_fold = time.time() # Start time for training
            results.append(training_function(full_model, modelpath, train_dataset, val_dataset)) #softmax 
            t_end_fold = time.time() # End time for training
        
    ### ===== TESTING ===== ###
    all_ensemble_metrics = [] # List to store metrics for all models and ensemble
    ensemble_scores_list = [] # To collect scores for ensemble averaging

# Assume true_labels and binary_true_labels are consistent across all batches for val_ds. Initialize outside the loop if val_ds is consistently processed
    first_batch, first_label = next(iter(val_dataset)) # Get first batch to determine true_labels structure
    true_labels_template = np.concatenate([np.array(l) for _, l in val_dataset]) # Get all true labels
    binary_true_labels = np.argmax(true_labels_template, axis=1)

    for idx, split_layer in enumerate(split_layers):
        modelpath = filepath + "//" + f"{training_style}_{split_layer}" + "_" + "model.keras"
        model = tf.keras.models.load_model(modelpath)
        temp_score = []
        for batch, label in val_dataset: # Re-iterate val_ds to get scores for current model
            batch_score = model(batch)
            temp_score.append (np.array(batch_score))
        model_score = np.concatenate(temp_score)

        # model_prediction is derived from model_score for this specific model
        model_prediction = np.argmax(model_score, axis=1)

        # Save individual ROC plot for this model
        _save_individual_model_roc_plot(
            binary_true_labels, model_score, nClasses,
            writeDir= filepath, filename_identifier=f"Model_{split_layer}_",
            target_labels=target_labels if 'target_labels' in globals() else None
            )
        print(f"Ensemble Model {split_layer} ROC plot saved.")

        # Calculate metrics for this individual model and add to list
        model_metrics = _calculate_metrics(
            binary_true_labels, model_prediction, model_score, nClasses,
            t_start = t_start_fold,
            t_end = t_end_fold,
            model_name=f"Ensemble Model {split_layer}"
        )
        all_ensemble_metrics.append(model_metrics)
        ensemble_scores_list.append(model_score)

    # Calculate ensemble average performance
    ensemble_scores_array = np.array(ensemble_scores_list)
    average_ensemble_score = np.mean(ensemble_scores_array, axis=0) # Corrected axis
    ensemble_prediction = np.argmax(average_ensemble_score, axis=1)

    # Save individual ROC plot for the ensemble average
    _save_individual_model_roc_plot(
        binary_true_labels, average_ensemble_score, nClasses,
        writeDir= filepath, filename_identifier="Ensemble_Average_",
        target_labels=target_labels if 'target_labels' in globals() else None
    )
    print("Ensemble Average ROC plot saved.")

    # Calculate metrics for the ensemble average and add to list
    ensemble_metrics = _calculate_metrics(
        binary_true_labels, ensemble_prediction, average_ensemble_score, nClasses,
        t_start=0, t_end=0, # Use 0 or a relevant ensemble inference time
        model_name="Ensemble Average"
    )
    all_ensemble_metrics.append(ensemble_metrics)

    # Finally, write all collected metrics to a single combined CSV
    write_combined_ensemble_results_csv(all_ensemble_metrics, filepath, nClasses)
    print("Combined ensemble results CSV saved.")

    model_prediction = ensemble_prediction
    model_score = average_ensemble_score

### ===== Collect results for the current fold using the modified function ===== ###
    fold_metrics = collect_fold_results(
        labels_test = binary_true_labels,
        ensemble_prediction = model_prediction,
        ensemble_scores = model_score,
        nClasses = nClasses,
        t_start = t_start_fold, 
        t_end = t_end_fold,    
        fold_number = i + 1,    # Pass the current fold number
        output_base_dir = net_dir      # Pass the directory to save ROC plots
    )
    all_fold_results.append(fold_metrics)
    print(f"Metrics for Fold {i+1}: {fold_metrics}")


## Print Results

In [None]:
# --- After the loop: Consolidate and save all results to a single CSV ---
print("\n===== Consolidating Results =====")

results_df = pd.DataFrame(all_fold_results)

# --- DEBUGGING STEP: Print the DataFrame and its columns immediately after creation ---
print("\n--- Debugging results_df content after creation ---")
print("Is results_df empty?", results_df.empty)
if not results_df.empty:
    print("results_df columns:", results_df.columns.tolist())
    print("First 3 rows of results_df (if available):\n", results_df.head(3))
else:
    print("results_df is empty. No results were collected from folds.")
    print("Please check your cross-validation loop and 'collect_fold_results' function.")
    # If the DataFrame is empty, there's nothing to average, so we can exit or skip
    exit()

# Add an "Average" row for overall performance
if 'Fold' in results_df.columns:
    cols_to_average = [col for col in results_df.columns if col != 'Fold']
else:
    print("\nWARNING: 'Fold' column not found in DataFrame. Attempting to average all numeric columns.")
    cols_to_average = results_df.select_dtypes(include=np.number).columns.tolist()

if cols_to_average:
    average_row = results_df[cols_to_average].mean().to_dict()
    average_row['Fold'] = 'Average'
    for col in ['Positive Labels', 'Negative Labels', 'TP', 'FP', 'TN', 'FN']:
        if col in average_row:
            average_row[col] = int(round(average_row[col]))
    results_df = pd.concat([results_df, pd.DataFrame([average_row])], ignore_index=True)
else:
    print("\nWARNING: No numeric columns found in results_df to calculate an average.")

# Define the path for the final CSV file
final_output_csv_path = f"{net_dir}/5_Folds_Cross_Validation_Summary.csv"

# Save the DataFrame to a single CSV file
results_df.to_csv(final_output_csv_path, index=False)

print(f"\nAll {num_folds} fold results and overall average saved to: {final_output_csv_path}")
print("\nFinal Results DataFrame:")
print(results_df.to_string())

In [None]:
# if training_style == "SWOIP":
    #     SWOIP_model = keras.applications.EfficientNetB3(
    #         include_top = True)
    #     for layer in SWOIP_model.layers[:385]: #frozen to layer 125, unfreezed the rest 
    #         layer.trainable = False 
    #     ### ====== TRAINING ===== ###
    #     split_layer = 385
    #     cutModel = tf.keras.Model(SWOIP_model.input, SWOIP_model.layers[split_layer-1].output)
    #     SWOIP_model = add_classification_layers(cutModel)
    #     modelpath = filepath + "//" + training_style + "_" + "model.keras"
    #     results = training_function(SWOIP_model, modelpath, train_dataset, val_dataset)
    #     t_start_fold = time.time() # Start time for training
    #     results = training_function(full_model)
    #     t_end_fold = time.time() # End time for training
    #     ### ===== TESTING ===== ### 
    #     temp_score = []
    #     temp_labels = []
    #     for batch, label in val_dataset: # test_dataset 
    #         batch_score = full_model(batch)
    #         temp_score.append (np.array(batch_score))
    #         temp_labels.append (np.array (label))
    #     model_score = np.concatenate(temp_score)
    #     true_labels = np.concatenate(temp_labels)
    #     model_prediction = np.argmax(model_score, axis=1)
    #     binary_true_labels = np.argmax (true_labels, axis=1)

## Loss Function

In [None]:
len(results)

In [None]:
val_loss = results[1].history['val_loss']
plt.figure(figsize=(10, 6))
plt.plot(results[1].history['loss'], label='Training Loss')
plt.plot(val_loss, label='Validation Loss')

In [None]:
# # Extract validation loss for easier access
# val_loss = history.history['val_loss']

# ### 1. Find the Best Epoch and Minimum Validation Loss
# # Use np.argmin to find the INDEX of the minimum loss
# best_epoch_idx = np.argmin(val_loss)
# min_val_loss = val_loss[best_epoch_idx]

# print(f"Minimum validation loss of {min_val_loss:.4f} was found at epoch {best_epoch_idx}.")

# ### 2. Calculate a Suggested Patience ###
# # Define how much worse the loss can get before we consider it overfitting
# tolerance_percentage = 0.05 # e.g., 5% worse than the minimum
# overfit_threshold = min_val_loss * (1 + tolerance_percentage)

# # Find the first epoch *after* the best epoch where loss exceeds the threshold
# patience_epoch = -1
# for i in range(best_epoch_idx + 1, len(val_loss)):
#     if val_loss[i] > overfit_threshold:
#         patience_epoch = i
#         break # Stop as soon as we find it

# # The suggested patience is the number of epochs between the best and the overfit point
# suggested_patience = -1
# if patience_epoch != -1:
#     suggested_patience = patience_epoch - best_epoch_idx

# ### 3. Plot the Results and Annotations ###
# plt.figure(figsize=(10, 6))
# plt.plot(history.history['loss'], label='Training Loss')
# plt.plot(val_loss, label='Validation Loss')

# # Annotate the minimum validation loss
# plt.axvline(x = best_epoch_idx, color = 'r', linestyle='--', label = f'Best Epoch: {best_epoch_idx}')
# plt.scatter(best_epoch_idx, min_val_loss, color = 'red', zorder = 5) # Mark the best point

# # Annotate the suggested patience point, if found
# if suggested_patience > 0:
#     plt.axvline(x=patience_epoch, color='g', linestyle='--', label=f'Recommended patience: {suggested_patience}')
#     plt.scatter(patience_epoch, val_loss[patience_epoch], color='green', zorder=5)
#     print(f"A good starting patience value could be around {suggested_patience}.")
#     print(f"This is how many epochs it took for the validation loss to increase by more than {tolerance_percentage:.0%} from its minimum (at {patience_epoch} epoch).")
# else:
#     print("\nThe model did not appear to overfit within the training run (based on the defined tolerance).")

# plt.title('Training and Validation Loss Analysis')
# plt.xlabel('Epochs')
# plt.ylabel('Loss (Categorical Cross Entropy)') 
# plt.legend()
# plt.grid(True)
# plt.show()