In [None]:
import numpy as np
import pandas as pd
import ast
from typing import List, Dict, Any

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import GroupKFold
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score


def plot_model_comparisons(
    folder_path: str,
    prob_column: str = "soft_prob",
    true_column: str = "y_true",
    roc_output: str = "comparison_roc.png",
    pr_output: str = "comparison_pr.png"
):
    """
    Reads all CSV prediction files in a folder and plots:
        - Combined ROC Curve
        - Combined Precision‚ÄìRecall Curve

    Args:
        folder_path: directory containing CSV files (one per model)
        prob_column: column name for predicted probabilities
        true_column: column name for true labels
        roc_output: filename for ROC comparison plot
        pr_output: filename for PR comparison plot
    """

    csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

    if len(csv_files) == 0:
        raise ValueError("No CSV files found in the folder.")

    print(f"üìÅ Found {len(csv_files)} CSV files to compare.")

    # --------------------------
    # 1. ROC COMPARISON PLOT
    # --------------------------
    plt.figure(figsize=(9, 7))

    for file in csv_files:
        path = os.path.join(folder_path, file)

        df = pd.read_csv(path)

        if true_column not in df or prob_column not in df:
            print(f"‚ö† Skipping {file}: required columns missing")
            continue

        y_true = df[true_column].values
        y_prob = df[prob_column].values

        fpr, tpr, _ = roc_curve(y_true, y_prob)
        auc_score = auc(fpr, tpr)

        model_name = os.path.splitext(file)[0]
        plt.plot(fpr, tpr, label=f"{model_name} (AUC={auc_score:.3f})")

    plt.plot([0, 1], [0, 1], "k--")

    plt.title("ROC Curve Comparison Across Models")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend()
    plt.grid(True)

    save_roc = os.path.join(folder_path, roc_output)
    plt.savefig(save_roc, dpi=300)
    plt.close()

    print(f" ROC comparison plot saved at: {save_roc}")

    # --------------------------
    # 2. PRECISION‚ÄìRECALL COMPARISON
    # --------------------------
    plt.figure(figsize=(9, 7))

    for file in csv_files:
        path = os.path.join(folder_path, file)
        df = pd.read_csv(path)

        if true_column not in df or prob_column not in df:
            print(f"‚ö† Skipping {file}: required columns missing")
            continue

        y_true = df[true_column].values
        y_prob = df[prob_column].values

        precision, recall, _ = precision_recall_curve(y_true, y_prob)
        avg_precision = average_precision_score(y_true, y_prob)

        model_name = os.path.splitext(file)[0]
        plt.plot(recall, precision, label=f"{model_name} (AP={avg_precision:.3f})")

    plt.title("Precision‚ÄìRecall Curve Comparison Across Models")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.legend()
    plt.grid(True)

    save_pr = os.path.join(folder_path, pr_output)
    plt.savefig(save_pr, dpi=300)
    plt.close()

    print(f" Precision‚ÄìRecall comparison plot saved at: {save_pr}")


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import (
    roc_curve, auc,
    precision_recall_curve, average_precision_score,
    accuracy_score, precision_score, recall_score, f1_score,
    log_loss
)


def plot_model_comparisons(
    folder_path: str,
    csv_files, 
    prob_column: str = "soft_prob",
    true_column: str = "y_true",
    roc_output: str = "comparison_roc.png",
    pr_output: str = "comparison_pr.png",
    excel_output: str = "model_comparison_metrics.xlsx"
):
    """
    Reads all CSV prediction files in a folder and plots:
        - Combined ROC Curve
        - Combined Precision‚ÄìRecall Curve
        - Saves Excel with all metrics per model

    Args:
        folder_path : directory containing CSV files (one per model)
    """

    #csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

    if len(csv_files) == 0:
        raise ValueError("No CSV files found in the folder.")

    print(f"üìÅ Found {len(csv_files)} CSV files to compare.\n")

    # Store metrics for Excel
    metrics_list = []

    # --------------------------
    # 1. ROC COMPARISON PLOT
    # --------------------------
    plt.figure(figsize=(9, 7))

    for file in csv_files:
        path = os.path.join(folder_path, file)
        df = pd.read_csv(path)

        if true_column not in df or prob_column not in df:
            print(f"‚ö† Skipping {file}: missing required columns")
            continue

        model_name = os.path.splitext(file)[0]
        y_true = df[true_column].values
        y_prob = df[prob_column].values

        # Compute ROC
        fpr, tpr, _ = roc_curve(y_true, y_prob)
        auc_score = auc(fpr, tpr)

        plt.plot(fpr, tpr, label=f"{model_name} (AUC={auc_score:.3f})")

        # Compute metrics for Excel
        y_pred = (y_prob >= 0.5).astype(int)

        metrics_list.append({
            "model_name": model_name,
            "AUC": auc_score,
            "Average Precision": average_precision_score(y_true, y_prob),
            "Accuracy": accuracy_score(y_true, y_pred),
            "Precision": precision_score(y_true, y_pred, zero_division=0),
            "Recall": recall_score(y_true, y_pred, zero_division=0),
            "F1 Score": f1_score(y_true, y_pred, zero_division=0),
            "Log Loss": log_loss(y_true, y_prob, labels=[0,1])
        })

    plt.plot([0, 1], [0, 1], "k--")
    plt.title("ROC Curve Comparison Across Models")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend()
    plt.grid(True)

    roc_path = os.path.join(folder_path, roc_output)
    plt.savefig(roc_path, dpi=300)
    plt.close()
    print(f"ROC comparison saved ‚Üí {roc_path}")

    # --------------------------
    # 2. PR CURVE COMPARISON
    # --------------------------
    plt.figure(figsize=(9, 7))

    for file in csv_files:
        path = os.path.join(folder_path, file)
        df = pd.read_csv(path)

        if true_column not in df or prob_column not in df:
            continue

        model_name = os.path.splitext(file)[0]
        y_true = df[true_column].values
        y_prob = df[prob_column].values

        precision, recall, _ = precision_recall_curve(y_true, y_prob)
        avg_precision = average_precision_score(y_true, y_prob)

        plt.plot(recall, precision, label=f"{model_name} (AP={avg_precision:.3f})")

    plt.title("Precision‚ÄìRecall Curve Comparison")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.legend()
    plt.grid(True)

    pr_path = os.path.join(folder_path, pr_output)
    plt.savefig(pr_path, dpi=300)
    plt.close()
    print(f" PR comparison saved ‚Üí {pr_path}")

    # --------------------------
    # 3. Excel File with Metrics
    # --------------------------
    metrics_df = pd.DataFrame(metrics_list)
    excel_path = os.path.join(folder_path, excel_output)
    metrics_df.to_excel(excel_path, index=False)

    print(f" Excel metrics saved ‚Üí {excel_path}\n")
    print(metrics_df)

    return metrics_df


In [None]:
files_path = 'C:\\Users\\Student\\Desktop\\Abouhashem\\DeepLearningProject\\OrganizedWork\\Allmetrics\\'
csv_files = ['1DCNN.csv','LSTM.csv','TCN.csv','1DCNN_Transformer.csv','TCN_Transformer.csv','LSTM_Transformer.csv']
plot_model_comparisons(files_path, csv_files )

üìÅ Found 6 CSV files to compare.

üìà ROC comparison saved ‚Üí C:\Users\Student\Desktop\Abouhashem\DeepLearningProject\OrganizedWork\Allmetrics\comparison_roc.png
üìâ PR comparison saved ‚Üí C:\Users\Student\Desktop\Abouhashem\DeepLearningProject\OrganizedWork\Allmetrics\comparison_pr.png
üìò Excel metrics saved ‚Üí C:\Users\Student\Desktop\Abouhashem\DeepLearningProject\OrganizedWork\Allmetrics\model_comparison_metrics.xlsx

          model_name       AUC  Average Precision  Accuracy  Precision  \
0              1DCNN  0.867521           0.552674  0.861982   0.515723   
1               LSTM  0.877647           0.589217  0.868049   0.530726   
2                TCN  0.903697           0.640726  0.881699   0.574924   
3  1DCNN_Transformer  0.903405           0.638901  0.856421   0.498851   
4    TCN_Transformer  0.910537           0.686390  0.894843   0.622150   
5   LSTM_Transformer  0.922662           0.716490  0.897877   0.611570   

     Recall  F1 Score  Log Loss  
0  0.579505 

Unnamed: 0,model_name,AUC,Average Precision,Accuracy,Precision,Recall,F1 Score,Log Loss
0,1DCNN,0.867521,0.552674,0.861982,0.515723,0.579505,0.545757,0.334784
1,LSTM,0.877647,0.589217,0.868049,0.530726,0.671378,0.592824,0.350333
2,TCN,0.903697,0.640726,0.881699,0.574924,0.664311,0.616393,0.287344
3,1DCNN_Transformer,0.903405,0.638901,0.856421,0.498851,0.766784,0.604457,0.344762
4,TCN_Transformer,0.910537,0.68639,0.894843,0.62215,0.674912,0.647458,0.260551
5,LSTM_Transformer,0.922662,0.71649,0.897877,0.61157,0.784452,0.687307,0.273261
