In [None]:
# Final results & analysis notebook

import os
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

try:
    import seaborn as sns
    sns.set(style="whitegrid")
except ImportError:
    sns = None
    print("Seaborn non installé, les heatmaps seront moins jolies.")

# Répertoires principaux
PROJECT_ROOT = os.path.abspath("..")  # si le notebook est dans notebooks/
EVAL_DIR = os.path.join(PROJECT_ROOT, "experiments", "evaluation")

print("PROJECT_ROOT =", PROJECT_ROOT)
print("EVAL_DIR     =", EVAL_DIR)


## Load Metric Files

In [None]:
overall_path = os.path.join(EVAL_DIR, "test_overall_metrics.csv")
per_class_path = os.path.join(EVAL_DIR, "test_per_class_metrics.csv")
conf_counts_path = os.path.join(EVAL_DIR, "test_confusion_counts.csv")
conf_norm_path = os.path.join(EVAL_DIR, "test_confusion_normalized.csv")
preds_path = os.path.join(EVAL_DIR, "test_predictions.csv")

print("Existence des fichiers :")
for p in [overall_path, per_class_path, conf_counts_path, conf_norm_path, preds_path]:
    print(os.path.basename(p), "→", os.path.exists(p))

overall_df = pd.read_csv(overall_path) if os.path.exists(overall_path) else None
per_class_df = pd.read_csv(per_class_path) if os.path.exists(per_class_path) else None
conf_counts_df = pd.read_csv(conf_counts_path, index_col=0) if os.path.exists(conf_counts_path) else None
conf_norm_df = pd.read_csv(conf_norm_path, index_col=0) if os.path.exists(conf_norm_path) else None
preds_df = pd.read_csv(preds_path) if os.path.exists(preds_path) else None

overall_df, per_class_df

## Display Overall Test Metrics

In [None]:
if overall_df is None:
    print("Pas de test_overall_metrics.csv trouvé.")
else:
    print("=== Métriques globales (test set) ===")
    display(overall_df)

    # Si les colonnes existent
    cols = [c for c in ["accuracy", "balanced_accuracy", "macro_f1", "weighted_f1"] if c in overall_df.columns]
    if cols:
        plt.figure()
        overall_df[cols].iloc[0].plot(kind="bar")
        plt.title("Métriques globales (test)")
        plt.ylabel("Score")
        plt.ylim(0, 1)
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()


## Per-Class Metrics Visualization

In [None]:
if per_class_df is None:
    print("Pas de test_per_class_metrics.csv trouvé.")
else:
    print("=== Métriques par classe ===")
    display(per_class_df)

    # Bar plot par classe pour F1
    if "f1" in per_class_df.columns:
        plt.figure(figsize=(8, 4))
        plt.bar(per_class_df["class"], per_class_df["f1"])
        plt.title("F1-score par classe")
        plt.xticks(rotation=45)
        plt.ylim(0, 1)
        plt.ylabel("F1-score")
        plt.tight_layout()
        plt.show()

    # Optionnel : precision / recall
    for metric in ["precision", "recall"]:
        if metric in per_class_df.columns:
            plt.figure(figsize=(8, 4))
            plt.bar(per_class_df["class"], per_class_df[metric])
            plt.title(f"{metric.capitalize()} par classe")
            plt.xticks(rotation=45)
            plt.ylim(0, 1)
            plt.ylabel(metric.capitalize())
            plt.tight_layout()
            plt.show()


## Confusion Matrices (Counts & Normalized)

In [None]:
def plot_confusion_matrix(df_conf, title, cmap="Blues"):
    if df_conf is None:
        print(f"{title}: matrice non disponible.")
        return
    plt.figure(figsize=(6, 5))
    if sns is not None:
        sns.heatmap(df_conf, annot=True, fmt=".2f" if "normalized" in title.lower() else "d",
                    cmap=cmap, cbar=True)
    else:
        plt.imshow(df_conf.values, cmap=cmap)
        for i in range(df_conf.shape[0]):
            for j in range(df_conf.shape[1]):
                plt.text(j, i, f"{df_conf.values[i, j]:.2f}", ha="center", va="center", color="black")
        plt.colorbar()

    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.xticks(np.arange(len(df_conf.columns)) + 0.5, df_conf.columns, rotation=45)
    plt.yticks(np.arange(len(df_conf.index)) + 0.5, df_conf.index, rotation=0)
    plt.tight_layout()
    plt.show()


plot_confusion_matrix(conf_counts_df, "Confusion matrix (counts)")
plot_confusion_matrix(conf_norm_df, "Confusion matrix (normalized)")


## Error Analysis (Most Confused Pairs)

In [None]:
if conf_norm_df is not None:
    errors = conf_norm_df.copy()
    np.fill_diagonal(errors.values, 0.0)

    # on "flatten" les erreurs
    error_list = []
    for i, true_class in enumerate(errors.index):
        for j, pred_class in enumerate(errors.columns):
            error_list.append({
                "true": true_class,
                "pred": pred_class,
                "rate": errors.values[i, j]
            })
    errors_df = pd.DataFrame(error_list)
    errors_df = errors_df.sort_values("rate", ascending=False)

    print("Top 10 des paires les plus confondues (taux normalisé) :")
    display(errors_df.head(10))
else:
    print("Pas de matrice de confusion normalisée pour analyser les erreurs.")


## Prediction-Level Analysis (Correct vs Incorrect)

In [None]:
if preds_df is not None:
    print("Aperçu des prédictions :")
    display(preds_df.head())

    if "true_label" in preds_df.columns and "pred_label" in preds_df.columns:
        preds_df["correct"] = preds_df["true_label"] == preds_df["pred_label"]
        acc = preds_df["correct"].mean()
        print(f"Accuracy recalculée depuis predictions.csv : {acc:.4f}")

        print("\nExemples mal classés :")
        display(preds_df[~preds_df["correct"]].head(10))
else:
    print("Pas de test_predictions.csv pour analyser au niveau exemple.")