In [2]:
# evaluation.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# 🎯 Chargement des données
X = pd.read_csv('../data/processed/X_train_fingerprints.csv')
y = pd.read_csv('../data/raw/y_train.csv')
df = X.merge(y, on="ID")
X_features = df.drop(columns=["ID", "Y1", "Y2", "Y3"])
y_labels = df[["Y1", "Y2", "Y3"]]

# 🧠 Chargement du modèle
model = joblib.load("../models/admet_model.pkl")

# 🔍 Prédictions
y_pred = model.predict(X_features)

# 📋 Rapport de classification
print("📋 Rapport de classification :")
print(classification_report(y_labels, y_pred, target_names=["Y1", "Y2", "Y3"]))

# 🔲 Matrices de confusion
sns.set(style="whitegrid")
for i, label in enumerate(["Y1", "Y2", "Y3"]):
    cm = confusion_matrix(y_labels.iloc[:, i], y_pred[:, i])
    plt.figure(figsize=(4, 3))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f"📌 Matrice de confusion : {label}")
    plt.xlabel("Prédit")
    plt.ylabel("Réel")
    plt.tight_layout()
    plt.show()

# 🧪 Analyse détaillée des erreurs
y_pred_df = pd.DataFrame(y_pred, columns=["Y1_pred", "Y2_pred", "Y3_pred"])
y_true_df = y_labels.reset_index(drop=True)
comparison = pd.concat([y_true_df, y_pred_df], axis=1)

def analyse_erreurs(label):
    print(f"\n🔬 Analyse des erreurs pour {label} :")
    fp = comparison[(comparison[label] == 0) & (comparison[f"{label}_pred"] == 1)]
    fn = comparison[(comparison[label] == 1) & (comparison[f"{label}_pred"] == 0)]
    print(f"❌ Faux positifs : {len(fp)}")
    print(f"❌ Faux négatifs : {len(fn)}")
    return fp, fn

fp_y1, fn_y1 = analyse_erreurs("Y1")
fp_y2, fn_y2 = analyse_erreurs("Y2")
fp_y3, fn_y3 = analyse_erreurs("Y3")

# 📈 Graphique des erreurs
errors = {
    "Y1_FP": len(fp_y1), "Y1_FN": len(fn_y1),
    "Y2_FP": len(fp_y2), "Y2_FN": len(fn_y2),
    "Y3_FP": len(fp_y3), "Y3_FN": len(fn_y3)
}
plt.figure(figsize=(8, 5))
sns.barplot(x=list(errors.keys()), y=list(errors.values()), palette="Set2")
plt.title("🚫 Nombre de faux positifs et faux négatifs par propriété")
plt.ylabel("Nombre d'erreurs")
plt.tight_layout()
plt.show()

# 💾 Sauvegarde des erreurs pour analyse future
Path("../reports").mkdir(parents=True, exist_ok=True)
fp_y1.to_csv("../reports/errors_fp_y1.csv", index=False)
fn_y1.to_csv("../reports/errors_fn_y1.csv", index=False)
fp_y2.to_csv("../reports/errors_fp_y2.csv", index=False)
fn_y2.to_csv("../reports/errors_fn_y2.csv", index=False)
fp_y3.to_csv("../reports/errors_fp_y3.csv", index=False)
fn_y3.to_csv("../reports/errors_fn_y3.csv", index=False)
print("📝 Fichiers des erreurs exportés dans le dossier reports/")

ModuleNotFoundError: No module named 'pandas'