In [4]:
# 📦 Imports and Setup
import os
import sys
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import shap
from sklearn.metrics import precision_recall_curve
from src.model_utils import evaluate_model

# 📁 Resolve project root
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__) if '__file__' in globals() else ".", ".."))
sys.path.append(project_root)

# 📥 Model Loading
def load_models(prefix):
    artifact_dir = os.path.join(project_root, "artifacts", prefix)
    model_paths = {
        "Logistic Regression": os.path.join(artifact_dir, "Logistic Regression.pkl"),
        "XGBoost": os.path.join(artifact_dir, "XGBoost.pkl")
    }
    validated_models = {}
    for name, path in model_paths.items():
        try:
            model = joblib.load(path)
            if not hasattr(model, "predict") or not hasattr(model, "predict_proba"):
                raise TypeError(f"{name} is not a valid estimator")
            validated_models[name] = model
            print(f"✅ Loaded: {os.path.basename(path)} — {type(model).__name__}")
        except Exception as e:
            print(f"⚠️ Skipped '{name}': {e}")
    return validated_models

# 📊 Model Evaluation
def evaluate_all(models, X_test, y_test):
    results = {}
    for name, model in models.items():
        try:
            y_pred = model.predict(X_test)
            y_prob = model.predict_proba(X_test)[:, 1]
            result = evaluate_model(y_test, y_pred, y_prob)
            if not isinstance(result, dict):
                raise ValueError(f"Invalid result for {name}")
            results[name] = result
        except Exception as e:
            print(f"⚠️ Evaluation failed for {name}: {e}")
    return results

# 📈 Precision-Recall Curve Plot
def plot_pr_curve(models, X_test, y_test, prefix):
    plt.figure(figsize=(8, 6))
    for name, model in models.items():
        try:
            y_prob = model.predict_proba(X_test)[:, 1]
            precision, recall, _ = precision_recall_curve(y_test, y_prob)
            plt.plot(recall, precision, label=name)
        except Exception as e:
            print(f"⚠️ Skipped PR curve for {name}: {e}")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"{prefix.upper()} — Precision-Recall Curve")
    plt.legend()
    plt.grid(True)
    os.makedirs("models", exist_ok=True)
    plt.savefig(os.path.join("models", f"{prefix}_pr_curve.png"))
    plt.close()

# 📉 Threshold Curve Visualization
def plot_threshold_curve(y_true, y_prob, prefix):
    precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
    f1_scores = 2 * (precision * recall) / (precision + recall + 1e-8)
    plt.figure(figsize=(8, 6))
    plt.plot(thresholds, precision[:-1], label="Precision")
    plt.plot(thresholds, recall[:-1], label="Recall")
    plt.plot(thresholds, f1_scores[:-1], label="F1 Score")
    plt.xlabel("Threshold")
    plt.ylabel("Score")
    plt.title(f"{prefix.upper()} — Threshold Curve")
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join("models", f"{prefix}_threshold_curve.png"))
    plt.close()

# 🧠 SHAP Interpretation
def run_shap_analysis(model, X_test, prefix):
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_test)
    shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
    plt.savefig(os.path.join("models", f"{prefix}_shap_summary_bar.png"))
    plt.close()
    shap.summary_plot(shap_values, X_test, show=False)
    plt.savefig(os.path.join("models", f"{prefix}_shap_summary_beeswarm.png"))
    plt.close()

# 📊 Confusion Matrix Plot
def plot_confusion_matrices(results, prefix):
    for name, metrics in results.items():
        cm = metrics.get("Confusion Matrix")
        if cm is None:
            print(f"⚠️ No Confusion Matrix for {name}")
            continue
        plt.figure(figsize=(6, 5))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title(f"{prefix.upper()} — {name} Confusion Matrix")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        fname = os.path.join("models", f"{prefix}_{name.lower().replace(' ', '_')}_cm.png")
        plt.savefig(fname)
        plt.close()

# ✅ Model Recommendation
def recommend_model(results, prefix):
    try:
        best = max(results.items(), key=lambda x: x[1].get("AUC-PR", 0))[0]
        print(f"✅ Recommended Model for {prefix.upper()}: {best}")
        return best
    except Exception as e:
        print(f"⚠️ Recommendation failed for {prefix}: {e}")
        return None

# 🚀 Run Evaluation
def run_evaluation(prefix):
    print(f"\n🔍 Evaluating models for scope: {prefix}")
    try:
        data_dir = os.path.join(project_root, "data", "processed")
        X_test = pd.read_csv(os.path.join(data_dir, f"{prefix}_X_test.csv"))
        y_test = pd.read_csv(os.path.join(data_dir, f"{prefix}_y_test.csv"))

        models = load_models(prefix)
        if not models:
            print(f"🚫 No valid models found for scope='{prefix}'")
            return

        results = evaluate_all(models, X_test, y_test)
        if not results:
            print(f"⚠️ Evaluation returned no results for scope='{prefix}'")
        else:
            for model, metrics in results.items():
                print(f"\n🔹 {model} — F1: {metrics['F1-Score']:.4f}, AUC-PR: {metrics['AUC-PR']:.4f}")
                print("Confusion Matrix:\n", metrics["Confusion Matrix"])

        plot_pr_curve(models, X_test, y_test, prefix)
        plot_confusion_matrices(results, prefix)
        recommend_model(results, prefix)

        # 🔍 Conditional Explainability for ECOM/XGBoost
        if prefix == "ecom" and "XGBoost" in models:
            try:
                y_prob = models["XGBoost"].predict_proba(X_test)[:, 1]
                plot_threshold_curve(y_test, y_prob, prefix)
                run_shap_analysis(models["XGBoost"], X_test, prefix)
                print("📊 Threshold + SHAP plots generated for XGBoost (ecom)")
            except Exception as e:
                print(f"⚠️ Explainability failed for ecom/XGBoost: {e}")

    except Exception as e:
        print(f"💥 Error during evaluation for '{prefix}': {e}")

# 🧪 Execute for both scopes
if __name__ == "__main__":
    for scope in ["ecom", "bank"]:
        run_evaluation(scope)



🔍 Evaluating models for scope: ecom
✅ Loaded: Logistic Regression.pkl — LogisticRegression
✅ Loaded: XGBoost.pkl — XGBClassifier

🔹 Logistic Regression — F1: 0.6584, AUC-PR: 0.6458
Confusion Matrix:
 [[26820   573]
 [ 1160  1670]]

🔹 XGBoost — F1: 0.5161, AUC-PR: 0.6774
Confusion Matrix:
 [[24916  2477]
 [  984  1846]]
✅ Recommended Model for ECOM: XGBoost
📊 Threshold + SHAP plots generated for XGBoost (ecom)

🔍 Evaluating models for scope: bank
✅ Loaded: Logistic Regression.pkl — LogisticRegression
✅ Loaded: XGBoost.pkl — XGBClassifier

🔹 Logistic Regression — F1: 0.1828, AUC-PR: 0.7311
Confusion Matrix:
 [[55941   710]
 [   14    81]]

🔹 XGBoost — F1: 0.8128, AUC-PR: 0.8073
Confusion Matrix:
 [[56635    16]
 [   19    76]]
✅ Recommended Model for BANK: XGBoost
