In [6]:
# ============================================================
# AUTDB — publication-ready pastel color plots (Engagement & Intensity)
# Matching tones: light blue (#AEC6CF), light green (#B5EAD7), light orange (#FFDAB9)
# ============================================================

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

OUT = "figs_best"
os.makedirs(OUT, exist_ok=True)

# ---------- Load CSV files ----------
eng = pd.read_csv("metrics_engagement_enriched.csv")
inte = pd.read_csv("metrics_intensity_enriched.csv")

# ---------- Normalize column names ----------
eng = eng.rename(columns={
    "macro_f1": "macro_f1",
    "bal_acc": "balanced_accuracy",
    "auc_ovr_macro": "macro_auroc",
    "model": "model",
})
inte = inte.rename(columns={
    "f1_pos": "f1",
    "bacc": "balanced_accuracy",
    "auroc": "auroc",
    "acc": "accuracy",
    "thr_f1": "thr_f1",
    "model": "model",
})

# ---------- Define pastel color palette ----------
COLOR_BLUE = "#AEC6CF"     # light blue — for logreg
COLOR_GREEN = "#B5EAD7"    # light green — for xgb
COLOR_ORANGE = "#FFDAB9"   # light peach/orange
COLOR_RED = "#F4A6A6"      # soft red for negative Δ
COLOR_GRAY = "#D3D3D3"     # neutral gray for accents

# ---------- Helper functions ----------
def savefig(fig, name):
    path = os.path.join(OUT, name)
    fig.savefig(path, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved: {path}")

def annotate_points(ax, x, y, labels, fontsize=10, dx=8, dy=8):
    for i, (xi, yi, lab) in enumerate(zip(x, y, labels)):
        ox = dx if i % 2 == 0 else -dx
        oy = dy if (i // 2) % 2 == 0 else -dy
        ax.annotate(lab, (xi, yi), xytext=(ox, oy), textcoords="offset points", fontsize=fontsize)

# =========================
# 1️⃣ ENGAGEMENT (multiclass)
# =========================

# A) Scatter: Macro-AUROC vs Macro-F1
fig = plt.figure(figsize=(8, 6))
ax = fig.gca()
colors = [COLOR_BLUE if m.lower() == "logreg" else COLOR_GREEN for m in eng["model"]]
ax.scatter(eng["macro_auroc"], eng["macro_f1"], s=220, alpha=0.9, color=colors, edgecolor="k")
annotate_points(ax, eng["macro_auroc"].values, eng["macro_f1"].values, eng["model"].tolist(), fontsize=11)
ax.set_xlabel("Macro-AUROC", fontsize=12)
ax.set_ylabel("Macro-F1", fontsize=12)
ax.set_title("Engagement — Macro-AUROC vs Macro-F1", fontsize=14, pad=10)
ax.grid(True, linestyle="--", linewidth=0.6, alpha=0.6)
savefig(fig, "engagement_scatter_macroAUC_vs_macroF1.png")

# B) Grouped bars
fig = plt.figure(figsize=(9, 6))
ax = fig.gca()
idx = np.arange(len(eng))
width = 0.25
ax.bar(idx - width, eng["macro_f1"], width, color=COLOR_BLUE, label="Macro-F1")
ax.bar(idx, eng["macro_auroc"], width, color=COLOR_GREEN, label="Macro-AUROC")
ax.bar(idx + width, eng["balanced_accuracy"], width, color=COLOR_ORANGE, label="Balanced Accuracy")
ax.set_xticks(idx)
ax.set_xticklabels(eng["model"], fontsize=11)
ax.set_ylabel("Score", fontsize=12)
ax.set_title("Engagement — Comparison by Metric", fontsize=14, pad=10)
ax.legend(title="Metric")
ax.grid(axis="y", linestyle="--", linewidth=0.6, alpha=0.6)
savefig(fig, "engagement_grouped_bars_metrics.png")

# C) Δ(LogReg − XGB)
if set(eng["model"].str.lower()) >= {"logreg", "xgb"}:
    e_log = eng.set_index(eng["model"].str.lower()).loc["logreg"]
    e_xgb = eng.set_index(eng["model"].str.lower()).loc["xgb"]
    labels = ["Δ Macro-F1", "Δ Macro-AUROC", "Δ Balanced Acc"]
    vals = [
        float(e_log["macro_f1"]) - float(e_xgb["macro_f1"]),
        float(e_log["macro_auroc"]) - float(e_xgb["macro_auroc"]),
        float(e_log["balanced_accuracy"]) - float(e_xgb["balanced_accuracy"]),
    ]
    fig = plt.figure(figsize=(8, 5))
    ax = fig.gca()
    pos = np.arange(len(vals))
    colors = [COLOR_GREEN if v >= 0 else COLOR_RED for v in vals]
    ax.barh(pos, vals, color=colors)
    ax.set_yticks(pos)
    ax.set_yticklabels(labels, fontsize=11)
    ax.axvline(0, color="black", linestyle="--", lw=1)
    ax.set_xlabel("Difference (LogReg − XGB)")
    ax.set_title("Engagement — Model Differences", fontsize=14, pad=10)
    ax.grid(axis="x", linestyle="--", alpha=0.5)
    savefig(fig, "engagement_delta_logreg_minus_xgb.png")

# ======================
# 2️⃣ INTENSITY (binary)
# ======================

# A) Scatter: AUROC vs F1 (size ~ Balanced Accuracy)
fig = plt.figure(figsize=(8, 6))
ax = fig.gca()
sizes = 300 * (inte["balanced_accuracy"] - inte["balanced_accuracy"].min()) / \
        (inte["balanced_accuracy"].max() - inte["balanced_accuracy"].min() + 1e-9) + 120
colors = [COLOR_BLUE if m.lower() == "logreg" else COLOR_GREEN for m in inte["model"]]
ax.scatter(inte["auroc"], inte["f1"], s=sizes, alpha=0.9, color=colors, edgecolor="k")
lbls = [f"{m} (thr={t:.2f})" if "thr_f1" in inte.columns and not pd.isna(t) else str(m)
        for m, t in zip(inte["model"], inte.get("thr_f1", pd.Series([np.nan]*len(inte))))]
annotate_points(ax, inte["auroc"].values, inte["f1"].values, lbls, fontsize=11)
ax.set_xlabel("AUROC", fontsize=12)
ax.set_ylabel("F1-score", fontsize=12)
ax.set_title("Intensity — AUROC vs F1 (size ~ Balanced Accuracy)", fontsize=14, pad=10)
ax.grid(True, linestyle="--", linewidth=0.6, alpha=0.6)
savefig(fig, "intensity_scatter_auroc_vs_f1.png")

# B) Grouped bars
fig = plt.figure(figsize=(10, 6))
ax = fig.gca()
idx = np.arange(len(inte))
width = 0.2
ax.bar(idx - 1.5 * width, inte["f1"], width, color=COLOR_BLUE, label="F1")
ax.bar(idx - 0.5 * width, inte["auroc"], width, color=COLOR_GREEN, label="AUROC")
ax.bar(idx + 0.5 * width, inte["balanced_accuracy"], width, color=COLOR_ORANGE, label="Balanced Acc.")
ax.bar(idx + 1.5 * width, inte["accuracy"], width, color=COLOR_GRAY, label="Accuracy")
ax.set_xticks(idx)
ax.set_xticklabels(inte["model"], fontsize=11)
ax.set_ylabel("Score", fontsize=12)
ax.set_title("Intensity — Comparison by Metric", fontsize=14, pad=10)
ax.legend(title="Metric")
ax.grid(axis="y", linestyle="--", linewidth=0.6, alpha=0.6)
savefig(fig, "intensity_grouped_bars_metrics.png")

# C) Δ(LogReg − XGB)
if set(inte["model"].str.lower()) >= {"logreg", "xgb"}:
    i_log = inte.set_index(inte["model"].str.lower()).loc["logreg"]
    i_xgb = inte.set_index(inte["model"].str.lower()).loc["xgb"]
    labels = ["Δ F1", "Δ AUROC", "Δ Balanced Acc", "Δ Accuracy"]
    vals = [
        float(i_log["f1"]) - float(i_xgb["f1"]),
        float(i_log["auroc"]) - float(i_xgb["auroc"]),
        float(i_log["balanced_accuracy"]) - float(i_xgb["balanced_accuracy"]),
        float(i_log["accuracy"]) - float(i_xgb["accuracy"]),
    ]
    fig = plt.figure(figsize=(8, 5))
    ax = fig.gca()
    pos = np.arange(len(vals))
    colors = [COLOR_BLUE if v >= 0 else COLOR_RED for v in vals]
    ax.barh(pos, vals, color=colors)
    ax.set_yticks(pos)
    ax.set_yticklabels(labels, fontsize=11)
    ax.axvline(0, color="black", linestyle="--", lw=1)
    ax.set_xlabel("Difference (LogReg − XGB)")
    ax.set_title("Intensity — Model Differences", fontsize=14, pad=10)
    ax.grid(axis="x", linestyle="--", alpha=0.5)
    savefig(fig, "intensity_delta_logreg_minus_xgb.png")

print("\n✅ All pastel-tone figures saved in:", OUT)


Saved: figs_best\engagement_scatter_macroAUC_vs_macroF1.png
Saved: figs_best\engagement_grouped_bars_metrics.png
Saved: figs_best\engagement_delta_logreg_minus_xgb.png
Saved: figs_best\intensity_scatter_auroc_vs_f1.png
Saved: figs_best\intensity_grouped_bars_metrics.png
Saved: figs_best\intensity_delta_logreg_minus_xgb.png

✅ All pastel-tone figures saved in: figs_best
