# Model Evaluation - Plots

Generate publication-ready plots from CSV results in `outputs/eval-metrics/`.
Save plots to `outputs/eval-plots/`.

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Directories
INPUT_DIR = Path("../outputs/eval-metrics")
OUTPUT_DIR = Path("../outputs/eval-plots")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# ggplot2-style settings
plt.style.use("ggplot")
plt.rcParams.update({
    "font.family": "sans-serif",
    "font.size": 9,
    "axes.labelsize": 10,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "axes.facecolor": "#EBEBEB",
    "axes.edgecolor": "white",
    "axes.grid": True,
    "grid.color": "white",
    "grid.linewidth": 1,
    "axes.axisbelow": True,
    "axes.spines.top": False,
    "axes.spines.right": False,
    "axes.spines.left": False,
    "axes.spines.bottom": False,
    "figure.facecolor": "white",
    "figure.dpi": 150,
    "legend.frameon": False,
})

# Color palettes (ggplot2-like)
MODEL_COLORS = {
    "UNet_512": "#F8766D",
    "UNet_1024": "#7CAE00", 
    "Seg_512": "#00BFC4",
    "Seg_1024": "#C77CFF",
}

AUG_COLORS = {
    "Default": "#F8766D",
    "Scale": "#00BA38",
    "Domain": "#619CFF",
}

REGIONAL_COLORS = {
    "North": "#F8766D",
    "Central": "#00BA38",
    "South": "#619CFF",
}

## Load Data

In [None]:
arch_df = pd.read_csv(INPUT_DIR / "architecture_experiment.csv", index_col=0)
aug_df = pd.read_csv(INPUT_DIR / "augmentation_experiment.csv", index_col=0)
regional_df = pd.read_csv(INPUT_DIR / "regional_cv.csv", index_col=0)
final_df = pd.read_csv(INPUT_DIR / "final_model.csv", index_col=0)

print("Data loaded successfully")

## 1. Architecture Experiment

In [None]:
fig, ax = plt.subplots(figsize=(5, 3.5))

metrics = ["IoU", "Precision", "Recall"]
models = list(arch_df.index)
n_metrics = len(metrics)
n_models = len(models)
width = 0.8 / n_models
x = np.arange(n_metrics)

for i, model in enumerate(models):
    offset = (i - n_models/2 + 0.5) * width
    values = [arch_df.loc[model, m] for m in metrics]
    bars = ax.bar(x + offset, values, width, label=model, color=MODEL_COLORS[model])
    
    # Value labels
    for bar, val in zip(bars, values):
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                f"{val:.2f}", ha="center", va="bottom", fontsize=6)

ax.set_ylabel("Score")
ax.set_xticks(x)
ax.set_xticklabels(metrics)
ax.set_ylim(0, 1.05)
ax.legend(loc="upper right", fontsize=7)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / "architecture_experiment.png", dpi=300, bbox_inches="tight")
plt.savefig(OUTPUT_DIR / "architecture_experiment.pdf", bbox_inches="tight")
plt.show()

## 2. Augmentation Experiment

In [None]:
fig, ax = plt.subplots(figsize=(4.5, 3.5))

metrics = ["IoU", "Precision", "Recall"]
tiers = list(aug_df.index)
n_metrics = len(metrics)
n_tiers = len(tiers)
width = 0.8 / n_tiers
x = np.arange(n_metrics)

for i, tier in enumerate(tiers):
    offset = (i - n_tiers/2 + 0.5) * width
    values = [aug_df.loc[tier, m] for m in metrics]
    bars = ax.bar(x + offset, values, width, label=tier, color=AUG_COLORS[tier])
    
    # Value labels
    for bar, val in zip(bars, values):
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                f"{val:.2f}", ha="center", va="bottom", fontsize=6)

ax.set_ylabel("Score")
ax.set_xticks(x)
ax.set_xticklabels(metrics)
ax.set_ylim(0, 1.05)
ax.legend(loc="upper right", fontsize=7)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / "augmentation_experiment.png", dpi=300, bbox_inches="tight")
plt.savefig(OUTPUT_DIR / "augmentation_experiment.pdf", bbox_inches="tight")
plt.show()

## 3. Regional Cross-Validation

In [None]:
fig, ax = plt.subplots(figsize=(4, 3.5))

models = list(regional_df.index)
x = np.arange(len(models))
colors = [REGIONAL_COLORS[m] for m in models]

bars = ax.bar(x, regional_df["IoU"].values, color=colors, width=0.6)

# Value labels
for bar, val in zip(bars, regional_df["IoU"].values):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
            f"{val:.2f}", ha="center", va="bottom", fontsize=7)

ax.set_ylabel("IoU")
ax.set_xlabel("Hold-out Region")
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.set_ylim(0, 1.0)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / "regional_cv.png", dpi=300, bbox_inches="tight")
plt.savefig(OUTPUT_DIR / "regional_cv.pdf", bbox_inches="tight")
plt.show()

## 5. Summary Tables

In [None]:
print("Architecture Experiment:")
display(arch_df.round(4))

print("\nAugmentation Experiment:")
display(aug_df.round(4))

print("\nRegional CV:")
display(regional_df.round(4))

print("\nFinal Model:")
display(final_df.round(4))