# 1. Load metrics.json

In [12]:
import json
from pathlib import Path
import matplotlib.pyplot as plt
import src.config as cfg
metrics_path = cfg.RESULTS_DIR / "metrics.json"
with open(metrics_path) as f:
    results = json.load(f)

results.keys()  # → should show "train", "test" if both exist

dict_keys(['train', 'test'])

# 2. Summary metrics
- Display mean Dice/IoU (including/excluding empties).

In [13]:
import pandas as pd

def summarize(split_data):
    return pd.DataFrame([split_data["metrics"]])

pd.concat({split: summarize(data) for split, data in results.items()})

  pd.concat({split: summarize(data) for split, data in results.items()})


Unnamed: 0,Unnamed: 1,mean_dice_including_empty,mean_iou_including_empty,mean_dice_excluding_empty_gt,mean_iou_excluding_empty_gt
train,0,0.9602152,0.9290072,0.960215,0.929007
test,0,6.486092e-11,6.486092e-11,,


# 3. Per-image metrics analysis
- Load into a DataFrame and make plots.

In [None]:
for split, data in results.items():
    df = pd.DataFrame(data["per_image"])
    print(f"{split} – {len(df)} images")
    display(df.head())

    plt.hist(df["dice"], bins=20)
    plt.title(f"{split} Dice distribution")
    plt.xlabel("Dice"); plt.ylabel("Count")
    plt.show()

# 4. Find best/worst examples

In [None]:
def top_k(df, k=5, col="dice"):
    return df.sort_values(col, ascending=False).head(k)

def bottom_k(df, k=5, col="dice"):
    return df.sort_values(col, ascending=True).head(k)

# 5. Visualize overlays for specific examples
- Because `evaluate.py` saves ```_image.png, _gt_mask.png, _pred_mask.png, _overlay.png``` under results/sample_predictions/<split>, we can map image names to overlays.

In [None]:
import cv2
import matplotlib.pyplot as plt

def show_example(split, image_name):
    base = cfg.RESULTS_DIR / "sample_predictions" / split / Path(image_name).stem
    overlay = str(base) + "_overlay.png"
    gt = str(base) + "_gt_mask.png"
    pred = str(base) + "_pred_mask.png"

    imgs = [cv2.cvtColor(cv2.imread(p), cv2.COLOR_BGR2RGB) for p in [overlay, gt, pred]]
    titles = ["Overlay", "GT mask", "Pred mask"]

    plt.figure(figsize=(12,4))
    for i, (im, t) in enumerate(zip(imgs, titles)):
        plt.subplot(1,3,i+1)
        plt.imshow(im, cmap="gray")
        plt.title(t)
        plt.axis("off")
    plt.show()
    
df = pd.DataFrame(results["train"]["per_image"])
worst = df.sort_values("dice").iloc[0]
show_example("train", worst["image"])

# 6. Scatterplot of GT size vs. Dice (to see if tiny masks fail more often).

In [None]:
for split, data in results.items():
    df = pd.DataFrame(data["per_image"])
    plt.scatter(df["gt_sum"], df["dice"], alpha=0.5)
    plt.title(f"{split}: Dice vs GT size")
    plt.xlabel("GT mask sum (pixels)")
    plt.ylabel("Dice")
    plt.show()