In [1]:
# cv_analysis.ipynb

import os
import json
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from ipywidgets import interact, Dropdown, IntSlider, VBox, HBox

plt.style.use("ggplot")

ROOT = Path("../experiments/oof_results")
FULL_TRAIN_ROOT = Path("../experiments/full_train_results")   # optional, see below


In [2]:
def available_runs(root=ROOT):
    # only directories that actually look like CV runs
    return [
        p.name
        for p in root.iterdir()
        if p.is_dir() and (p / "oof_predictions.csv").exists()
    ]

def load_run(run_name):
    run_dir = ROOT / run_name
    oof_csv = run_dir / "oof_predictions.csv"
    metrics_json = run_dir / "oof_metrics.json"

    oof_df = pd.read_csv(oof_csv)
    with metrics_json.open() as f:
        metrics = json.load(f)

    # Optionally load per-fold CSVs
    fold_files = sorted(run_dir.glob("fold_*_oof.csv"))
    fold_dfs = {f.stem: pd.read_csv(f) for f in fold_files}

    return oof_df, metrics, fold_dfs


def load_full_train_results(root=FULL_TRAIN_ROOT):
    """
    Expected structure:
        experiments/full_train_results/
            run_A/
                val_predictions.csv
                metrics.json
            run_B/
                ...
    """
    if not root.exists():
        return {}
    runs = {}
    for sub in root.iterdir():
        if not sub.is_dir():
            continue
        preds = sub / "val_predictions.csv"
        met = sub / "metrics.json"
        if preds.exists() and met.exists():
            runs[sub.name] = {
                "predictions": pd.read_csv(preds),
                "metrics": json.load(open(met)),
            }
    return runs


In [3]:
cv_runs = available_runs()
full_train_runs = load_full_train_results()

print("CV Runs:", cv_runs)
print("Full-train Runs:", list(full_train_runs.keys()))


CV Runs: ['mini_smoke']
Full-train Runs: []


In [14]:
run_dropdown = Dropdown(options=cv_runs, description="CV Run:")
display(run_dropdown)


Dropdown(description='CV Run:', options=('mini_smoke',), value='mini_smoke')

In [15]:
def show_run_metrics(run_name):
    oof_df, metrics, fold_dfs = load_run(run_name)
    print(f"=== {run_name} ===")
    print("Mean CV:", metrics["mean_cv"])
    print("OOF score:", metrics["oof_score"])
    print("\nPer-fold scores:", metrics["fold_scores"])

    plt.figure(figsize=(6,4))
    plt.bar(range(1, len(metrics["fold_scores"])+1), metrics["fold_scores"])
    plt.xlabel("Fold")
    plt.ylabel("Score")
    plt.title(f"Per-fold Scores: {run_name}")
    plt.show()

    return oof_df, metrics, fold_dfs

interact(show_run_metrics, run_name=run_dropdown)


interactive(children=(Dropdown(description='CV Run:', options=('mini_smoke',), value='mini_smoke'), Output()),…

<function __main__.show_run_metrics(run_name)>

In [16]:
GT_PATH = "data/train_solution.csv"  # or generate same as build_solution_df
gt_df = pd.read_csv(GT_PATH)

def compute_errors(pred_df, gt_df):
    merged = gt_df.merge(pred_df, on="row_id", suffixes=("_gt", "_pred"))
    merged["is_correct"] = merged["annotation_gt"] == merged["annotation_pred"]
    return merged

def show_error_stats(run_name):
    pred_df, metrics, _ = load_run(run_name)
    merged = compute_errors(pred_df, gt_df)

    acc = merged["is_correct"].mean()
    print(f"Accuracy (strict equality): {acc:.4f}")

    forged_gt = merged[merged["annotation_gt"]!="authentic"]
    authentic_gt = merged[merged["annotation_gt"]=="authentic"]

    forged_acc = (forged_gt["annotation_gt"] == forged_gt["annotation_pred"]).mean()
    authentic_acc = (authentic_gt["annotation_gt"] == authentic_gt["annotation_pred"]).mean()

    print("Forged accuracy:", forged_acc)
    print("Authentic accuracy:", authentic_acc)

    # display bar chart
    plt.figure(figsize=(6,4))
    plt.bar(["All","Forged","Authentic"], [acc, forged_acc, authentic_acc])
    plt.title(f"Error breakdown: {run_name}")
    plt.ylim(0, 1)
    plt.show()

interact(show_error_stats, run_name=run_dropdown)


FileNotFoundError: [Errno 2] No such file or directory: 'data/train_solution.csv'

In [None]:
def misclassified_viewer(run_name):
    pred_df, metrics, _ = load_run(run_name)
    merged = compute_errors(pred_df, gt_df)
    mis = merged[~merged["is_correct"]]

    ids = mis["row_id"].tolist()
    if not ids:
        print("No misclassifications—nice!")
        return
    
    def show_sample(row_id):
        row = mis[mis["row_id"] == row_id].iloc[0]
        print("GT:", row["annotation_gt"])
        print("Pred:", row["annotation_pred"])
        # optionally show the image and masks
        # display(Image.open(path_to_image(row_id)))
    
    interact(show_sample, row_id=Dropdown(options=ids, description="row_id"))

interact(misclassified_viewer, run_name=run_dropdown)


In [None]:
def compare_runs(run_a, run_b):
    a_df, a_met, _ = load_run(run_a)
    b_df, b_met, _ = load_run(run_b)

    print(f"{run_a} – OOF: {a_met['oof_score']}")
    print(f"{run_b} – OOF: {b_met['oof_score']}")

    plt.figure(figsize=(6,4))
    plt.bar(["A","B"], [a_met["oof_score"], b_met["oof_score"]])
    plt.title("OOF Comparison")
    plt.show()

interact(compare_runs,
         run_a=Dropdown(options=cv_runs),
         run_b=Dropdown(options=cv_runs))


In [None]:
def compare_cv_full(cv_run, full_run):
    pred_df, cv_metrics, _ = load_run(cv_run)
    f = full_train_runs[full_run]

    print("CV OOF:", cv_metrics["oof_score"])
    print("Full-train validation:", f["metrics"].get("val_score", "N/A"))

    plt.figure(figsize=(6,4))
    plt.bar(["CV OOF", "Full Train"], [cv_metrics["oof_score"], f["metrics"]["val_score"]])
    plt.title(f"{cv_run} vs {full_run}")
    plt.show()

interact(compare_cv_full,
         cv_run=Dropdown(options=cv_runs),
         full_run=Dropdown(options=list(full_train_runs.keys())))


In [None]:
def fold_drill(run_name, fold_number):
    _, _, fold_dfs = load_run(run_name)
    key = f"fold_{fold_number}_oof"
    if key not in fold_dfs:
        print("No per-fold file found.")
        return

    fold_df = fold_dfs[key]
    print("Rows:", len(fold_df))
    display(fold_df.head())

interact(
    fold_drill,
    run_name=run_dropdown,
    fold_number=IntSlider(min=1, max=5, step=1, value=1)
)
