In [None]:
import json
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display

def find_eval_dir():
    cwd = Path.cwd()
    candidates = []
    for base in [cwd, *cwd.parents]:
        candidates.append(base / "backend/data/ea-logs/json")
        candidates.append(base / "data/ea-logs/json")
    for cand in candidates:
        if cand.is_dir():
            return cand
    raise FileNotFoundError("Could not locate backend/data/ea-logs/json")

EVAL_DIR = find_eval_dir()
print("Using EVAL_DIR:", EVAL_DIR)

rows = []
for path in sorted(EVAL_DIR.glob("ea_run_*.json")):
    with path.open() as f:
        data = json.load(f)

    runs_by_variant = {r["variant"]: r for r in data.get("runs", [])}
    if not {"ea_rl", "ea_only"} <= runs_by_variant.keys():
        print(f"Skipping {path.name}: missing expected variants")
        continue

    cfg = data["config"]
    rl_run = runs_by_variant["ea_rl"]
    ea_run = runs_by_variant["ea_only"]

    rows.append({
        "file": path.name,
        "run_id": data.get("run_id"),
        "floor_id": data.get("floor_id"),
        "grid_size": data.get("grid_size"),
        "rotate_k": data.get("rotate_k"),
        "gens": cfg.get("generations"),
        "population": cfg.get("population_size"),

        "rl_best": rl_run.get("best_fitness_final"),
        "rl_best_initial": rl_run.get("best_fitness_initial"),
        "rl_gen_at_best": rl_run.get("gen_at_best"),
        "rl_duration_s": rl_run.get("duration_s"),
        "rl_is_real": rl_run.get("is_real"),
        "rl_realism_score": rl_run.get("realism_score"),

        "ea_best": ea_run.get("best_fitness_final"),
        "ea_best_initial": ea_run.get("best_fitness_initial"),
        "ea_gen_at_best": ea_run.get("gen_at_best"),
        "ea_duration_s": ea_run.get("duration_s"),
        "ea_is_real": ea_run.get("is_real"),
        "ea_realism_score": ea_run.get("realism_score"),
    })

df = pd.DataFrame(rows)

if df.empty:
    print(f"No runs found in {EVAL_DIR}")
else:
    display(df.head())

    df["improvement_abs"] = df["ea_best"] - df["rl_best"]
    df["improvement_rel"] = (df["ea_best"] - df["rl_best"]) / df["ea_best"]
    df["rl_better"] = df["rl_best"] < df["ea_best"]
    df["time_speedup_s"] = df["ea_duration_s"] - df["rl_duration_s"]

    n_runs = len(df)
    print("Total paired runs:", n_runs)
    print("RL better in:", df["rl_better"].sum(), "runs")
    print("RL better (%):", 100 * df["rl_better"].mean())

    print("Mean best fitness:")
    print("  RL:", df["rl_best"].mean())
    print("  EA:", df["ea_best"].mean())

    print("Median best fitness:")
    print("  RL:", df["rl_best"].median())
    print("  EA:", df["ea_best"].median())

    print("Mean relative improvement (%):", 100 * df["improvement_rel"].mean())
    print("Median relative improvement (%):", 100 * df["improvement_rel"].median())

    print("Mean gen_at_best:")
    print("  RL:", df["rl_gen_at_best"].mean())
    print("  EA:", df["ea_gen_at_best"].mean())

    print("Mean duration (s):")
    print("  RL:", df["rl_duration_s"].mean())
    print("  EA:", df["ea_duration_s"].mean())

    plt.figure(figsize=(6, 4))
    plt.hist(df["improvement_abs"], bins=40)
    plt.axvline(0, linestyle="--")
    plt.xlabel("ea_best - rl_best (positive = RL better)")
    plt.ylabel("Count")
    plt.title("Distribution of RL advantage over pure EA")
    plt.show()

    plt.figure(figsize=(4, 4))
    plt.boxplot([df["rl_best"], df["ea_best"]], tick_labels=["EA+RL", "EA only"])
    plt.ylabel("Best fitness (lower is better)")
    plt.title("Best fitness across paired runs")
    plt.show()

    plt.figure(figsize=(5, 5))
    plt.scatter(df["ea_best"], df["rl_best"], alpha=0.5)
    ea_min, ea_max = df["ea_best"].min(), df["ea_best"].max()
    plt.plot([ea_min, ea_max], [ea_min, ea_max], linestyle="--")
    plt.xlabel("EA only best fitness")
    plt.ylabel("EA+RL best fitness")
    plt.title("Per-run comparison of best fitness")
    plt.show()