# EA Evaluation: RL seeding vs manual (single notebook)

Init once, run both EA variants (same floor/config), then plot together. Update plotting without rerunning by reusing the stored histories.

In [None]:
import random
import json
from datetime import datetime
from collections import defaultdict
from pathlib import Path
import sys
import matplotlib.pyplot as plt
import numpy as np

PROJECT_ROOT = None
for candidate in [Path.cwd().resolve(), *Path.cwd().resolve().parents]:
    module_root = candidate / "backend" / "src"
    if module_root.exists():
        PROJECT_ROOT = candidate
        if str(module_root) not in sys.path:
            sys.path.insert(0, str(module_root))
        break
if PROJECT_ROOT is None:
    raise RuntimeError("Run this notebook from inside the repo")

from ver0.evolver import EAConfig, init_population, make_next_generation, mutate, evaluate_population
from ver0.fitness import Weights
from ver0.grid_encoder import encode_floorplan_to_grid
from ver0.rl_bandit import make_seed_bandit
from ver0.seeders import SEEDING_REGISTRY
from ver0.vars import (
    DEFAULT_GRID_SIZE,
    POPULATION_SIZE,
    GENERATIONS,
    CROSSOVER_RATE,
    MUTATION_RATE,
    TOURNAMENT_K,
    ELITE_FRACTION,
    RANDOM_SEED,
    QUADRANT_WEIGHT,
    OVERLAP_WEIGHT,
    AREA_WEIGHT,
    COMPACTNESS_WEIGHT,
    ADJACENCY_WEIGHT,
    LOCATION_WEIGHT,
    SECTION_WEIGHT,
    DISPERSION_WEIGHT,
    ROOM_USAGE_WEIGHT,
    MASK_WEIGHT,
    BUDGET_WEIGHT,
    SECTION_BBOX_WEIGHT,
    RELATIONSHIP_WEIGHT,
    REALISM_WEIGHT,
    REALISM_THRESHOLD,
    NO_CHANGE_PENALTY,
    ROTATE_IMAGE_K,
)

EA_LOG_DIR = PROJECT_ROOT / "backend" / "data" / "ea-logs" / "json"
EA_LOG_DIR.mkdir(parents=True, exist_ok=True)

# Fixed floors aligned with RL training
FIXED_FLOOR_IDS = [
    15, 35, 55, 75, 95,
    101, 110, 120, 135, 150,
    160, 175, 185, 190, 205,
    210, 230, 235, 245, 260,
    270, 285, 295, 309, 320,
    340, 345, 365, 370, 390,
    395, 412, 420, 440, 445,
    465, 470, 490, 495, 515,
    523, 540, 550, 565, 575,
    590, 600, 615, 634, 640,
    660, 665, 685, 690, 710,
    715, 740, 745, 765, 770,
    790, 795, 815, 820, 840,
    845, 856, 865, 880, 890,
    905, 915, 930, 940, 955,
    960, 967
]

FLOOR_ID = random.choice(FIXED_FLOOR_IDS)
GRID_SIZE = DEFAULT_GRID_SIZE
ROTATE_K = ROTATE_IMAGE_K
POPULATION_SIZE = 52
GENERATIONS = 100
CROSSOVER_RATE = 0.7
MUTATION_RATE = 0.25
TOURNAMENT_K = 3
ELITE_FRACTION = 0.08
RANDOM_SEED = 123456

EA_CONFIG = EAConfig(
    population_size=POPULATION_SIZE,
    generations=GENERATIONS,
    crossover_rate=CROSSOVER_RATE,
    mutation_rate=MUTATION_RATE,
    tournament_k=TOURNAMENT_K,
    elite_fraction=ELITE_FRACTION,
    random_seed=RANDOM_SEED,
    weights=Weights(
        quadrant=QUADRANT_WEIGHT,
        overlap=OVERLAP_WEIGHT,
        area=AREA_WEIGHT,
        compactness=COMPACTNESS_WEIGHT,
        adjacency=ADJACENCY_WEIGHT,
        location=LOCATION_WEIGHT,
        section=SECTION_WEIGHT,
        dispersion=DISPERSION_WEIGHT,
        room_usage=ROOM_USAGE_WEIGHT,
        budget=BUDGET_WEIGHT,
        section_bbox=SECTION_BBOX_WEIGHT,
        mask=MASK_WEIGHT,
        relationships=RELATIONSHIP_WEIGHT,
        realism=REALISM_WEIGHT,
    ),
    stagnation_threshold=20,
    restart_fraction=0.30,
    mutation_boost=1.5,
    mutation_floor=0.05,
    mutation_ceiling=0.65,
    no_change_penalty=NO_CHANGE_PENALTY,
)

# Load sample
floor_dir = PROJECT_ROOT / "backend" / "data" / "processed" / "floor_plans" / f"floor{FLOOR_ID:03d}"
sample = encode_floorplan_to_grid(floor_dir, grid_size=GRID_SIZE, rotate_k=ROTATE_K)

# Seeder choices
bandit = make_seed_bandit(PROJECT_ROOT / "backend" / "data" / "rl" / "seed_bandit.json", epsilon=0.05, rng=random.Random(RANDOM_SEED))
seed_name_rl, seed_fn_rl = bandit.select()
manual_seed_name = list(SEEDING_REGISTRY.keys())[0]
seed_fn_manual = SEEDING_REGISTRY[manual_seed_name]
print(f"Floor {FLOOR_ID}, RL seeder: {seed_name_rl}, manual seeder: {manual_seed_name}")

def run_ea(seed_fn, cfg_seed_offset: int = 0):
    rng = random.Random(EA_CONFIG.random_seed + cfg_seed_offset)
    pop = init_population(sample, EA_CONFIG, rng, seed_fn)
    evaluate_population(sample, pop, EA_CONFIG)
    hist = []
    for gen in range(EA_CONFIG.generations):
        if gen > 0:
            pop = make_next_generation(sample, pop, EA_CONFIG, rng, seed_fn, mutate)
            evaluate_population(sample, pop, EA_CONFIG)
        best = min(pop, key=lambda g: g.fitness if g.fitness is not None else float('inf'))
        hist.append(best.fitness)
    best = min(pop, key=lambda g: g.fitness if g.fitness is not None else float('inf'))
    return hist, best.fitness


def config_summary(cfg: EAConfig):
    summary_fields = [
        "population_size",
        "generations",
        "crossover_rate",
        "mutation_rate",
        "tournament_k",
        "elite_fraction",
        "random_seed",
        "stagnation_threshold",
        "restart_fraction",
        "mutation_boost",
        "mutation_floor",
        "mutation_ceiling",
        "no_change_penalty",
    ]
    weight_fields = [
        "quadrant",
        "overlap",
        "area",
        "compactness",
        "adjacency",
        "location",
        "section",
        "dispersion",
        "room_usage",
        "budget",
        "section_bbox",
        "mask",
        "relationships",
        "realism",
    ]
    summary = {field: getattr(cfg, field, None) for field in summary_fields}
    summary["weights"] = {field: getattr(getattr(cfg, "weights", None), field, None) for field in weight_fields}
    return summary


def save_run_log(
    log_dir: Path,
    floor_id: int,
    seed_name_rl: str,
    manual_seed_name: str,
    hist_rl,
    hist_manual,
    best_rl: float,
    best_manual: float,
    cfg: EAConfig,
):
    now = datetime.utcnow()
    run_id = now.strftime("%Y%m%dT%H%M%SZ")
    payload = {
        "run_id": run_id,
        "timestamp_utc": now.isoformat() + "Z",
        "floor_id": floor_id,
        "grid_size": GRID_SIZE,
        "rotate_k": ROTATE_K,
        "seeders": {"rl": seed_name_rl, "manual": manual_seed_name},
        "history": {"rl": hist_rl, "manual": hist_manual},
        "best_fitness": {"rl": best_rl, "manual": best_manual},
        "config": config_summary(cfg),
    }
    log_path = log_dir / f"ea_run_{run_id}_floor{floor_id:03d}.json"
    log_path.write_text(json.dumps(payload, indent=2))
    return log_path


def load_logged_runs(log_dir: Path):
    runs = []
    for fpath in sorted(log_dir.glob("*.json")):
        try:
            runs.append(json.loads(fpath.read_text()))
        except Exception as exc:
            print(f"Skipping {fpath.name}: {exc}")
    return runs


def generation_stats(runs, key: str):
    by_gen = defaultdict(list)
    for run in runs:
        hist = run.get("history", {}).get(key)
        if not hist:
            continue
        for idx, value in enumerate(hist):
            if value is not None:
                by_gen[idx].append(value)
    if not by_gen:
        return {"mean": [], "lower": [], "upper": [], "count": []}
    max_gen = max(by_gen.keys())
    means, lowers, uppers, counts = [], [], [], []
    for gen in range(max_gen + 1):
        values = by_gen.get(gen, [])
        counts.append(len(values))
        if values:
            lowers.append(min(values))
            uppers.append(max(values))
            means.append(sum(values) / len(values))
        else:
            lowers.append(None)
            uppers.append(None)
            means.append(None)
    return {"mean": means, "lower": lowers, "upper": uppers, "count": counts}


# Run both
hist_rl, best_rl = run_ea(seed_fn_rl, cfg_seed_offset=0)
hist_manual, best_manual = run_ea(seed_fn_manual, cfg_seed_offset=1234)

run_log_path = save_run_log(
    log_dir=EA_LOG_DIR,
    floor_id=FLOOR_ID,
    seed_name_rl=seed_name_rl,
    manual_seed_name=manual_seed_name,
    hist_rl=hist_rl,
    hist_manual=hist_manual,
    best_rl=best_rl,
    best_manual=best_manual,
    cfg=EA_CONFIG,
)
print(f"Logged run to {run_log_path}")

logged_runs = load_logged_runs(EA_LOG_DIR)
rl_stats = generation_stats(logged_runs, "rl")
manual_stats = generation_stats(logged_runs, "manual")
print(f"Loaded {len(logged_runs)} logged runs for averaging.")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import json
from pathlib import Path

LIMIT = 5000
RL_COLOR = '#1f77b4'
MANUAL_COLOR = '#ff7f0e'

# Fallback: load persisted logs if prior cells (run generation/stats) were not executed
if 'logged_runs' not in locals() or not locals().get('logged_runs'):
    candidate_root = None
    for candidate in [Path.cwd().resolve(), *Path.cwd().resolve().parents]:
        log_dir = candidate / 'backend' / 'data' / 'ea-logs' / 'json'
        if log_dir.exists():
            candidate_root = candidate
            break
    if candidate_root:
        log_dir = candidate_root / 'backend' / 'data' / 'ea-logs' / 'json'
        runs = []
        for fpath in sorted(log_dir.glob('*.json')):
            try:
                runs.append(json.loads(fpath.read_text()))
            except Exception:
                continue
        logged_runs = runs

# Reuse globals if present, otherwise initialize
logged_runs = locals().get('logged_runs', [])
rl_stats = locals().get('rl_stats', {})
manual_stats = locals().get('manual_stats', {})

# If stats are missing but we have logs, compute a lightweight aggregate
if (not rl_stats or not manual_stats) and logged_runs:
    def _generation_stats(runs, key: str):
        by_gen = {}
        for run in runs:
            hist = run.get('history', {}).get(key)
            if not hist:
                continue
            for idx, value in enumerate(hist):
                if value is None:
                    continue
                by_gen.setdefault(idx, []).append(value)
        if not by_gen:
            return {'mean': [], 'lower': [], 'upper': [], 'count': []}
        max_gen = max(by_gen.keys())
        means, lowers, uppers, counts = [], [], [], []
        for gen in range(max_gen + 1):
            values = by_gen.get(gen, [])
            counts.append(len(values))
            if values:
                lowers.append(min(values))
                uppers.append(max(values))
                means.append(sum(values) / len(values))
            else:
                lowers.append(None)
                uppers.append(None)
                means.append(None)
        return {'mean': means, 'lower': lowers, 'upper': uppers, 'count': counts}

    if logged_runs and not rl_stats:
        rl_stats = _generation_stats(logged_runs, 'rl')
    if logged_runs and not manual_stats:
        manual_stats = _generation_stats(logged_runs, 'manual')


def nan_if_missing(values):
    return [np.nan if value is None else value for value in values]


def error_bounds(stats):
    means = stats.get('mean', []) if stats else []
    lowers = stats.get('lower', []) if stats else []
    uppers = stats.get('upper', []) if stats else []
    ys = []
    yerr_lower = []
    yerr_upper = []
    for mean, low, up in zip(means, lowers, uppers):
        if mean is None:
            ys.append(np.nan)
            yerr_lower.append(0)
            yerr_upper.append(0)
        else:
            ys.append(mean)
            yerr_lower.append(mean - low if low is not None else 0)
            yerr_upper.append(up - mean if up is not None else 0)
    return ys, [yerr_lower, yerr_upper]


def _axis_basics(ax, title: str, length: int, limit: float = LIMIT):
    ax.set_title(title)
    ax.set_xlabel('Generation')
    ax.set_ylabel('Fitness (lower is better)')
    ax.set_ylim([0, limit])
    step = 5
    length = max(1, length)
    ax.set_xticks(range(0, length, step))
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{int(x):,}"))
    ax.grid(True, which='both', linestyle='--', alpha=0.4)


def _plot_aggregate(ax, stats, color: str, label: str, *, connect_start: bool = False, include_start: bool = False):
    if not stats.get('mean'):
        return
    ys, yerr = error_bounds(stats)
    if not ys:
        return
    start_text = ""
    if include_start and not np.isnan(ys[0]):
        start_text = f" (start {ys[0]:.2f})"
    label_final = f"{label}{start_text}"

    if connect_start:
        ax.plot(
            range(len(ys)),
            nan_if_missing(ys),
            color=color,
            alpha=0.85,
            linewidth=1.5,
            marker='o',
            markersize=3,
            label=label_final,
        )
    else:
        if not np.isnan(ys[0]):
            ax.plot(0, ys[0], marker='s', color=color, alpha=0.9)
    # Plot error bars from generation 1 onward
    if len(ys) > 1:
        xs = list(range(1, len(ys)))
        ax.errorbar(
            xs,
            nan_if_missing(ys[1:]),
            yerr=[yerr[0][1:], yerr[1][1:]],
            fmt='s',
            color=color,
            ecolor=color,
            elinewidth=1.2,
            capsize=3,
            alpha=0.6,
            label=None if connect_start else label_final,
        )
    else:
        if not connect_start:
            ax.plot([], [], color=color, label=label_final)  # placeholder for legend


rl_len = len(rl_stats.get('mean', []) or [])
manual_len = len(manual_stats.get('mean', []) or [])
max_len = max(rl_len, manual_len, 1)

fig, axes = plt.subplots(3, 1, figsize=(10, 15), sharey=True)

# Combined (aggregates only)
ax = axes[0]
_plot_aggregate(ax, rl_stats, RL_COLOR, f"RL avg ± range ({len(logged_runs)} runs)", connect_start=True, include_start=True)
_plot_aggregate(ax, manual_stats, MANUAL_COLOR, f"Manual avg ± range ({len(logged_runs)} runs)", include_start=True)
_axis_basics(ax, 'Combined', max_len)
handles, labels = ax.get_legend_handles_labels()
if handles:
    ax.legend(loc='upper right')

# RL only (aggregates)
ax_rl = axes[1]
_plot_aggregate(ax_rl, rl_stats, RL_COLOR, f"RL avg ± range ({len(logged_runs)} runs)", connect_start=True, include_start=True)
_axis_basics(ax_rl, 'EA with RL seeding', rl_len)
handles, labels = ax_rl.get_legend_handles_labels()
if handles:
    ax_rl.legend(loc='upper right')

# Manual only (aggregates)
ax_manual = axes[2]
_plot_aggregate(ax_manual, manual_stats, MANUAL_COLOR, f"Manual avg ± range ({len(logged_runs)} runs)", connect_start=True, include_start=True)
_axis_basics(ax_manual, 'EA without RL (manual seeding)', manual_len)
handles, labels = ax_manual.get_legend_handles_labels()
if handles:
    ax_manual.legend(loc='upper right')

plt.tight_layout()
plt.show()

print(f"Runs included in aggregates: {len(logged_runs)}")
if rl_stats.get('count'):
    print(f"RL samples per generation: {rl_stats['count'][0]}")
if manual_stats.get('count'):
    print(f"Manual samples per generation: {manual_stats['count'][0]}")
