# EA Evaluation: RL seeding vs manual (single notebook)

Init once, run both EA variants (same floor/config), then plot together. Update plotting without rerunning by reusing the stored histories.

In [None]:
import random
import json
from datetime import datetime
from collections import defaultdict
from pathlib import Path
import sys
import matplotlib.pyplot as plt
import numpy as np

PROJECT_ROOT = None
for candidate in [Path.cwd().resolve(), *Path.cwd().resolve().parents]:
    module_root = candidate / "backend" / "src"
    if module_root.exists():
        PROJECT_ROOT = candidate
        if str(module_root) not in sys.path:
            sys.path.insert(0, str(module_root))
        break
if PROJECT_ROOT is None:
    raise RuntimeError("Run this notebook from inside the repo")

from ver0.evolver import EAConfig, init_population, make_next_generation, mutate, evaluate_population
from ver0.fitness import Weights
from ver0.grid_encoder import encode_floorplan_to_grid
from ver0.rl_bandit import make_seed_bandit
from ver0.seeders import SEEDING_REGISTRY
from ver0.vars import (
    DEFAULT_GRID_SIZE,
    POPULATION_SIZE,
    GENERATIONS,
    CROSSOVER_RATE,
    MUTATION_RATE,
    TOURNAMENT_K,
    ELITE_FRACTION,
    RANDOM_SEED,
    QUADRANT_WEIGHT,
    OVERLAP_WEIGHT,
    AREA_WEIGHT,
    COMPACTNESS_WEIGHT,
    ADJACENCY_WEIGHT,
    LOCATION_WEIGHT,
    SECTION_WEIGHT,
    DISPERSION_WEIGHT,
    ROOM_USAGE_WEIGHT,
    MASK_WEIGHT,
    BUDGET_WEIGHT,
    SECTION_BBOX_WEIGHT,
    RELATIONSHIP_WEIGHT,
    REALISM_WEIGHT,
    REALISM_THRESHOLD,
    NO_CHANGE_PENALTY,
    ROTATE_IMAGE_K,
)

EA_LOG_DIR = PROJECT_ROOT / "backend" / "data" / "ea-logs" / "json"
EA_LOG_DIR.mkdir(parents=True, exist_ok=True)

# Fixed floors aligned with RL training
FIXED_FLOOR_IDS = [
    15, 35, 55, 75, 95,
    101, 110, 120, 135, 150,
    160, 175, 185, 190, 205,
    210, 230, 235, 245, 260,
    270, 285, 295, 309, 320,
    340, 345, 365, 370, 390,
    395, 412, 420, 440, 445,
    465, 470, 490, 495, 515,
    523, 540, 550, 565, 575,
    590, 600, 615, 634, 640,
    660, 665, 685, 690, 710,
    715, 740, 745, 765, 770,
    790, 795, 815, 820, 840,
    845, 856, 865, 880, 890,
    905, 915, 930, 940, 955,
    960, 967
]

FLOOR_ID = random.choice(FIXED_FLOOR_IDS)
GRID_SIZE = DEFAULT_GRID_SIZE
ROTATE_K = ROTATE_IMAGE_K
POPULATION_SIZE = 52
GENERATIONS = 100
CROSSOVER_RATE = 0.7
MUTATION_RATE = 0.25
TOURNAMENT_K = 3
ELITE_FRACTION = 0.08
RANDOM_SEED = 123456

EA_CONFIG = EAConfig(
    population_size=POPULATION_SIZE,
    generations=GENERATIONS,
    crossover_rate=CROSSOVER_RATE,
    mutation_rate=MUTATION_RATE,
    tournament_k=TOURNAMENT_K,
    elite_fraction=ELITE_FRACTION,
    random_seed=RANDOM_SEED,
    weights=Weights(
        quadrant=QUADRANT_WEIGHT,
        overlap=OVERLAP_WEIGHT,
        area=AREA_WEIGHT,
        compactness=COMPACTNESS_WEIGHT,
        adjacency=ADJACENCY_WEIGHT,
        location=LOCATION_WEIGHT,
        section=SECTION_WEIGHT,
        dispersion=DISPERSION_WEIGHT,
        room_usage=ROOM_USAGE_WEIGHT,
        budget=BUDGET_WEIGHT,
        section_bbox=SECTION_BBOX_WEIGHT,
        mask=MASK_WEIGHT,
        relationships=RELATIONSHIP_WEIGHT,
        realism=REALISM_WEIGHT,
    ),
    stagnation_threshold=20,
    restart_fraction=0.30,
    mutation_boost=1.5,
    mutation_floor=0.05,
    mutation_ceiling=0.65,
    no_change_penalty=NO_CHANGE_PENALTY,
)

# Load sample
floor_dir = PROJECT_ROOT / "backend" / "data" / "processed" / "floor_plans" / f"floor{FLOOR_ID:03d}"
sample = encode_floorplan_to_grid(floor_dir, grid_size=GRID_SIZE, rotate_k=ROTATE_K)

# Seeder choices
bandit = make_seed_bandit(PROJECT_ROOT / "backend" / "data" / "rl" / "seed_bandit.json", epsilon=0.05, rng=random.Random(RANDOM_SEED))
seed_name_rl, seed_fn_rl = bandit.select()
manual_seed_name = list(SEEDING_REGISTRY.keys())[0]
seed_fn_manual = SEEDING_REGISTRY[manual_seed_name]
print(f"Floor {FLOOR_ID}, RL seeder: {seed_name_rl}, manual seeder: {manual_seed_name}")

def run_ea(seed_fn, cfg_seed_offset: int = 0):
    rng = random.Random(EA_CONFIG.random_seed + cfg_seed_offset)
    pop = init_population(sample, EA_CONFIG, rng, seed_fn)
    evaluate_population(sample, pop, EA_CONFIG)
    hist = []
    for gen in range(EA_CONFIG.generations):
        if gen > 0:
            pop = make_next_generation(sample, pop, EA_CONFIG, rng, seed_fn, mutate)
            evaluate_population(sample, pop, EA_CONFIG)
        best = min(pop, key=lambda g: g.fitness if g.fitness is not None else float('inf'))
        hist.append(best.fitness)
    best = min(pop, key=lambda g: g.fitness if g.fitness is not None else float('inf'))
    return hist, best.fitness


def config_summary(cfg: EAConfig):
    summary_fields = [
        "population_size",
        "generations",
        "crossover_rate",
        "mutation_rate",
        "tournament_k",
        "elite_fraction",
        "random_seed",
        "stagnation_threshold",
        "restart_fraction",
        "mutation_boost",
        "mutation_floor",
        "mutation_ceiling",
        "no_change_penalty",
    ]
    weight_fields = [
        "quadrant",
        "overlap",
        "area",
        "compactness",
        "adjacency",
        "location",
        "section",
        "dispersion",
        "room_usage",
        "budget",
        "section_bbox",
        "mask",
        "relationships",
        "realism",
    ]
    summary = {field: getattr(cfg, field, None) for field in summary_fields}
    summary["weights"] = {field: getattr(getattr(cfg, "weights", None), field, None) for field in weight_fields}
    return summary


def save_run_log(
    log_dir: Path,
    floor_id: int,
    seed_name_rl: str,
    manual_seed_name: str,
    hist_rl,
    hist_manual,
    best_rl: float,
    best_manual: float,
    cfg: EAConfig,
):
    now = datetime.utcnow()
    run_id = now.strftime("%Y%m%dT%H%M%SZ")
    payload = {
        "run_id": run_id,
        "timestamp_utc": now.isoformat() + "Z",
        "floor_id": floor_id,
        "grid_size": GRID_SIZE,
        "rotate_k": ROTATE_K,
        "seeders": {"rl": seed_name_rl, "manual": manual_seed_name},
        "history": {"rl": hist_rl, "manual": hist_manual},
        "best_fitness": {"rl": best_rl, "manual": best_manual},
        "config": config_summary(cfg),
    }
    log_path = log_dir / f"ea_run_{run_id}_floor{floor_id:03d}.json"
    log_path.write_text(json.dumps(payload, indent=2))
    return log_path


def load_logged_runs(log_dir: Path):
    runs = []
    for fpath in sorted(log_dir.glob("*.json")):
        try:
            runs.append(json.loads(fpath.read_text()))
        except Exception as exc:
            print(f"Skipping {fpath.name}: {exc}")
    return runs


def generation_stats(runs, key: str):
    by_gen = defaultdict(list)
    for run in runs:
        hist = run.get("history", {}).get(key)
        if not hist:
            continue
        for idx, value in enumerate(hist):
            if value is not None:
                by_gen[idx].append(value)
    if not by_gen:
        return {"mean": [], "lower": [], "upper": [], "count": []}
    max_gen = max(by_gen.keys())
    means, lowers, uppers, counts = [], [], [], []
    for gen in range(max_gen + 1):
        values = by_gen.get(gen, [])
        counts.append(len(values))
        if values:
            lowers.append(min(values))
            uppers.append(max(values))
            means.append(sum(values) / len(values))
        else:
            lowers.append(None)
            uppers.append(None)
            means.append(None)
    return {"mean": means, "lower": lowers, "upper": uppers, "count": counts}


# Run both
hist_rl, best_rl = run_ea(seed_fn_rl, cfg_seed_offset=0)
hist_manual, best_manual = run_ea(seed_fn_manual, cfg_seed_offset=1234)

run_log_path = save_run_log(
    log_dir=EA_LOG_DIR,
    floor_id=FLOOR_ID,
    seed_name_rl=seed_name_rl,
    manual_seed_name=manual_seed_name,
    hist_rl=hist_rl,
    hist_manual=hist_manual,
    best_rl=best_rl,
    best_manual=best_manual,
    cfg=EA_CONFIG,
)
print(f"Logged run to {run_log_path}")

logged_runs = load_logged_runs(EA_LOG_DIR)
rl_stats = generation_stats(logged_runs, "rl")
manual_stats = generation_stats(logged_runs, "manual")
print(f"Loaded {len(logged_runs)} logged runs for averaging.")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import json
from pathlib import Path

LIMIT = 5000

# Fallback: load persisted logs if prior cells (run generation/stats) were not executed
if 'logged_runs' not in locals() or not locals().get('logged_runs'):
    candidate_root = None
    for candidate in [Path.cwd().resolve(), *Path.cwd().resolve().parents]:
        log_dir = candidate / 'backend' / 'data' / 'ea-logs' / 'json'
        if log_dir.exists():
            candidate_root = candidate
            break
    if candidate_root:
        log_dir = candidate_root / 'backend' / 'data' / 'ea-logs' / 'json'
        runs = []
        for fpath in sorted(log_dir.glob('*.json')):
            try:
                runs.append(json.loads(fpath.read_text()))
            except Exception:
                continue
        logged_runs = runs

# Reuse globals if present, otherwise initialize
logged_runs = locals().get('logged_runs', [])
rl_stats = locals().get('rl_stats', {})
manual_stats = locals().get('manual_stats', {})
hist_rl = locals().get('hist_rl', [])
hist_manual = locals().get('hist_manual', [])
best_rl = locals().get('best_rl', float('nan'))
best_manual = locals().get('best_manual', float('nan'))

# If stats are missing but we have logs, compute a lightweight aggregate
if not rl_stats or not manual_stats:
    def _generation_stats(runs, key: str):
        by_gen = {}
        for run in runs:
            hist = run.get('history', {}).get(key)
            if not hist:
                continue
            for idx, value in enumerate(hist):
                if value is None:
                    continue
                by_gen.setdefault(idx, []).append(value)
        if not by_gen:
            return {'mean': [], 'lower': [], 'upper': [], 'count': []}
        max_gen = max(by_gen.keys())
        means, lowers, uppers, counts = [], [], [], []
        for gen in range(max_gen + 1):
            values = by_gen.get(gen, [])
            counts.append(len(values))
            if values:
                lowers.append(min(values))
                uppers.append(max(values))
                means.append(sum(values) / len(values))
            else:
                lowers.append(None)
                uppers.append(None)
                means.append(None)
        return {'mean': means, 'lower': lowers, 'upper': uppers, 'count': counts}

    if logged_runs and not rl_stats:
        rl_stats = _generation_stats(logged_runs, 'rl')
    if logged_runs and not manual_stats:
        manual_stats = _generation_stats(logged_runs, 'manual')

# If single-run histories are missing, reuse the most recent run for the per-run lines
if logged_runs and (not hist_rl or not hist_manual):
    latest = logged_runs[-1]
    hist_rl = hist_rl or latest.get('history', {}).get('rl', []) or []
    hist_manual = hist_manual or latest.get('history', {}).get('manual', []) or []
    best_rl = best_rl if best_rl == best_rl else latest.get('best_fitness', {}).get('rl', float('nan'))
    best_manual = best_manual if best_manual == best_manual else latest.get('best_fitness', {}).get('manual', float('nan'))


def nan_if_missing(values):
    return [np.nan if value is None else value for value in values]


def error_bounds(stats):
    means = stats.get('mean', []) if stats else []
    lowers = stats.get('lower', []) if stats else []
    uppers = stats.get('upper', []) if stats else []
    ys = []
    yerr_lower = []
    yerr_upper = []
    for mean, low, up in zip(means, lowers, uppers):
        if mean is None:
            ys.append(np.nan)
            yerr_lower.append(0)
            yerr_upper.append(0)
        else:
            ys.append(mean)
            yerr_lower.append(mean - low if low is not None else 0)
            yerr_upper.append(up - mean if up is not None else 0)
    return ys, [yerr_lower, yerr_upper]


gens_rl = list(range(len(hist_rl)))
gens_manual = list(range(len(hist_manual)))

plt.figure(figsize=(10, 5))

line_rl, = plt.plot(gens_rl, hist_rl, marker='o', label=f"RL (this run {best_rl:.2f})")
line_rl.set_clip_on(True)

line_manual, = plt.plot(gens_manual, hist_manual, marker='o', linestyle='--', 
                        label=f"Manual (this run {best_manual:.2f})")
line_manual.set_clip_on(True)

avg_rl_hist = rl_stats.get('mean', []) if rl_stats else []
avg_manual_hist = manual_stats.get('mean', []) if manual_stats else []
rl_counts = rl_stats.get('count', []) if rl_stats else []
manual_counts = manual_stats.get('count', []) if manual_stats else []

if avg_rl_hist:
    ys, yerr = error_bounds(rl_stats)
    plt.errorbar(
        range(len(avg_rl_hist)),
        nan_if_missing(ys),
        yerr=yerr,
        fmt='s',
        color=line_rl.get_color(),
        ecolor=line_rl.get_color(),
        elinewidth=1.2,
        capsize=3,
        alpha=0.6,
        label=f"RL avg ± range ({len(logged_runs)} runs)",
    )
if avg_manual_hist:
    ys, yerr = error_bounds(manual_stats)
    plt.errorbar(
        range(len(avg_manual_hist)),
        nan_if_missing(ys),
        yerr=yerr,
        fmt='s',
        color=line_manual.get_color(),
        ecolor=line_manual.get_color(),
        elinewidth=1.2,
        capsize=3,
        alpha=0.6,
        label=f"Manual avg ± range ({len(logged_runs)} runs)",
    )

plt.title('Best fitness per generation')
plt.xlabel('Generation')
plt.ylabel('Fitness (lower is better)')

# Clips anything above 5k
plt.ylim([0, LIMIT])

ax = plt.gca()
step = 5
plt.xticks(range(0, len(gens_rl), step))
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{int(x):,}"))

plt.grid(True, which='both', linestyle='--', alpha=0.4)
plt.legend()
plt.tight_layout()
plt.show()

print(f'RL best fitness (this run): {best_rl:.3f}')
print(f'Manual best fitness (this run): {best_manual:.3f}')
print(f'Runs included in aggregates: {len(logged_runs)}')
if rl_counts:
    print(f'RL samples per generation: {rl_counts[0]}')
if manual_counts:
    print(f'Manual samples per generation: {manual_counts[0]}')
