# Engine performance analysis notebook

This notebook ingests the `engine-perf` harness artifact, extracts scheduler/GC telemetry, and derives updated AutoParry
parameters for the heavy-load manifests. Run it after `python tests/run_harness.py --suite engine-perf` to compare commits
and feed tuned values back into `tests/engine/perf.manifest.lua`.

In [None]:
from __future__ import annotations

import json
from pathlib import Path
from statistics import mean

ARTIFACT_PATH = Path("tests/artifacts/engine-perf/engine_perf_metrics.json")
if not ARTIFACT_PATH.exists():
    raise FileNotFoundError("Run `python tests/run_harness.py --suite engine-perf` to generate engine perf artifacts.")

payload = json.loads(ARTIFACT_PATH.read_text())
payload

In [None]:
def collect_scheduler_snapshots(metrics_payload: dict[str, object]) -> dict[str, dict[str, float]]:
    snapshots: dict[str, dict[str, float]] = {}
    for scenario in metrics_payload.get("scenarios", []):
        entry_metrics = scenario.get("metrics", {})
        perf = entry_metrics.get("performance", {})
        scheduler = perf.get("scheduler")
        if not scheduler:
            continue
        snapshots[scenario.get("id", "unknown")] = {
            "utilisation": scheduler.get("utilisation", 0.0) or 0.0,
            "avg_step": scheduler.get("averageStep", 0.0) or 0.0,
            "events_per_step": scheduler.get("events", {}).get("perStep", 0.0) or 0.0,
            "queue_depth": scheduler.get("queue", {}).get("averageDepth", 0.0) or 0.0,
        }
    return snapshots

scheduler_snapshots = collect_scheduler_snapshots(payload)
scheduler_snapshots

In [None]:
def recommend_config(snapshots: dict[str, dict[str, float]]) -> dict[str, dict[str, float]]:
    recommendations: dict[str, dict[str, float]] = {}
    global_avg_util = mean((snapshots[k]["utilisation"] for k in snapshots if snapshots[k])) if snapshots else 1.0

    for scenario_id, stats in snapshots.items():
        util = stats.get("utilisation", 1.0) or 1.0
        avg_step = stats.get("avg_step", 0.0) or 0.0
        queue_depth = stats.get("queue_depth", 0.0) or 0.0

        slack = max(0.006, min(0.014, avg_step * 2.0))
        cooldown = max(0.08, min(0.14, 0.12 * util / max(0.01, global_avg_util)))
        lookahead = max(0.82, min(0.98, 0.9 + (queue_depth * 0.01)))

        recommendations[scenario_id] = {
            "pressScheduleSlack": round(slack, 5),
            "cooldown": round(cooldown, 5),
            "pressMaxLookahead": round(lookahead, 5),
        }
    return recommendations

config_updates = recommend_config(scheduler_snapshots)
config_updates

In [None]:
import re

MANIFEST_PATH = Path("tests/engine/perf.manifest.lua")
manifest_text = MANIFEST_PATH.read_text()

def apply_updates(text: str, scenario_id: str, updates: dict[str, float]) -> str:
    for key, value in updates.items():
        pattern = rf'(id\s*=\s*"{re.escape(scenario_id)}".*?{key}\s*=\s*)([0-9.]+)'
        replacement = rf'\g<1>{value}'
        text, count = re.subn(pattern, replacement, text, count=1, flags=re.S)
        if count == 0:
            print(f"[warn] {scenario_id}: unable to update {key}")
    return text

updated_text = manifest_text
for scenario_id, updates in config_updates.items():
    updated_text = apply_updates(updated_text, scenario_id, updates)

if updated_text != manifest_text:
    MANIFEST_PATH.write_text(updated_text)
    print("Updated perf manifest with tuned parameters.")
else:
    print("No manifest changes were applied (keys may already match).")
