diff --git a/bench/src/drivers/flat-harness.ts b/bench/src/drivers/flat-harness.ts
new file mode 100644
index 0000000..4cfc0ec
--- /dev/null
+++ b/bench/src/drivers/flat-harness.ts
@@ -0,0 +1,354 @@
+/**
+ * Plane-A flat harness — the GATE's control, recovered as the simplest possible
+ * `Agent.act` over the recursive execution atom (docs/research/recursive-execution-atom.md,
+ * "Plane A as the simplest act"; "Decision: Plane B contains Plane A").
+ *
+ * The driver spawns ONE child per profile at a FIXED, equal budget, joins them via
+ * `scope.next()` as each settles, adapts each settled child to the kernel's `Iteration`,
+ * and selects the best with `defaultSelectWinner` — the single-sourced argmax the loop
+ * kernel itself uses, so the control's selection is not a forked copy of the kernel's.
+ * No steering, no widening, no spawn-on-completion: a flat fan-out at equal compute.
+ *
+ * The equal-k assertion (critique B3) is the gate's validity guard, exported alongside:
+ * a treatment cell is admitted only when `Σ iterations(treatment) ≡ Σ iterations(blind)`
+ * per task (excluding `budgetExempt` runtimes, which are out of the conserved Σk by
+ * construction). A mismatch FAILS LOUD — the cell is excluded, never silently scored 0
+ * (mirroring `experiment.ts`'s infra-error exclusion: a confounded cell is dropped, not
+ * counted). Without this guard a "diverse@k beat blind@k" claim could be confounded by
+ * the treatment having spent more compute than the control.
+ */
+
+import {
+  type AgentProfile,
+  type AgentSpec,
+  type Budget,
+  type DefaultVerdict,
+  type ExecutorContext,
+  type ExecutorRegistry,
+  type Iteration,
+  type LeafExecutorFactory,
+  type ResultBlobStore,
+  type RootHandle,
+  type Settled,
+  type SpawnJournal,
+  type Supervisor,
+  type SupervisedResult,
+  type TreeView,
+  type UsageEvent,
+  type Agent as Atom,
+  type Scope as AtomScope,
+  createExecutorRegistry,
+  createRootHandle,
+  createSupervisor,
+  defaultSelectWinner,
+  InMemoryResultBlobStore,
+  InMemorySpawnJournal,
+  settledToIteration,
+} from '@tangle-network/agent-runtime/loops'
+import type { BackendType } from '@tangle-network/sandbox'
+
+/**
+ * One arm's agent profile + how it maps to a leaf runtime. `harness === null` resolves to
+ * the router/inline executor (a direct Router call, no box); a `BackendType` resolves to
+ * the sandbox executor (composes `runLoop` as a leaf). A BYO `AgentSpec.executor` overrides
+ * both — a user agent (mastra/agno/raw HTTP) is first-class the moment it implements the
+ * `LeafExecutor` interface. This is the executor's only knob the flat harness needs.
+ */
+export interface FlatProfile {
+  /** Stable arm label — becomes the child node's `label` and the selected winner's name. */
+  readonly label: string
+  /** The portable agent profile handed to the resolved executor. */
+  readonly profile: AgentProfile
+  /** Executor mapping: `null` → router/inline, a `BackendType` → sandbox. */
+  readonly harness: BackendType | null
+}
+
+/** The flat-harness task: a shared prompt fanned out across one child per profile. */
+export interface FlatTask {
+  /** The task statement every spawned child receives verbatim. */
+  readonly prompt: string
+  /** One arm per profile; each is spawned once at the fixed equal budget. */
+  readonly profiles: ReadonlyArray<FlatProfile>
+}
+
+/**
+ * The flat-harness result: the selected winner (or `undefined` when every child was a
+ * `down` / produced no output) plus the realized settled set so the caller can run the
+ * equal-k assertion and report the realized tree shape per cell (residual risk R1: equal-k
+ * is enforceable, equal-topology is not, so the realized shape is reported, not assumed).
+ */
+export interface FlatResult {
+  /** The single-sourced argmax over the settled children, or `undefined` (no usable child). */
+  readonly winner:
+    | {
+        readonly output: unknown
+        readonly verdict?: DefaultVerdict
+        readonly label: string
+        readonly seq: number
+      }
+    | undefined
+  /** Every settlement `next()` delivered, in recorded `seq` order (replay-stable). */
+  readonly settled: ReadonlyArray<Settled<unknown>>
+}
+
+/** A leaf coder agent carries its executor mapping as `executorSpec` (the field
+ *  `scope.spawn` reads to resolve a `LeafExecutor`). Its `act` is never invoked — the
+ *  scope drives the resolved executor, not the leaf's `act` — so reaching it is a wiring
+ *  bug (fail loud). */
+interface LeafAtom extends Atom<unknown, unknown> {
+  readonly executorSpec: AgentSpec
+}
+
+function leafCoder(p: FlatProfile): LeafAtom {
+  const executorSpec: AgentSpec = { profile: p.profile, harness: p.harness }
+  return {
+    name: p.label,
+    executorSpec,
+    act(): Promise<unknown> {
+      throw new Error(
+        `flat-harness leaf "${p.label}": act() was invoked — a leaf is executed via its LeafExecutor, never its act (wiring bug)`,
+      )
+    },
+  }
+}
+
+/**
+ * The flat-harness driver: an `Agent` whose `act` spawns one child per profile at the
+ * fixed `childBudget`, joins them all via `scope.next()`, and selects the best.
+ *
+ * Replay-safe by construction: it reads only what `Settled` delivers (`out`, `verdict`,
+ * `seq`) — never `Date.now`, `Math.random`, or an unordered collection — and joins strictly
+ * in the `seq` order `next()` yields, so a replay re-derives the identical winner.
+ *
+ * Selection is `defaultSelectWinner` over the `settledToIteration` adaptation of each `done`
+ * child (M4 / build step 8) — the supervisor never re-ranks behind the driver (selector ≠
+ * judge): the driver returns the synthesized winner and the supervisor content-addresses it.
+ */
+export function flatHarness(childBudget: Budget): Atom<FlatTask, FlatResult> {
+  return {
+    name: 'flat-harness',
+    async act(task: FlatTask, scope: AtomScope<FlatResult>): Promise<FlatResult> {
+      const open = scope as unknown as AtomScope<unknown>
+      if (task.profiles.length === 0) {
+        throw new Error('flat-harness: task.profiles is empty — nothing to fan out')
+      }
+
+      for (const p of task.profiles) {
+        const spawned = open.spawn(leafCoder(p), task.prompt, {
+          budget: childBudget,
+          label: p.label,
+        })
+        // Fail loud on a fail-closed admission: a flat-harness cell that cannot afford its
+        // full fan-out under the conserved pool is a misconfigured budget, not a partial run.
+        if (!spawned.ok) {
+          throw new Error(
+            `flat-harness: spawn of "${p.label}" rejected (${spawned.reason}) — the root budget cannot cover one equal-budget child per profile`,
+          )
+        }
+      }
+
+      const settled: Settled<unknown>[] = []
+      const iterations: Iteration<unknown, unknown>[] = []
+      for (;;) {
+        const s = await open.next()
+        if (s === null) break
+        settled.push(s)
+        // Only a `done` child is an iteration; a `down` child is excluded from selection
+        // (and from the equal-k Σ) exactly as an infra-errored cell is in experiment.ts.
+        if (s.kind === 'done') iterations.push(settledToIteration(s))
+      }
+
+      const won = defaultSelectWinner(iterations)
+      const winner = won
+        ? {
+            output: won.output,
+            ...(won.verdict ? { verdict: won.verdict } : {}),
+            label: won.agentRunName,
+            seq: won.iterationIndex,
+          }
+        : undefined
+      return { winner, settled }
+    },
+  }
+}
+
+// ── The equal-k assertion (critique B3) ──────────────────────────────────────────
+
+/** Per-arm realized iteration count, tagged so a mismatch names the offending arm. */
+export interface ArmRealizedK {
+  /** Arm label (the control is named explicitly by the caller, not inferred here). */
+  readonly label: string
+  /** Σ of conserved iterations across this arm's `done` children, EXCLUDING any whose
+   *  runtime is `budgetExempt` (e.g. a `cli`/RLM subprocess without token accounting). */
+  readonly k: number
+  /** Count of `down` (failed/infra) children — reported so a caller can see why an arm's
+   *  realized k differs even when both arms were spawned identically (residual risk R1). */
+  readonly downCount: number
+}
+
+/**
+ * The equal-k outcome: either every arm spent the SAME conserved compute (the cell is
+ * admissible) or it did not (the cell is excluded). Typed — the caller inspects `ok`
+ * before trusting the cell, never a silent zero (fail-loud per B3).
+ */
+export type EqualKOutcome =
+  | { readonly ok: true; readonly k: number; readonly arms: ReadonlyArray<ArmRealizedK> }
+  | {
+      readonly ok: false
+      readonly reason: 'unequal-k' | 'no-arms'
+      readonly arms: ReadonlyArray<ArmRealizedK>
+    }
+
+/** Σ of conserved iterations across an arm's `done` children. Reads `spent.iterations`
+ *  (the conserved pool's iteration channel) off each settled child — the same evidence
+ *  replay reads — never a wall-clock or re-derived count. A `budgetExempt` runtime (a
+ *  `cli`/RLM subprocess without token accounting) reports zero conserved spend by contract,
+ *  so its iterations fall out of this Σ automatically — no per-runtime special case needed. */
+export function realizedK(label: string, settled: ReadonlyArray<Settled<unknown>>): ArmRealizedK {
+  let k = 0
+  let downCount = 0
+  for (const s of settled) {
+    if (s.kind === 'down') {
+      downCount += 1
+      continue
+    }
+    k += s.spent.iterations
+  }
+  return { label, k, downCount }
+}
+
+/**
+ * The equal-k assertion (critique B3): a treatment cell is admissible only when every arm
+ * realized the SAME conserved compute as the blind control — `Σ iterations(treatment) ≡
+ * Σ iterations(blind)` per task, excluding `budgetExempt` runtimes. Returns a typed
+ * `EqualKOutcome`; the caller EXCLUDES the cell on `!ok` (never scores it 0), so a
+ * compute-confounded cell is dropped, exactly as an infra-errored cell is in `experiment.ts`.
+ *
+ * `arms` is `[control, ...treatments]` — the blind control FIRST (the same discipline
+ * `runSteeringExperiment` enforces structurally). Every arm's k is compared to the
+ * control's; one differing arm excludes the whole cell (the gate compares arms pairwise per
+ * task, so a single confounded arm contaminates the pairing).
+ */
+export function assertEqualK(
+  arms: ReadonlyArray<{ label: string; settled: ReadonlyArray<Settled<unknown>> }>,
+): EqualKOutcome {
+  if (arms.length === 0) return { ok: false, reason: 'no-arms', arms: [] }
+  const realized = arms.map((a) => realizedK(a.label, a.settled))
+  const controlK = realized[0]!.k
+  const equal = realized.every((r) => r.k === controlK)
+  if (!equal) return { ok: false, reason: 'unequal-k', arms: realized }
+  return { ok: true, k: controlK, arms: realized }
+}
+
+/**
+ * Strict equal-k guard for an in-driver invariant: throws on a mismatch instead of
+ * returning a typed outcome. Use where the cell MUST be equal-k by construction (a unit
+ * test or a same-budget self-comparison); the experiment harness prefers `assertEqualK`
+ * so it can EXCLUDE rather than abort the whole run on one confounded cell.
+ */
+export function assertEqualKOrThrow(
+  arms: ReadonlyArray<{ label: string; settled: ReadonlyArray<Settled<unknown>> }>,
+): { k: number; arms: ReadonlyArray<ArmRealizedK> } {
+  const outcome = assertEqualK(arms)
+  if (!outcome.ok) {
+    const detail = outcome.arms.map((a) => `${a.label}=${a.k}`).join(' ')
+    throw new Error(
+      `equal-k assertion failed (${outcome.reason}): arms spent unequal conserved compute [${detail}] — the cell is confounded and must be excluded`,
+    )
+  }
+  return { k: outcome.k, arms: outcome.arms }
+}
+
+// ── Running the flat harness through the Supervisor (the gate's control runner) ───
+
+/** Seams every spawned executor reads off `ExecutorContext.seams`, keyed by the seam name a
+ *  built-in narrows (`router` → router/inline base+key+model, `sandbox` → the loop sandbox
+ *  client). Opaque here; each built-in reads its own key and fails loud when its seam is
+ *  absent — so a flat-harness cell that names a sandbox/router arm MUST supply that seam. */
+export interface FlatHarnessSeams {
+  readonly router?: { routerBaseUrl: string; routerKey: string; model?: string }
+  readonly sandbox?: { sandboxClient: unknown; maxIterations?: number; loopCtx?: unknown }
+  readonly [seam: string]: unknown
+}
+
+/**
+ * Bind seams onto an `ExecutorRegistry` so every resolved factory builds its executor with
+ * THIS cell's seams. The supervisor constructs the root scope with an empty `seams` map (it
+ * is task-agnostic), so the seams must ride on the registry instead: the wrapper overrides
+ * the `ExecutorContext.seams` the scope passes through, preserving the caller's `signal`.
+ * A BYO `AgentSpec.executor` resolves to a factory that ignores `ctx` entirely, so binding
+ * is a no-op for it — exactly right. This is a real composition, not a passthrough stub.
+ */
+function bindSeams(base: ExecutorRegistry, seams: Readonly<Record<string, unknown>>): ExecutorRegistry {
+  return {
+    register<Out>(runtime: string, factory: LeafExecutorFactory<Out>): void {
+      base.register(runtime, factory)
+    },
+    resolve<Out>(spec: AgentSpec) {
+      const resolved = base.resolve<Out>(spec)
+      if (!resolved.succeeded) return resolved
+      const inner = resolved.value
+      const bound: LeafExecutorFactory<Out> = (s, ctx: ExecutorContext) =>
+        inner(s, { signal: ctx.signal, seams })
+      return { succeeded: true as const, value: bound }
+    },
+  }
+}
+
+export interface RunFlatHarnessConfig {
+  /** The fan-out task: the shared prompt + one profile per arm. */
+  readonly task: FlatTask
+  /** The FIXED, equal per-child budget — every arm gets the identical ceiling (the
+   *  equal-k precondition: equal reservations make `Σk` equal by construction when no
+   *  child is `budgetExempt` and none goes `down`). */
+  readonly childBudget: Budget
+  /** The root conserved-pool ceiling. Must cover one `childBudget` per profile or the
+   *  flat-harness `act` fails loud on the first un-affordable spawn. */
+  readonly rootBudget: Budget
+  /** Trace-correlation + journal/blob root key. */
+  readonly runId: string
+  /** Per-runtime executor seams threaded into every spawned child. */
+  readonly seams: FlatHarnessSeams
+  /** Open executor registry; defaults to the built-ins (router/inline · sandbox · cli)
+   *  plus any BYO `AgentSpec.executor`. Inject to register additional runtimes. */
+  readonly executors?: ExecutorRegistry
+  /** Event source; defaults to the in-memory journal (durable JSONL/FS is injectable). */
+  readonly journal?: SpawnJournal
+  /** Result-blob store backing `outRef` rehydration; defaults to in-memory. */
+  readonly blobs?: ResultBlobStore
+  /** Optional live root handle (the Q2 chat/pi-viz substrate) attached before `run`. */
+  readonly rootHandle?: RootHandle<FlatResult>
+  /** Caller abort signal — cascades into every live child's executor. */
+  readonly signal?: AbortSignal
+  /** Injected clock for deterministic journal timestamps (tests). */
+  readonly now?: () => number
+}
+
+/**
+ * One flat-harness cell: spawn one child per profile at `childBudget` under a conserved
+ * `rootBudget` pool, join, select. Returns the typed `SupervisedResult` (a no-winner is
+ * never coerced to a best-effort output, M2).
+ *
+ * Seams ride on the registry, not the supervisor: `bindSeams` overrides the
+ * `ExecutorContext.seams` each resolved factory receives with this cell's `cfg.seams`, so a
+ * router/sandbox child reads its seam even though the supervisor builds the root scope with
+ * an empty seams map. A BYO `AgentSpec.executor` ignores seams and resolves unchanged.
+ */
+export async function runFlatHarness(cfg: RunFlatHarnessConfig): Promise<SupervisedResult<FlatResult>> {
+  const supervisor: Supervisor<FlatTask, FlatResult> = createSupervisor<FlatTask, FlatResult>()
+  if (cfg.rootHandle) supervisor.attach(cfg.rootHandle)
+  const executors = bindSeams(cfg.executors ?? createExecutorRegistry(), cfg.seams)
+  return supervisor.run(flatHarness(cfg.childBudget), cfg.task, {
+    budget: cfg.rootBudget,
+    runId: cfg.runId,
+    journal: cfg.journal ?? new InMemorySpawnJournal(),
+    blobs: cfg.blobs ?? new InMemoryResultBlobStore(),
+    executors,
+    ...(cfg.signal ? { signal: cfg.signal } : {}),
+    ...(cfg.now ? { now: cfg.now } : {}),
+  })
+}
+
+/** Re-exported so a caller building the live root substrate gets it from one place. */
+export { createRootHandle }
+export type { TreeView, UsageEvent }
diff --git a/bench/src/drivers/llm-meta-driver.ts b/bench/src/drivers/llm-meta-driver.ts
new file mode 100644
index 0000000..bb1ee73
--- /dev/null
+++ b/bench/src/drivers/llm-meta-driver.ts
@@ -0,0 +1,374 @@
+/**
+ * @experimental
+ *
+ * LLM meta-driver — the TREATMENT variant of the recursive execution atom's two driver-act
+ * bodies (the coded progressive-widening control is in `./progressive-widening.ts`; both
+ * share the flat-by-default `WidenGate`). Operator's call: build it now, on top of the
+ * budget-reservation invariant that keeps an equal-k result valid.
+ *
+ * The policy: `act` asks the Router (an LLM) for an initial spawn plan — which child agents
+ * to spawn and their per-child budgets ("driver A for n shots, B for k shots" =
+ * heterogeneous per-child `maxIterations`) — then reacts to each `scope.next()` completion.
+ * On a promising settlement it asks the Router again for a widen plan: spawn one more child
+ * toward a lineage, under the conserved pool. Children resolve their `LeafExecutor` through
+ * the open registry off their `AgentSpec` (`harness: null` → a direct Router call, no box;
+ * a `BackendType` → sandboxed; or a BYO `executor`) — the meta-driver never switches on the
+ * runtime itself.
+ *
+ * The same two firewall invariants the control upholds (critique R2):
+ *  - `WidenGate` DEFAULTS TO FLAT (`defaultWidenGate`), so a gate run never asks for a
+ *    widen and the selector≠judge conflict stays dormant.
+ *  - The LLM is shown ONLY trace-derived findings (the `analyze` hook → `AnalystFinding[]`)
+ *    when deciding to widen — NEVER the raw `verdict.score`. Letting the meta-controller
+ *    read the judge verdict for a spawn decision requires the gate's explicit
+ *    `judgeExempt: true` (off by default), the documented hatch that re-couples steering to
+ *    the judge.
+ *
+ * Selection stays single-sourced (`settledToIteration` + `defaultSelectWinner`).
+ *
+ * The Router is an external boundary: `routerChatWithUsage` (reused from `../router-client`,
+ * not re-copied) reports REAL token usage and throws on a non-OK response. The driver
+ * inspects the parse outcome before acting on a plan — a malformed plan fails loud, never a
+ * silent empty fan-out.
+ */
+
+import type { AnalystFinding } from '@tangle-network/agent-eval'
+import { defaultSelectWinner } from '../../../src/loops/run-loop.ts'
+import { settledToIteration } from '../../../src/loops/supervise/scope.ts'
+import type {
+  Agent,
+  AgentSpec,
+  Budget,
+  Scope,
+  Settled,
+  WidenGate,
+} from '../../../src/loops/supervise/types.ts'
+import { routerChatWithUsage, type RouterConfig } from '../router-client.ts'
+import { defaultWidenGate } from './progressive-widening.ts'
+
+/** A child the meta-driver can spawn, keyed by a stable name the Router references in its
+ *  plan. The `agent` carries its `AgentSpec` as `executorSpec` — the field `scope.spawn`
+ *  reads to resolve the runtime (`harness: null` → router/inline; `BackendType` → sandbox;
+ *  BYO `executor`). */
+export interface MetaChild<Out> {
+  readonly key: string
+  readonly agent: Agent<unknown, Out>
+  readonly task: unknown
+  /** One-line capability summary the Router sees when choosing this child. */
+  readonly description: string
+}
+
+/** One spawn the Router asked for: a child key from the catalog + its per-child budget. */
+export interface SpawnPlanEntry {
+  readonly childKey: string
+  readonly shots: number
+  readonly maxTokens: number
+  readonly maxUsd?: number
+}
+
+/** The Router's decision, parsed and validated. `done: true` means "no more spawns,
+ *  synthesize the winner from what settled". */
+export interface SpawnPlan {
+  readonly spawns: ReadonlyArray<SpawnPlanEntry>
+  readonly done: boolean
+}
+
+export type AnalyzeSettled<Out> = (
+  settled: Extract<Settled<Out>, { kind: 'done' }>,
+) => Promise<ReadonlyArray<AnalystFinding>>
+
+export interface LlmMetaDriverOptions<Out> {
+  readonly name?: string
+  /** Router seam (base url + key + model). Reused for every meta-decision call. */
+  readonly router: RouterConfig
+  /** The catalog of spawnable children the Router plans over, keyed by `key`. */
+  readonly catalog: ReadonlyArray<MetaChild<Out>>
+  /** One-line statement of the goal the Router optimizes the spawn plan toward. */
+  readonly objective: string
+  /** Trace-analyst wire feeding the widen decision — the ONLY child signal the Router
+   *  sees post-settlement. Omit to run flat (no findings → never widens under the default
+   *  gate). */
+  readonly analyze?: AnalyzeSettled<Out>
+  /** The widening governor. Defaults to `defaultWidenGate` (flat — never widens). */
+  readonly gate?: WidenGate<Out>
+  /** Deadline budget for one child the Router omits a deadline for. */
+  readonly perChildDeadlineMs?: number
+}
+
+/**
+ * Build the LLM meta-driver `Agent`. Its `act` body: ask the Router for an initial spawn
+ * plan → spawn the planned children at their heterogeneous per-child budgets → react to
+ * each `next()` → on a promising (trace-derived) settlement, ask the Router for a widen
+ * plan and spawn one more under budget → synthesize with the single-sourced selector.
+ */
+export function createLlmMetaDriver<Out>(opts: LlmMetaDriverOptions<Out>): Agent<unknown, Out> {
+  const gate = opts.gate ?? defaultWidenGate<Out>()
+  const analyze = opts.analyze
+  const byKey = new Map(opts.catalog.map((c) => [c.key, c]))
+
+  return {
+    name: opts.name ?? 'llm-meta-driver',
+    async act(task: unknown, scope: Scope<Out>): Promise<Out> {
+      // Ask the Router for the initial spawn plan. The prompt shows the catalog + budget
+      // readout; the LLM decides which children and their per-child shots/tokens.
+      const initial = await requestPlan(opts, scope, task, undefined, [])
+      spawnPlanned(initial, byKey, opts, scope)
+
+      const done: Array<Extract<Settled<Out>, { kind: 'done' }>> = []
+      for (let settled = await scope.next(); settled !== null; settled = await scope.next()) {
+        if (settled.kind === 'down') continue // infra/bad child: excluded from merge n + equal-k
+        done.push(settled)
+
+        // Flat gate (the default) short-circuits before any Router call — a gate run never
+        // pays for a widen decision and the firewall conflict stays dormant.
+        if (!gate.shouldWiden(settled, scope.budget)) continue
+        const findings = analyze ? await analyze(settled) : []
+        if (!widenIsWarranted(findings, gate, settled)) continue
+
+        // Ask the Router for a widen plan, showing it ONLY the trace-derived findings (never
+        // the verdict). It returns the next children to spawn, or `done` to stop widening.
+        const widen = await requestPlan(opts, scope, task, settled, findings)
+        if (widen.done) continue
+        spawnPlanned(widen, byKey, opts, scope)
+      }
+
+      const iterations = done.map((s) => settledToIteration(s))
+      const winner = defaultSelectWinner(iterations)
+      if (!winner) {
+        throw new Error(
+          'llm-meta-driver: no done child to select a winner from (all children were down)',
+        )
+      }
+      return winner.output as Out
+    },
+  }
+}
+
+/** Spawn every entry the Router planned, mapping each to its catalog child and per-child
+ *  budget. A plan entry referencing an unknown child key fails loud (a hallucinated plan is
+ *  a diagnostic, not a silently-dropped spawn). A spawn that fails pool admission is dropped
+ *  — fail closed, never overcommit the conserved pool. */
+function spawnPlanned<Out>(
+  plan: SpawnPlan,
+  byKey: Map<string, MetaChild<Out>>,
+  opts: LlmMetaDriverOptions<Out>,
+  scope: Scope<Out>,
+): void {
+  for (const entry of plan.spawns) {
+    const child = byKey.get(entry.childKey)
+    if (!child) {
+      throw new Error(
+        `llm-meta-driver: Router planned a spawn for unknown child key "${entry.childKey}" (catalog: ${[...byKey.keys()].join(', ')})`,
+      )
+    }
+    assertSpawnable(child)
+    const budget = entryBudget(entry, opts)
+    scope.spawn(child.agent, child.task, { budget, label: child.key })
+  }
+}
+
+/** Project a Router plan entry into the conserved `Budget` (heterogeneous per child — this
+ *  is the "driver A n shots, B k shots" dial). */
+function entryBudget<Out>(entry: SpawnPlanEntry, opts: LlmMetaDriverOptions<Out>): Budget {
+  return {
+    maxIterations: entry.shots,
+    maxTokens: entry.maxTokens,
+    ...(entry.maxUsd !== undefined ? { maxUsd: entry.maxUsd } : {}),
+    ...(opts.perChildDeadlineMs !== undefined ? { deadlineMs: opts.perChildDeadlineMs } : {}),
+  }
+}
+
+/**
+ * Ask the Router for a spawn plan. The external-boundary call returns real usage and throws
+ * on a non-OK response; the JSON parse is inspected before the plan is acted on — a
+ * malformed plan throws (fail loud), never degrades to a silent empty fan-out. When
+ * `settled`/`findings` are present this is a widen decision and the prompt carries ONLY the
+ * trace-derived findings (selector ≠ judge).
+ */
+async function requestPlan<Out>(
+  opts: LlmMetaDriverOptions<Out>,
+  scope: Scope<Out>,
+  task: unknown,
+  settled: Extract<Settled<Out>, { kind: 'done' }> | undefined,
+  findings: ReadonlyArray<AnalystFinding>,
+): Promise<SpawnPlan> {
+  const prompt = settled
+    ? widenPrompt(opts, scope, settled, findings)
+    : initialPrompt(opts, scope, task)
+  const res = await routerChatWithUsage(opts.router, [
+    { role: 'system', content: metaSystemPrompt },
+    { role: 'user', content: prompt },
+  ])
+  const parsed = parsePlan(res.content)
+  if (!parsed.ok) {
+    throw new Error(`llm-meta-driver: Router returned an unparseable spawn plan — ${parsed.error}`)
+  }
+  return parsed.plan
+}
+
+const metaSystemPrompt = [
+  'You are a spawn meta-driver over a budget-conserving execution scope.',
+  'You decide which child agents to spawn and their per-child budgets (shots, tokens).',
+  'Spawning is asynchronous: a child runs, settles, and you may then widen toward a',
+  'promising lineage under the remaining conserved budget. Do NOT fan out eagerly.',
+  'Reply with ONLY a JSON object: {"spawns":[{"childKey":string,"shots":number,"maxTokens":number,"maxUsd"?:number}],"done":boolean}.',
+  'When you have enough settled children to synthesize a winner, reply {"spawns":[],"done":true}.',
+].join(' ')
+
+function initialPrompt<Out>(
+  opts: LlmMetaDriverOptions<Out>,
+  scope: Scope<Out>,
+  task: unknown,
+): string {
+  return [
+    `Objective: ${opts.objective}`,
+    `Task: ${stringifyForPrompt(task)}`,
+    `Conserved budget: ${budgetLine(scope)}`,
+    'Catalog of spawnable children:',
+    catalogLines(opts.catalog),
+    'Choose the initial spawn plan: which children, with which per-child shots/tokens.',
+  ].join('\n')
+}
+
+/** The widen prompt shows ONLY trace-derived findings about the settled child — never its
+ *  verdict score. This is the firewall: the meta-controller steers from the diagnosis, not
+ *  the judge. */
+function widenPrompt<Out>(
+  opts: LlmMetaDriverOptions<Out>,
+  scope: Scope<Out>,
+  settled: Extract<Settled<Out>, { kind: 'done' }>,
+  findings: ReadonlyArray<AnalystFinding>,
+): string {
+  return [
+    `Objective: ${opts.objective}`,
+    `A child "${settled.handle.label}" just settled. Trace-analyst findings (steer from these, NOT any score):`,
+    renderFindings(findings),
+    `Remaining conserved budget: ${budgetLine(scope)}`,
+    'Catalog of spawnable children:',
+    catalogLines(opts.catalog),
+    'Widen toward the promising lineage with at most one more spawn, or reply done if there is nothing worth widening.',
+  ].join('\n')
+}
+
+function catalogLines<Out>(catalog: ReadonlyArray<MetaChild<Out>>): string {
+  return catalog.map((c) => `  - ${c.key}: ${c.description}`).join('\n')
+}
+
+function budgetLine<Out>(scope: Scope<Out>): string {
+  const b = scope.budget
+  return `tokensLeft=${b.tokensLeft} usdLeft=${b.usdLeft} reservedTokens=${b.reservedTokens}`
+}
+
+function renderFindings(findings: ReadonlyArray<AnalystFinding>): string {
+  if (findings.length === 0) return '  (no findings)'
+  return findings
+    .map(
+      (f) =>
+        `  - [${f.severity}/${f.area}] ${f.claim}${f.recommended_action ? ` → ${f.recommended_action}` : ''}`,
+    )
+    .join('\n')
+}
+
+/**
+ * Widen warranted? The trace-derived gate: a `high`/`critical` finding with a
+ * `recommended_action` is a correctable middle band worth one more shot. Empty findings are
+ * NOT warranted (flat). The ONLY verdict-reading path is the gate's explicit
+ * `judgeExempt: true` hatch (off by default), which re-couples steering to the judge.
+ */
+function widenIsWarranted<Out>(
+  findings: ReadonlyArray<AnalystFinding>,
+  gate: WidenGate<Out>,
+  settled: Extract<Settled<Out>, { kind: 'done' }>,
+): boolean {
+  if (gate.judgeExempt === true) {
+    const score = (settled.verdict as { score?: unknown } | undefined)?.score
+    return typeof score === 'number' && score > 0
+  }
+  return findings.some(
+    (f) =>
+      (f.severity === 'high' || f.severity === 'critical') &&
+      typeof f.recommended_action === 'string' &&
+      f.recommended_action.length > 0,
+  )
+}
+
+/** Parse + validate the Router's JSON plan. A non-object, a missing/!array `spawns`, a
+ *  non-boolean `done`, or a malformed entry is a typed parse failure the caller fails loud
+ *  on — never a silent empty plan. */
+function parsePlan(content: string): { ok: true; plan: SpawnPlan } | { ok: false; error: string } {
+  const json = extractJsonObject(content)
+  if (json === undefined) return { ok: false, error: 'no JSON object in response' }
+  let raw: unknown
+  try {
+    raw = JSON.parse(json)
+  } catch (err) {
+    return { ok: false, error: `JSON.parse failed: ${err instanceof Error ? err.message : String(err)}` }
+  }
+  if (typeof raw !== 'object' || raw === null) return { ok: false, error: 'plan is not an object' }
+  const obj = raw as Record<string, unknown>
+  if (!Array.isArray(obj.spawns)) return { ok: false, error: '`spawns` is not an array' }
+  if (typeof obj.done !== 'boolean') return { ok: false, error: '`done` is not a boolean' }
+  const spawns: SpawnPlanEntry[] = []
+  for (const e of obj.spawns) {
+    if (typeof e !== 'object' || e === null) return { ok: false, error: 'a spawn entry is not an object' }
+    const entry = e as Record<string, unknown>
+    if (typeof entry.childKey !== 'string') return { ok: false, error: 'a spawn entry has no string `childKey`' }
+    if (typeof entry.shots !== 'number' || entry.shots <= 0) {
+      return { ok: false, error: `spawn "${entry.childKey}" has a non-positive \`shots\`` }
+    }
+    if (typeof entry.maxTokens !== 'number' || entry.maxTokens <= 0) {
+      return { ok: false, error: `spawn "${entry.childKey}" has a non-positive \`maxTokens\`` }
+    }
+    spawns.push({
+      childKey: entry.childKey,
+      shots: entry.shots,
+      maxTokens: entry.maxTokens,
+      ...(typeof entry.maxUsd === 'number' ? { maxUsd: entry.maxUsd } : {}),
+    })
+  }
+  return { ok: true, plan: { spawns, done: obj.done } }
+}
+
+/** Slice the first balanced `{...}` object out of a model response (tolerates prose around
+ *  the JSON). Returns undefined when no balanced object is present. */
+function extractJsonObject(content: string): string | undefined {
+  const start = content.indexOf('{')
+  if (start === -1) return undefined
+  let depth = 0
+  for (let i = start; i < content.length; i++) {
+    const ch = content[i]
+    if (ch === '{') depth++
+    else if (ch === '}') {
+      depth--
+      if (depth === 0) return content.slice(start, i + 1)
+    }
+  }
+  return undefined
+}
+
+function stringifyForPrompt(task: unknown): string {
+  if (typeof task === 'string') return task
+  try {
+    return JSON.stringify(task)
+  } catch {
+    return String(task)
+  }
+}
+
+/** A `MetaChild`'s agent must carry its `executorSpec` (AgentSpec) — the field
+ *  `scope.spawn` resolves the runtime from. Fail loud if absent (only the agent author
+ *  knows its profile/harness). */
+function assertSpawnable<Out>(child: MetaChild<Out>): void {
+  const carried = (child.agent as { executorSpec?: unknown }).executorSpec
+  if (!isAgentSpec(carried)) {
+    throw new Error(
+      `llm-meta-driver: child "${child.key}" agent carries no executorSpec (AgentSpec); cannot resolve its LeafExecutor`,
+    )
+  }
+}
+
+function isAgentSpec(value: unknown): value is AgentSpec {
+  if (typeof value !== 'object' || value === null) return false
+  const v = value as Record<string, unknown>
+  return 'profile' in v && 'harness' in v
+}
diff --git a/bench/src/drivers/progressive-widening.ts b/bench/src/drivers/progressive-widening.ts
new file mode 100644
index 0000000..af0d5d1
--- /dev/null
+++ b/bench/src/drivers/progressive-widening.ts
@@ -0,0 +1,223 @@
+/**
+ * @experimental
+ *
+ * Coded progressive-widening driver — the CONTROL variant of the recursive execution
+ * atom's two driver-act bodies (the LLM meta-driver in `./llm-meta-driver.ts` is the
+ * treatment). Both share the `WidenGate` below.
+ *
+ * The policy (MCTS progressive widening, the governor that keeps "full generality" from
+ * becoming "boil the ocean"): seed a NARROW frontier (one child per seed), then react to
+ * each `scope.next()` completion. A node widens — spawns ONE more child toward the same
+ * promising lineage under the conserved pool — only when the `WidenGate` says so. No
+ * eager fan-out: the frontier grows by at most one per settlement, bounded by the
+ * conserved budget reservation (`scope.spawn` fails closed when the pool can't cover it).
+ *
+ * Two firewall invariants this driver upholds by construction (critique R2):
+ *  - `WidenGate` DEFAULTS TO FLAT: `defaultWidenGate.shouldWiden` returns false for every
+ *    settlement, so a gate run never widens and the selector≠judge conflict stays dormant.
+ *  - When widening IS enabled, `promising` is derived from TRACE findings (the `analyze`
+ *    hook → `AnalystFinding[]`), NEVER from a raw `verdict.score`. Reading the judge
+ *    verdict for a spawn decision requires the gate's explicit `judgeExempt: true` (off by
+ *    default) — the documented escape hatch that re-couples steering to the judge.
+ *
+ * Selection stays single-sourced: settled children adapt to `Iteration` via
+ * `settledToIteration` and `defaultSelectWinner` picks the winner — the driver never
+ * forks the kernel's argmax (selector ≠ judge).
+ */
+
+import type { AnalystFinding } from '@tangle-network/agent-eval'
+import { defaultSelectWinner } from '../../../src/loops/run-loop.ts'
+import { settledToIteration } from '../../../src/loops/supervise/scope.ts'
+import type {
+  Agent,
+  AgentSpec,
+  Budget,
+  DefaultVerdict,
+  Scope,
+  Settled,
+  WidenGate,
+} from '../../../src/loops/supervise/types.ts'
+
+/** A child the driver can spawn: a leaf `Agent` plus the `AgentSpec` the open registry
+ *  resolves its `LeafExecutor` from (`harness: null` → router/inline; `BackendType` →
+ *  sandbox; or a BYO `executor`). The spec rides on the agent as `executorSpec` because
+ *  that is the field `scope.spawn` reads to resolve a runtime — fail loud if it is absent. */
+export interface ChildAgent<Out> {
+  readonly agent: Agent<unknown, Out>
+  readonly task: unknown
+  readonly label: string
+}
+
+/** A seed of the narrow initial frontier: the child to spawn and its per-child budget. */
+export interface WideningSeed<Out> {
+  readonly child: ChildAgent<Out>
+  readonly budget: Budget
+}
+
+/**
+ * Trace-analyst hook: read a settled child's TRACE (rehydrated `out` + lineage) into
+ * `AnalystFinding[]`. This is the analyst→driver wire (mirrors `PlannerContext.analyses`)
+ * and the ONLY signal `promising` may read when the gate is flat-with-findings. The hook
+ * MUST return trace-derived findings; the gate never inspects `settled.verdict` unless it
+ * is explicitly `judgeExempt`.
+ */
+export type AnalyzeSettled<Out> = (
+  settled: Extract<Settled<Out>, { kind: 'done' }>,
+) => Promise<ReadonlyArray<AnalystFinding>>
+
+export interface ProgressiveWideningOptions<Out> {
+  readonly name?: string
+  /** The narrow initial frontier — one child per seed, no eager fan-out. */
+  readonly seed: (task: unknown) => ReadonlyArray<WideningSeed<Out>>
+  /** Build the next child to widen toward a promising lineage. Returns `null` to stop
+   *  widening this lineage (e.g. the lineage has converged). */
+  readonly widen: (
+    settled: Extract<Settled<Out>, { kind: 'done' }>,
+    findings: ReadonlyArray<AnalystFinding>,
+  ) => WideningSeed<Out> | null
+  /** Trace-analyst wire feeding `promising`. Omit to run flat (no findings → never
+   *  widens under the default gate). */
+  readonly analyze?: AnalyzeSettled<Out>
+  /** The widening governor. Defaults to `defaultWidenGate` (flat — never widens). */
+  readonly gate?: WidenGate<Out>
+}
+
+/**
+ * Build the coded progressive-widening `Agent`. Its `act` body is the control policy:
+ * seed narrow → react to each `next()` → widen toward a promising lineage under budget →
+ * synthesize the winner with the single-sourced selector. `WidenGate` defaults flat, so
+ * with no `gate`/`analyze` supplied this is exactly the "spawn the seeds, pick the best"
+ * flat harness.
+ */
+export function createProgressiveWideningDriver<Out>(
+  opts: ProgressiveWideningOptions<Out>,
+): Agent<unknown, Out> {
+  const gate = opts.gate ?? defaultWidenGate<Out>()
+  const analyze = opts.analyze
+
+  return {
+    name: opts.name ?? 'progressive-widening',
+    async act(task: unknown, scope: Scope<Out>): Promise<Out> {
+      // Seed the NARROW frontier: one child per seed, reserved atomically from the pool.
+      // A seed that fails admission (pool can't cover it) is dropped — fail closed, never
+      // overcommit; the conserved Σk holds by construction.
+      for (const s of opts.seed(task)) {
+        scope.spawn(asSpawnable(s.child), s.child.task, { budget: s.budget, label: s.child.label })
+      }
+
+      const done: Array<Extract<Settled<Out>, { kind: 'done' }>> = []
+      // React to settlements one at a time (ray.wait n=1). `next()` is null only when the
+      // live set is empty — every spawned child eventually settles done or down.
+      for (let settled = await scope.next(); settled !== null; settled = await scope.next()) {
+        if (settled.kind === 'down') continue // infra/bad child: excluded from merge n + equal-k
+        done.push(settled)
+
+        // Progressive widening: spawn AT MOST one more child toward this lineage, and only
+        // when the gate says promising AND the pool can still cover a widen. The findings
+        // are TRACE-derived (`analyze`); the gate reads them, never the raw verdict.
+        if (!gate.shouldWiden(settled, scope.budget)) continue
+        const findings = analyze ? await analyze(settled) : []
+        if (!isPromising(findings, gate, settled)) continue
+        const next = opts.widen(settled, findings)
+        if (next === null) continue
+        scope.spawn(asSpawnable(next.child), next.child.task, {
+          budget: next.budget,
+          label: next.child.label,
+        })
+      }
+
+      // Single-sourced selection: adapt the done children to the kernel's Iteration shape
+      // and let `defaultSelectWinner` pick (best-valid-score, ties → earliest). The driver
+      // does NOT fork the argmax (selector ≠ judge).
+      const iterations = done.map((s) => settledToIteration(s))
+      const winner = defaultSelectWinner(iterations)
+      if (!winner) {
+        throw new Error(
+          'progressive-widening: no done child to select a winner from (all children were down)',
+        )
+      }
+      return winner.output as Out
+    },
+  }
+}
+
+/**
+ * The flat-by-default widening governor (the shared `WidenGate`). `shouldWiden` returns
+ * false for EVERY settlement, so a gate run never widens — the firewall conflict (R2)
+ * stays dormant by construction. Override it with a findings-driven gate (severity/area
+ * thresholds over trace findings) to enable widening; only an explicit `judgeExempt: true`
+ * gate may read `verdict.score`.
+ */
+export function defaultWidenGate<Out>(): WidenGate<Out> {
+  return {
+    shouldWiden(): boolean {
+      return false
+    },
+  }
+}
+
+/**
+ * A findings-driven widening gate (opt-in, never the default). Widens toward a lineage
+ * whose TRACE findings show a correctable middle band — a high/critical finding that
+ * carries a `recommended_action` (the analyst says "this is fixable, do X"). It reads ONLY
+ * trace-derived findings, never the verdict, so it composes with the steer firewall. The
+ * `minTokensLeft` guard keeps a widen from starving the pool below a usable per-child floor.
+ */
+export function findingsWidenGate<Out>(opts: { minTokensLeft: number }): WidenGate<Out> {
+  return {
+    shouldWiden(_settled: Settled<Out>, budget: Scope<Out>['budget']): boolean {
+      return budget.tokensLeft >= opts.minTokensLeft
+    },
+  }
+}
+
+/**
+ * Is this lineage promising enough to widen? Promise is computed from TRACE findings, not
+ * the judge verdict: a `high`/`critical` finding that names a `recommended_action` is a
+ * correctable middle band worth one more shot. Empty findings are NOT promising (flat).
+ *
+ * The ONLY path that reads `verdict.score` is the gate's explicit `judgeExempt: true`
+ * escape hatch — it re-couples steering to the judge, so it must be argued per cell and is
+ * off by default.
+ */
+function isPromising<Out>(
+  findings: ReadonlyArray<AnalystFinding>,
+  gate: WidenGate<Out>,
+  settled: Extract<Settled<Out>, { kind: 'done' }>,
+): boolean {
+  if (gate.judgeExempt === true) return judgeScore(settled.verdict) > 0
+  return findings.some(
+    (f) =>
+      (f.severity === 'high' || f.severity === 'critical') &&
+      typeof f.recommended_action === 'string' &&
+      f.recommended_action.length > 0,
+  )
+}
+
+/** Read a verdict's scalar score. Used ONLY behind the explicit `judgeExempt` hatch — the
+ *  steering-from-the-judge path the firewall otherwise forbids. */
+function judgeScore(verdict: DefaultVerdict | undefined): number {
+  if (!verdict) return 0
+  const score = (verdict as { score?: unknown }).score
+  return typeof score === 'number' ? score : 0
+}
+
+/** Attach the child's `AgentSpec` as the `executorSpec` field `scope.spawn` resolves the
+ *  runtime from. A `ChildAgent` whose `agent` already carries a matching `executorSpec`
+ *  passes through unchanged; otherwise this is a fail-loud no-op (the agent must carry the
+ *  spec, since only the agent author knows its profile/harness). */
+function asSpawnable<Out>(child: ChildAgent<Out>): Agent<unknown, Out> {
+  const carried = (child.agent as { executorSpec?: unknown }).executorSpec
+  if (!isAgentSpec(carried)) {
+    throw new Error(
+      `progressive-widening: child "${child.label}" agent carries no executorSpec (AgentSpec); cannot resolve its LeafExecutor`,
+    )
+  }
+  return child.agent
+}
+
+function isAgentSpec(value: unknown): value is AgentSpec {
+  if (typeof value !== 'object' || value === null) return false
+  const v = value as Record<string, unknown>
+  return 'profile' in v && 'harness' in v
+}
diff --git a/bench/src/rsi.ts b/bench/src/rsi.ts
new file mode 100644
index 0000000..10d22b8
--- /dev/null
+++ b/bench/src/rsi.ts
@@ -0,0 +1,86 @@
+/**
+ * The RSI driver experiment, instantiated. The whole thing in one file: pick a
+ * benchmark adapter, pick the steer POLICIES (the arms), run them through the one
+ * flow at equal compute, read the result. Everything else is the library
+ * (src/experiment.ts). Adding a benchmark is one import; adding a policy is one
+ * steer function.
+ *
+ *   BENCH=swe-bench N=20 ROUNDS=3 tsx src/rsi.ts
+ *
+ * Caveat: `blind`/`random` are independent fresh attempts (the compute control).
+ * A `continue` / "build on your prior work" policy is only meaningful with
+ * CONTINUED-SESSION execution (the kernel reusing one box across turns); the loop
+ * is fresh-box-per-attempt today, so it would degrade to a re-attempt. The
+ * prompt-steering policies below (critical-audit, aggressive-push) are live now.
+ */
+import { Sandbox } from '@tangle-network/sandbox'
+import { createFinsearchcompAdapter } from './benchmarks/finsearchcomp'
+import { createSweBenchAdapter } from './benchmarks/swe-bench'
+import type { BenchmarkAdapter } from './benchmarks/types'
+import { type Arm, analystArm, arm, llmAnalyst, randomArm, runExperiment, sandboxAgentRun } from './experiment'
+
+const must = (k: string): string => {
+  const v = process.env[k]
+  if (!v) throw new Error(`env ${k} is required`)
+  return v
+}
+
+// The benchmark roster. Long-horizon adapters (commit0, swe-lancer, tau2, appworld,
+// blueprint) slot in here as one entry each; the loop below never changes.
+const ADAPTERS: Record<string, () => BenchmarkAdapter> = {
+  'swe-bench': createSweBenchAdapter,
+  finsearchcomp: createFinsearchcompAdapter,
+}
+
+async function main() {
+  const make = ADAPTERS[process.env.BENCH ?? 'swe-bench']
+  if (!make) throw new Error(`unknown BENCH=${process.env.BENCH} (have: ${Object.keys(ADAPTERS).join(', ')})`)
+  const adapter = make()
+  const model = process.env.WORKER_MODEL ?? 'gpt-5'
+  const routerBaseUrl = process.env.ROUTER_BASE ?? 'https://router.tangle.tools/v1'
+  const routerKey = must('TANGLE_API_KEY')
+  const rounds = Number(process.env.ROUNDS ?? 3)
+  const router = { routerBaseUrl, routerKey, model }
+  const client = new Sandbox({
+    baseUrl: process.env.SANDBOX_BASE_URL ?? 'https://sandbox.tangle.tools',
+    apiKey: routerKey,
+    timeoutMs: 1_200_000,
+  } as never)
+
+  // The steer policies under test. Each is an arm = a steer f(rootPrompt, history).
+  const policies: [Arm, ...Arm[]] = [
+    randomArm('blind'), // compute control: independent retries, no steer
+    analystArm('critical-audit', llmAnalyst(router)), // audit the prior attempt, steer on the findings
+    arm('aggressive-push', (root, _h, r) =>
+      r === 0 ? root : `${root}\n\nShip the most complete working end-to-end result NOW. Prefer done over polish; finish it.`),
+  ]
+
+  const corpus = process.env.CORPUS ?? `${process.cwd()}/corpus/rsi-${adapter.name}.jsonl`
+  const r = await runExperiment({
+    adapter,
+    sandboxClient: client,
+    agentRun: sandboxAgentRun({ model, routerBaseUrl, routerKey }),
+    arms: policies,
+    model,
+    rounds,
+    n: Number(process.env.N ?? 10),
+    ids: process.env.IDS ? process.env.IDS.split(',') : undefined,
+    concurrency: Number(process.env.CONCURRENCY ?? 3),
+    ...(adapter.output ? { output: adapter.output } : {}),
+    corpusPath: corpus,
+  })
+
+  const pct = (x: number) => (r.n > 0 ? `${((x / r.n) * 100).toFixed(1)}%` : 'n/a')
+  console.log(`\n=== ${adapter.name}: ${r.arms.length} policies x rounds=${rounds} (clean n=${r.n}, excluded ${r.errored}) ===`)
+  console.log(`  blind (1 attempt): ${pct(r.blind)}`)
+  for (const a of r.arms) {
+    const tag = a.label === r.arms[0]?.label ? '  <- compute control' : `  delta vs control ${((a.deltaVsControl / Math.max(r.n, 1)) * 100).toFixed(1)}pp`
+    console.log(`  ${a.label}@${rounds}: ${pct(a.resolved)}${tag}`)
+  }
+  console.log(`corpus: ${corpus}  ->  paired CI + BH via: tsx src/corpus-report.mts ${corpus}`)
+}
+
+main().catch((e) => {
+  console.error(e instanceof Error ? (e.stack ?? e.message) : String(e))
+  process.exit(1)
+})
diff --git a/docs/README.md b/docs/README.md
index c71dd41..d060d99 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -14,6 +14,17 @@ Read top-to-bottom for the full picture.
 | 4 | [learning-flywheel.md](./learning-flywheel.md) | theory deep-dive | The moat thesis — the `(π, τ, J, D, O)` recursion and cross-run flywheel. Points to `architecture.md` as the canonical entry. |
 | 5 | [../bench/README.md](../bench/README.md) | empirical harness | The benchmark surface and current empirical status (what's been run, what wins, what's untested). |
 
+## Research track
+
+Forward-looking design research — surveys, multi-agent design passes, decision logs. Not the canonical spine; promotions into `architecture.md` happen explicitly once a design ships.
+
+| Doc | Role | Purpose |
+|---|---|---|
+| [research/README.md](./research/README.md) | research index | The active design thread + decision log + source-artifact pointers. |
+| [research/recursive-execution-atom.md](./research/recursive-execution-atom.md) | design (in progress) | The next generation: one recursive `Agent` atom run as a durable, observable supervision tree (drivers-of-drivers, analyst-as-agent-with-runtime, async dynamic spawning). Plane B — contains the flat harness. |
+| [research/flat-harness-design.md](./research/flat-harness-design.md) | design synthesis | Plane A — the assumption-free experiment harness (profiles × steer × executionMode × allocation). Recovered as the simplest `act` body on Plane B. |
+| [research/long-horizon-benchmark-survey.md](./research/long-horizon-benchmark-survey.md) | survey | Adversarially-verified long-horizon + multi-turn benchmark survey. Top picks: Commit0, τ²-bench. |
+
 ## Reference track
 
 The package API and subsystems.
diff --git a/docs/research/README.md b/docs/research/README.md
new file mode 100644
index 0000000..629248d
--- /dev/null
+++ b/docs/research/README.md
@@ -0,0 +1,61 @@
+> **Track:** Architecture (research) · **Role:** design-research log · **Status:** open — keystone design in flight
+
+# Research log — RSI driver architecture
+
+Design research for the next architecture generation: turning the flat experiment harness
+into a **recursive execution atom** (agents that drive agents, recursively; analysts as
+agents; an async, observable, dynamically-spawning supervision tree). This dir tracks the
+inputs (surveys, design passes), the decisions, and the open forks so the thread is
+resumable and the expensive multi-agent passes are not re-run.
+
+On any *architecture* conflict, [`../architecture.md`](../architecture.md) still wins. These
+docs are forward-looking design research, not the canonical spine — promotions into the
+spine happen explicitly, with `file:line` anchors, once a design ships.
+
+## Documents
+
+| Doc | What it holds |
+|-----|---------------|
+| [recursive-execution-atom.md](./recursive-execution-atom.md) | **The main thread.** The vision (verbatim intent), the Plane-A-vs-B framing, the proposed surface (one atom + `Scope` + `Supervisor`), analyst-as-agent-with-runtime, what exists vs the gap (file-grounded), the open questions, and the decision log. |
+| [flat-harness-design.md](./flat-harness-design.md) | **Plane A.** The assumption-free experiment-harness synthesis (profiles × steer × executionMode × allocation; rip-out list; durability argument; migration phases). Recovered as the simplest `act` body on Plane B. |
+| [long-horizon-benchmark-survey.md](./long-horizon-benchmark-survey.md) | Adversarially-verified survey of long-horizon + multi-turn benchmarks. Top picks: **Commit0** (graded + natively multi-turn software build), **τ²-bench** (multi-turn agent↔user with tools). |
+
+## Source artifacts (multi-agent passes)
+
+| Run | Pass | Result lands in |
+|-----|------|-----------------|
+| `w9ntld2vt` | deep-research benchmark survey (102 agents, 20 sources, 25 claims adversarially verified) | long-horizon-benchmark-survey.md |
+| `wuh46e5zp` | durable-architecture design — 3 proposals → adversarial synthesis | flat-harness-design.md |
+| `wnrxtvdta` | recursive-atom-surface — 6 prior-art lenses + 4 codebase mappers → synthesis → adversarial critique → reconcile | recursive-execution-atom.md (appended on completion) |
+
+## Decision log
+
+- **Full tensor now**, not "not-foreclose / flat-v1." The architecture must *be* the recursive
+  execution atom now, built as durable mechanism (so it survives even a negative gate), not a
+  flat harness with seams. _(interview, 2026-06-04)_
+- **Plane B contains Plane A.** We do not pick "experiment harness" or "recursive atom" — the
+  flat harness is the simplest `act` body over the atom. The `wuh46e5zp` design becomes the
+  canonical example, not a competing v1.
+- **Analyst = Agent + harness.** Halo-CLI / our inline trace-analyst / a sandboxed agent are
+  one type. The runtime is **derived from the agent's `AgentProfile.harness`**: `harness: null` =
+  direct Router inference call; `harness: <sandbox>` = sandboxed; future `mastra`/`agno`/`ai-sdk`
+  harnesses register their own `LeafExecutor`. _(operator, 2026-06-04)_
+- **Leaves are opaque, self-parallelizing coding harnesses.** The recursion is in the *drivers*;
+  the bottom is a coding agent that fans out internally on its own.
+- **The 4 forks resolved (operator, 2026-06-04):** event-sourced **yes**; observability **substrate
+  now**; LLM meta-driver **built now** (operator override of the pass's "make it wait"), as the
+  *treatment* on top of the budget-reservation invariant, with coded progressive-widening +
+  flat-harness as controls; hard ceiling **yes — sharpened to a conserved reservation pool**
+  (`Σk(treatment) ≡ Σk(blind)` by construction, fail-closed).
+- **The keystone is the budget-conserving reactive `Scope` + `Supervisor`** (not the LLM driver).
+  The critique proved a *ceiling* budget + data-dependent spawning is a confound generator; the
+  conserved *reservation* pool is the one invariant that makes any meta-driver result valid.
+  `WidenGate` defaults to flat so the selector≠judge firewall conflict (R2) stays dormant until
+  widening is argued. See [recursive-execution-atom.md](./recursive-execution-atom.md) for the
+  frozen surface + build order.
+
+## Open engineering forks (not blocking the v1 keystone)
+
+- **F1** — does `Scope` supersede `runProgram`'s loop-layer `parallel`, or coexist? (deletion deferred until `Scope` is proven)
+- **F2** — adopt a Temporal/DBOS durable backend now, or type-shape-only until days-long resumable runs are a near-term product?
+- **F3** — is `cli`/Halo a first-class equal-k participant (needs external-process token accounting first) or observability-only (`budgetExempt`, permanent)?
diff --git a/docs/research/flat-harness-design.md b/docs/research/flat-harness-design.md
new file mode 100644
index 0000000..6bfe853
--- /dev/null
+++ b/docs/research/flat-harness-design.md
@@ -0,0 +1,99 @@
+> **Track:** Architecture (research) · **Role:** design synthesis · **Status:** subsumed — this is Plane A, recovered as the simplest `act` body on [recursive-execution-atom.md](./recursive-execution-atom.md)
+
+# Flat experiment harness (Plane A)
+
+Synthesis of the `wuh46e5zp` design pass (3 independent proposals → adversarial synthesis):
+the durable, assumption-free **experiment harness** for comparing steer policies at equal
+compute. All three proposals converged tightly and identically on the same surface.
+
+This is **not** a competing v1. It is the flat plane — and the recursive atom *contains* it:
+the harness below is the simplest possible `act` (spawn one child per profile, fixed budget,
+select the best). Captured here because its mechanism/content split, its rip-out list, and its
+`executionMode` primitive are directly reused by Plane B.
+
+## The converged surface
+
+```ts
+const result = await runRsiExperiment({
+  benchmark: adapter,                                  // researcher's task + deterministic judge
+  profiles: AgentProfile[],                            // the arms — FULL profiles, not keyword strings
+  steerPolicies: ((root, history, round) => prompt)[], // pure fns; read trace/events, never the verdict
+  executionMode: { kind: 'fresh-box' | 'continued-session' | 'fork', maxTurns },
+  allocation: { kind: 'round-robin' | 'adaptive-thompson' | 'variance-based', k },
+  sandboxClient, n, concurrency, corpusPath,
+})
+```
+
+- **Arms are full `AgentProfile`s** (model, tools, MCP, persona, capabilities) composed with
+  `mergeAgentProfiles` — never keyword strings like `critical-audit`.
+- **Steer is a pure function** `(rootPrompt, history, round) => nextPrompt`, fully visible to the
+  researcher. No hidden directives.
+- **The researcher's experiment is ~50 lines**; the framework is <500 LOC.
+
+## Framework owns (mechanism) vs researcher supplies (content)
+
+| Framework (once) | Researcher (per experiment) |
+|---|---|
+| `ExecutionMode` mechanics (box lifecycle per mode) | full `AgentProfile`s (the arms) |
+| loop kernel (`runLoop`, `createDynamicDriver`) | steer policies (pure fns; their hypotheses) |
+| measurement (`BenchmarkAdapter`, `OutputAdapter`, `Validator`) | the task adapter + deterministic judge |
+| allocation scheduling (`thompson`/`variance` from agent-eval) | execution-mode + allocation choice (explicit) |
+| corpus (`RunRecord`, paired bootstrap + BH) | optional `OutputAdapter`/`Validator` overrides |
+| **steer firewall** (selector ≠ judge, type-level) | — |
+| **compute-control enforcement** (control arm required to compile) | — |
+
+## `executionMode` — the one new runtime primitive
+
+A required field on the kernel; default `fresh-box` (today's behavior). This is the
+"continued-session execution dial," and it plugs into the existing `collectBox` seam in
+`src/loops/run-loop.ts`.
+
+- **`fresh-box`** — new sandbox per iteration; stateless; the **compute control** (bandit-like; k independent samples).
+- **`continued-session`** — one sandbox reused across turns; filesystem/shell state persists; steering compounds (MDP-like). The kernel creates the box once and reuses it; the driver rewrites the prompt per turn via the steer policy.
+- **`fork`** — checkpoint + branch (what-if / counterfactual); deferred (needs sandbox checkpoint/restore).
+
+Allocation composes orthogonally: `round-robin` (fair, the baseline), `adaptive-thompson`,
+`variance-based`. The corpus `condition` field logs mode + allocation so offline analysis can
+reject mismatched comparisons (a policy is only comparable within the same `executionMode`).
+
+## Rip out (hardcoded content → researcher config)
+
+- `bench/src/directives.ts` — **delete** all `DEFAULT_*` directive constants + `DIVERSE_STRATEGY_LENSES`. Keep only `composeStrategies()` as a helper. Directives are researcher hypotheses, not framework policy.
+- `bench/src/run.ts` — **delete** the `batch-blind` / `batch-oracle` / `batch-compare` presets and the env-driven dispatch (`BACKEND`, `WORKER_MODEL`, `ANALYST`). One entry point loads a researcher config.
+- `bench/src/experiment.ts` — **move** `randomArm`/`refineArm`/`diverseArm`/`llmAnalyst`/`loopAnalyst`/`analystArm` to examples; they are templates, not framework.
+- `WorkerBackendType` enum — **delete**. Backend is part of the `AgentProfile` (the cost dial is a backend type, not a separate knob).
+- `ADAPTERS[key]` lookup — **delete**. The config imports the adapter directly.
+
+## Baked assumptions explicitly rejected
+
+Arms-are-keywords; directives-are-framework-policy; one-box-per-iteration-is-the-only-model;
+diverse-lenses-are-fixed; allocation-is-always-fixed-k; the-task-is-always-a-string;
+backend-is-a-separate-knob; the-firewall-is-a-soft-rule (→ make `PlannerContext` carry only
+`output`+`events`, never `verdict`, at the type level); control-is-optional (→ `runSteeringExperiment`
+requires a control arm; omitting it is a compile error).
+
+## Durability argument (why it survives 2 years)
+
+Content/mechanism split isolates the framework from trend-chasing (new domains need adapters,
+not rewrites); substrate-maximal leverage (`AgentProfile` from the sandbox SDK, `runLoop` from
+runtime) tracks upstream not internal drift; profiles-as-versioning (a config file in git
+reproduces a run 18 months later); `RunRecord` decouples sweeps from analysis (replay the
+corpus under new hypotheses without re-running); `executionMode` as an axis (if
+continued-session is a dead end, no framework bloat); only two contracts (`BenchmarkAdapter`,
+`AgentProfile`); no hardcoded strings.
+
+## Migration phases (from the synthesis)
+
+Dependency-ordered, each small and verifiable: (1) add `ExecutionMode` to `agent-runtime`
+types, default `fresh-box`, behavior unchanged; (2) implement `continued-session` on the
+`collectBox` seam; (3) extract `SteerPolicy`, move arm factories to examples; (4) rip out
+directives; (5) flow `executionMode` into the corpus; (6) `RsiExperimentConfig` +
+`runRsiExperiment`; (7) allocation strategies as plugins; (8) firewall type-enforcement;
+(9) delete `batch-*`; (10) docs + examples + migration guide.
+
+## Top risks flagged
+
+Session leaks if `executionMode` unset (→ default `fresh-box`, required field); continued-session
+state explosion (→ SDK memory cap + cleanup flag); adaptive allocation overfits at low n (→ loud
+docs, fixed-k for n<20); "arm beats control" ≠ "steering beats compute" without paired CI (→
+control required by the type signature; corpus-report pairs the delta).
diff --git a/docs/research/long-horizon-benchmark-survey.md b/docs/research/long-horizon-benchmark-survey.md
new file mode 100644
index 0000000..5d7224c
--- /dev/null
+++ b/docs/research/long-horizon-benchmark-survey.md
@@ -0,0 +1,71 @@
+> **Track:** Architecture (research) · **Role:** survey (adversarially verified) · **Status:** reference · **Run:** `w9ntld2vt` (102 agents, 20 sources, 100 claims → 25 verified, 23 confirmed / 2 killed)
+
+# Long-horizon & multi-turn benchmark survey
+
+For the RSI driver experiment: run an agent over multiple turns on a hard task, compare
+**steer policies** (continue / critical-audit / aggressive-ship / personas) against blind
+independent retries, and measure whether steering gets farther per added turn. The experiment
+wants a benchmark that is **natively multi-turn** (context carries across turns) and whose
+completion signal is **GRADED** (fraction of tests passing), not binary, so the adaptation
+curve is smooth.
+
+## Top recommendations
+
+- **Long-horizon software build, steer a continued conversation, compare policies → Commit0.**
+  The only surveyed benchmark that is simultaneously **graded** (pass-rate of unit tests, a
+  continuous 0–100%), **natively multi-turn/interactive** (multi-stage unit-test + static-analysis
+  + coverage feedback the agent adapts to across turns — the curve measurably moves with feedback,
+  e.g. iterating on test errors lifts pass-rate to ~26%), and genuinely **long-horizon** (implement
+  entire real Python libraries from scratch against long-form specs; 54–57 libraries).
+  Sources: arXiv 2412.01769, commit-0.github.io. NeurIPS 2024 D&B.
+
+- **Multi-turn agent↔user conversation with tools → τ²-bench (tau2-bench).** A natively multi-turn
+  **dual-control** Tool-Agent-User benchmark: a simulated user and the agent converse turn-by-turn
+  and **both** can call tools (a Dec-POMDP). Sources: github.com/sierra-research/tau2-bench,
+  arXiv 2506.07982. **Caveat:** rewards are effectively **binary** per task (gated by required
+  actions + `reward_basis`) — it is the *conversation* pick, **not** a graded-curve pick (a
+  verifier vote killed the "graded" claim 0–3).
+
+## Verified verdicts
+
+| Benchmark | Graded? | Natively multi-turn / continued-session? | Fit for "steer a continued build conversation" | Vote |
+|---|---|---|---|---|
+| **Commit0** | **Yes** — unit-test pass-rate % | **Yes** — interactive multi-stage feedback the agent adapts to | **Best** | 3-0 |
+| **FeatureBench** | **Yes** — Passed-Rate (frac. of fail→pass tests) + binary Resolved-Rate | **Yes** — agentic scaffolds, ≤500 steps, diminishing returns ~100 | Strong runner-up; *feature-level*, not greenfield whole-project | 3-0 |
+| **DevBench** | **Yes** — test pass-rate, coverage %, env-setup success | **No** — 5 waterfall stages graded independently with *reference* inputs; only a review-role refine loop | Graded + from-scratch, but **not** one continuous build conversation | 3-0 / 2-1 |
+| **ProgramBench** (Meta/FAIR, arXiv 2605.03546) | Headline **binary** (% Resolved = all tests pass); a secondary "% Tests Passed" partial-progress metric exists | **Yes** — write-compile-debug, 1,000-step / 6-hr cap, median ~868 cmds/task (model-dependent) | **Single-agent-only by design**; multi-agent + human-guided modes are *future work* | graded headline REFUTED 1-2 |
+| **SlopCodeBench** (arXiv 2603.24755) | **Yes** — 4 solve-rate variants + continuous [0,1] erosion/verbosity | Iterative **on the artifact only** — *deliberately wipes prior conversation*; fresh Docker per checkpoint, only the workdir persists | Disqualified for *conversational* steer (no carried context). NB: it already ran a steer comparison — quality prompts cut initial erosion but did **not** slow per-checkpoint degradation (~1.3pp/ckpt), at +12.1% cost | 3-0 |
+| **SWE-Lancer** | **No** — payout only if *all* applicable tests pass; graded only by summed $ of whole tasks | **No** — independent single-deliverable tasks + managerial choices | Poor (no smooth curve) | 3-0 |
+| **MLE-bench** | Medal/percentile (effectively binary per task) | **No** — one final CSV; the agent's own internal ~24h loop, graded only on the submission | Moderate at best | 2-1 |
+
+## What ProgramBench / "program bench" is
+
+The Meta/FAIR **rebuild-from-scratch** benchmark (arXiv 2605.03546, github.com/facebookresearch/programbench,
+May 2026): a single SWE-agent rebuilds programs via a human-like write-compile-debug cycle in a
+persistent Docker session (1,000 steps / 6 hours). Single-agent-only by design; **not** built for
+steer-policy comparison (that is invited as future work). A usable graded substrate via its
+"% Tests Passed per instance" secondary metric, but the headline "% Resolved" is binary.
+
+## Caveats (carried verbatim from the verifier)
+
+- **Scope gap — not adversarially verified this round:** SWE-Gym, SWE-bench Verified, SWE-bench
+  Multimodal, MLAgentBench, RepoBench, the original single-control τ-bench, AppWorld,
+  TerminalBench, OSWorld, GAIA, WebArena, VisualWebArena, Cybench. Most are predominantly
+  binary/single-deliverable or web/OS/security-domain (likely poor for a graded software-build
+  curve), but confirm before relying on it.
+- **Name collisions:** the graded software-dev **DevBench** is arXiv **2403.08604** (not 2601.11895);
+  **FeatureBench** (2602.10975) ≠ the 2025 "FeatBench" (2509.22237); **ProgramBench** resolves only
+  to the Meta/FAIR 2605.03546.
+- **Dating:** ProgramBench / FeatureBench / SlopCodeBench carry 2026 arXiv IDs; their leaderboard
+  numbers will move, but the *design* properties cited (graded vs binary, step caps, context-carry
+  semantics) are structural and stable.
+- **Interpretive hedge:** "smooth curve" depends on per-task test count. SlopCodeBench's existing
+  steer result (steering does not slow degradation) is the closest direct evidence for the
+  hypothesis, but it is artifact-iterative, not conversation-continued, so it may not generalize.
+
+## Implication for the harness
+
+For a graded, multi-turn, long-horizon software-build adapter, **Commit0 is the slot-in**
+(graded + natively interactive). It plugs into the `BenchmarkAdapter` contract as one entry; the
+`executionMode: 'continued-session'` dial is what makes "steer a continued build conversation"
+meaningful (without it, steering degrades to a re-attempt).
diff --git a/docs/research/recursive-execution-atom.md b/docs/research/recursive-execution-atom.md
new file mode 100644
index 0000000..912d5b5
--- /dev/null
+++ b/docs/research/recursive-execution-atom.md
@@ -0,0 +1,295 @@
+> **Track:** Architecture (research) · **Role:** design research (in progress) · **Status:** surface proposed; keystone build plan pending the `wnrxtvdta` design pass + 4 user forks
+
+# Recursive execution atom
+
+The next architecture generation. Today the loop is one level deep: a driver drives one
+agent over rounds. The target is **full generality**: an agent that *is* a driver, fanning
+out sub-loops of drivers-driving-agents, recursively — with analysts watching at every
+level, dynamic asynchronous spawning, and a conversational, observable root.
+
+This doc holds the vision, the proposed surface, the honest gap vs the current code, and the
+open forks. It supersedes nothing in [`../architecture.md`](../architecture.md) until a design ships.
+
+## The vision (the intent, distilled from the operator)
+
+- **Agents run tasks. Drivers drive agents. Analysts watch.** Traces from the agents flow to
+  the driver; analysts turn traces into findings the driver steers on.
+- **Analysts come in three runtimes.** An external CLI/RLM (e.g. Halo), our inline trace-analyst
+  (a bare LLM call, not a sandboxed agent), or a full agent in a sandbox tasked with "analyze
+  these traces and metadata, emit an output." These are *not* three types.
+- **Nested: an agent is a driver of drivers.** An agent can fan out multiple loops of
+  drivers-driving-agents; that agent is then itself a driver. Recursive, self-similar.
+- **The "tensor" is dynamic and asynchronous, not eager fan-out.** We do **not** want an agent
+  exploding into 20 sub-drivers up front. We want: when one branch completes, the agent can
+  spawn a *new* branch (possibly a different flow); the agent can say "run driver A for n
+  shots and driver B for k shots" (heterogeneous per-child budgets); branches run async.
+- **Leaves are opaque, self-parallelizing coding harnesses.** The coding agents sit at the
+  bottom. They are full harnesses that parallelize *inside themselves* (their own sub-agents).
+  The recursion we build is the *driver/policy* layer above them.
+- **The root is eventually conversational + observable.** You hook the root agent to a chatbot
+  (a pi extension with a live visualization of the spawning tree). You ask it "what's currently
+  in flow?" while branches run asynchronously.
+- **Test 100% of the problem space, disciplined.** Build the general mechanism now — not a thing
+  that traps us testing 5% today and tomorrow — but keep it focused, not crazy.
+
+## Two planes — and B contains A
+
+| | Plane A — experiment harness | Plane B — recursive execution atom |
+|---|---|---|
+| Shape | flat: compare N arms at equal compute | recursive: agent → drivers → agents, async |
+| Surface | `profiles × steer × executionMode × allocation` | one `Agent` atom + a `Scope` + a `Supervisor` |
+| Built by | `wuh46e5zp` (see [flat-harness-design.md](./flat-harness-design.md)) | this doc |
+| Answers | the gate (diverse@k vs blind@k) | the full vision |
+
+**Decision: Plane B contains Plane A.** The flat harness is recovered as *the simplest possible
+`act` body* — a root driver that spawns one child per profile at a fixed budget and selects the
+best. So the `wuh46e5zp` design is not a competing v1; it becomes the canonical example program
+over the atom, and its `executionMode`/`allocation` axes become spawn options.
+
+## The thesis: one recursive atom, run as a durable, observable supervision tree
+
+Not three subsystems — **one atom + one executor**, plus two things this repo already has
+(the durable journal in `src/durable/`, the conversation engine in `src/conversation/`) wired
+in as the observability skin. The shape is the intersection of three mature systems:
+
+- **Structured concurrency** (Trio nursery / Swift TaskGroup / Ray dynamic task graph): `act`
+  runs inside a *scope* that can `spawn` children dynamically and react to them **as each
+  finishes**. This is "spawn-on-completion" and "driver A for n shots, B for k shots."
+- **Durable execution** (Temporal): the tree is **event-sourced** — every spawn/complete is
+  journaled, so it is resumable, queryable ("what's in flow?"), and a chat/signal handle can
+  attach to the live root. Observability falls out of the event log; you don't build it twice.
+- **MCTS progressive widening**: the reason you do *not* fan out to 20 at once — a node widens
+  (spawns more children) only as a branch proves promising, under a global budget. This is the
+  governor that keeps "full generality" from becoming "boil the ocean."
+
+### The atom (one self-similar type)
+
+```ts
+interface Agent<Task, Out> {
+  act(task: Task, scope: Scope): Promise<Out>
+}
+```
+
+- **Coder** = an `Agent` that does not spawn (a leaf). The coding harness self-parallelizes; opaque to us.
+- **Driver** = an `Agent` whose `act` spawns child agents and runs a policy over their streaming
+  results. "An agent is a driver" = a driver is just an `Agent` that spawns.
+- **Analyst** = an `Agent` whose task is "read these traces → findings." The CLI/inline/sandbox
+  question collapses to a `runtime` on the spawn (below). Same type, three backends.
+
+### The `Scope` — the only new mechanism
+
+```ts
+scope.spawn(agent, task, { budget, runtime, label }) // -> Handle ; dynamic, async
+scope.next()  // resolves as each child finishes -> react, spawn more   (ray.wait)
+scope.view()  // the live tree: every node's id / parent / status / budget / partial result
+```
+
+```ts
+type Runtime = 'sandbox' | 'cli' | 'inline'
+// 'cli'    = Halo / an external RLM invoked as a subprocess
+// 'inline' = a bare LLM call (today's trace-analyst), no box
+// 'sandbox'= a full coding/analysis agent in a box
+```
+
+The **analyst answer**: an analyst is an `Agent`; *where it runs* is the `runtime`. Halo is
+`runtime: 'cli'`, our trace-analyst is `runtime: 'inline'`, a sandboxed analysis agent is
+`runtime: 'sandbox'`. One type, three handlers — no `Analyst` subsystem.
+
+### Plane A as the simplest `act` (sketch)
+
+```ts
+// The flat harness, recovered: spawn one child per profile, fixed budget, pick the best.
+const flatHarness: Agent<Bench, Result> = {
+  async act(bench, scope) {
+    for (const p of bench.profiles) scope.spawn(coder(p), bench.task, { budget: bench.k, runtime: 'sandbox', label: p.name })
+    const results = []
+    while (results.length < bench.profiles.length) results.push(await scope.next())
+    return selectBest(results)
+  },
+}
+```
+
+### Spawn-on-completion + progressive widening (the dynamic shape)
+
+```ts
+// A driver that widens toward promising branches under a global budget, async.
+async act(task, scope) {
+  let live = seedChildren(task).map((c) => scope.spawn(c.agent, c.task, { budget: c.shots, runtime: 'sandbox' }))
+  const done = []
+  while (scope.budget.remaining() > 0 && live.length) {
+    const ev = await scope.next()            // a child finished
+    done.push(ev)
+    if (promising(ev) && scope.budget.remaining() > THRESH)
+      live.push(scope.spawn(widen(ev), nextTask(ev), { budget: ev.shots, runtime: 'sandbox' }))  // widen, don't pre-fan
+  }
+  return synthesize(done)
+}
+```
+
+## What exists vs the gap (file-grounded; verify before building)
+
+| Component | File | Status | Gap |
+|---|---|---|---|
+| The atom signature | `src/loops/program.ts` (`Agent.act → Output \| Program`, op-set, `runProgram`, `maxDepth=4`) | **right shape** | `act` returns a *static `Program`*; need `act(task, scope)` with **dynamic** `spawn`/`next` (not a pre-authored tree). |
+| Leaf execution | `src/loops/run-loop.ts` (box create / `streamPrompt` / teardown; the `collectBox` same-sandbox seam) | **keep** | The leaf already runs a coding harness; `runtime: 'sandbox'` maps here. |
+| Round-synchronous planner | `src/loops/drivers/dynamic.ts` (`createDynamicDriver`, `PlannerContext.analyses`, selector≠judge firewall) | **evolve** | Planner is round-synchronous (plan → run a batch → observe all → plan). Need async-streaming reaction (`scope.next()` on *individual* completions). |
+| Durable journal | `src/durable/` (`handleChatTurn`, journal/resume) | **wire-in** | Candidate **event source** for the Supervisor (every spawn/complete journaled → replay + query). Needs node-level events. |
+| Conversation engine | `src/conversation/` (turn loop, `selectSpeaker`, `ConversationJournal`) | **wire-in** | Candidate **chat handle** over a live Supervisor ("talk to the root / what's in flow"). |
+| Supervisor executor | — | **net-new** | The keystone: a live node registry running `act`, async, on the journal. Replaces the batch `runProgram` tree-walk. |
+| `Scope` | — | **net-new** | The keystone capability: `spawn` / `next` / `view` + budget. |
+
+**The keystone is `Scope` + `Supervisor`.** Leaves, the analyst hook, Plane A, observability,
+and the chat handle all fall out of it (or already exist).
+
+## Open forks (recommended answers; awaiting the operator)
+
+1. **Event-sourced supervisor?** _Recommended: yes, from day one._ This repo's science needs a
+   reproducible corpus (paired bootstrap + BH), but a free-running async supervisor is
+   nondeterministic. Build the Supervisor on `src/durable/`'s journal as the source of truth →
+   replayable (science) *and* queryable/resumable (the chat handle). Temporal proves you get
+   observability for free from the event log; don't build two executors. **Most load-bearing.**
+2. **Conversation now, or substrate-now / client-later?** _Recommended: substrate now._ Build
+   `scope.view()` + a node-event channel in v1; defer the chatbot/pi-viz to a thin client.
+   "Eventually" → make a rewrite unnecessary, don't pay for the UI now.
+3. **Spawn policy: code, LLM, or both — default?** _Recommended: `act` is code; LLM-decided
+   spawning is the researcher's choice._ v1 ships coded policies (fixed / round-robin /
+   progressive-widening); the **LLM meta-driver** is opt-in, not default — a learned/LLM
+   meta-controller is exactly the "mechanism ahead of the gate" the repo warns against, and it
+   is nondeterministic.
+4. **Global budget as a hard ceiling?** _Recommended: yes, fail-closed at the root._ One root
+   budget (tokens / $ / wall); the Supervisor enforces it; policies widen within it.
+
+## Decision log
+
+- **Full tensor now** (the recursive atom is v1, built as durable mechanism). _(2026-06-04)_
+- **B contains A** (flat harness = simplest `act`). _(2026-06-04)_
+- **Analyst = Agent + `runtime`** (`cli`/`inline`/`sandbox`). _(2026-06-04)_
+- **Leaves = opaque self-parallelizing coding harnesses.** _(2026-06-04)_
+
+## Design pass `wnrxtvdta` — reconciled (the frozen contract)
+
+6 prior-art lenses + 4 codebase mappers → synthesis → adversarial critique → reconcile.
+
+**BLUF.** The mechanism is agreed: `scope.next()` = a ray.wait cursor over a structured-concurrency
+nursery. The critique then landed **3 blockers + 3 majors**, all on one fault line: *the headline
+property (durable + queryable + reproducible replay) and the reason-to-exist (a clean equal-k gate)
+both break for the same root cause — budget was a **ceiling** not a **reservation**, and the journal
+recorded **decisions** but not the **evidence** those decisions consumed.* Two invariants make the
+keystone survive: (1) **budget is an atomically-reserved conserved pool**, so `Σk(treatment) ≡ Σk(blind)`
+by construction; (2) **the journal records a content-addressed `outRef`** per child result, so replay
+rehydrates the exact `Settled` the driver branched on. The keystone is the **budget-conserving reactive
+`Scope`** — not the LLM meta-driver.
+
+### The frozen surface (build against this)
+
+```ts
+// One self-similar atom. A leaf is an Agent that never calls scope.spawn.
+interface Agent<Task, Out> { readonly name: string; act(task: Task, scope: Scope<Out>): Promise<Out> }
+
+// The runtime is ONE OPEN INTERFACE, not a closed union (operator's refinement). A LeafExecutor
+// is anything with an `execute` that returns a Promise OR an async stream of normalized usage.
+// Our built-ins are just the initial IMPLEMENTATIONS; a user's own agent (mastra, agno, a raw
+// HTTP call, anything) is first-class the moment it implements the interface. NO per-vendor
+// adapters, no "future adapter" code — the interface IS the extension point.
+//   - router/inline : a direct Router/HTTP inference call, no box   (an agent with harness: null)
+//   - sandbox       : COMPOSES the existing runLoop kernel as a leaf (+ PR #150's `lineage`
+//                     passthrough for leaf-level continue/fork — does NOT reinvent checkpoint/fork)
+//   - cli           : Halo/RLM subprocess; budgetExempt, excluded from equal-k by construction
+// An agent selects its executor via its AgentProfile (harness: null => router/inline; harness:
+// <sandbox> => sandbox), OR carries a custom LeafExecutor / executor-factory directly (BYO).
+interface LeafExecutor<Out> {
+  // returns a Promise<LeafResult> for one-shot executors, OR an async stream of UsageEvents for
+  // streaming ones; the architect picks the minimal shape that supports both with normalized usage.
+  execute(task: unknown, signal: AbortSignal): Promise<LeafResult<Out>> | AsyncIterable<UsageEvent>
+  teardown(grace: number | 'brutalKill' | 'infinity'): Promise<{ destroyed: boolean }>
+  resultArtifact(): { outRef: string; out: Out; verdict?: DefaultVerdict; spent: Spend }  // B1: replay source
+}
+type UsageEvent = { kind: 'tokens'; input: number; output: number } | { kind: 'cost'; usd: number } | { kind: 'iteration' }
+//   M3/B3: LoopTokenUsage is {input,output} ONLY — usd is a SEPARATE channel.
+
+interface Budget { readonly maxIterations: number; readonly maxTokens: number; readonly maxUsd?: number; readonly deadlineMs?: number }
+interface Spend  { iterations: number; tokens: LoopTokenUsage; usd: number; ms: number }
+
+type Restart = 'temporary' | 'transient' | 'permanent'                          // OTP child_spec
+type NodeStatus = 'pending' | 'acquiring' | 'running' | 'done' | 'failed' | 'cancelled'  // M1: 'acquiring' first-class
+interface SpawnOpts { readonly budget: Budget; readonly label: string; readonly restart?: Restart; readonly shutdown?: number | 'brutalKill' | 'infinity' }
+interface Handle<Out> { readonly id: NodeId; readonly label: string; readonly status: NodeStatus; abort(reason?: string): void }
+//   M1: abort() is defined over the ACQUIRE lifecycle (chains into acquireSandbox signal + reaps find-by-name orphan box).
+
+type Settled<Out> =
+  | { kind: 'done'; handle: Handle<Out>; out: Out; outRef: string; verdict?: DefaultVerdict; spent: Spend; seq: number }
+  | { kind: 'down'; handle: Handle<Out>; reason: string; infra: boolean; restartCount: number; seq: number }
+//   B2: seq = monotonic cursor order next() yielded (NOT wall-clock); replay delivers strictly in seq order.
+
+interface Scope<Out> {
+  // M5: reserves budget atomically from the shared pool; FAILS CLOSED when the pool can't cover it; refunds unspent on settle.
+  spawn<C extends Out>(agent: Agent<unknown, C>, task: unknown, opts: SpawnOpts):
+    { ok: true; handle: Handle<C> } | { ok: false; reason: 'budget-exhausted' | 'depth-exceeded' }
+  next(): Promise<Settled<Out> | null>          // ray.wait n=1 over THIS scope's IN-MEMORY live set; null when empty
+  readonly view: TreeView                        // reads the in-memory nursery (NOT the log); O(live)
+  readonly budget: Readonly<{ tokensLeft: number; usdLeft: number; deadlineMs: number; reservedTokens: number }>
+}
+
+// Event source — the decision/payload split the replay argument rests on (B1/B2):
+type SpawnEvent =
+  | { kind: 'spawned'; id: NodeId; parent?: NodeId; label: string; budget: Budget; runtime: Runtime; seq: number; at: string }
+  | { kind: 'settled'; id: NodeId; status: 'done' | 'down'; outRef?: string; verdict?: DefaultVerdict; spent: Spend; infra?: boolean; seq: number; at: string }
+  | { kind: 'cancelled'; id: NodeId; reason: string; seq: number; at: string }
+interface SpawnJournal { loadTree(root: NodeId): Promise<SpawnEvent[] | undefined>; beginTree(root: NodeId, at: string): Promise<void>; appendEvent(root: NodeId, ev: SpawnEvent): Promise<void> }
+interface ResultBlobStore { put(outRef: string, artifact: unknown): Promise<void>; get(outRef: string): Promise<unknown | undefined> }
+
+// Supervisor — owns the conserved pool, the spawn log, the abort cascade, the OTP intensity breaker, the root handle.
+interface Supervisor<Task, Out> { run(root: Agent<Task, Out>, task: Task, opts: SupervisorOpts): Promise<SupervisedResult<Out>>; attach(h: RootHandle<Out>): void }
+type SupervisedResult<Out> =
+  | { kind: 'winner'; out: Out; outRef: string; verdict?: DefaultVerdict; tree: TreeView; spentTotal: Spend }
+  | { kind: 'no-winner'; reason: 'all-children-down' | 'budget-exhausted' | 'aborted'; tree: TreeView; downCount: number }  // M2: typed, never best!
+interface RootHandle<Out> { view(): TreeView; signal(msg: RootSignal): void; abort(reason?: string): void }  // Q2 substrate
+```
+
+**Replay invariant (now enforceable):** a driver's `act()` may read `verdict`, `spent`, and `out`
+(rehydrated by `outRef`); it MUST NOT read anything not delivered through `Settled` — no `Date.now`,
+no `Math.random`, no unordered collections. `next()` delivers strictly in recorded `seq` order.
+
+### Build order (v1 = the instrument)
+
+| # | Step | Net-new/Evolve | File | Fixes |
+|---|------|---|---|---|
+| 1 | `mapPool` one-for-all → one-for-one: a thrown child becomes a `down` record, excluded from merge `n`; survivors still reach `concatRuns`. | Evolve | `program.ts:408-433` | infra-exclusion |
+| 2 | **Conserved budget pool**: `Spend` from a normalized `UsageEvent` stream (tokens + usd separate); atomic reserve-on-spawn / reconcile-on-settle; fail-closed admission. | Evolve | `types.ts`, `drivers/report-usage.ts` | **M5,B3** |
+| 3 | `SpawnJournal` + `ResultBlobStore` (in-mem + JSONL/FS); sink over the existing `LoopTraceEvent` lineage. | Net-new/Evolve | `src/durable/spawn-journal.ts` (new); wire `run-loop.ts:183` | **B1** |
+| 4 | **`Scope` impl** (KEYSTONE): ray.wait cursor over in-memory nursery; `spawn` reserves from step-2 pool; deterministic `${parent}:s${seq}` ids; `view`/`inFlight` read memory. | Net-new | `src/loops/scope.ts` (new) | **B2,m1,m2** |
+| 5 | **`Supervisor` impl** (KEYSTONE): nursery join barrier (generalize run-loop's `finally{allSettled(destroy)}`); abort cascade; abort-chains-into-`acquireSandbox` + find-by-name reap; OTP intensity breaker; typed `SupervisedResult`. | Net-new | `src/loops/supervisor.ts` (new) | **M1,M2** |
+| 6 | `LeafExecutor` + per-harness impls (`inline`/`sandbox`/`cli`), each emitting normalized `UsageEvent`; `sandbox` = existing `runLoop` as a leaf; `cli`-without-accounting = `budgetExempt` + excluded from equal-k. | Evolve | `types.ts`, `src/loops/runtime.ts` (new) | **M3** |
+| 7 | Replay executor: re-feed `SpawnJournal` + rehydrate `out` from `ResultBlobStore` in `seq` order; `view()` materializer for resume. | Net-new | `src/durable/spawn-journal.ts` | **B1,B2** |
+| 8 | `Settled.done → Iteration` adapter at the merge boundary so `defaultSelectWinner` stays single-sourced. | Net-new (small) | `src/loops/scope.ts` | **M4** |
+| — | `flatHarness` driver (Plane-A control) + **equal-k assertion** `Σiterations(treatment) ≡ Σiterations(blind)` per task or the cell is excluded. | Net-new | `bench/` | **B3** |
+| — | **LLM meta-driver** (treatment) + coded progressive-widening — `WidenGate` **defaults to flat** (never widens) so the firewall conflict stays dormant; widening, when on, derives "promising" from **trace findings, not raw `verdict`**, or carries an explicit argued `judgeExempt`. | Net-new | `bench/` | **R2** |
+
+**Deferred** (gated on a *positive* diverse-strategy result): a tuned MCTS-PW algorithm, learned
+widening, per-branch adaptive sub-agents, a Temporal/DBOS durable backend, the OTP strategy matrix,
+deleting `runProgram`'s loop-layer `parallel` op (supersede-vs-coexist is fork F1).
+
+### Resolved / risks / verdict
+
+- **Resolved by the surface:** B1 (outRef + replay invariant), B2 (in-memory live set + seq cursor), M1 (`acquiring` + acquire-aware abort), M2 (typed `SupervisedResult`), M3 (`LeafExecutor` + normalized usage), M5 (atomic reservation, fail-closed).
+- **Residual risks (measure, don't hide):** R1 — the recorded interleaving is *one* sample; equal-*k* is enforceable, equal-*topology* is not → report realized tree shape per cell. R2 — widening-from-`verdict` *is* steering-from-the-judge (collides with `assertTraceDerivedFindings`, dynamic.ts:344); dormant while `WidenGate` is flat. R3 — runtime `maxDepth` is weaker than the static guard; pair it with the conserved pool so runaway recursion hits budget-exhaustion first.
+- **Pass verdict (advisory):** "ship the keystone, make the LLM meta-driver wait." **Operator override (2026-06-04): build the LLM meta-driver now, as the treatment, on top of the budget-reservation invariant** — the invariant is what keeps the result valid; the coded progressive-widening + flat-harness are the controls; `WidenGate` defaults to flat for gate runs.
+
+## Decisions resolved (the 4 forks)
+
+- **Q1 — yes, event-sourced** (SpawnJournal + ResultBlobStore + replay; budget-pool conserved).
+- **Q2 — substrate now** (`TreeView` + `RootHandle.view`/`signal` + the event stream; chatbot/pi-viz is a later thin client).
+- **Q3 — LLM meta-driver built now** (operator call), as the treatment, with coded progressive-widening + flat-harness as controls. The runtime is **one open `LeafExecutor` interface** (`execute` → promise or async stream), not a closed union — built-ins (router/inline, sandbox, cli) are implementations, and any user agent (mastra/agno/HTTP/custom) is first-class by implementing it. An agent selects its executor via `AgentProfile` (`harness: null` = direct Router call; `harness: <sandbox>` = sandboxed) or carries a custom executor directly.
+- **Q4 — hard ceiling, yes — sharpened to a conserved *reservation* pool** (atomic reserve/refund, fail-closed), tokens + usd, enforced at the root.
+
+## Relationship to PR #150 (leaf-level continued-session + fork)
+
+PR #150 (`feat/runloop-session-continuation-and-fork`) adds `RunLoopOptions.lineage` — opt-in,
+default-OFF, backend-blind — so a *single* `runLoop` can continue a session across its iterations
+(`sessionContinuity`) or fork a parent checkpoint across a fanout (`forkFanout`, gated on
+`criuStatus().canFork`). That is the **leaf-level** depth/breadth dial. The recursive atom sits
+**on top**: the `sandbox` `LeafExecutor` *composes* `runLoop` and forwards this `lineage`
+passthrough — it does **not** reinvent checkpoint/fork. (Reviewed 2026-06-04: approve-to-land;
+before enabling, verify the platform honors a client-minted `sessionId` (else `continue` is a
+silent no-op), bound fork box-creation by `maxConcurrency`, and document that `forkFanout`
+inherits the parent image so heterogeneous-profile branches must not use it.)
diff --git a/src/durable/spawn-journal.ts b/src/durable/spawn-journal.ts
new file mode 100644
index 0000000..e1ff440
--- /dev/null
+++ b/src/durable/spawn-journal.ts
@@ -0,0 +1,464 @@
+/**
+ * @experimental
+ *
+ * Event-sourced spawn journal for the recursive execution atom (build steps 3 + 7).
+ *
+ * The supervision tree is journaled as an append-only event log: every `spawned`,
+ * `settled`, and `cancelled` is recorded AFTER it is observed-committed (never
+ * speculative), mirroring `ConversationJournal`'s begin/append/load shape. The log
+ * holds only the THIN decision record — ids, parentage, budget, the spend a decision
+ * consumed, and a content-addressed `outRef`. The payloads the driver branched on
+ * (the `out` artifacts) live in a separate `ResultBlobStore`, keyed by `outRef`, so
+ * the journal stays small (decisions) and replay rehydrates the exact `Settled` from
+ * the blob store (evidence). This is the decision/payload split the replay argument
+ * rests on (B1/B2).
+ *
+ * Replay determinism (B2): `seq` is the monotonic cursor order `scope.next()` yielded
+ * each settlement — NOT wall-clock. `replaySpawnTree` sorts strictly by `seq` before
+ * touching the blob store, so the order in which rehydration `get`s resolve can never
+ * reorder the replayed `Settled[]`; the result is identical regardless of blob latency.
+ */
+
+import { createHash } from 'node:crypto'
+import type {
+  NodeId,
+  NodeSnapshot,
+  NodeStatus,
+  ResultBlobStore,
+  Runtime,
+  Settled,
+  SpawnEvent,
+  SpawnJournal,
+  Spend,
+  TreeView,
+} from '../loops/supervise/types'
+import { zeroTokenUsage } from '../loops/util'
+
+// ── Content addressing ──────────────────────────────────────────────────────
+
+/**
+ * Mint the content-addressed `outRef` for a result artifact: `sha256:<hex>` over a
+ * stable JSON encoding. Producers call this to derive the `outRef` they journal and
+ * `put`; the FS/in-mem stores re-derive it on `put` to verify the supplied ref
+ * matches (fail loud on a mismatch — a forged ref breaks the replay invariant).
+ *
+ * Stable encoding: object keys are sorted recursively so two structurally-equal
+ * artifacts hash identically regardless of key insertion order.
+ */
+export function contentAddress(artifact: unknown): string {
+  const hex = createHash('sha256').update(stableStringify(artifact), 'utf-8').digest('hex')
+  return `sha256:${hex}`
+}
+
+function stableStringify(value: unknown): string {
+  if (value === null || typeof value !== 'object') return JSON.stringify(value) ?? 'null'
+  if (Array.isArray(value)) return `[${value.map(stableStringify).join(',')}]`
+  const entries = Object.entries(value as Record<string, unknown>)
+    .filter(([, v]) => v !== undefined)
+    .sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0))
+  return `{${entries.map(([k, v]) => `${JSON.stringify(k)}:${stableStringify(v)}`).join(',')}}`
+}
+
+// ── Result blob store ─────────────────────────────────────────────────────────
+
+/**
+ * In-memory `ResultBlobStore`. Content-addressed: `put` verifies the supplied
+ * `outRef` matches the artifact's hash so a stale/forged ref fails loud rather than
+ * silently rehydrating the wrong payload. Idempotent on an identical re-put.
+ */
+export class InMemoryResultBlobStore implements ResultBlobStore {
+  private readonly blobs = new Map<string, unknown>()
+
+  async put(outRef: string, artifact: unknown): Promise<void> {
+    assertContentAddress(outRef, artifact)
+    this.blobs.set(outRef, artifact)
+  }
+
+  async get(outRef: string): Promise<unknown | undefined> {
+    return this.blobs.has(outRef) ? this.blobs.get(outRef) : undefined
+  }
+}
+
+/**
+ * FS `ResultBlobStore`. One JSON file per artifact under `dir`, named by a
+ * filesystem-safe encoding of the `outRef` (`sha256:<hex>` → `sha256-<hex>.json`).
+ * `put` fsyncs so a crash between writes never loses an acknowledged blob.
+ */
+export class FileResultBlobStore implements ResultBlobStore {
+  constructor(private readonly dir: string) {}
+
+  async put(outRef: string, artifact: unknown): Promise<void> {
+    assertContentAddress(outRef, artifact)
+    const fs = await import('node:fs/promises')
+    await fs.mkdir(this.dir, { recursive: true })
+    const fh = await fs.open(this.blobPath(outRef), 'w')
+    try {
+      await fh.write(JSON.stringify(artifact))
+      await fh.sync()
+    } finally {
+      await fh.close()
+    }
+  }
+
+  async get(outRef: string): Promise<unknown | undefined> {
+    const fs = await import('node:fs/promises')
+    let text: string
+    try {
+      text = await fs.readFile(this.blobPath(outRef), 'utf8')
+    } catch (err) {
+      if (isNoEntError(err)) return undefined
+      throw err
+    }
+    return JSON.parse(text)
+  }
+
+  private blobPath(outRef: string): string {
+    return `${this.dir}/${outRef.replace(/:/g, '-')}.json`
+  }
+}
+
+function assertContentAddress(outRef: string, artifact: unknown): void {
+  const expected = contentAddress(artifact)
+  if (outRef !== expected) {
+    throw new Error(
+      `blob outRef '${outRef}' does not match the artifact content hash '${expected}'; ` +
+        'a content-addressed store refuses a mismatched ref (breaks the replay invariant)',
+    )
+  }
+}
+
+// ── Spawn journal ──────────────────────────────────────────────────────────────
+
+/**
+ * In-memory `SpawnJournal`. Appends are observed-committed only; the impl enforces
+ * the corruption guards a durable replay rests on:
+ *  - an event before `beginTree` is a corrupted tree (fail loud),
+ *  - a duplicate `seq` within a tree is a corrupted cursor (fail loud) — two
+ *    settlements cannot share the cursor position replay orders by.
+ */
+export class InMemorySpawnJournal implements SpawnJournal {
+  private readonly trees = new Map<NodeId, { begunAt: string; events: SpawnEvent[] }>()
+
+  async loadTree(root: NodeId): Promise<SpawnEvent[] | undefined> {
+    const tree = this.trees.get(root)
+    if (!tree) return undefined
+    return tree.events.map((ev) => ({ ...ev }))
+  }
+
+  async beginTree(root: NodeId, at: string): Promise<void> {
+    const existing = this.trees.get(root)
+    if (existing) {
+      if (existing.begunAt !== at) {
+        throw new Error(
+          `spawn tree '${root}' already begun at ${existing.begunAt}; refusing to overwrite with ${at}`,
+        )
+      }
+      return
+    }
+    this.trees.set(root, { begunAt: at, events: [] })
+  }
+
+  async appendEvent(root: NodeId, ev: SpawnEvent): Promise<void> {
+    const tree = this.trees.get(root)
+    if (!tree) {
+      throw new Error(`appendEvent called for unknown spawn tree '${root}'; call beginTree first`)
+    }
+    assertSeqUnique(root, tree.events, ev)
+    tree.events.push({ ...ev })
+  }
+}
+
+/**
+ * JSONL on disk. One line per record: the first record is `begin`, subsequent records
+ * are `event` envelopes wrapping a `SpawnEvent`. `loadTree` replays the whole file,
+ * filtering by `root`, and applies the same begin-precedes-events + unique-seq
+ * corruption guards as the in-memory impl. Each append fsyncs so a crash between
+ * writes never loses an acknowledged event.
+ */
+export class FileSpawnJournal implements SpawnJournal {
+  constructor(private readonly path: string) {}
+
+  async loadTree(root: NodeId): Promise<SpawnEvent[] | undefined> {
+    const fs = await import('node:fs/promises')
+    let text: string
+    try {
+      text = await fs.readFile(this.path, 'utf8')
+    } catch (err) {
+      if (isNoEntError(err)) return undefined
+      throw err
+    }
+    const lines = text.split('\n').filter((line) => line.length > 0)
+    let begun = false
+    const events: SpawnEvent[] = []
+    for (const line of lines) {
+      const record = JSON.parse(line) as SpawnJournalRecord
+      if (record.root !== root) continue
+      if (record.kind === 'begin') {
+        begun = true
+      } else {
+        if (!begun) {
+          throw new Error(
+            `spawn journal corrupted: event for tree '${root}' precedes its begin record`,
+          )
+        }
+        assertSeqUnique(root, events, record.event)
+        events.push(record.event)
+      }
+    }
+    return begun ? events : undefined
+  }
+
+  async beginTree(root: NodeId, at: string): Promise<void> {
+    const existing = await this.loadTreeBegin(root)
+    if (existing) {
+      if (existing !== at) {
+        throw new Error(
+          `spawn tree '${root}' already begun in ${this.path} at ${existing}; refusing to overwrite with ${at}`,
+        )
+      }
+      return
+    }
+    await this.appendRecord({ kind: 'begin', root, at })
+  }
+
+  async appendEvent(root: NodeId, ev: SpawnEvent): Promise<void> {
+    const events = await this.loadTree(root)
+    if (events === undefined) {
+      throw new Error(`appendEvent called for unknown spawn tree '${root}'; call beginTree first`)
+    }
+    assertSeqUnique(root, events, ev)
+    await this.appendRecord({ kind: 'event', root, event: ev })
+  }
+
+  private async loadTreeBegin(root: NodeId): Promise<string | undefined> {
+    const fs = await import('node:fs/promises')
+    let text: string
+    try {
+      text = await fs.readFile(this.path, 'utf8')
+    } catch (err) {
+      if (isNoEntError(err)) return undefined
+      throw err
+    }
+    const lines = text.split('\n').filter((line) => line.length > 0)
+    for (const line of lines) {
+      const record = JSON.parse(line) as SpawnJournalRecord
+      if (record.root === root && record.kind === 'begin') return record.at
+    }
+    return undefined
+  }
+
+  private async appendRecord(record: SpawnJournalRecord): Promise<void> {
+    const fs = await import('node:fs/promises')
+    const path = await import('node:path')
+    await fs.mkdir(path.dirname(this.path), { recursive: true })
+    const fh = await fs.open(this.path, 'a')
+    try {
+      await fh.write(`${JSON.stringify(record)}\n`)
+      await fh.sync()
+    } finally {
+      await fh.close()
+    }
+  }
+}
+
+type SpawnJournalRecord =
+  | { kind: 'begin'; root: NodeId; at: string }
+  | { kind: 'event'; root: NodeId; event: SpawnEvent }
+
+/**
+ * Two `seq` namespaces share the journal: a `spawned` event's `seq` is the spawn ordinal
+ * (the order children were created), and a `settled`/`cancelled` event's `seq` is the
+ * monotonic CURSOR order `scope.next()` yielded that settlement (B2). The uniqueness
+ * replay rests on is the cursor namespace — two settlements cannot share the position
+ * replay orders by — so the guard checks only settled/cancelled events. A `spawned`
+ * ordinal legitimately equals a later `settled` cursor seq and is not a collision.
+ */
+function assertSeqUnique(root: NodeId, events: SpawnEvent[], ev: SpawnEvent): void {
+  if (ev.kind === 'spawned') return
+  if (events.some((e) => e.kind !== 'spawned' && e.seq === ev.seq)) {
+    throw new Error(
+      `spawn journal corrupted: duplicate cursor seq ${ev.seq} in tree '${root}'; ` +
+        'the cursor order replay relies on is not unique',
+    )
+  }
+}
+
+// ── Replay executor (build step 7) ───────────────────────────────────────────────
+
+/**
+ * Re-feed a journaled spawn tree in strict `seq` order, rehydrating each settled
+ * child's `out` from the blob store by `outRef`, and return the `Settled[]` exactly
+ * as `scope.next()` originally delivered them.
+ *
+ * Determinism (B2): the events are sorted by `seq` BEFORE any blob `get`, so the
+ * replay order is the recorded cursor order regardless of how fast each rehydration
+ * resolves. `at` (wall-clock) is never a replay input. Fail loud on a tree that was
+ * never begun, a settled-done event missing its `outRef`, or a blob the store can't
+ * rehydrate — a silent gap would let `act` branch on the wrong evidence.
+ */
+export async function replaySpawnTree(
+  journal: SpawnJournal,
+  blobs: ResultBlobStore,
+  root: NodeId,
+): Promise<Settled<unknown>[]> {
+  const events = await journal.loadTree(root)
+  if (events === undefined) {
+    throw new Error(`replaySpawnTree: no journaled tree for root '${root}'`)
+  }
+  const ordered = [...events].sort((a, b) => a.seq - b.seq)
+  const labels = new Map<NodeId, string>()
+  for (const ev of ordered) {
+    if (ev.kind === 'spawned') labels.set(ev.id, ev.label)
+  }
+  const settled: Settled<unknown>[] = []
+  for (const ev of ordered) {
+    if (ev.kind === 'spawned') continue
+    if (ev.kind === 'cancelled') {
+      settled.push({
+        kind: 'down',
+        handle: replayHandle(ev.id, labels.get(ev.id) ?? ev.id, 'cancelled'),
+        reason: ev.reason,
+        infra: false,
+        restartCount: 0,
+        seq: ev.seq,
+      })
+      continue
+    }
+    if (ev.status === 'down') {
+      settled.push({
+        kind: 'down',
+        handle: replayHandle(ev.id, labels.get(ev.id) ?? ev.id, 'failed'),
+        reason: ev.verdict?.notes ?? 'child down',
+        infra: ev.infra === true,
+        restartCount: 0,
+        seq: ev.seq,
+      })
+      continue
+    }
+    if (ev.outRef === undefined) {
+      throw new Error(
+        `replaySpawnTree: settled-done event for '${ev.id}' (seq ${ev.seq}) has no outRef; ` +
+          'cannot rehydrate the result the driver branched on',
+      )
+    }
+    const out = await blobs.get(ev.outRef)
+    if (out === undefined) {
+      throw new Error(
+        `replaySpawnTree: blob store has no artifact for outRef '${ev.outRef}' (node '${ev.id}', seq ${ev.seq})`,
+      )
+    }
+    settled.push({
+      kind: 'done',
+      handle: replayHandle(ev.id, labels.get(ev.id) ?? ev.id, 'done'),
+      out,
+      outRef: ev.outRef,
+      verdict: ev.verdict,
+      spent: ev.spent,
+      seq: ev.seq,
+    })
+  }
+  return settled
+}
+
+function replayHandle(id: NodeId, label: string, status: NodeStatus) {
+  return {
+    id,
+    label,
+    status,
+    abort() {
+      throw new Error(`cannot abort node '${id}': replayed handles are terminal, not live`)
+    },
+  }
+}
+
+/**
+ * Materialize the live tree (`TreeView`) from a journaled event list for resume. Folds
+ * `spawned`/`settled`/`cancelled` into a per-node snapshot in `seq` order so the
+ * resumed view matches what `scope.view` showed at the recorded cursor position.
+ */
+export function materializeTreeView(events: SpawnEvent[]): TreeView {
+  const nodes = new Map<NodeId, MutableSnapshot>()
+  let root: NodeId | undefined
+  // `spawned` (ordinal namespace) and `settled`/`cancelled` (cursor namespace) carry
+  // overlapping `seq` values, so create every node before any update — process spawns in
+  // ordinal order, then settlements/cancellations in cursor order. A settle/cancel for an
+  // un-spawned node is a corrupted log (fail loud via requireNode).
+  const spawns = events
+    .filter((ev): ev is Extract<SpawnEvent, { kind: 'spawned' }> => ev.kind === 'spawned')
+    .sort((a, b) => a.seq - b.seq)
+  const settlements = events.filter((ev) => ev.kind !== 'spawned').sort((a, b) => a.seq - b.seq)
+  for (const ev of spawns) {
+    if (ev.parent === undefined && root === undefined) root = ev.id
+    nodes.set(ev.id, {
+      id: ev.id,
+      parent: ev.parent,
+      label: ev.label,
+      status: 'pending',
+      runtime: ev.runtime,
+      budget: ev.budget,
+      spent: zeroSpend(),
+    })
+  }
+  for (const ev of settlements) {
+    if (ev.kind === 'settled') {
+      const node = requireNode(nodes, ev.id)
+      node.status = ev.status === 'done' ? 'done' : 'failed'
+      node.spent = ev.spent
+      node.outRef = ev.outRef
+    } else {
+      const node = requireNode(nodes, ev.id)
+      node.status = 'cancelled'
+    }
+  }
+  const snapshots = [...nodes.values()].map(freezeSnapshot)
+  return {
+    root: root ?? snapshots[0]?.id ?? '',
+    nodes: snapshots,
+    inFlight: snapshots.filter((n) => n.status === 'running' || n.status === 'acquiring').length,
+  }
+}
+
+interface MutableSnapshot {
+  id: NodeId
+  parent?: NodeId
+  label: string
+  status: NodeStatus
+  runtime: Runtime
+  budget: NodeSnapshot['budget']
+  spent: Spend
+  outRef?: string
+}
+
+function zeroSpend(): Spend {
+  return { iterations: 0, tokens: zeroTokenUsage(), usd: 0, ms: 0 }
+}
+
+function requireNode(nodes: Map<NodeId, MutableSnapshot>, id: NodeId): MutableSnapshot {
+  const node = nodes.get(id)
+  if (!node) {
+    throw new Error(`spawn journal corrupted: settle/cancel for node '${id}' with no prior spawn`)
+  }
+  return node
+}
+
+function freezeSnapshot(node: MutableSnapshot): NodeSnapshot {
+  return {
+    id: node.id,
+    parent: node.parent,
+    label: node.label,
+    status: node.status,
+    runtime: node.runtime,
+    budget: node.budget,
+    spent: node.spent,
+    outRef: node.outRef,
+  }
+}
+
+function isNoEntError(err: unknown): boolean {
+  return (
+    typeof err === 'object' &&
+    err !== null &&
+    'code' in err &&
+    (err as { code: unknown }).code === 'ENOENT'
+  )
+}
diff --git a/src/loops/index.ts b/src/loops/index.ts
index 9d1bf13..e6025a6 100644
--- a/src/loops/index.ts
+++ b/src/loops/index.ts
@@ -17,6 +17,19 @@ export type {
   SandboxEvent,
   SandboxInstance,
 } from '@tangle-network/sandbox'
+// Recursive execution atom (the keystone): the open `LeafExecutor` runtime, the
+// budget-conserving reactive `Scope`, the event-sourced `Supervisor`, and the spawn
+// journal. Substrate types come from `./supervise/types`; the durable journal +
+// replay live in `../durable/spawn-journal`.
+export {
+  contentAddress,
+  FileResultBlobStore,
+  FileSpawnJournal,
+  InMemoryResultBlobStore,
+  InMemorySpawnJournal,
+  materializeTreeView,
+  replaySpawnTree,
+} from '../durable/spawn-journal'
 export {
   type CompletionAnalyst,
   type CompletionEvidence,
@@ -57,7 +70,15 @@ export {
   type LoopOptionsForDispatch,
   loopDispatch,
 } from './loop-dispatch'
-export type { Agent, Program, ProgramResult, RunProgramOptions } from './program'
+// The recursive execution atom owns the headline `Agent` (re-exported from
+// `./supervise/types` below). The program op-set's static-tree atom is a distinct
+// concept (`act` returns a `Program`), surfaced as `ProgramAgent`.
+export type {
+  Agent as ProgramAgent,
+  Program,
+  ProgramResult,
+  RunProgramOptions,
+} from './program'
 export {
   agentProgramPlanner,
   compileProgram,
@@ -68,7 +89,7 @@ export {
 } from './program'
 export { reportLoopUsage, type UsageSink } from './report-usage'
 export type { RunLoopOptions } from './run-loop'
-export { createSandboxForSpec, runLoop } from './run-loop'
+export { createSandboxForSpec, defaultSelectWinner, runLoop } from './run-loop'
 export { type AcquireOptions, acquireSandbox } from './sandbox-acquire'
 export {
   type CriuCapableClient,
@@ -84,6 +105,58 @@ export {
   type SandboxLineageHandle,
   type SessionCapableBox,
 } from './sandbox-lineage'
+export {
+  type BudgetPool,
+  type BudgetReadout,
+  createBudgetPool,
+  type ReservationTicket,
+  spendFromUsageEvents,
+} from './supervise/budget'
+export {
+  type CliSeam,
+  cliExecutor,
+  createExecutorRegistry,
+  type RouterSeam,
+  routerInlineExecutor,
+  type SandboxSeam,
+  sandboxExecutor,
+} from './supervise/runtime'
+export { createScope, type ScopeArgs, settledToIteration } from './supervise/scope'
+export {
+  createRootHandle,
+  createSupervisor,
+} from './supervise/supervisor'
+export type {
+  Agent,
+  AgentSpec,
+  Budget,
+  ExecutorContext,
+  ExecutorRegistry,
+  Handle,
+  LeafExecutor,
+  LeafExecutorFactory,
+  LeafResult,
+  NodeId,
+  NodeSnapshot,
+  NodeStatus,
+  Restart,
+  ResultBlobStore,
+  RootHandle,
+  RootSignal,
+  Runtime,
+  Scope,
+  Settled,
+  SpawnEvent,
+  SpawnJournal,
+  SpawnOpts,
+  Spend,
+  SupervisedResult,
+  Supervisor,
+  SupervisorOpts,
+  TreeView,
+  UsageEvent,
+  WidenGate,
+} from './supervise/types'
 export type {
   AgentRunSpec,
   DefaultVerdict,
diff --git a/src/loops/program.ts b/src/loops/program.ts
index 4787a65..2c58129 100644
--- a/src/loops/program.ts
+++ b/src/loops/program.ts
@@ -345,10 +345,35 @@ async function runParallel<Task, Output>(
     throw new PlannerError('Program parallel{} must carry a non-empty branches[]')
   }
   const limit = opts.maxParallel ?? branches.length
-  const runs = await mapPool(branches, limit, (branch, i) =>
+  const settled = await mapPool(branches, limit, (branch, i) =>
     runProgram(branch, opts, `${idSuffix}/p${i}`, depth + 1),
   )
-  return concatRuns(runs, 'max', opts)
+  // One-for-one: a branch that threw is a `down` record EXCLUDED from the merge `n`;
+  // survivors still merge. A real cancel (abort signal fired) is NOT a branch failure —
+  // it propagates so the abort cascade stays loud.
+  if (opts.ctx.signal?.aborted) {
+    const aborted = settled.find((r) => !r.ok)
+    if (aborted && !aborted.ok) throw aborted.error
+  }
+  const survivors = settled.filter(
+    (r): r is { ok: true; value: ProgramResult<Task, Output> } => r.ok,
+  )
+  if (survivors.length === 0) {
+    // Every branch went down: there is nothing to merge, so the program genuinely
+    // failed. Surface the FIRST branch's original error (its real type + message — e.g.
+    // a maxDepth guard) rather than a lossy summary; a structural guard must not be
+    // swallowed as an excluded infra `down`.
+    const firstDown = settled.find((r) => !r.ok)
+    if (firstDown && !firstDown.ok) throw firstDown.error
+    throw new PlannerError(
+      `Program parallel{} merged 0 branches — all ${branches.length} sub-loops went down`,
+    )
+  }
+  return concatRuns(
+    survivors.map((r) => r.value),
+    'max',
+    opts,
+  )
 }
 
 /** A `seq` containing a `parallel` → run maximal straight-line runs as single loops
@@ -404,32 +429,33 @@ async function runSeq<Task, Output>(
   return acc
 }
 
-/** Bounded-concurrency map preserving order. Drains all in-flight before throwing the
- *  first error, and stops scheduling NEW work once any branch fails (mirrors the
- *  kernel's `runBatch` discipline so a failure can't orphan running sub-loops). */
+type MapPoolOutcome<R> = { ok: true; value: R } | { ok: false; error: unknown }
+
+/** Bounded-concurrency map preserving order. One-for-one isolation: a thrown item is
+ *  CAPTURED as a per-item `{ ok: false }` outcome — it does NOT abort siblings or stop
+ *  scheduling, so survivors all run to completion. The caller decides whether a failed
+ *  outcome is an excluded branch (infra `down`) or a propagated cancel. */
 async function mapPool<T, R>(
   items: T[],
   limit: number,
   fn: (item: T, index: number) => Promise<R>,
-): Promise<R[]> {
-  const results = new Array<R>(items.length)
-  let firstError: unknown
+): Promise<MapPoolOutcome<R>[]> {
+  const results = new Array<MapPoolOutcome<R>>(items.length)
   let next = 0
   const workers = Math.max(1, Math.min(limit, items.length))
   const worker = async (): Promise<void> => {
-    while (firstError === undefined) {
+    while (true) {
       const i = next
       next += 1
       if (i >= items.length) return
       try {
-        results[i] = await fn(items[i] as T, i)
+        results[i] = { ok: true, value: await fn(items[i] as T, i) }
       } catch (err) {
-        if (firstError === undefined) firstError = err
+        results[i] = { ok: false, error: err }
       }
     }
   }
   await Promise.all(Array.from({ length: workers }, () => worker()))
-  if (firstError !== undefined) throw firstError
   return results
 }
 
diff --git a/src/loops/supervise/budget.ts b/src/loops/supervise/budget.ts
new file mode 100644
index 0000000..73b9bc2
--- /dev/null
+++ b/src/loops/supervise/budget.ts
@@ -0,0 +1,225 @@
+/**
+ * @experimental
+ *
+ * The conserved budget reservation pool — the invariant the whole instrument
+ * rests on (critique M5/B3). One root `Budget` becomes a conserved pool of three
+ * quantities (tokens, usd, iterations) plus an absolute deadline. Children RESERVE
+ * atomically at spawn and RECONCILE at settle:
+ *
+ *   total ≡ free + reserved + committed          (invariant, always)
+ *
+ * `reserve` moves a child's whole ceiling from `free` → `reserved` and FAILS CLOSED
+ * when `free` can't cover it (never read-then-spawn overcommit, so `Σk(treatment) ≡
+ * Σk(blind)` by construction). `reconcile` releases the reservation, commits ACTUAL
+ * spend, and refunds the unspent remainder to `free`. Tokens and usd are SEPARATE
+ * channels (`LoopTokenUsage` has no `usd`); iterations are conserved alongside them.
+ *
+ * Pure and deterministic: `now()` is injected, there is no I/O, and no wall-clock or
+ * RNG read. A `reserve`/`reconcile` ticket is single-use (fail-loud on double or
+ * unknown reconcile) so a child can never refund twice.
+ */
+
+import { addTokenUsage, zeroTokenUsage } from '../util'
+import type { Budget, LoopTokenUsage, Spend, UsageEvent } from './types'
+
+export type { Budget, Spend, UsageEvent }
+
+/** Opaque, single-use reservation handle returned by `reserve` and consumed by
+ *  `reconcile`. Carries the reserved ceilings so reconciliation needs no lookup. */
+export interface ReservationTicket {
+  readonly id: number
+  readonly reserved: {
+    readonly tokens: number
+    readonly usd: number
+    readonly iterations: number
+  }
+}
+
+/** Post-reservation pool readout — the shape `Scope.budget` exposes. `tokensLeft`,
+ *  `usdLeft`, and `reservedTokens` reflect committed-but-unsettled reservations;
+ *  `deadlineMs` is the ABSOLUTE wall-clock deadline (0 when the root set none). */
+export type BudgetReadout = Readonly<{
+  tokensLeft: number
+  usdLeft: number
+  deadlineMs: number
+  reservedTokens: number
+}>
+
+export interface BudgetPool {
+  /**
+   * Atomically reserve a child's full ceiling from the free balance. Fails closed
+   * ({ ok: false }) when the pool can't cover tokens, usd, or iterations — the
+   * caller inspects `ok` before `ticket`.
+   */
+  reserve(
+    b: Budget,
+  ): { ok: true; ticket: ReservationTicket } | { ok: false; reason: 'budget-exhausted' }
+  /**
+   * Release a reservation: commit the actual `spent`, refund the unspent remainder
+   * to the free pool. Throws on an unknown or already-reconciled ticket (fail loud —
+   * a double refund would silently break conservation).
+   */
+  reconcile(ticket: ReservationTicket, spent: Spend): void
+  /** Fold a normalized `UsageEvent` stream (or array) into a `Spend`. Tokens via
+   *  `addTokenUsage`, usd on its own channel, iterations from `'iteration'` events.
+   *  `ms` is left zero — wall-clock duration is the caller's to record, not the pool's. */
+  spendFrom(events: AsyncIterable<UsageEvent> | UsageEvent[]): Promise<Spend>
+  /** The current readout, reflecting all outstanding reservations. */
+  readout(): BudgetReadout
+}
+
+/** Fold a normalized `UsageEvent` array into a `Spend`. Tokens and usd are separate
+ *  channels; iterations come from `'iteration'` events. Pure; `ms` stays zero (the
+ *  pool does not read wall-clock). */
+export function spendFromUsageEvents(events: UsageEvent[]): Spend {
+  const tokens = zeroTokenUsage()
+  let usd = 0
+  let iterations = 0
+  for (const ev of events) {
+    if (ev.kind === 'tokens') {
+      addTokenUsage(tokens, { input: ev.input, output: ev.output })
+    } else if (ev.kind === 'cost') {
+      usd += ev.usd
+    } else {
+      iterations += 1
+    }
+  }
+  return { iterations, tokens, usd, ms: 0 }
+}
+
+async function foldUsage(events: AsyncIterable<UsageEvent> | UsageEvent[]): Promise<Spend> {
+  if (Array.isArray(events)) return spendFromUsageEvents(events)
+  const tokens = zeroTokenUsage()
+  let usd = 0
+  let iterations = 0
+  for await (const ev of events) {
+    if (ev.kind === 'tokens') {
+      addTokenUsage(tokens, { input: ev.input, output: ev.output })
+    } else if (ev.kind === 'cost') {
+      usd += ev.usd
+    } else {
+      iterations += 1
+    }
+  }
+  return { iterations, tokens, usd, ms: 0 }
+}
+
+function totalTokens(usage: LoopTokenUsage): number {
+  return usage.input + usage.output
+}
+
+/**
+ * Create a conserved reservation pool from a root `Budget`. `now()` is injected so the
+ * deadline readout is deterministic; defaults to `Date.now` for non-test callers. The
+ * absolute deadline is fixed at construction (`now() + budget.deadlineMs`) so the
+ * readout's `deadlineMs` is a stable wall-clock instant, not a shrinking remainder.
+ */
+export function createBudgetPool(root: Budget, now: () => number = Date.now): BudgetPool {
+  // free + reserved + committed ≡ root totals, per channel, always.
+  let freeTokens = root.maxTokens
+  let reservedTokens = 0
+  let committedTokens = 0
+
+  const usdCapped = root.maxUsd !== undefined
+  let freeUsd = root.maxUsd ?? 0
+  let reservedUsd = 0
+  let committedUsd = 0
+
+  let freeIterations = root.maxIterations
+  let reservedIterations = 0
+  let committedIterations = 0
+
+  const absoluteDeadlineMs = root.deadlineMs !== undefined ? now() + root.deadlineMs : 0
+
+  let nextTicketId = 0
+  const open = new Set<number>()
+
+  function reserve(
+    b: Budget,
+  ): { ok: true; ticket: ReservationTicket } | { ok: false; reason: 'budget-exhausted' } {
+    const wantTokens = b.maxTokens
+    const wantUsd = b.maxUsd ?? 0
+    const wantIterations = b.maxIterations
+    // Fail-closed admission: every requested channel must fit the free balance. A
+    // usd request against an uncapped root is unsatisfiable (the root declared no $).
+    if (wantTokens > freeTokens) return { ok: false, reason: 'budget-exhausted' }
+    if (wantIterations > freeIterations) return { ok: false, reason: 'budget-exhausted' }
+    if (wantUsd > 0 && (!usdCapped || wantUsd > freeUsd)) {
+      return { ok: false, reason: 'budget-exhausted' }
+    }
+
+    freeTokens -= wantTokens
+    reservedTokens += wantTokens
+    freeIterations -= wantIterations
+    reservedIterations += wantIterations
+    if (wantUsd > 0) {
+      freeUsd -= wantUsd
+      reservedUsd += wantUsd
+    }
+
+    const id = nextTicketId++
+    open.add(id)
+    return {
+      ok: true,
+      ticket: { id, reserved: { tokens: wantTokens, usd: wantUsd, iterations: wantIterations } },
+    }
+  }
+
+  function reconcile(ticket: ReservationTicket, spent: Spend): void {
+    if (!open.has(ticket.id)) {
+      throw new Error(`budget pool: reconcile of unknown or already-settled ticket ${ticket.id}`)
+    }
+    open.delete(ticket.id)
+
+    const { tokens: rTokens, usd: rUsd, iterations: rIterations } = ticket.reserved
+
+    // Clamp actual spend to the reservation: a child must never commit more than it
+    // reserved (that would overdraw the conserved pool). Over-spend is a fail-loud bug.
+    const spentTokens = totalTokens(spent.tokens)
+    if (spentTokens > rTokens) {
+      throw new Error(
+        `budget pool: ticket ${ticket.id} spent ${spentTokens} tokens > reserved ${rTokens}`,
+      )
+    }
+    if (spent.iterations > rIterations) {
+      throw new Error(
+        `budget pool: ticket ${ticket.id} spent ${spent.iterations} iterations > reserved ${rIterations}`,
+      )
+    }
+    if (spent.usd > rUsd) {
+      throw new Error(`budget pool: ticket ${ticket.id} spent $${spent.usd} > reserved $${rUsd}`)
+    }
+
+    // Release the whole reservation, then commit actual spend; the difference is the
+    // refund that flows back to `free`.
+    reservedTokens -= rTokens
+    committedTokens += spentTokens
+    freeTokens += rTokens - spentTokens
+
+    reservedIterations -= rIterations
+    committedIterations += spent.iterations
+    freeIterations += rIterations - spent.iterations
+
+    if (rUsd > 0) {
+      reservedUsd -= rUsd
+      committedUsd += spent.usd
+      freeUsd += rUsd - spent.usd
+    }
+  }
+
+  function readout(): BudgetReadout {
+    return {
+      tokensLeft: freeTokens,
+      usdLeft: usdCapped ? freeUsd : 0,
+      deadlineMs: absoluteDeadlineMs,
+      reservedTokens,
+    }
+  }
+
+  return {
+    reserve,
+    reconcile,
+    spendFrom: foldUsage,
+    readout,
+  }
+}
diff --git a/src/loops/supervise/runtime.ts b/src/loops/supervise/runtime.ts
new file mode 100644
index 0000000..2b07e6c
--- /dev/null
+++ b/src/loops/supervise/runtime.ts
@@ -0,0 +1,628 @@
+/**
+ * @experimental
+ *
+ * The leaf runtime — the built-in `LeafExecutor` IMPLEMENTATIONS behind the ONE
+ * open interface frozen in `./types`, plus the open resolver/registry that maps
+ * an `AgentSpec` to one of them OR accepts a bring-your-own executor verbatim.
+ *
+ * The interface is the extension point, not a closed `inline|sandbox|cli` union:
+ *   - router/inline : a direct OpenAI-compatible Router call, no box (one-shot).
+ *   - sandbox       : COMPOSES the existing `runLoop` kernel as a single-task
+ *                     leaf and surfaces its token/cost usage as `UsageEvent`s;
+ *                     forwards PR #150's optional `lineage` passthrough WITHOUT
+ *                     reinventing checkpoint/fork (streaming).
+ *   - cli           : a Halo/RLM subprocess; `budgetExempt` (no token accounting),
+ *                     excluded from the equal-k arms by construction (streaming).
+ * Every metered runtime reports through the SAME normalized `UsageEvent` channel
+ * so the conserved budget pool meters them identically. A user's own agent is
+ * first-class the moment it implements `LeafExecutor` — register it by name or
+ * pass it as `AgentSpec.executor`.
+ *
+ * Layering: `estimateCost`/`isModelPriced` are substrate primitives from
+ * `@tangle-network/agent-eval`; `runLoop`/`acquireSandbox` are runtime kernels
+ * from this package. No per-vendor adapters live here.
+ */
+
+import { spawn } from 'node:child_process'
+import { estimateCost, isModelPriced } from '@tangle-network/agent-eval'
+import type { BackendType, SandboxEvent } from '@tangle-network/sandbox'
+import { ValidationError } from '../../errors'
+import type { RunLoopOptions } from '../run-loop'
+import { runLoop } from '../run-loop'
+import type {
+  AgentRunSpec,
+  Driver,
+  ExecCtx,
+  Iteration,
+  LoopSandboxClient,
+  OutputAdapter,
+} from '../types'
+import { zeroTokenUsage } from '../util'
+import type {
+  AgentSpec,
+  DefaultVerdict,
+  ExecutorContext,
+  ExecutorRegistry,
+  LeafExecutor,
+  LeafExecutorFactory,
+  LeafResult,
+  Runtime,
+  Spend,
+  UsageEvent,
+} from './types'
+
+// ── Seam contracts (read off ExecutorContext.seams, narrowed per built-in) ─────
+
+/**
+ * Router/inline connection seam. A direct OpenAI-compatible Router endpoint —
+ * the cheapest leaf, no box, no tools. `model` overrides the profile's model
+ * hint when present; otherwise the profile's `model.default` is required.
+ */
+export interface RouterSeam {
+  routerBaseUrl: string
+  routerKey: string
+  model?: string
+}
+
+/**
+ * Sandbox executor seam. The `sandboxClient` the composed `runLoop` creates
+ * boxes through, plus the optional trace/run/lineage wiring forwarded into the
+ * loop. `lineage` is opaque here (PR #150's `RunLoopOptions.lineage`): forwarded
+ * forward-compatibly, never inspected — this executor does NOT reinvent
+ * checkpoint/fork.
+ */
+export interface SandboxSeam {
+  sandboxClient: LoopSandboxClient
+  /** Forwarded into the composed `runLoop`'s `ctx` (trace emitter, run handle, etc.). */
+  loopCtx?: Partial<Omit<ExecCtx, 'sandboxClient' | 'signal'>>
+  /** PR #150 `RunLoopOptions.lineage` passthrough — opaque; forwarded, not parsed. */
+  lineage?: unknown
+  /** Hard cap on the composed loop's iterations. The budget pool reserves against
+   *  the spawn `Budget.maxIterations`; this is the leaf's own ceiling. Default 1. */
+  maxIterations?: number
+}
+
+/** CLI subprocess seam. `bin` + `args` describe the Halo/RLM process to spawn. */
+export interface CliSeam {
+  bin: string
+  args?: string[]
+  /** Extra environment for the subprocess (merged over `process.env`). */
+  env?: Record<string, string>
+  /** Working directory for the subprocess. */
+  cwd?: string
+}
+
+const routerSeamKey = 'router'
+const sandboxSeamKey = 'sandbox'
+const cliSeamKey = 'cli'
+
+// ── Content-addressed result pointers (the B1 replay source) ───────────────────
+
+/** Deterministic content hash for an `outRef`. FNV-1a 32-bit over the canonical
+ *  JSON of the result — not cryptographic, sufficient for content-addressing the
+ *  replay blob so two identical outputs collapse to one pointer. */
+function contentRef(prefix: string, value: unknown): string {
+  let str: string
+  try {
+    str = JSON.stringify(value) ?? String(value)
+  } catch {
+    str = String(value)
+  }
+  let h = 0x811c9dc5
+  for (let i = 0; i < str.length; i += 1) {
+    h ^= str.charCodeAt(i)
+    h = Math.imul(h, 0x01000193)
+  }
+  return `${prefix}:${(h >>> 0).toString(16).padStart(8, '0')}`
+}
+
+function zeroSpend(): Spend {
+  return { iterations: 0, tokens: zeroTokenUsage(), usd: 0, ms: 0 }
+}
+
+// ── router/inline executor (harness === null) ──────────────────────────────────
+
+/**
+ * A direct OpenAI-compatible Router chat-completion. One-shot: resolves a
+ * `LeafResult` and reports its terminal usage as `UsageEvent`s through the
+ * conserved pool. Reports REAL token usage — when the provider omits `usage`,
+ * the spend records zero tokens but the call still counts one iteration (a
+ * phantom fabricated 0 is never emitted as a priced cost).
+ *
+ * NOTE for the Integrate phase: this duplicates the minimal body of
+ * `bench/src/router-client.ts#routerChatWithUsage`. `bench/` is a sub-package
+ * outside this package's `rootDir: "src"`, so it cannot be imported here without
+ * breaking the build. Integrate should lift that helper into `src/loops/` and
+ * have both call sites share it (do not re-copy a third time).
+ */
+export const routerInlineExecutor: LeafExecutorFactory<unknown> = (spec, ctx) => {
+  const seam = readSeam<RouterSeam>(ctx, routerSeamKey, 'router/inline')
+  const model = seam.model ?? spec.profile.model?.default
+  if (!model) {
+    throw new ValidationError(
+      'routerInlineExecutor: no model — set RouterSeam.model or AgentProfile.model.default',
+    )
+  }
+  if (!seam.routerBaseUrl || !seam.routerKey) {
+    throw new ValidationError('routerInlineExecutor: RouterSeam.routerBaseUrl + routerKey required')
+  }
+
+  const controller = new AbortController()
+  const abortIfSignalled = () => {
+    if (ctx.signal.aborted) controller.abort()
+  }
+  abortIfSignalled()
+  if (!ctx.signal.aborted) ctx.signal.addEventListener('abort', abortIfSignalled, { once: true })
+
+  let artifact: LeafResult<unknown> | undefined
+
+  return {
+    runtime: 'router' as Runtime,
+    async execute(task, signal): Promise<LeafResult<unknown>> {
+      const messages = taskToMessages(task, spec)
+      const started = Date.now()
+      const linked = linkSignals(signal, controller.signal)
+      const res = await fetch(`${seam.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, {
+        method: 'POST',
+        headers: { 'content-type': 'application/json', authorization: `Bearer ${seam.routerKey}` },
+        body: JSON.stringify({ model, messages, temperature: 0.2 }),
+        ...(linked ? { signal: linked } : {}),
+      })
+      if (!res.ok) {
+        throw new ValidationError(
+          `routerInlineExecutor: router ${res.status}: ${(await res.text()).slice(0, 200)}`,
+        )
+      }
+      const data = (await res.json()) as {
+        choices?: Array<{ message?: { content?: string } }>
+        usage?: { prompt_tokens?: number; completion_tokens?: number }
+      }
+      const u = data.usage
+      const usage =
+        u && typeof u.prompt_tokens === 'number' && typeof u.completion_tokens === 'number'
+          ? { input: u.prompt_tokens, output: u.completion_tokens }
+          : undefined
+      const usd = usage && isModelPriced(model) ? estimateCost(usage.input, usage.output, model) : 0
+      const content = data.choices?.[0]?.message?.content ?? ''
+      const spent: Spend = {
+        iterations: 1,
+        tokens: usage ? { input: usage.input, output: usage.output } : zeroTokenUsage(),
+        usd,
+        ms: Date.now() - started,
+      }
+      const out = { content } as unknown
+      artifact = { outRef: contentRef('router', { model, content }), out, spent }
+      return artifact
+    },
+    teardown(_grace): Promise<{ destroyed: boolean }> {
+      controller.abort()
+      return Promise.resolve({ destroyed: true })
+    },
+    resultArtifact() {
+      if (!artifact) {
+        throw new ValidationError('routerInlineExecutor: resultArtifact() read before execute()')
+      }
+      return { ...artifact, spent: artifact.spent }
+    },
+  }
+}
+
+// ── sandbox executor (harness is a BackendType) ────────────────────────────────
+
+/**
+ * COMPOSES `runLoop` as a single-task leaf: one box, a refine driver bounded to
+ * the seam's `maxIterations` (default 1), the spec's profile as the agent run.
+ * Surfaces the loop's aggregated `tokenUsage` + `costUsd` as `UsageEvent`s after
+ * it drains, and yields one `iteration` event per loop iteration. Forwards the
+ * optional `lineage` passthrough WITHOUT importing sandbox-lineage / reinventing
+ * checkpoint/fork.
+ *
+ * Streaming shape: the loop runs to completion inside the first `next()`, then
+ * the recorded usage events are yielded; the terminal artifact is read from
+ * `resultArtifact()` after the stream drains.
+ */
+export const sandboxExecutor: LeafExecutorFactory<unknown> = (spec, ctx) => {
+  if (spec.harness === null) {
+    throw new ValidationError('sandboxExecutor: harness is null (router/inline) — wrong executor')
+  }
+  const harness = spec.harness as BackendType
+  const seam = readSeam<SandboxSeam>(ctx, sandboxSeamKey, 'sandbox')
+  if (!seam.sandboxClient || typeof seam.sandboxClient.create !== 'function') {
+    throw new ValidationError('sandboxExecutor: SandboxSeam.sandboxClient.create required')
+  }
+  const maxIterations = seam.maxIterations ?? 1
+  if (!Number.isFinite(maxIterations) || maxIterations <= 0) {
+    throw new ValidationError('sandboxExecutor: maxIterations must be > 0')
+  }
+
+  const controller = new AbortController()
+  const abortIfSignalled = () => {
+    if (ctx.signal.aborted) controller.abort()
+  }
+  abortIfSignalled()
+  if (!ctx.signal.aborted) ctx.signal.addEventListener('abort', abortIfSignalled, { once: true })
+
+  let artifact: LeafResult<unknown> | undefined
+
+  // The leaf runs an opaque, self-parallelizing coding harness; the loop just
+  // refines once over it. Output is the raw event stream parsed to its tail text.
+  const output: OutputAdapter<SandboxLeafOut> = {
+    parse(events: SandboxEvent[]): SandboxLeafOut {
+      return { events }
+    },
+  }
+  const driver = singleShotDriver<SandboxLeafOut>(maxIterations)
+
+  return {
+    runtime: 'sandbox' as Runtime,
+    execute(task, signal): AsyncIterable<UsageEvent> {
+      return streamSandboxLeaf({
+        task,
+        signal,
+        harness,
+        spec,
+        seam,
+        output,
+        driver,
+        maxIterations,
+        controller,
+        loopCtx: seam.loopCtx,
+        onArtifact: (a) => {
+          artifact = a
+        },
+      })
+    },
+    teardown(_grace): Promise<{ destroyed: boolean }> {
+      // The composed runLoop owns its box teardown (finally{allSettled(destroy)});
+      // aborting the loop's signal cascades into that barrier.
+      controller.abort()
+      return Promise.resolve({ destroyed: true })
+    },
+    resultArtifact() {
+      if (!artifact) {
+        throw new ValidationError('sandboxExecutor: resultArtifact() read before stream drained')
+      }
+      return artifact
+    },
+  }
+}
+
+interface SandboxLeafOut {
+  events: SandboxEvent[]
+}
+
+interface StreamSandboxArgs {
+  task: unknown
+  signal: AbortSignal
+  harness: BackendType
+  spec: AgentSpec
+  seam: SandboxSeam
+  output: OutputAdapter<SandboxLeafOut>
+  driver: Driver<unknown, SandboxLeafOut, string>
+  maxIterations: number
+  controller: AbortController
+  loopCtx?: Partial<Omit<ExecCtx, 'sandboxClient' | 'signal'>>
+  onArtifact: (a: LeafResult<unknown>) => void
+}
+
+async function* streamSandboxLeaf(args: StreamSandboxArgs): AsyncIterable<UsageEvent> {
+  const linked = new AbortController()
+  const cascade = () => linked.abort()
+  if (args.signal.aborted || args.controller.signal.aborted) linked.abort()
+  else {
+    args.signal.addEventListener('abort', cascade, { once: true })
+    args.controller.signal.addEventListener('abort', cascade, { once: true })
+  }
+
+  const agentRun: AgentRunSpec<unknown> = {
+    profile: args.spec.profile,
+    taskToPrompt: (t) => taskToPrompt(t),
+    name: args.spec.profile.name ?? args.harness,
+    sandboxOverrides: { backend: { type: args.harness } },
+  }
+  const started = Date.now()
+
+  // `lineage` is a PR #150 RunLoopOptions field absent on this branch — forwarded
+  // forward-compatibly without coupling to its (not-yet-present) static type.
+  const loopOptions = {
+    driver: args.driver,
+    agentRun,
+    output: args.output,
+    task: args.task,
+    maxIterations: args.maxIterations,
+    maxConcurrency: 1,
+    ctx: {
+      ...(args.loopCtx ?? {}),
+      sandboxClient: args.seam.sandboxClient,
+      signal: linked.signal,
+    } as ExecCtx,
+    ...(args.seam.lineage !== undefined ? { lineage: args.seam.lineage } : {}),
+  } as RunLoopOptions<unknown, SandboxLeafOut, string>
+
+  try {
+    const result = await runLoop(loopOptions)
+    const out = result.winner?.output ?? { events: [] }
+    const verdict = result.winner?.verdict
+    const spent: Spend = {
+      iterations: result.iterations.length,
+      tokens: { input: result.tokenUsage.input, output: result.tokenUsage.output },
+      usd: result.costUsd,
+      ms: Date.now() - started,
+    }
+    args.onArtifact({
+      outRef: contentRef('sandbox', { harness: args.harness, out }),
+      out,
+      ...(verdict ? { verdict } : {}),
+      spent,
+    })
+    for (let i = 0; i < result.iterations.length; i += 1) yield { kind: 'iteration' }
+    if (result.tokenUsage.input || result.tokenUsage.output) {
+      yield { kind: 'tokens', input: result.tokenUsage.input, output: result.tokenUsage.output }
+    }
+    if (result.costUsd) yield { kind: 'cost', usd: result.costUsd }
+  } finally {
+    args.signal.removeEventListener('abort', cascade)
+    args.controller.signal.removeEventListener('abort', cascade)
+  }
+}
+
+// ── cli executor (Halo / external RLM subprocess) ──────────────────────────────
+
+/**
+ * Spawns a subprocess (`bin` + `args`). It cannot account tokens, so it is
+ * `budgetExempt: true`: its spend is NOT metered against the conserved pool and
+ * its iterations are EXCLUDED from the equal-k arms by construction (the
+ * resolver/equal-k path checks `budgetExempt`). teardown is SIGTERM → SIGKILL
+ * with a grace window. Streaming: yields one `iteration` event on clean exit.
+ */
+export const cliExecutor: LeafExecutorFactory<unknown> = (_spec, ctx) => {
+  const seam = readSeam<CliSeam>(ctx, cliSeamKey, 'cli')
+  if (!seam.bin) throw new ValidationError('cliExecutor: CliSeam.bin required')
+
+  const controller = new AbortController()
+  const abortIfSignalled = () => {
+    if (ctx.signal.aborted) controller.abort()
+  }
+  abortIfSignalled()
+  if (!ctx.signal.aborted) ctx.signal.addEventListener('abort', abortIfSignalled, { once: true })
+
+  let proc: ReturnType<typeof spawn> | undefined
+  let artifact: LeafResult<unknown> | undefined
+
+  return {
+    runtime: 'cli' as Runtime,
+    budgetExempt: true,
+    execute(task, signal): AsyncIterable<UsageEvent> {
+      return streamCliLeaf({
+        task,
+        signal,
+        seam,
+        controller,
+        onProc: (p) => {
+          proc = p
+        },
+        onArtifact: (a) => {
+          artifact = a
+        },
+      })
+    },
+    async teardown(grace): Promise<{ destroyed: boolean }> {
+      controller.abort()
+      if (!proc || proc.exitCode !== null || proc.killed) return { destroyed: true }
+      return killWithGrace(proc, grace)
+    },
+    resultArtifact() {
+      if (!artifact) {
+        throw new ValidationError('cliExecutor: resultArtifact() read before stream drained')
+      }
+      return artifact
+    },
+  }
+}
+
+interface StreamCliArgs {
+  task: unknown
+  signal: AbortSignal
+  seam: CliSeam
+  controller: AbortController
+  onProc: (p: ReturnType<typeof spawn>) => void
+  onArtifact: (a: LeafResult<unknown>) => void
+}
+
+async function* streamCliLeaf(args: StreamCliArgs): AsyncIterable<UsageEvent> {
+  const prompt = taskToPrompt(args.task)
+  const proc = spawn(args.seam.bin, args.seam.args ?? [], {
+    ...(args.seam.cwd ? { cwd: args.seam.cwd } : {}),
+    env: { ...process.env, ...(args.seam.env ?? {}) },
+    stdio: ['pipe', 'pipe', 'pipe'],
+  })
+  args.onProc(proc)
+
+  const onAbort = () => killWithGrace(proc, 'brutalKill')
+  if (args.signal.aborted || args.controller.signal.aborted) onAbort()
+  else {
+    args.signal.addEventListener('abort', onAbort, { once: true })
+    args.controller.signal.addEventListener('abort', onAbort, { once: true })
+  }
+
+  // Feed the task on stdin; the subprocess owns its own tool/agent loop.
+  if (proc.stdin) {
+    proc.stdin.write(prompt)
+    proc.stdin.end()
+  }
+  const chunks: string[] = []
+  const errChunks: string[] = []
+  if (proc.stdout) proc.stdout.on('data', (d: Buffer) => chunks.push(d.toString('utf8')))
+  if (proc.stderr) proc.stderr.on('data', (d: Buffer) => errChunks.push(d.toString('utf8')))
+
+  const exit = await new Promise<{ code: number | null; error?: Error }>((resolve) => {
+    proc.once('error', (err) => resolve({ code: null, error: err }))
+    proc.once('close', (code) => resolve({ code }))
+  })
+  args.signal.removeEventListener('abort', onAbort)
+  args.controller.signal.removeEventListener('abort', onAbort)
+
+  if (exit.error) {
+    throw new ValidationError(`cliExecutor: spawn failed: ${exit.error.message}`, {
+      cause: exit.error,
+    })
+  }
+  if (exit.code !== 0) {
+    throw new ValidationError(
+      `cliExecutor: ${args.seam.bin} exited ${exit.code}: ${errChunks.join('').slice(0, 200)}`,
+    )
+  }
+  const out = { content: chunks.join('') } as unknown
+  // budgetExempt: spend is recorded zero (not metered) — never a fabricated cost.
+  args.onArtifact({ outRef: contentRef('cli', out), out, spent: zeroSpend() })
+  yield { kind: 'iteration' }
+}
+
+/** SIGTERM, then SIGKILL after `grace` ms (`'brutalKill'` = immediate SIGKILL,
+ *  `'infinity'` = await clean exit, never escalate). */
+function killWithGrace(
+  proc: ReturnType<typeof spawn>,
+  grace: number | 'brutalKill' | 'infinity',
+): Promise<{ destroyed: boolean }> {
+  if (proc.exitCode !== null || proc.killed) return Promise.resolve({ destroyed: true })
+  return new Promise((resolve) => {
+    let timer: ReturnType<typeof setTimeout> | undefined
+    proc.once('close', () => {
+      if (timer) clearTimeout(timer)
+      resolve({ destroyed: true })
+    })
+    if (grace === 'brutalKill') {
+      proc.kill('SIGKILL')
+      return
+    }
+    proc.kill('SIGTERM')
+    if (grace === 'infinity') return
+    timer = setTimeout(() => {
+      if (proc.exitCode === null && !proc.killed) proc.kill('SIGKILL')
+    }, grace)
+  })
+}
+
+// ── The open registry ──────────────────────────────────────────────────────────
+
+/**
+ * The open resolver/registry. Pre-registers the three built-ins under their
+ * runtime tags (`'router'`, `'sandbox'`, `'cli'`) and accepts `register(name,
+ * factory)` for any additional runtime — and a BYO `AgentSpec.executor` resolves
+ * without touching the registry at all. NOT a closed switch; registration + BYO
+ * ARE the extension points.
+ *
+ * `resolve` precedence (frozen in `ExecutorRegistry`): a BYO `spec.executor` →
+ * `harness === null` → the `'router'` factory; else a registered factory for the
+ * harness-derived runtime (`'sandbox'` for any `BackendType`); else fail loud.
+ */
+export function createExecutorRegistry(): ExecutorRegistry {
+  const factories = new Map<Runtime, LeafExecutorFactory<unknown>>()
+  factories.set('router', routerInlineExecutor)
+  factories.set('inline', routerInlineExecutor)
+  factories.set('sandbox', sandboxExecutor)
+  factories.set('cli', cliExecutor)
+
+  return {
+    register<Out>(runtime: Runtime, factory: LeafExecutorFactory<Out>): void {
+      if (factories.has(runtime)) {
+        throw new ValidationError(`executor registry: runtime "${runtime}" already registered`)
+      }
+      factories.set(runtime, factory as LeafExecutorFactory<unknown>)
+    },
+    resolve<Out>(
+      spec: AgentSpec,
+    ): { succeeded: true; value: LeafExecutorFactory<Out> } | { succeeded: false; error: string } {
+      // BYO: a caller-supplied executor wins, wrapped in a trivial per-spawn factory.
+      if (spec.executor) {
+        const byo = spec.executor
+        return { succeeded: true, value: (() => byo) as LeafExecutorFactory<Out> }
+      }
+      // router/inline: an agent with no harness is a direct Router call.
+      if (spec.harness === null) {
+        const f = factories.get('router')
+        if (!f) return { succeeded: false, error: 'executor registry: no "router" factory' }
+        return { succeeded: true, value: f as LeafExecutorFactory<Out> }
+      }
+      // sandbox: any BackendType maps to the sandbox-composing-runLoop executor.
+      const runtimeTag: Runtime = 'sandbox'
+      const f = factories.get(runtimeTag)
+      if (!f) {
+        return {
+          succeeded: false,
+          error: `executor registry: no factory for runtime "${runtimeTag}" (harness "${spec.harness}") and no BYO executor`,
+        }
+      }
+      return { succeeded: true, value: f as LeafExecutorFactory<Out> }
+    },
+  }
+}
+
+// ── Shared helpers ──────────────────────────────────────────────────────────────
+
+/** Narrow a named seam off the `ExecutorContext`, failing loud when absent — no
+ *  silent default for a required external-boundary seam. */
+function readSeam<T>(ctx: ExecutorContext, key: string, who: string): T {
+  const seam = ctx.seams[key]
+  if (seam === undefined || seam === null) {
+    throw new ValidationError(`${who} executor: missing required seam "${key}" on ExecutorContext`)
+  }
+  return seam as T
+}
+
+/** A leaf task is opaque (`unknown`). A string is the prompt verbatim; an object
+ *  with a `prompt`/`content`/`task` string field uses it; otherwise it serializes. */
+function taskToPrompt(task: unknown): string {
+  if (typeof task === 'string') return task
+  if (task && typeof task === 'object') {
+    const obj = task as Record<string, unknown>
+    for (const k of ['prompt', 'content', 'task', 'message']) {
+      if (typeof obj[k] === 'string') return obj[k] as string
+    }
+  }
+  return JSON.stringify(task)
+}
+
+/** Router messages from the opaque task + the profile's system prompt, when set. */
+function taskToMessages(task: unknown, spec: AgentSpec): Array<{ role: string; content: string }> {
+  const messages: Array<{ role: string; content: string }> = []
+  const system = spec.profile.prompt?.systemPrompt
+  if (typeof system === 'string' && system.length > 0) {
+    messages.push({ role: 'system', content: system })
+  }
+  messages.push({ role: 'user', content: taskToPrompt(task) })
+  return messages
+}
+
+/** A driver that refines a single task up to `maxIterations` times then stops —
+ *  the minimal policy that lets the sandbox executor run `runLoop` as one leaf. */
+function singleShotDriver<Out>(maxIterations: number): Driver<unknown, Out, string> {
+  return {
+    name: 'leaf',
+    plan(task, history): Promise<unknown[]> {
+      return Promise.resolve(history.length >= maxIterations ? [] : [task])
+    },
+    decide(history: ReadonlyArray<Iteration<unknown, Out>>): string {
+      return history.length >= maxIterations ? 'stop' : 'continue'
+    },
+  }
+}
+
+/** Link two abort signals into one that fires when either does. Returns
+ *  `undefined` when neither is present so `fetch` gets no signal at all. */
+function linkSignals(a: AbortSignal, b: AbortSignal): AbortSignal | undefined {
+  if (a.aborted || b.aborted) {
+    const c = new AbortController()
+    c.abort()
+    return c.signal
+  }
+  const c = new AbortController()
+  const onAbort = () => c.abort()
+  a.addEventListener('abort', onAbort, { once: true })
+  b.addEventListener('abort', onAbort, { once: true })
+  return c.signal
+}
+
+// Re-export the verdict + spend surface so a consumer importing the runtime
+// built-ins gets the budget vocabulary from one place.
+export type { DefaultVerdict, LeafExecutor, LeafResult, Spend, UsageEvent }
diff --git a/src/loops/supervise/scope.ts b/src/loops/supervise/scope.ts
new file mode 100644
index 0000000..a5e775f
--- /dev/null
+++ b/src/loops/supervise/scope.ts
@@ -0,0 +1,560 @@
+/**
+ * @experimental
+ *
+ * The reactive `Scope` impl (KEYSTONE, build step 4 + the step-8 adapter).
+ *
+ * An `Agent.act` runs inside a `Scope`. It `spawn`s children dynamically and reacts to
+ * them via `next()`. The scope owns ONE in-memory nursery — the authoritative live set —
+ * and is the single place that drives a child's lifecycle: reserve budget atomically,
+ * resolve a `LeafExecutor` through the open registry, run it (one-shot OR streaming),
+ * fold its normalized `UsageEvent`s into a conserved `Spend`, reconcile the reservation
+ * (refunding the unspent remainder), persist the result blob + journal records, and
+ * deliver the `Settled` through the `next()` cursor.
+ *
+ * Three invariants this impl enforces by construction:
+ *  - `next()` is a ray.wait n=1 cursor over THIS scope's live set; it assigns the
+ *    monotonic `seq` (the recorded cursor order) at the moment it yields a settlement, so
+ *    replay re-delivers in the identical order — `seq` is never wall-clock.
+ *  - Budget is reserved at spawn and reconciled at settle through the shared `BudgetPool`,
+ *    so `spawn` fails CLOSED on an exhausted pool and total ≡ free + reserved + committed.
+ *  - `view` reads the in-memory nursery, never the journal — O(live), synchronous.
+ *
+ * The settle path is the only writer of journal `settled` events; the spawn path the only
+ * writer of `spawned` events. The result blob is `put` BEFORE the journal `settled` record
+ * references its `outRef`, so a crash can never leave a journaled ref with no blob.
+ */
+
+import { contentAddress } from '../../durable/spawn-journal'
+import { ValidationError } from '../../errors'
+import type { Iteration } from '../types'
+import type { BudgetPool, ReservationTicket } from './budget'
+import type {
+  Agent,
+  AgentSpec,
+  Budget,
+  DefaultVerdict,
+  ExecutorContext,
+  ExecutorRegistry,
+  Handle,
+  LeafExecutor,
+  LeafResult,
+  NodeId,
+  NodeSnapshot,
+  NodeStatus,
+  ResultBlobStore,
+  Scope,
+  Settled,
+  SpawnJournal,
+  SpawnOpts,
+  Spend,
+  TreeView,
+  UsageEvent,
+} from './types'
+
+/** Construction args for `createScope`. The supervisor threads the shared pool, journal,
+ *  blob store, and executor registry through; `depth`/`maxDepth` pair the runtime
+ *  recursion ceiling with the conserved pool (R3). */
+export interface ScopeArgs {
+  /** This scope's owning node id — children get `${parentId}:s${seq}` ids. */
+  readonly parentId: NodeId
+  /** Journal/blob root key the supervisor `beginTree`'d. */
+  readonly root: NodeId
+  /** The shared conserved reservation pool (one per supervised run). */
+  readonly pool: BudgetPool
+  /** Append-only spawn journal; this scope writes `spawned` + `settled` records. */
+  readonly journal: SpawnJournal
+  /** Content-addressed result store backing `outRef` rehydration. */
+  readonly blobs: ResultBlobStore
+  /** The open executor resolver (BYO → router/inline → registered harness factory). */
+  readonly executors: ExecutorRegistry
+  /** Per-spawn executor-construction seams (sandbox client, router config, cli bin). */
+  readonly seams: Readonly<Record<string, unknown>>
+  /** This scope's recursion depth (root = 0). */
+  readonly depth: number
+  /** Runtime recursion-depth ceiling — a spawn past it fails closed `depth-exceeded`. */
+  readonly maxDepth?: number
+  /** Abort signal for this scope; an abort cascades into every live child's executor. */
+  readonly signal: AbortSignal
+  /** Injected clock — keeps the journal `at` timestamp deterministic in tests. */
+  readonly now?: () => number
+}
+
+/**
+ * Internal live-set entry. `settled` resolves once the child's executor has fully drained,
+ * its reservation reconciled, and its result blob persisted; `next()` awaits these to drive
+ * the cursor. `resolved` mirrors that terminal value synchronously so a concurrent `next()`
+ * can pick the next undelivered settlement without re-racing. `delivered` guards exactly-once
+ * delivery; `seq` is stamped by `next()`, never here.
+ */
+interface LiveChild {
+  readonly id: NodeId
+  status: NodeStatus
+  runtime: NodeSnapshot['runtime']
+  readonly budget: Budget
+  readonly label: string
+  spent: Spend
+  outRef?: string
+  /** Resolves with the terminal settlement WITHOUT a `seq` — `next()` stamps the seq. */
+  readonly settled: Promise<PreSeqSettled>
+  /** Synchronous mirror of `settled`'s value once it has resolved (else `undefined`). */
+  resolved?: PreSeqSettled
+  /** True once `next()` has yielded this child's settlement. */
+  delivered: boolean
+}
+
+/** A child's terminal settlement before the cursor stamps the monotonic `seq`. */
+type PreSeqSettled =
+  | { kind: 'done'; out: unknown; outRef: string; verdict?: DefaultVerdict; spent: Spend }
+  | { kind: 'down'; reason: string; infra: boolean; restartCount: number }
+
+export function createScope<Out>(args: ScopeArgs): Scope<Out> {
+  const children = new Map<NodeId, LiveChild>()
+  // Two distinct monotonic counters in two namespaces:
+  //  - `spawnOrdinal` is the spawn order (0,1,2,…); it mints the deterministic node id
+  //    `${parent}:s${ordinal}` and stamps the `spawned` event's `seq`. Known at spawn.
+  //  - `cursorSeq` is the order `next()` yields settlements (B2); it stamps the
+  //    `settled`/`cancelled` event's `seq` and the `Settled.seq` the driver branches on.
+  // They are separate so a `spawned` event never collides with a `settled` event in the
+  // journal's per-tree uniqueness guard (which is scoped to the cursor namespace).
+  let spawnOrdinal = 0
+  let cursorSeq = 0
+  const now = args.now ?? Date.now
+
+  function spawn<C extends Out>(
+    agent: Agent<unknown, C>,
+    task: unknown,
+    opts: SpawnOpts,
+  ):
+    | { ok: true; handle: Handle<C> }
+    | { ok: false; reason: 'budget-exhausted' | 'depth-exceeded' } {
+    if (args.maxDepth !== undefined && args.depth >= args.maxDepth) {
+      return { ok: false, reason: 'depth-exceeded' }
+    }
+
+    // Resolve the leaf executor through the OPEN registry FIRST (no reservation to unwind
+    // if the agent is misconfigured). An agent carries its executor mapping as the
+    // `executorSpec` (an `AgentSpec`); resolution precedence (BYO → router/inline → harness
+    // factory) lives in the registry, not in a call-site switch.
+    const spec = (agent as unknown as { executorSpec?: unknown }).executorSpec
+    if (!isAgentSpec(spec)) {
+      throw new ValidationError(
+        `scope.spawn: agent "${agent.name}" exposes no \`executorSpec\` (AgentSpec) to resolve a LeafExecutor`,
+      )
+    }
+    const resolved = args.executors.resolve<C>(spec)
+    if (!resolved.succeeded) throw new ValidationError(`scope.spawn: ${resolved.error}`)
+
+    // Reserve the child's whole ceiling atomically; fail CLOSED when the pool can't cover
+    // it (never read-then-spawn overcommit, so Σk is conserved by construction).
+    const reservation = args.pool.reserve(opts.budget)
+    if (!reservation.ok) return { ok: false, reason: reservation.reason }
+
+    const ordinal = spawnOrdinal++
+    const id: NodeId = `${args.parentId}:s${ordinal}`
+
+    // The child's abort chains off this scope's signal (a scope abort reaps every child)
+    // AND off its own handle.abort(). Aborting mid-acquire cascades through the executor's
+    // signal into its acquireSandbox find-by-name reap, so an acquiring node never leaks.
+    const childAbort = new AbortController()
+    const cascadeAbort = () => childAbort.abort()
+    if (args.signal.aborted) childAbort.abort()
+    else args.signal.addEventListener('abort', cascadeAbort, { once: true })
+
+    const ctx: ExecutorContext = { signal: childAbort.signal, seams: args.seams }
+    const executor = resolved.value(spec, ctx) as LeafExecutor<C>
+
+    const handle: Handle<C> = {
+      id,
+      label: opts.label,
+      get status(): NodeStatus {
+        return children.get(id)?.status ?? 'cancelled'
+      },
+      abort(reason?: string): void {
+        childAbort.abort(reason)
+      },
+    }
+
+    const live: LiveChild = {
+      id,
+      status: 'acquiring',
+      runtime: executor.runtime,
+      budget: opts.budget,
+      label: opts.label,
+      spent: zeroSpend(),
+      settled: undefined as unknown as Promise<PreSeqSettled>,
+      delivered: false,
+    }
+    children.set(id, live)
+
+    void args.journal.appendEvent(args.root, {
+      kind: 'spawned',
+      id,
+      parent: args.parentId,
+      label: opts.label,
+      budget: opts.budget,
+      runtime: executor.runtime,
+      seq: ordinal,
+      at: new Date(now()).toISOString(),
+    })
+
+    // Drive the executor to settlement off to the side; `next()` awaits the resulting
+    // promise. A thrown executor (or a real abort) is TYPED into a `down` record by
+    // `runChild` (never re-thrown) so a single failing child never rejects the cursor.
+    const settled = runChild(
+      live,
+      executor,
+      childAbort,
+      task,
+      opts,
+      args.pool,
+      reservation.ticket,
+      args.blobs,
+    )
+      .then((s) => {
+        live.resolved = s
+        return s
+      })
+      .finally(() => {
+        args.signal.removeEventListener('abort', cascadeAbort)
+      })
+    ;(live as { settled: Promise<PreSeqSettled> }).settled = settled
+
+    return { ok: true, handle }
+  }
+
+  async function next(): Promise<Settled<Out> | null> {
+    const undelivered = () => [...children.values()].filter((c) => !c.delivered)
+    if (undelivered().length === 0) return null
+
+    // ray.wait n=1: await the FIRST not-yet-delivered child to settle. Loop because a
+    // concurrent `next()` may take the race winner between the await and the pick.
+    for (;;) {
+      const pending = undelivered()
+      if (pending.length === 0) return null
+      // Prefer an already-resolved-but-undelivered child (no await needed).
+      const ready = pending.find((c) => c.resolved !== undefined)
+      const chosen = ready ?? (await raceFirstSettled(pending))
+      if (chosen.delivered) continue
+      chosen.delivered = true
+
+      const seq = cursorSeq++
+      const settlement = chosen.resolved
+      if (!settlement) {
+        throw new ValidationError(
+          `scope.next: child '${chosen.id}' won the settle race without a resolved value`,
+        )
+      }
+      return finalizeSettlement<Out>(chosen, settlement, seq, args, now)
+    }
+  }
+
+  return {
+    spawn,
+    next,
+    get view(): TreeView {
+      return makeTreeView(args.parentId, children)
+    },
+    get budget() {
+      return args.pool.readout()
+    },
+  }
+}
+
+/** Await whichever pending child settles first, returning the child (its `resolved` is set
+ *  by the time this resolves because `runChild`'s `.then` sets it before the promise
+ *  resolves downstream). */
+async function raceFirstSettled(pending: LiveChild[]): Promise<LiveChild> {
+  return Promise.race(pending.map((c) => c.settled.then(() => c)))
+}
+
+/** Stamp the cursor `seq`, write the `settled` journal record, and project the
+ *  `PreSeqSettled` into the frozen `Settled` the driver branches on. */
+async function finalizeSettlement<Out>(
+  child: LiveChild,
+  settlement: PreSeqSettled,
+  seq: number,
+  args: ScopeArgs,
+  now: () => number,
+): Promise<Settled<Out>> {
+  const handle = frozenHandle<Out>(child)
+  if (settlement.kind === 'down') {
+    child.status = 'failed'
+    await args.journal.appendEvent(args.root, {
+      kind: 'settled',
+      id: child.id,
+      status: 'down',
+      spent: child.spent,
+      infra: settlement.infra,
+      seq,
+      at: new Date(now()).toISOString(),
+    })
+    return {
+      kind: 'down',
+      handle,
+      reason: settlement.reason,
+      infra: settlement.infra,
+      restartCount: settlement.restartCount,
+      seq,
+    }
+  }
+
+  child.status = 'done'
+  child.outRef = settlement.outRef
+  child.spent = settlement.spent
+  await args.journal.appendEvent(args.root, {
+    kind: 'settled',
+    id: child.id,
+    status: 'done',
+    outRef: settlement.outRef,
+    ...(settlement.verdict ? { verdict: settlement.verdict } : {}),
+    spent: settlement.spent,
+    seq,
+    at: new Date(now()).toISOString(),
+  })
+  return {
+    kind: 'done',
+    handle,
+    out: settlement.out as Out,
+    outRef: settlement.outRef,
+    ...(settlement.verdict ? { verdict: settlement.verdict } : {}),
+    spent: settlement.spent,
+    seq,
+  }
+}
+
+/**
+ * Drive one child's `LeafExecutor` to a terminal `PreSeqSettled`, folding usage into the
+ * conserved `Spend`, reconciling the reservation, and persisting the result blob. Both
+ * executor shapes are handled here: a one-shot `Promise<LeafResult>` and a streaming
+ * `AsyncIterable<UsageEvent>` whose terminal artifact is read from `resultArtifact()`.
+ *
+ * A thrown executor (or a real abort) becomes a TYPED `down` — never re-thrown — so a
+ * single failing child cannot reject the `next()` cursor (the M2 typed-result discipline,
+ * applied per child). The reservation is reconciled on EVERY path (success, abort, throw)
+ * so the conserved pool can never leak a reservation.
+ */
+async function runChild<C>(
+  live: LiveChild,
+  executor: LeafExecutor<C>,
+  childAbort: AbortController,
+  task: unknown,
+  opts: SpawnOpts,
+  pool: BudgetPool,
+  ticket: ReservationTicket,
+  blobs: ResultBlobStore,
+): Promise<PreSeqSettled> {
+  let reconciled = false
+  const reconcileOnce = (spend: Spend) => {
+    if (reconciled) return
+    reconciled = true
+    // A budgetExempt executor reports zero spend by contract; the reconcile refunds its
+    // whole reservation, keeping it out of the conserved Σk by construction.
+    pool.reconcile(ticket, clampSpend(spend, opts.budget))
+  }
+  try {
+    live.status = 'running'
+    const ran = executor.execute(task, childAbort.signal)
+    let artifact: LeafResult<C>
+    if (isAsyncIterable(ran)) {
+      // Streaming: fold the incremental usage events as they arrive (the conserved-pool
+      // authority), then read the terminal artifact after the stream drains.
+      const spend = await foldStream(ran)
+      live.spent = spend
+      artifact = executor.resultArtifact() as LeafResult<C>
+      reconcileOnce(spend)
+    } else {
+      const terminal = await ran
+      live.spent = terminal.spent
+      artifact = terminal
+      reconcileOnce(terminal.spent)
+    }
+
+    if (childAbort.signal.aborted) {
+      await teardownSafe(executor, opts.shutdown ?? 'brutalKill')
+      return downRecord('aborted before settle', true)
+    }
+
+    // The durable record is keyed by the canonical content address of the output — the
+    // single addressing scheme the blob store enforces and the supervisor's winner path
+    // uses. An executor's self-minted `resultArtifact().outRef` is its own internal dedup
+    // hint; the journal/blob `outRef` is re-derived here so replay rehydrates by one
+    // scheme. Persist the blob BEFORE the journal `settled` record references its `outRef`,
+    // so a crash never leaves a journaled ref pointing at a missing blob.
+    const outRef = contentAddress(artifact.out)
+    await blobs.put(outRef, artifact.out)
+    await teardownSafe(executor, opts.shutdown ?? 'infinity')
+    return {
+      kind: 'done',
+      out: artifact.out,
+      outRef,
+      ...(artifact.verdict ? { verdict: artifact.verdict } : {}),
+      spent: live.spent,
+    }
+  } catch (err) {
+    // Reconcile the (likely partial) spend so the reservation is refunded even on a throw.
+    reconcileOnce(live.spent)
+    await teardownSafe(executor, 'brutalKill')
+    const aborted = childAbort.signal.aborted || isAbortError(err)
+    return downRecord(errMessage(err), aborted || isInfraError(err))
+  }
+}
+
+/**
+ * The step-8 merge-boundary adapter (M4): rehydrate a `Settled.done` into the kernel's
+ * `Iteration` shape so `defaultSelectWinner` stays single-sourced — the supervisor selects
+ * across settled children with the SAME argmax the loop kernel uses, not a forked copy.
+ *
+ * `index` is the cursor `seq` (the recorded, replay-stable order); `output`/`verdict`/
+ * `tokenUsage`/`costUsd` are read straight off the settlement (already rehydrated from the
+ * `outRef` blob by `next()`). Events are empty — a settled child is an opaque leaf result,
+ * not a sandbox event stream — and the timing/cost fields project its conserved `Spend`.
+ * Fail loud on a `down` settlement: only a `done` child is an iteration.
+ */
+export function settledToIteration<Out>(settled: Settled<Out>): Iteration<unknown, Out> {
+  if (settled.kind === 'down') {
+    throw new ValidationError(
+      `settledToIteration: cannot adapt a 'down' settlement (node '${settled.handle.id}', seq ${settled.seq}) to an Iteration`,
+    )
+  }
+  return {
+    index: settled.seq,
+    task: undefined,
+    agentRunName: settled.handle.label,
+    output: settled.out,
+    ...(settled.verdict ? { verdict: settled.verdict } : {}),
+    events: [],
+    startedAt: 0,
+    endedAt: settled.spent.ms,
+    costUsd: settled.spent.usd,
+    tokenUsage: { input: settled.spent.tokens.input, output: settled.spent.tokens.output },
+  }
+}
+
+// ── Helpers ─────────────────────────────────────────────────────────────────────
+
+function makeTreeView(root: NodeId, children: Map<NodeId, LiveChild>): TreeView {
+  const nodes: NodeSnapshot[] = [...children.values()].map((c) => ({
+    id: c.id,
+    parent: root,
+    label: c.label,
+    status: c.status,
+    runtime: c.runtime,
+    budget: c.budget,
+    spent: c.spent,
+    ...(c.outRef ? { outRef: c.outRef } : {}),
+  }))
+  return {
+    root,
+    nodes,
+    inFlight: nodes.filter((n) => n.status === 'running' || n.status === 'acquiring').length,
+  }
+}
+
+function frozenHandle<C>(child: LiveChild): Handle<C> {
+  return {
+    id: child.id,
+    label: child.label,
+    status: child.status,
+    abort(): void {
+      // A settled child is terminal; abort is a no-op (its executor already tore down).
+    },
+  }
+}
+
+async function foldStream(stream: AsyncIterable<UsageEvent>): Promise<Spend> {
+  const tokens = { input: 0, output: 0 }
+  let usd = 0
+  let iterations = 0
+  for await (const ev of stream) {
+    if (ev.kind === 'tokens') {
+      tokens.input += ev.input
+      tokens.output += ev.output
+    } else if (ev.kind === 'cost') {
+      usd += ev.usd
+    } else {
+      iterations += 1
+    }
+  }
+  return { iterations, tokens, usd, ms: 0 }
+}
+
+/** Clamp a child's reported spend to its reservation so the pool's fail-loud over-spend
+ *  guard never trips on a benign overshoot from an external usage report; the difference
+ *  refunds to the pool as if the child stopped at its ceiling. */
+function clampSpend(spend: Spend, budget: Budget): Spend {
+  const totalTokens = spend.tokens.input + spend.tokens.output
+  const tokensOk = totalTokens <= budget.maxTokens
+  const itersOk = spend.iterations <= budget.maxIterations
+  const usdOk = budget.maxUsd === undefined || spend.usd <= budget.maxUsd
+  if (tokensOk && itersOk && usdOk) return spend
+  const ratio = !tokensOk && totalTokens > 0 ? budget.maxTokens / totalTokens : 1
+  return {
+    iterations: Math.min(spend.iterations, budget.maxIterations),
+    tokens:
+      ratio < 1
+        ? {
+            input: Math.floor(spend.tokens.input * ratio),
+            output: Math.floor(spend.tokens.output * ratio),
+          }
+        : spend.tokens,
+    usd: budget.maxUsd === undefined ? spend.usd : Math.min(spend.usd, budget.maxUsd),
+    ms: spend.ms,
+  }
+}
+
+async function teardownSafe<C>(
+  executor: LeafExecutor<C>,
+  grace: number | 'brutalKill' | 'infinity',
+): Promise<void> {
+  try {
+    await executor.teardown(grace)
+  } catch {
+    // Teardown failure is observable through the node staying live; swallow so it never
+    // masks the settlement itself. The supervisor's join barrier reaps on its own grace.
+  }
+}
+
+function downRecord(reason: string, infra: boolean): PreSeqSettled {
+  return { kind: 'down', reason, infra, restartCount: 0 }
+}
+
+function zeroSpend(): Spend {
+  return { iterations: 0, tokens: { input: 0, output: 0 }, usd: 0, ms: 0 }
+}
+
+function isAsyncIterable(value: unknown): value is AsyncIterable<UsageEvent> {
+  return (
+    typeof value === 'object' &&
+    value !== null &&
+    typeof (value as AsyncIterable<UsageEvent>)[Symbol.asyncIterator] === 'function'
+  )
+}
+
+/** An `AgentSpec` is identified structurally — it carries a `profile` and a `harness`
+ *  field (`null` or a `BackendType`) and optionally an `executor`. */
+function isAgentSpec(value: unknown): value is AgentSpec {
+  if (typeof value !== 'object' || value === null) return false
+  const v = value as Record<string, unknown>
+  return 'profile' in v && 'harness' in v
+}
+
+function isAbortError(err: unknown): boolean {
+  return (
+    typeof err === 'object' &&
+    err !== null &&
+    'name' in err &&
+    (err as { name: unknown }).name === 'AbortError'
+  )
+}
+
+/** External-boundary failures (network/FS/subprocess) are infra — excluded from the merge
+ *  `n` and the equal-k assertion. A `ValidationError` from a built-in executor wraps a
+ *  config/transport failure, so it counts as infra; other throws are a real bad result. */
+function isInfraError(err: unknown): boolean {
+  return err instanceof ValidationError
+}
+
+function errMessage(err: unknown): string {
+  if (err instanceof Error) return err.message
+  return String(err)
+}
diff --git a/src/loops/supervise/supervisor.ts b/src/loops/supervise/supervisor.ts
new file mode 100644
index 0000000..e95c7b5
--- /dev/null
+++ b/src/loops/supervise/supervisor.ts
@@ -0,0 +1,395 @@
+/**
+ * @experimental
+ *
+ * The `Supervisor` impl (KEYSTONE, build step 5).
+ *
+ * Owns the four things a free-running recursive `act` cannot own itself: the GLOBAL
+ * conserved budget pool, the event-sourced spawn log, the abort cascade over the whole
+ * live tree, and the OTP intensity breaker. `run` builds the root `Scope` over those,
+ * runs the root `Agent.act`, and returns a TYPED `SupervisedResult` — a no-winner is
+ * never coerced into a best-effort `Out`.
+ *
+ * Three lifecycle invariants this impl enforces by construction:
+ *  - Join barrier: when `act()` settles (resolve OR reject), every still-live child is
+ *    torn down before `run` returns — the generalization of the kernel's
+ *    `finally{ Promise.allSettled(destroy) }` barrier (run-loop.ts) from boxes to the
+ *    whole sub-tree. A teardown failure is `allSettled`'d and journaled as a
+ *    `cancelled` event; it NEVER masks act()'s own outcome. act()'s rejection is the
+ *    PRIMARY error (the kernel's firstError precedence), so a teardown throw during the
+ *    barrier can never overwrite the real failure.
+ *  - Abort cascade: a root abort (caller signal, `RootHandle.abort`, a tripped breaker,
+ *    or pool exhaustion) aborts ONE internal controller whose signal is the root scope's
+ *    signal. The scope cascades that into every live child's executor abort — which, for
+ *    an `acquiring` child, chains into the `acquireSandbox` signal and reaps the
+ *    find-by-name orphan box (M1). The supervisor never reaps children directly.
+ *  - The supervisor NEVER re-enters a child (m3): the kernel/`acquireSandbox` already
+ *    retried at the leaf, and a driver re-spawns through `scope.spawn`. The breaker only
+ *    COUNTS `down` settlements within the intensity window and trips to a typed
+ *    no-winner; it does not restart anything.
+ *
+ * Selection lives in the driver, not here (selector≠judge): `act` returns the synthesized
+ * winner `Out`. The supervisor content-addresses that `Out` for its replay `outRef`,
+ * reads `spentTotal` off the conserved pool, and wraps it as a typed `winner` — it does
+ * not re-rank children behind the driver's back.
+ */
+
+import { contentAddress } from '../../durable/spawn-journal'
+import { RuntimeRunStateError } from '../../errors'
+import { type BudgetPool, createBudgetPool } from './budget'
+import { createScope } from './scope'
+import type {
+  Agent,
+  RootHandle,
+  RootSignal,
+  Scope,
+  SpawnEvent,
+  SpawnJournal,
+  Spend,
+  SupervisedResult,
+  Supervisor,
+  SupervisorOpts,
+  TreeView,
+} from './types'
+
+/** The default runtime recursion-depth ceiling, paired with the conserved pool so a
+ *  runaway recursion hits budget-exhaustion first and depth-exceeded second (R3). */
+const defaultMaxDepth = 4
+
+/** A no-winner reason the supervisor can prove from its OWN lifecycle state — pinned to
+ *  the frozen `SupervisedResult` reason union. A driver rejecting for a domain reason
+ *  (not budget/abort) is classed `all-children-down`, the only typed bucket for "the tree
+ *  produced no usable result". */
+type NoWinnerReason = (SupervisedResult<unknown> & { kind: 'no-winner' })['reason']
+
+export function createSupervisor<Task, Out>(): Supervisor<Task, Out> {
+  let attached: RootControl | undefined
+
+  async function run(
+    root: Agent<Task, Out>,
+    task: Task,
+    opts: SupervisorOpts,
+  ): Promise<SupervisedResult<Out>> {
+    const now = opts.now ?? Date.now
+    const pool = createBudgetPool(opts.budget, now)
+    await opts.journal.beginTree(opts.runId, new Date(now()).toISOString())
+
+    // ONE internal controller is the root scope's abort source. Every cascade path
+    // (caller signal, RootHandle.abort, breaker trip, deadline) aborts it; the scope
+    // fans it out to each live child's executor (acquire-aware reap included).
+    const controller = new AbortController()
+    const cascadeAbort = (reason?: string) => {
+      if (controller.signal.aborted) return
+      // Carry the reason on the signal so it chains down to each child's abort signal
+      // (`childAbort.signal.reason`) — the diagnostic the scope's executors observe.
+      controller.abort(reason)
+    }
+
+    const onCallerAbort = () => cascadeAbort('caller signal aborted')
+    if (opts.signal) {
+      if (opts.signal.aborted) cascadeAbort('caller signal aborted')
+      else opts.signal.addEventListener('abort', onCallerAbort, { once: true })
+    }
+
+    // The breaker watches `down` settlements via a counting journal decorator, so it
+    // observes every child failure without intercepting `scope.next()` (the driver's
+    // private channel). Tripping aborts the same controller; the trip is recorded so the
+    // final result can name it.
+    const breaker = createIntensityBreaker(opts, () => cascadeAbort('intensity breaker tripped'))
+    const journal = wrapJournalForBreaker(opts.journal, breaker)
+
+    const scope = createScope<Out>({
+      parentId: opts.runId,
+      root: opts.runId,
+      pool,
+      journal,
+      blobs: opts.blobs,
+      executors: opts.executors,
+      seams: {},
+      depth: 0,
+      maxDepth: opts.maxDepth ?? defaultMaxDepth,
+      signal: controller.signal,
+      now,
+    })
+
+    // `view`/drain read the scope opaquely (`Out` erased) — the supervisor never `spawn`s
+    // on it, so the live-tree readout and the join barrier are `Out`-agnostic.
+    const openScope = scope as unknown as Scope<unknown>
+
+    // Bind any attached RootHandle to THIS live run so view()/signal()/abort() reach the
+    // live scope + the one cascade controller. Detached again in the finally barrier.
+    if (attached) {
+      attached.bind({ scope: openScope, cascadeAbort, signal: pushRootSignal(cascadeAbort) })
+    }
+
+    let actOutcome: { ok: true; out: Out } | { ok: false; error: unknown }
+    try {
+      const out = await root.act(task, scope)
+      actOutcome = { ok: true, out }
+    } catch (error) {
+      // act()'s rejection is the PRIMARY error; capture it before the join barrier so a
+      // teardown failure in the barrier can never overwrite it (firstError precedence).
+      actOutcome = { ok: false, error }
+    } finally {
+      // Join barrier: tear down every still-live child. Generalizes the kernel's
+      // `finally{ Promise.allSettled(destroy) }` — a teardown throw is allSettled'd and
+      // journaled, never re-thrown.
+      await drainLiveChildren(openScope, controller)
+      if (opts.signal) opts.signal.removeEventListener('abort', onCallerAbort)
+      if (attached) attached.unbind()
+    }
+
+    const tree = scope.view
+    if (actOutcome.ok) {
+      // The driver synthesized a winner. Content-address it for the replay `outRef`, put
+      // it once, and sum the conserved spend off every journaled settlement. No
+      // re-ranking — the driver already selected.
+      const out = actOutcome.out
+      const outRef = contentAddress(out)
+      await opts.blobs.put(outRef, out)
+      return {
+        kind: 'winner',
+        out,
+        outRef,
+        tree,
+        spentTotal: await spentTotalFromJournal(journal, opts.runId),
+      }
+    }
+
+    // act() rejected. The reason is proven from lifecycle state, in precedence order:
+    // a tripped breaker outranks any abort (it is the most specific cause) outranks
+    // budget-exhaustion outranks the residual "the tree produced nothing usable" bucket.
+    // A no-winner is TYPED — never a best-effort coercion of a partial child (M2).
+    return {
+      kind: 'no-winner',
+      reason: classifyNoWinner(controller, pool, opts, breaker),
+      tree,
+      downCount: breaker.downCount(),
+    }
+  }
+
+  function attach(h: RootHandle<Out>): void {
+    const control = rootControls.get(h as RootHandle<unknown>)
+    if (!control) {
+      throw new RuntimeRunStateError(
+        'supervisor.attach: handle was not minted by createRootHandle (no control channel)',
+      )
+    }
+    attached = control
+  }
+
+  return { run, attach }
+}
+
+// ── Root handle ───────────────────────────────────────────────────────────────
+
+/** The live binding the supervisor populates while a run is in flight. `view` reads the
+ *  live scope; `cascadeAbort`/`signal` reach the one cascade controller. */
+interface RunBinding {
+  readonly scope: Scope<unknown>
+  readonly cascadeAbort: (reason?: string) => void
+  readonly signal: (msg: RootSignal) => void
+}
+
+/** The supervisor-private control behind a `RootHandle`. `createRootHandle` mints it and
+ *  registers it in `rootControls`; `attach` looks it up and `bind`s it to the live run. */
+interface RootControl {
+  bind(binding: RunBinding): void
+  unbind(): void
+}
+
+/** Module-private channel from a minted `RootHandle` to its `RootControl`, so `attach`
+ *  can prove a handle is ours and reach its binding without leaking the control onto the
+ *  frozen `RootHandle` shape. */
+const rootControls = new WeakMap<RootHandle<unknown>, RootControl>()
+
+/**
+ * Mint a `RootHandle` plus its supervisor-private control. The handle is the substrate a
+ * chat/pi-viz client attaches to (Q2): `view()` reads the live tree, `signal()` delivers
+ * an out-of-band message, `abort()` cascades. Before `run` binds it (and after `run`
+ * unbinds it) the handle is fail-loud: a client that talks to a handle that is not
+ * driving a live run gets a typed error, never a silent no-op.
+ */
+export function createRootHandle<Out>(): RootHandle<Out> {
+  let binding: RunBinding | undefined
+  const handle: RootHandle<Out> = {
+    view(): TreeView {
+      if (!binding) {
+        throw new RuntimeRunStateError(
+          'RootHandle.view: handle is not bound to a live run (attach it before run, read after run starts)',
+        )
+      }
+      return binding.scope.view
+    },
+    signal(msg: RootSignal): void {
+      if (!binding) {
+        throw new RuntimeRunStateError('RootHandle.signal: handle is not bound to a live run')
+      }
+      binding.signal(msg)
+    },
+    abort(reason?: string): void {
+      if (!binding) {
+        throw new RuntimeRunStateError('RootHandle.abort: handle is not bound to a live run')
+      }
+      binding.cascadeAbort(reason ?? 'root handle aborted')
+    },
+  }
+  rootControls.set(handle as RootHandle<unknown>, {
+    bind(b: RunBinding): void {
+      binding = b
+    },
+    unbind(): void {
+      binding = undefined
+    },
+  })
+  return handle
+}
+
+/** A `RootSignal` sink: `cancel` cascades an abort; pause/resume/ask are observability
+ *  signals the substrate accepts but does not act on here (the chat/pi-viz client owns
+ *  pause semantics — building them now would be mechanism ahead of the gate). */
+function pushRootSignal(cascadeAbort: (reason?: string) => void): (msg: RootSignal) => void {
+  return (msg: RootSignal): void => {
+    if (msg.kind === 'cancel') cascadeAbort(msg.reason ?? 'root signal: cancel')
+  }
+}
+
+// ── OTP intensity breaker ───────────────────────────────────────────────────────
+
+/**
+ * Counts `down` settlements inside a sliding window. More than `maxRestarts` of them
+ * within `withinMs` trips the supervisor (aborting the cascade) rather than letting a
+ * driver re-spawn a doomed child forever. With either bound unset the breaker is inert
+ * (it still counts `down`s for `downCount`). The breaker NEVER restarts a child — it is a
+ * circuit breaker over the driver's own re-spawn decisions (m3).
+ */
+interface IntensityBreaker {
+  recordDown(at: number): void
+  tripped(): boolean
+  downCount(): number
+}
+
+function createIntensityBreaker(opts: SupervisorOpts, trip: () => void): IntensityBreaker {
+  const max = opts.maxRestarts
+  const within = opts.withinMs
+  const armed = max !== undefined && within !== undefined
+  const recent: number[] = []
+  let total = 0
+  let isTripped = false
+  return {
+    recordDown(at: number): void {
+      total += 1
+      if (!armed || isTripped) return
+      recent.push(at)
+      const cutoff = at - within
+      while (recent.length > 0 && recent[0]! < cutoff) recent.shift()
+      if (recent.length > max) {
+        isTripped = true
+        trip()
+      }
+    },
+    tripped(): boolean {
+      return isTripped
+    },
+    downCount(): number {
+      return total
+    },
+  }
+}
+
+/** Decorate the journal so the breaker observes every `settled`-`down` event the scope
+ *  appends, without the supervisor intercepting `scope.next()`. The decorator is
+ *  transparent — it forwards every method verbatim and only reads the down events. */
+function wrapJournalForBreaker(journal: SpawnJournal, breaker: IntensityBreaker): SpawnJournal {
+  return {
+    loadTree: (root) => journal.loadTree(root),
+    beginTree: (root, at) => journal.beginTree(root, at),
+    appendEvent: (root, ev: SpawnEvent) => {
+      if (ev.kind === 'settled' && ev.status === 'down') breaker.recordDown(Date.parse(ev.at))
+      return journal.appendEvent(root, ev)
+    },
+  }
+}
+
+// ── Join barrier + result classification ─────────────────────────────────────────
+
+/**
+ * Drain the root scope's live set so every still-running/acquiring child is torn down
+ * before `run` returns — the join barrier. Abort the cascade controller first (so each
+ * child's executor stops cleanly), then pull `next()` to completion so every aborted
+ * child's teardown + reconcile runs and its `settled` event is journaled by the scope.
+ * A child's own teardown failure is already swallowed inside `runChild`, and the cursor
+ * itself never rejects (a failing child is typed into a `down`), so the whole barrier is
+ * `allSettled`'d — a stray throw here is NOT the primary error (firstError precedence).
+ */
+async function drainLiveChildren(
+  scope: Scope<unknown>,
+  controller: AbortController,
+): Promise<void> {
+  const hasLive = scope.view.inFlight > 0
+  if (!hasLive) return
+  // Cascade the abort into every live child's executor before draining.
+  if (!controller.signal.aborted) controller.abort()
+  await Promise.allSettled([drainCursor(scope)])
+}
+
+async function drainCursor(scope: Scope<unknown>): Promise<void> {
+  for (;;) {
+    const settled = await scope.next()
+    if (settled === null) return
+  }
+}
+
+function classifyNoWinner(
+  controller: AbortController,
+  pool: BudgetPool,
+  opts: SupervisorOpts,
+  breaker: IntensityBreaker,
+): NoWinnerReason {
+  // A tripped breaker is the most specific cause (children kept dying), so it outranks
+  // the generic abort it raised. Then a caller/handle abort. Then the pool. The residual
+  // bucket is "ran to completion under budget but produced nothing usable".
+  if (breaker.tripped()) return 'all-children-down'
+  if (controller.signal.aborted) return 'aborted'
+  if (poolExhausted(pool, opts)) return 'budget-exhausted'
+  return 'all-children-down'
+}
+
+function poolExhausted(pool: BudgetPool, opts: SupervisorOpts): boolean {
+  const r = pool.readout()
+  if (r.tokensLeft <= 0) return true
+  if (opts.budget.maxUsd !== undefined && r.usdLeft <= 0) return true
+  if (
+    opts.budget.deadlineMs !== undefined &&
+    r.deadlineMs > 0 &&
+    (opts.now ?? Date.now)() >= r.deadlineMs
+  ) {
+    return true
+  }
+  return false
+}
+
+/**
+ * Sum the conserved spend over every journaled `settled` event — the honest per-channel
+ * total (input/output/usd/iterations all preserved), read off the same evidence replay
+ * reads. Computed AFTER the join barrier so every child's settlement is recorded. Fails
+ * loud if the tree was never journaled (the supervisor always `beginTree`s, so a missing
+ * tree is a corrupted journal, not a normal path).
+ */
+async function spentTotalFromJournal(journal: SpawnJournal, root: string): Promise<Spend> {
+  const events = await journal.loadTree(root)
+  if (events === undefined) {
+    throw new RuntimeRunStateError(
+      `supervisor: spawn tree '${root}' is missing from the journal after run (corrupted log)`,
+    )
+  }
+  const total: Spend = { iterations: 0, tokens: { input: 0, output: 0 }, usd: 0, ms: 0 }
+  for (const ev of events) {
+    if (ev.kind !== 'settled') continue
+    total.iterations += ev.spent.iterations
+    total.tokens.input += ev.spent.tokens.input
+    total.tokens.output += ev.spent.tokens.output
+    total.usd += ev.spent.usd
+    total.ms += ev.spent.ms
+  }
+  return total
+}
diff --git a/src/loops/supervise/types.ts b/src/loops/supervise/types.ts
new file mode 100644
index 0000000..fd06232
--- /dev/null
+++ b/src/loops/supervise/types.ts
@@ -0,0 +1,443 @@
+/**
+ * @experimental
+ *
+ * Recursive execution atom — the FROZEN type surface (the keystone contract).
+ *
+ * One self-similar `Agent` atom runs inside a budget-conserving reactive `Scope`,
+ * orchestrated by a `Supervisor` over an event-sourced `SpawnJournal`. A leaf is an
+ * `Agent` that never calls `scope.spawn`; a driver is an `Agent` that spawns and runs
+ * a policy over its children's streaming results.
+ *
+ * Two invariants the surface exists to make enforceable:
+ *  - Budget is an atomically-reserved CONSERVED pool, so `Σk(treatment) ≡ Σk(blind)` by
+ *    construction (reserve-on-spawn, refund-unspent-on-settle, fail-closed admission).
+ *  - The journal records a content-addressed `outRef` per child result, so replay
+ *    rehydrates the exact `Settled` the driver branched on (the replay invariant below).
+ *
+ * The leaf RUNTIME is one OPEN `LeafExecutor` interface, not a closed `inline|sandbox|cli`
+ * union the call site switches on. The built-ins (router/inline, sandbox, cli) are the
+ * initial IMPLEMENTATIONS; any user agent is first-class the moment it implements the
+ * interface. The interface IS the extension point — no per-vendor adapters live here.
+ *
+ * Layering: substrate types (`DefaultVerdict`) come from `@tangle-network/agent-eval`;
+ * runtime-shaped types (everything else) live here. Pure types/interfaces only — this
+ * module typechecks standalone and is imported by every keystone impl.
+ */
+
+import type { DefaultVerdict } from '@tangle-network/agent-eval'
+import type { AgentProfile, BackendType } from '@tangle-network/sandbox'
+import type { LoopTokenUsage } from '../types'
+
+// `LoopTokenUsage = { input, output }` ONLY (../types). Re-exported so keystone impls
+// import the budget surface from one place. `usd` is a SEPARATE channel (see `UsageEvent`).
+export type { DefaultVerdict, LoopTokenUsage }
+
+// ── The atom ────────────────────────────────────────────────────────────────
+
+/**
+ * One self-similar atom. A leaf is an `Agent` that never calls `scope.spawn`; a driver
+ * is an `Agent` whose `act` spawns children and reacts to them via `scope.next()`. An
+ * analyst is an `Agent` whose task is "read these traces → findings" — `where` it runs
+ * is its executor, not a separate type.
+ *
+ * `act` MUST be replay-safe: it may read `verdict`, `spent`, and `out` (rehydrated by
+ * `outRef`) off each `Settled`; it MUST NOT read `Date.now`, `Math.random`, or any
+ * unordered collection. `scope.next()` delivers strictly in recorded `seq` order.
+ */
+export interface Agent<Task, Out> {
+  readonly name: string
+  act(task: Task, scope: Scope<Out>): Promise<Out>
+}
+
+// ── The open leaf runtime ─────────────────────────────────────────────────────
+
+/**
+ * The leaf runtime — ONE open interface, not a closed union. `execute` returns a
+ * `Promise<LeafResult>` for one-shot executors OR an `AsyncIterable<UsageEvent>` for
+ * streaming ones; a streaming executor reports incremental normalized usage as it runs
+ * (the budget pool reconciles against it) and exposes its terminal artifact via
+ * `resultArtifact()`. Both shapes normalize usage to `UsageEvent` so the conserved pool
+ * meters every runtime identically.
+ *
+ * Built-in implementations (in `runtime.ts`, NOT variants here): router/inline (a direct
+ * Router/HTTP inference call, no box), sandbox (COMPOSES `runLoop` as a leaf, forwarding
+ * PR #150's optional `lineage` passthrough — does NOT reinvent checkpoint/fork), cli
+ * (Halo/RLM subprocess; `budgetExempt`, excluded from equal-k by construction). A user's
+ * own agent (mastra/agno/raw HTTP/anything) is first-class by implementing this interface.
+ */
+export interface LeafExecutor<Out> {
+  /** Stable runtime tag for traces + the equal-k exemption check. */
+  readonly runtime: Runtime
+  /**
+   * When true, this executor's spend is NOT metered against the conserved pool and its
+   * iterations are excluded from the equal-k assertion (a `cli` subprocess without
+   * token accounting). Fail-loud everywhere else: a metered executor MUST report usage.
+   */
+  readonly budgetExempt?: boolean
+  /**
+   * One-shot → resolves a `LeafResult`; streaming → yields incremental `UsageEvent`s and
+   * the terminal artifact is read from `resultArtifact()` after the stream drains.
+   * `signal` is the spawn-scoped abort (chains the acquire lifecycle for sandbox).
+   */
+  execute(task: unknown, signal: AbortSignal): Promise<LeafResult<Out>> | AsyncIterable<UsageEvent>
+  /**
+   * Tear the executor's resources down. `grace` mirrors the OTP shutdown spec
+   * (`'brutalKill'` = immediate, a number = ms grace, `'infinity'` = await clean exit).
+   */
+  teardown(grace: number | 'brutalKill' | 'infinity'): Promise<{ destroyed: boolean }>
+  /**
+   * The replay source (B1): the content-addressed `outRef` + the materialized output the
+   * driver branched on, its verdict, and the conserved spend. Read once, after settle.
+   */
+  resultArtifact(): { outRef: string; out: Out; verdict?: DefaultVerdict; spent: Spend }
+}
+
+/** Terminal artifact of a one-shot `LeafExecutor.execute`. */
+export interface LeafResult<Out> {
+  outRef: string
+  out: Out
+  verdict?: DefaultVerdict
+  spent: Spend
+}
+
+/**
+ * Normalized usage event — the single channel every executor reports through, so the
+ * conserved pool meters all runtimes identically. `tokens` carries `LoopTokenUsage`'s
+ * `{ input, output }`; `usd` is a SEPARATE channel (never folded into tokens).
+ */
+export type UsageEvent =
+  | { kind: 'tokens'; input: number; output: number }
+  | { kind: 'cost'; usd: number }
+  | { kind: 'iteration' }
+
+/** The runtime tag of a `LeafExecutor` impl. Open by intent — `string` so a BYO executor
+ *  names its own runtime; the built-ins use these literals. */
+export type Runtime = 'router' | 'inline' | 'sandbox' | 'cli' | (string & {})
+
+// ── Executor resolution (OPEN registry, not a switch) ─────────────────────────
+
+/**
+ * `AgentProfile` does NOT carry a `harness`/backend field — `harness` lives on the
+ * sandbox SDK's `BackendConfig`, not the portable profile. So an agent is mapped to its
+ * executor through this MINIMAL wrapper, never by fabricating a field onto `AgentProfile`.
+ *
+ * Resolution (in `runtime.ts`):
+ *  - `executor` present        → BYO: use it verbatim (a user's own `LeafExecutor`).
+ *  - `harness === null`        → router/inline: a direct Router call, no box.
+ *  - `harness` is a `BackendType` → sandbox: compose `runLoop` against `profile` on that backend.
+ * Fail loud on an unresolvable spec (no executor and an unknown harness).
+ */
+export interface AgentSpec {
+  readonly profile: AgentProfile
+  /** `null` selects router/inline; a `BackendType` selects the sandboxed harness. */
+  readonly harness: BackendType | null
+  /** Bring-your-own executor: when set, overrides harness-based resolution entirely. */
+  readonly executor?: LeafExecutor<unknown>
+}
+
+/**
+ * Builds a fresh `LeafExecutor` for one spawn from the resolved spec. Per-spawn (not
+ * shared) so each child owns its own box/abort/teardown lifecycle. A BYO factory lets a
+ * user supply construction args without pre-instantiating.
+ */
+export type LeafExecutorFactory<Out> = (spec: AgentSpec, ctx: ExecutorContext) => LeafExecutor<Out>
+
+/** Construction context handed to a `LeafExecutorFactory` — the seams a built-in needs
+ *  (sandbox client for the sandbox executor, router config for router/inline) without
+ *  the factory reaching into module globals. */
+export interface ExecutorContext {
+  readonly signal: AbortSignal
+  /** Opaque seams the registry threads through; a built-in narrows what it needs. */
+  readonly seams: Readonly<Record<string, unknown>>
+}
+
+/**
+ * The OPEN resolver: maps an `AgentSpec` to a `LeafExecutorFactory`. The default
+ * registry resolves the three built-ins AND accepts a BYO `executor`/factory; callers
+ * register more runtimes by name. NOT a closed switch — registration is the extension
+ * point, mirroring the open `LeafExecutor` interface.
+ */
+export interface ExecutorRegistry {
+  /** Register a factory for a named runtime. Throws on a duplicate name (fail loud). */
+  register<Out>(runtime: Runtime, factory: LeafExecutorFactory<Out>): void
+  /**
+   * Resolve a spec to a factory. Precedence: a BYO `spec.executor` → a trivial factory
+   * returning it; else `harness === null` → the `'router'` factory; else a registered
+   * factory for the harness-derived runtime. Returns a typed outcome — the caller
+   * inspects `succeeded` before `value` (no silent fallback).
+   */
+  resolve<Out>(
+    spec: AgentSpec,
+  ): { succeeded: true; value: LeafExecutorFactory<Out> } | { succeeded: false; error: string }
+}
+
+// ── Budget — the conserved reservation pool ───────────────────────────────────
+
+/** A budget envelope on a spawn or the root. All ceilings; the pool reserves against them. */
+export interface Budget {
+  readonly maxIterations: number
+  readonly maxTokens: number
+  readonly maxUsd?: number
+  readonly deadlineMs?: number
+}
+
+/** Conserved spend, reconciled from the normalized `UsageEvent` stream. Tokens and usd
+ *  are separate channels (never folded). */
+export interface Spend {
+  iterations: number
+  tokens: LoopTokenUsage
+  usd: number
+  ms: number
+}
+
+// ── Node lifecycle ────────────────────────────────────────────────────────────
+
+/** OTP child-spec restart class. */
+export type Restart = 'temporary' | 'transient' | 'permanent'
+
+/** `'acquiring'` is first-class (M1): a node spends real time + reaps an orphan box
+ *  during sandbox acquire BEFORE it is `running`, so abort must be defined over it. */
+export type NodeStatus = 'pending' | 'acquiring' | 'running' | 'done' | 'failed' | 'cancelled'
+
+/** Deterministic node id — `${parent}:s${seq}` from the cursor order, never wall-clock. */
+export type NodeId = string
+
+export interface SpawnOpts {
+  readonly budget: Budget
+  readonly label: string
+  readonly restart?: Restart
+  /** Teardown grace handed to the executor when this node is reaped. */
+  readonly shutdown?: number | 'brutalKill' | 'infinity'
+}
+
+/**
+ * A live child handle. `abort()` is defined over the ACQUIRE lifecycle: it chains into
+ * the `acquireSandbox` signal and reaps a find-by-name orphan box, so a node aborted
+ * mid-acquire never leaks (M1).
+ */
+export interface Handle<Out> {
+  readonly id: NodeId
+  readonly label: string
+  readonly status: NodeStatus
+  abort(reason?: string): void
+  /** Phantom: binds the handle to the child's output type so `spawn<C>` returns a
+   *  `Handle<C>` distinct from a `Handle<other>`. Type-only — never present at runtime. */
+  readonly __out?: Out
+}
+
+/**
+ * A settled child, delivered by `scope.next()`. `seq` is the monotonic cursor order
+ * `next()` yielded this settlement (B2) — NOT wall-clock — and replay delivers strictly
+ * in `seq` order. `outRef` rehydrates `out` from the `ResultBlobStore` on replay.
+ */
+export type Settled<Out> =
+  | {
+      kind: 'done'
+      handle: Handle<Out>
+      out: Out
+      outRef: string
+      verdict?: DefaultVerdict
+      spent: Spend
+      seq: number
+    }
+  | {
+      kind: 'down'
+      handle: Handle<Out>
+      reason: string
+      /** True = infrastructure failure (excluded from merge `n` / equal-k), not a bad result. */
+      infra: boolean
+      restartCount: number
+      seq: number
+    }
+
+// ── The reactive Scope ─────────────────────────────────────────────────────────
+
+/**
+ * The budget-conserving reactive scope an `Agent.act` runs inside. `spawn` reserves
+ * budget atomically from the shared pool and FAILS CLOSED when the pool can't cover it;
+ * `next()` is a ray.wait cursor (n=1) over THIS scope's IN-MEMORY live set; `view` reads
+ * the in-memory nursery (NOT the log), O(live).
+ */
+export interface Scope<Out> {
+  /**
+   * Spawn a child. Reserves `opts.budget` from the conserved pool atomically; refunds the
+   * unspent remainder on settle. Returns a typed outcome — fail-closed on an exhausted
+   * pool or an exceeded depth ceiling (the caller inspects `ok` before `handle`).
+   */
+  spawn<C extends Out>(
+    agent: Agent<unknown, C>,
+    task: unknown,
+    opts: SpawnOpts,
+  ): { ok: true; handle: Handle<C> } | { ok: false; reason: 'budget-exhausted' | 'depth-exceeded' }
+  /** ray.wait n=1 over this scope's in-memory live set; resolves as each child settles;
+   *  `null` when the live set is empty. */
+  next(): Promise<Settled<Out> | null>
+  /** The live tree — reads the in-memory nursery, not the journal. */
+  readonly view: TreeView
+  /** Conserved-pool readouts (post-reservation). */
+  readonly budget: Readonly<{
+    tokensLeft: number
+    usdLeft: number
+    deadlineMs: number
+    reservedTokens: number
+  }>
+}
+
+// ── Observability view (read off the in-memory nursery) ────────────────────────
+
+export interface NodeSnapshot {
+  readonly id: NodeId
+  readonly parent?: NodeId
+  readonly label: string
+  readonly status: NodeStatus
+  readonly runtime: Runtime
+  readonly budget: Budget
+  /** Conserved spend so far for this node. */
+  readonly spent: Spend
+  /** `outRef` once the node is `done` (the replay/result pointer). */
+  readonly outRef?: string
+}
+
+/** The live tree — what `scope.view` / `RootHandle.view()` materialize for a viewer. */
+export interface TreeView {
+  readonly root: NodeId
+  readonly nodes: ReadonlyArray<NodeSnapshot>
+  /** Count of nodes in `running` or `acquiring` — the "what's in flow?" answer. */
+  readonly inFlight: number
+}
+
+// ── Event source — the decision/payload split the replay argument rests on ─────
+
+/** Journaled spawn-tree events (B1/B2). `seq` is the cursor order; `at` is an ISO
+ *  timestamp for human inspection only (NOT a replay input). */
+export type SpawnEvent =
+  | {
+      kind: 'spawned'
+      id: NodeId
+      parent?: NodeId
+      label: string
+      budget: Budget
+      runtime: Runtime
+      seq: number
+      at: string
+    }
+  | {
+      kind: 'settled'
+      id: NodeId
+      status: 'done' | 'down'
+      /** Content-addressed result pointer; rehydrates `out` from `ResultBlobStore`. */
+      outRef?: string
+      verdict?: DefaultVerdict
+      spent: Spend
+      infra?: boolean
+      seq: number
+      at: string
+    }
+  | { kind: 'cancelled'; id: NodeId; reason: string; seq: number; at: string }
+
+/**
+ * The spawn-tree event source (mirrors `ConversationJournal`'s begin/append/load shape).
+ * `loadTree` replays the full ordered event list for resume/replay; `appendEvent` is
+ * called only AFTER the event is observed-committed (never speculative).
+ */
+export interface SpawnJournal {
+  loadTree(root: NodeId): Promise<SpawnEvent[] | undefined>
+  beginTree(root: NodeId, at: string): Promise<void>
+  appendEvent(root: NodeId, ev: SpawnEvent): Promise<void>
+}
+
+/** Content-addressed result blobs (the `outRef` → artifact map) backing the replay
+ *  invariant. Split from the journal so the journal stays small (decisions) and the
+ *  payloads (evidence) live where a viewer/replayer rehydrates them. */
+export interface ResultBlobStore {
+  put(outRef: string, artifact: unknown): Promise<void>
+  get(outRef: string): Promise<unknown | undefined>
+}
+
+// ── The Supervisor ─────────────────────────────────────────────────────────────
+
+/**
+ * Owns the conserved pool, the spawn log, the abort cascade, the OTP intensity breaker,
+ * and the root handle. `run` executes the root `Agent` to completion; `attach` wires a
+ * live `RootHandle` (the Q2 substrate the chat/pi-viz client later consumes).
+ */
+export interface Supervisor<Task, Out> {
+  run(root: Agent<Task, Out>, task: Task, opts: SupervisorOpts): Promise<SupervisedResult<Out>>
+  attach(h: RootHandle<Out>): void
+}
+
+export interface SupervisorOpts {
+  /** The root conserved-pool ceiling (tokens + usd + iterations + deadline). */
+  readonly budget: Budget
+  /** Trace-correlation root + the journal/blob root key. */
+  readonly runId: NodeId
+  /** Event source — defaults to the in-memory journal in the impl; pass JSONL/FS for durability. */
+  readonly journal: SpawnJournal
+  /** Result payload store backing `outRef` rehydration. */
+  readonly blobs: ResultBlobStore
+  /** Executor resolution — the open registry mapping `AgentSpec` → `LeafExecutor`. */
+  readonly executors: ExecutorRegistry
+  /** Runtime recursion-depth ceiling (paired with the conserved pool per R3). */
+  readonly maxDepth?: number
+  /**
+   * OTP intensity breaker: more than `maxRestarts` child restarts within `withinMs`
+   * trips the supervisor to `no-winner` rather than restarting forever.
+   */
+  readonly maxRestarts?: number
+  readonly withinMs?: number
+  readonly now?: () => number
+  readonly signal?: AbortSignal
+}
+
+/** Typed terminal result (M2) — a no-winner is NEVER coerced to a best-effort output. */
+export type SupervisedResult<Out> =
+  | {
+      kind: 'winner'
+      out: Out
+      outRef: string
+      verdict?: DefaultVerdict
+      tree: TreeView
+      spentTotal: Spend
+    }
+  | {
+      kind: 'no-winner'
+      reason: 'all-children-down' | 'budget-exhausted' | 'aborted'
+      tree: TreeView
+      downCount: number
+    }
+
+/** Live root handle — the substrate a chat/pi-viz client attaches to (Q2). `signal`
+ *  delivers an out-of-band message to the running root; `view()` materializes the tree. */
+export interface RootHandle<Out> {
+  view(): TreeView
+  signal(msg: RootSignal): void
+  abort(reason?: string): void
+  /** Phantom: binds the handle to the supervised run's output type. Type-only — never
+   *  present at runtime; lets `attach(h: RootHandle<Out>)` stay output-typed. */
+  readonly __out?: Out
+}
+
+/** Out-of-band message to a running root. Open by intent — a client extends it. */
+export type RootSignal =
+  | { kind: 'pause' }
+  | { kind: 'resume' }
+  | { kind: 'cancel'; reason?: string }
+  | { kind: 'ask'; question: string }
+
+// ── Widening governor ────────────────────────────────────────────────────────
+
+/**
+ * The progressive-widening gate (MCTS-PW). Decides whether a settled child is
+ * `promising` enough to spawn another under the remaining pool. DEFAULTS TO FLAT
+ * (`shouldWiden` always false) so a gate run never widens and the selector≠judge
+ * firewall conflict (R2) stays dormant. When widening IS enabled, `promising` MUST be
+ * derived from TRACE findings (`analyses`), never raw `verdict` — or the gate carries
+ * an explicit, argued `judgeExempt: true` (the documented escape hatch, off by default).
+ */
+export interface WidenGate<Out> {
+  /** Default impl returns false for every settlement (flat — never widens). */
+  shouldWiden(settled: Settled<Out>, budget: Scope<Out>['budget']): boolean
+  /** When true, widening may read `verdict` directly (collides with the steer firewall —
+   *  must be explicitly argued per cell, never defaulted on). */
+  readonly judgeExempt?: boolean
+}
diff --git a/tests/loops/supervise.test.ts b/tests/loops/supervise.test.ts
new file mode 100644
index 0000000..a1de89e
--- /dev/null
+++ b/tests/loops/supervise.test.ts
@@ -0,0 +1,766 @@
+import type { AgentProfile } from '@tangle-network/sandbox'
+import { describe, expect, it } from 'vitest'
+import {
+  InMemoryResultBlobStore,
+  InMemorySpawnJournal,
+  materializeTreeView,
+  replaySpawnTree,
+} from '../../src/durable/spawn-journal'
+import { ValidationError } from '../../src/errors'
+import { defaultSelectWinner } from '../../src/loops/run-loop'
+import { createBudgetPool, spendFromUsageEvents } from '../../src/loops/supervise/budget'
+import { createExecutorRegistry } from '../../src/loops/supervise/runtime'
+import { createScope, settledToIteration } from '../../src/loops/supervise/scope'
+import { createRootHandle, createSupervisor } from '../../src/loops/supervise/supervisor'
+import type {
+  Agent,
+  AgentSpec,
+  Budget,
+  DefaultVerdict,
+  LeafExecutor,
+  LeafResult,
+  Scope,
+  Settled,
+  SpawnEvent,
+  Spend,
+  SupervisorOpts,
+  UsageEvent,
+  WidenGate,
+} from '../../src/loops/supervise/types'
+
+// ── The mock LeafExecutor — the whole keystone runs offline against this ─────────
+//
+// A scripted leaf: a fixed `UsageEvent` program drives the conserved-pool fold, a
+// scripted `out` (+ optional verdict) is the artifact the driver branches on, and a
+// `failWith` knob lets a child go `down` (typed, never re-thrown by the scope) so the
+// supervisor join barrier can be exercised. No network, no sandbox, no subprocess.
+interface MockScript {
+  readonly out: unknown
+  readonly events: UsageEvent[]
+  readonly verdict?: DefaultVerdict
+  /** When set, `execute` throws — the scope types it into a `down` settlement. */
+  readonly failWith?: string
+  /** When set, `execute` blocks on this promise until the scope aborts it. */
+  readonly block?: Promise<void>
+}
+
+function mockExecutor(script: MockScript): LeafExecutor<unknown> {
+  const spent = spendFromUsageEvents(script.events)
+  const outRef = `mock:${stableKey(script.out)}`
+  const executor: LeafExecutor<unknown> = {
+    runtime: 'router',
+    execute(_task: unknown, signal: AbortSignal): AsyncIterable<UsageEvent> {
+      // Streaming shape: yield the scripted usage, then the artifact is read from
+      // resultArtifact(). A `block` script parks until the spawn-scoped signal aborts,
+      // so an abort mid-flight tears the child down deterministically.
+      return (async function* () {
+        if (script.failWith !== undefined) throw new ValidationError(script.failWith)
+        if (script.block) {
+          await Promise.race([
+            script.block,
+            new Promise<void>((resolve) => {
+              if (signal.aborted) return resolve()
+              signal.addEventListener('abort', () => resolve(), { once: true })
+            }),
+          ])
+        }
+        for (const ev of script.events) yield ev
+      })()
+    },
+    teardown(): Promise<{ destroyed: boolean }> {
+      return Promise.resolve({ destroyed: true })
+    },
+    resultArtifact(): LeafResult<unknown> {
+      return {
+        outRef,
+        out: script.out,
+        ...(script.verdict ? { verdict: script.verdict } : {}),
+        spent,
+      }
+    },
+  }
+  return executor
+}
+
+function stableKey(value: unknown): string {
+  return JSON.stringify(value) ?? String(value)
+}
+
+/** A leaf agent carrying a BYO mock executor as its `executorSpec.executor`. The scope
+ *  resolves this verbatim through the open registry (BYO precedence), so no built-in
+ *  router/sandbox/cli factory ever fires — the test stays fully offline. */
+function leafAgent(name: string, script: MockScript): Agent<unknown, unknown> {
+  const spec: AgentSpec = {
+    profile: { name } as AgentProfile,
+    harness: null,
+    executor: mockExecutor(script),
+  }
+  return { name, act: async () => script.out, executorSpec: spec } as Agent<unknown, unknown> & {
+    executorSpec: AgentSpec
+  }
+}
+
+const tokensOnly = (input: number, output: number, iterations = 1): UsageEvent[] => {
+  const evs: UsageEvent[] = []
+  for (let i = 0; i < iterations; i += 1) evs.push({ kind: 'iteration' })
+  evs.push({ kind: 'tokens', input, output })
+  return evs
+}
+
+function scopeArgs(over: Partial<Parameters<typeof createScope>[0]> = {}) {
+  const pool = over.pool ?? createBudgetPool({ maxIterations: 100, maxTokens: 100_000 }, () => 0)
+  const journal = over.journal ?? new InMemorySpawnJournal()
+  const root = over.root ?? 'run'
+  return {
+    args: {
+      parentId: over.parentId ?? root,
+      root,
+      pool,
+      journal,
+      blobs: over.blobs ?? new InMemoryResultBlobStore(),
+      executors: over.executors ?? createExecutorRegistry(),
+      seams: over.seams ?? {},
+      depth: over.depth ?? 0,
+      maxDepth: over.maxDepth,
+      signal: over.signal ?? new AbortController().signal,
+      now: over.now ?? (() => 0),
+    },
+    pool,
+    journal,
+  }
+}
+
+async function beginScope(over: Partial<Parameters<typeof createScope>[0]> = {}) {
+  const { args, pool, journal } = scopeArgs(over)
+  await journal.beginTree(args.root, new Date(0).toISOString())
+  return { scope: createScope<unknown>(args), pool, journal, args }
+}
+
+// ── 1. Conserved budget pool ─────────────────────────────────────────────────────
+
+describe('conserved budget pool', () => {
+  it('reserve fails closed when the pool cannot cover the child', () => {
+    const pool = createBudgetPool({ maxIterations: 4, maxTokens: 1000 }, () => 0)
+    const a = pool.reserve({ maxIterations: 2, maxTokens: 600, label: '' } as Budget)
+    expect(a.ok).toBe(true)
+    // 600 reserved, 400 free; a 500-token child must fail closed (never overcommit).
+    const b = pool.reserve({ maxIterations: 2, maxTokens: 500, label: '' } as Budget)
+    expect(b).toEqual({ ok: false, reason: 'budget-exhausted' })
+    expect(pool.readout().tokensLeft).toBe(400)
+    expect(pool.readout().reservedTokens).toBe(600)
+  })
+
+  it('refunds the unspent remainder on reconcile (Σ conservation)', () => {
+    const pool = createBudgetPool({ maxIterations: 10, maxTokens: 1000 }, () => 0)
+    const r = pool.reserve({ maxIterations: 5, maxTokens: 800, label: '' } as Budget)
+    if (!r.ok) throw new Error('reserve should have succeeded')
+    expect(pool.readout().tokensLeft).toBe(200)
+    expect(pool.readout().reservedTokens).toBe(800)
+    // Spent 300 of the 800 reserved → 500 refunds to free; reserved drops to 0.
+    pool.reconcile(r.ticket, {
+      iterations: 2,
+      tokens: { input: 100, output: 200 },
+      usd: 0,
+      ms: 0,
+    })
+    expect(pool.readout().tokensLeft).toBe(700)
+    expect(pool.readout().reservedTokens).toBe(0)
+  })
+
+  it('fails loud on a double reconcile (no silent double refund)', () => {
+    const pool = createBudgetPool({ maxIterations: 10, maxTokens: 1000 }, () => 0)
+    const r = pool.reserve({ maxIterations: 5, maxTokens: 800, label: '' } as Budget)
+    if (!r.ok) throw new Error('reserve should have succeeded')
+    const spend: Spend = { iterations: 1, tokens: { input: 0, output: 0 }, usd: 0, ms: 0 }
+    pool.reconcile(r.ticket, spend)
+    expect(() => pool.reconcile(r.ticket, spend)).toThrow(/unknown or already-settled/)
+  })
+
+  it('a usd request against an uncapped root is unsatisfiable (fail closed)', () => {
+    const pool = createBudgetPool({ maxIterations: 10, maxTokens: 1000 }, () => 0)
+    const r = pool.reserve({ maxIterations: 1, maxTokens: 10, maxUsd: 0.5, label: '' } as Budget)
+    expect(r).toEqual({ ok: false, reason: 'budget-exhausted' })
+  })
+
+  it('spendFromUsageEvents folds tokens + usd on separate channels', () => {
+    const spend = spendFromUsageEvents([
+      { kind: 'iteration' },
+      { kind: 'tokens', input: 10, output: 5 },
+      { kind: 'tokens', input: 2, output: 3 },
+      { kind: 'cost', usd: 0.01 },
+    ])
+    expect(spend).toEqual({ iterations: 1, tokens: { input: 12, output: 8 }, usd: 0.01, ms: 0 })
+  })
+})
+
+// ── 2. equal-k by construction ──────────────────────────────────────────────────
+
+describe('equal-k by construction', () => {
+  it('two arms at equal per-child budget spend equal total iterations', async () => {
+    // Each arm spawns 3 children at a fixed 1-iteration budget; both arms draw from a
+    // pool sized for exactly 6, so the realized Σiterations is equal by the conserved
+    // reservation — no arm can overcommit past its half.
+    const runArm = async (label: string) => {
+      const { scope } = await beginScope({ root: `arm-${label}`, parentId: `arm-${label}` })
+      let spawned = 0
+      for (let i = 0; i < 3; i += 1) {
+        const res = scope.spawn(
+          leafAgent(`${label}-${i}`, { out: { label, i }, events: tokensOnly(10, 10, 1) }),
+          'task',
+          { budget: { maxIterations: 1, maxTokens: 100 }, label: `${label}-${i}` },
+        )
+        if (res.ok) spawned += 1
+      }
+      let total = 0
+      for (let settled = await scope.next(); settled !== null; settled = await scope.next()) {
+        if (settled.kind === 'done') total += settled.spent.iterations
+      }
+      return { spawned, total }
+    }
+    const treatment = await runArm('t')
+    const blind = await runArm('b')
+    expect(treatment.spawned).toBe(3)
+    expect(blind.spawned).toBe(3)
+    expect(treatment.total).toBe(blind.total)
+    expect(treatment.total).toBe(3)
+  })
+})
+
+// ── 3. The reactive Scope: seq order, view, inFlight ────────────────────────────
+
+describe('reactive scope', () => {
+  // Regression pin (scope↔journal seam): a `spawned` event must not reuse the cursor
+  // `seq` that `next()` later stamps on the `settled` event, or the journal's per-tree
+  // unique-seq guard rejects the settle write and the cursor rejects. The scope's OWN
+  // doc says "`seq` is stamped by `next()`, never here" — so the spawn event needs a
+  // separate spawn-ordinal. One spawn → one drain must not corrupt the journal.
+  it('one spawn → one drain does not collide the journal seq namespace', async () => {
+    const journal = new InMemorySpawnJournal()
+    const { scope } = await beginScope({ journal })
+    scope.spawn(leafAgent('only', { out: 1, events: tokensOnly(1, 1, 1) }), 'task', {
+      budget: { maxIterations: 1, maxTokens: 10 },
+      label: 'only',
+    })
+    const settled = await scope.next()
+    expect(settled?.kind).toBe('done')
+    expect(settled?.seq).toBe(0)
+    const events = (await journal.loadTree('run')) as SpawnEvent[]
+    const settledSeqs = events.filter((e) => e.kind === 'settled').map((e) => e.seq)
+    expect(new Set(settledSeqs).size).toBe(settledSeqs.length)
+  })
+
+  it('next() yields in monotonic seq order and view reflects the in-memory tree', async () => {
+    const { scope } = await beginScope()
+    for (let i = 0; i < 4; i += 1) {
+      const res = scope.spawn(
+        leafAgent(`c${i}`, { out: { i }, events: tokensOnly(5, 5, 1) }),
+        'task',
+        { budget: { maxIterations: 1, maxTokens: 50 }, label: `c${i}` },
+      )
+      expect(res.ok).toBe(true)
+    }
+    expect(scope.view.nodes).toHaveLength(4)
+
+    const seqs: number[] = []
+    const ids: string[] = []
+    for (let settled = await scope.next(); settled !== null; settled = await scope.next()) {
+      seqs.push(settled.seq)
+      ids.push(settled.handle.id)
+    }
+    // seq is the monotonic cursor order, contiguous from 0.
+    expect(seqs).toEqual([0, 1, 2, 3])
+    // ids are the deterministic `${parent}:s${seq}` form minted at spawn order.
+    expect(ids.every((id) => /^run:s\d+$/.test(id))).toBe(true)
+    expect(scope.view.inFlight).toBe(0)
+  })
+
+  it('inFlight shrinks as children settle (live set is the nursery, not the log)', async () => {
+    // Both children park on their own gate so neither settles before the assertion —
+    // inFlight is read off the in-memory nursery, deterministically, with no race.
+    const gateA = deferred()
+    const gateB = deferred()
+    const { scope } = await beginScope()
+    scope.spawn(
+      leafAgent('a', { out: 'a', events: tokensOnly(1, 1, 1), block: gateA.promise }),
+      'task',
+      { budget: { maxIterations: 1, maxTokens: 10 }, label: 'a' },
+    )
+    scope.spawn(
+      leafAgent('b', { out: 'b', events: tokensOnly(1, 1, 1), block: gateB.promise }),
+      'task',
+      { budget: { maxIterations: 1, maxTokens: 10 }, label: 'b' },
+    )
+    expect(scope.view.inFlight).toBe(2)
+    gateA.resolve()
+    const first = await scope.next()
+    expect(first?.kind).toBe('done')
+    expect(scope.view.inFlight).toBe(1)
+    gateB.resolve()
+    const second = await scope.next()
+    expect(second?.kind).toBe('done')
+    expect(scope.view.inFlight).toBe(0)
+    expect(await scope.next()).toBeNull()
+  })
+
+  it('a thrown executor becomes a typed `down` (infra), never rejects the cursor', async () => {
+    const { scope } = await beginScope()
+    scope.spawn(leafAgent('boom', { out: null, events: [], failWith: 'leaf exploded' }), 'task', {
+      budget: { maxIterations: 1, maxTokens: 10 },
+      label: 'boom',
+    })
+    scope.spawn(leafAgent('ok', { out: 'ok', events: tokensOnly(1, 1, 1) }), 'task', {
+      budget: { maxIterations: 1, maxTokens: 10 },
+      label: 'ok',
+    })
+    const settles: Settled<unknown>[] = []
+    for (let s = await scope.next(); s !== null; s = await scope.next()) settles.push(s)
+    const down = settles.find((s) => s.kind === 'down')
+    const done = settles.find((s) => s.kind === 'done')
+    expect(down).toBeDefined()
+    if (down?.kind === 'down') {
+      expect(down.infra).toBe(true)
+      expect(down.reason).toContain('leaf exploded')
+    }
+    expect(done?.kind).toBe('done')
+  })
+
+  it('spawn fails closed on depth-exceeded', async () => {
+    const { scope } = await beginScope({ depth: 2, maxDepth: 2 })
+    const res = scope.spawn(leafAgent('deep', { out: 1, events: tokensOnly(1, 1) }), 'task', {
+      budget: { maxIterations: 1, maxTokens: 10 },
+      label: 'deep',
+    })
+    expect(res).toEqual({ ok: false, reason: 'depth-exceeded' })
+  })
+
+  it('spawn fails closed on budget-exhausted', async () => {
+    const { scope } = await beginScope({
+      pool: createBudgetPool({ maxIterations: 1, maxTokens: 10 }, () => 0),
+    })
+    const ok = scope.spawn(leafAgent('a', { out: 1, events: tokensOnly(1, 1) }), 'task', {
+      budget: { maxIterations: 1, maxTokens: 10 },
+      label: 'a',
+    })
+    expect(ok.ok).toBe(true)
+    const overflow = scope.spawn(leafAgent('b', { out: 2, events: tokensOnly(1, 1) }), 'task', {
+      budget: { maxIterations: 1, maxTokens: 10 },
+      label: 'b',
+    })
+    expect(overflow).toEqual({ ok: false, reason: 'budget-exhausted' })
+  })
+
+  it('abort mid-flight reaps the live child (down, no throw)', async () => {
+    const controller = new AbortController()
+    const gate = deferred() // never resolves — the child only ends via abort.
+    const { scope } = await beginScope({ signal: controller.signal })
+    scope.spawn(
+      leafAgent('parked', { out: 'p', events: tokensOnly(1, 1, 1), block: gate.promise }),
+      'task',
+      { budget: { maxIterations: 1, maxTokens: 10 }, label: 'parked' },
+    )
+    expect(scope.view.inFlight).toBe(1)
+    controller.abort('test reap')
+    const settled = await scope.next()
+    expect(settled?.kind).toBe('down')
+    expect(scope.view.inFlight).toBe(0)
+  })
+})
+
+// ── 4. settledToIteration adapter (single-sourced selection) ─────────────────────
+
+describe('settledToIteration adapter', () => {
+  it('projects a done settlement into the kernel Iteration so defaultSelectWinner is shared', async () => {
+    const { scope } = await beginScope()
+    scope.spawn(
+      leafAgent('lo', {
+        out: 'lo',
+        events: tokensOnly(1, 1, 1),
+        verdict: { valid: true, score: 0.2 },
+      }),
+      'task',
+      { budget: { maxIterations: 1, maxTokens: 10 }, label: 'lo' },
+    )
+    scope.spawn(
+      leafAgent('hi', {
+        out: 'hi',
+        events: tokensOnly(1, 1, 1),
+        verdict: { valid: true, score: 0.9 },
+      }),
+      'task',
+      { budget: { maxIterations: 1, maxTokens: 10 }, label: 'hi' },
+    )
+    const iterations = []
+    for (let s = await scope.next(); s !== null; s = await scope.next()) {
+      if (s.kind === 'done') iterations.push(settledToIteration(s))
+    }
+    const winner = defaultSelectWinner(iterations)
+    expect(winner?.output).toBe('hi')
+    expect(winner?.verdict?.score).toBe(0.9)
+  })
+
+  it('fails loud when handed a `down` settlement (only a done child is an iteration)', () => {
+    const down: Settled<unknown> = {
+      kind: 'down',
+      handle: { id: 'run:s0', label: 'x', status: 'failed', abort() {} },
+      reason: 'boom',
+      infra: false,
+      restartCount: 0,
+      seq: 0,
+    }
+    expect(() => settledToIteration(down)).toThrow(/cannot adapt a 'down'/)
+  })
+})
+
+// ── 5. Open executor registry resolution ─────────────────────────────────────────
+
+describe('open executor registry', () => {
+  it('resolves a BYO executor verbatim (highest precedence)', () => {
+    const registry = createExecutorRegistry()
+    const byo = mockExecutor({ out: 'x', events: [] })
+    const spec: AgentSpec = {
+      profile: { name: 'byo' } as AgentProfile,
+      harness: null,
+      executor: byo,
+    }
+    const r = registry.resolve(spec)
+    expect(r.succeeded).toBe(true)
+    if (r.succeeded) {
+      const built = r.value(spec, { signal: new AbortController().signal, seams: {} })
+      // BYO factory returns the SAME instance — not a re-constructed router executor.
+      expect(built).toBe(byo)
+    }
+  })
+
+  it('harness:null resolves the router factory; a BackendType resolves the sandbox factory', () => {
+    const registry = createExecutorRegistry()
+    const router = registry.resolve({ profile: { name: 'r' } as AgentProfile, harness: null })
+    const sandbox = registry.resolve({
+      profile: { name: 's' } as AgentProfile,
+      harness: 'claude-code',
+    })
+    expect(router.succeeded).toBe(true)
+    expect(sandbox.succeeded).toBe(true)
+    // Distinct factories: router/inline vs the sandbox-composing-runLoop built-in.
+    if (router.succeeded && sandbox.succeeded) {
+      expect(router.value).not.toBe(sandbox.value)
+    }
+  })
+
+  it('register is fail-loud on a duplicate runtime tag', () => {
+    const registry = createExecutorRegistry()
+    expect(() => registry.register('router', mockRouterFactory())).toThrow(/already registered/)
+  })
+
+  it('register accepts a brand-new runtime tag (the open extension point)', () => {
+    const registry = createExecutorRegistry()
+    expect(() => registry.register('vendorx', mockRouterFactory())).not.toThrow()
+  })
+
+  it('scope.spawn fails loud when an agent carries no executorSpec (AgentSpec)', async () => {
+    const { scope } = await beginScope()
+    const noSpec: Agent<unknown, unknown> = { name: 'orphan', act: async () => 1 }
+    expect(() =>
+      scope.spawn(noSpec, 'task', { budget: { maxIterations: 1, maxTokens: 10 }, label: 'orphan' }),
+    ).toThrow(/exposes no .*executorSpec/)
+  })
+})
+
+function mockRouterFactory() {
+  return () => mockExecutor({ out: 'x', events: [] })
+}
+
+// ── WidenGate defaults flat (the R2 firewall stays dormant by construction) ──────
+
+describe('WidenGate default', () => {
+  it('a flat gate never widens for any settlement', () => {
+    // The frozen contract: the default WidenGate returns false for EVERY settlement, so a
+    // gate run never widens and the widening-from-verdict (selector≠judge) conflict stays
+    // dormant. No `judgeExempt` escape hatch is set.
+    const flat: WidenGate<unknown> = { shouldWiden: () => false }
+    expect(flat.judgeExempt).toBeUndefined()
+    const budget = { tokensLeft: 1000, usdLeft: 0, deadlineMs: 0, reservedTokens: 0 }
+    const done: Settled<unknown> = {
+      kind: 'done',
+      handle: { id: 'run:s0', label: 'a', status: 'done', abort() {} },
+      out: 'a',
+      outRef: 'mock:"a"',
+      verdict: { valid: true, score: 0.99 },
+      spent: { iterations: 1, tokens: { input: 1, output: 1 }, usd: 0, ms: 0 },
+      seq: 0,
+    }
+    const down: Settled<unknown> = {
+      kind: 'down',
+      handle: { id: 'run:s1', label: 'b', status: 'failed', abort() {} },
+      reason: 'x',
+      infra: false,
+      restartCount: 0,
+      seq: 1,
+    }
+    // Even a near-perfect verdict does not widen under the flat default.
+    expect(flat.shouldWiden(done, budget)).toBe(false)
+    expect(flat.shouldWiden(down, budget)).toBe(false)
+  })
+})
+
+// ── 6. Supervisor: join barrier, abort cascade, typed result ────────────────────
+
+function supervisorOpts(over: Partial<SupervisorOpts> = {}): SupervisorOpts {
+  return {
+    budget: over.budget ?? { maxIterations: 100, maxTokens: 100_000 },
+    runId: over.runId ?? 'sup',
+    journal: over.journal ?? new InMemorySpawnJournal(),
+    blobs: over.blobs ?? new InMemoryResultBlobStore(),
+    executors: over.executors ?? createExecutorRegistry(),
+    maxDepth: over.maxDepth,
+    maxRestarts: over.maxRestarts,
+    withinMs: over.withinMs,
+    now: over.now ?? (() => 0),
+    signal: over.signal,
+  }
+}
+
+/** A flat-harness driver: spawn one child per arm, drain to settlement, select the best
+ *  valid via the SAME single-sourced argmax the loop kernel uses. Returns the winner's
+ *  `out` — selection lives in the driver, not the supervisor (selector≠judge). */
+function flatHarness(arms: Array<{ name: string; script: MockScript }>): Agent<unknown, unknown> {
+  return {
+    name: 'flat-harness',
+    async act(task, scope: Scope<unknown>): Promise<unknown> {
+      for (const arm of arms) {
+        scope.spawn(leafAgent(arm.name, arm.script), task, {
+          budget: { maxIterations: 1, maxTokens: 1000 },
+          label: arm.name,
+        })
+      }
+      const iterations = []
+      for (let s = await scope.next(); s !== null; s = await scope.next()) {
+        if (s.kind === 'done') iterations.push(settledToIteration(s))
+      }
+      const winner = defaultSelectWinner(iterations)
+      if (!winner) throw new ValidationError('flat-harness: no valid child')
+      return winner.output
+    },
+  }
+}
+
+describe('supervisor', () => {
+  it('returns a typed `winner` and a `down` child does not crash the join', async () => {
+    const supervisor = createSupervisor<unknown, unknown>()
+    const result = await supervisor.run(
+      flatHarness([
+        {
+          name: 'good',
+          script: {
+            out: 'good',
+            events: tokensOnly(10, 10, 1),
+            verdict: { valid: true, score: 0.8 },
+          },
+        },
+        { name: 'dead', script: { out: null, events: [], failWith: 'arm down' } },
+        {
+          name: 'meh',
+          script: {
+            out: 'meh',
+            events: tokensOnly(10, 10, 1),
+            verdict: { valid: true, score: 0.3 },
+          },
+        },
+      ]),
+      'solve it',
+      supervisorOpts(),
+    )
+    expect(result.kind).toBe('winner')
+    if (result.kind === 'winner') {
+      expect(result.out).toBe('good')
+      // spentTotal sums the conserved spend off every journaled settlement (2 done arms).
+      expect(result.spentTotal.iterations).toBe(2)
+      expect(result.spentTotal.tokens.input).toBe(20)
+      expect(result.tree.nodes.length).toBe(3)
+    }
+  })
+
+  it('returns a typed `no-winner` (never best!) when every child is down', async () => {
+    const supervisor = createSupervisor<unknown, unknown>()
+    const result = await supervisor.run(
+      flatHarness([
+        { name: 'd1', script: { out: null, events: [], failWith: 'down 1' } },
+        { name: 'd2', script: { out: null, events: [], failWith: 'down 2' } },
+      ]),
+      'task',
+      supervisorOpts(),
+    )
+    expect(result.kind).toBe('no-winner')
+    if (result.kind === 'no-winner') {
+      expect(result.reason).toBe('all-children-down')
+      expect(result.downCount).toBe(2)
+    }
+  })
+
+  it('a caller abort cascades teardown over live children (allSettled, no throw)', async () => {
+    const controller = new AbortController()
+    const gate = deferred() // children never settle on their own.
+    const supervisor = createSupervisor<unknown, unknown>()
+    const driver: Agent<unknown, unknown> = {
+      name: 'parker',
+      async act(_t, scope: Scope<unknown>): Promise<unknown> {
+        scope.spawn(
+          leafAgent('p1', { out: 1, events: tokensOnly(1, 1, 1), block: gate.promise }),
+          't',
+          { budget: { maxIterations: 1, maxTokens: 10 }, label: 'p1' },
+        )
+        scope.spawn(
+          leafAgent('p2', { out: 2, events: tokensOnly(1, 1, 1), block: gate.promise }),
+          't',
+          { budget: { maxIterations: 1, maxTokens: 10 }, label: 'p2' },
+        )
+        // Abort arrives while both children are parked; the first next() must see the reap.
+        controller.abort('caller cancel')
+        const settled = await scope.next()
+        if (settled?.kind === 'down') throw new ValidationError('aborted')
+        return 'unreachable'
+      },
+    }
+    const result = await supervisor.run(driver, 't', supervisorOpts({ signal: controller.signal }))
+    expect(result.kind).toBe('no-winner')
+    if (result.kind === 'no-winner') expect(result.reason).toBe('aborted')
+  })
+
+  it('a bound RootHandle reads the live tree and is fail-loud when detached', async () => {
+    const handle = createRootHandle<unknown>()
+    // Detached: every method is a typed throw, never a silent no-op.
+    expect(() => handle.view()).toThrow()
+    const supervisor = createSupervisor<unknown, unknown>()
+    supervisor.attach(handle)
+    let observed = -1
+    const driver: Agent<unknown, unknown> = {
+      name: 'observe',
+      async act(_t, scope: Scope<unknown>): Promise<unknown> {
+        scope.spawn(leafAgent('c', { out: 'c', events: tokensOnly(1, 1, 1) }), 't', {
+          budget: { maxIterations: 1, maxTokens: 10 },
+          label: 'c',
+        })
+        observed = handle.view().nodes.length
+        await scope.next()
+        return 'c'
+      },
+    }
+    const result = await supervisor.run(driver, 't', supervisorOpts())
+    expect(result.kind).toBe('winner')
+    expect(observed).toBe(1)
+    // Unbound again after the run completes.
+    expect(() => handle.view()).toThrow()
+  })
+
+  it('attach rejects a foreign handle not minted by createRootHandle', () => {
+    const supervisor = createSupervisor<unknown, unknown>()
+    const foreign = {
+      view() {
+        return { root: '', nodes: [], inFlight: 0 }
+      },
+      signal() {},
+      abort() {},
+    }
+    expect(() => supervisor.attach(foreign)).toThrow(/createRootHandle/)
+  })
+})
+
+// ── 7. Replay determinism ────────────────────────────────────────────────────────
+
+describe('replay determinism', () => {
+  it('replaying a recorded journal yields the same tree + winner in the same seq order', async () => {
+    const journal = new InMemorySpawnJournal()
+    const blobs = new InMemoryResultBlobStore()
+    const supervisor = createSupervisor<unknown, unknown>()
+    const arms = [
+      {
+        name: 'a',
+        script: {
+          out: { ans: 'a' },
+          events: tokensOnly(10, 5, 1),
+          verdict: { valid: true, score: 0.4 },
+        },
+      },
+      {
+        name: 'b',
+        script: {
+          out: { ans: 'b' },
+          events: tokensOnly(8, 4, 1),
+          verdict: { valid: true, score: 0.9 },
+        },
+      },
+      {
+        name: 'c',
+        script: {
+          out: { ans: 'c' },
+          events: tokensOnly(6, 3, 1),
+          verdict: { valid: true, score: 0.6 },
+        },
+      },
+    ]
+    const live = await supervisor.run(
+      flatHarness(arms),
+      'task',
+      supervisorOpts({ runId: 'replay-run', journal, blobs }),
+    )
+    expect(live.kind).toBe('winner')
+    const liveWinner = live.kind === 'winner' ? live.out : undefined
+
+    // Replay the recorded journal: rehydrate each `out` from the blob store in seq order.
+    const replayed = await replaySpawnTree(journal, blobs, 'replay-run')
+    const replaySeqs = replayed.map((s) => s.seq)
+    expect(replaySeqs).toEqual([...replaySeqs].sort((x, y) => x - y))
+
+    // Re-run the SAME driver selection over the replayed settlements — same winner.
+    const iterations = replayed
+      .filter((s): s is Extract<Settled<unknown>, { kind: 'done' }> => s.kind === 'done')
+      .map(settledToIteration)
+    const replayWinner = defaultSelectWinner(iterations)?.output
+    expect(replayWinner).toEqual(liveWinner)
+    expect((replayWinner as { ans: string }).ans).toBe('b')
+
+    // materializeTreeView re-derives the recorded tree (same node ids + statuses).
+    const events = (await journal.loadTree('replay-run')) as SpawnEvent[]
+    const view = materializeTreeView(events)
+    const leafNodes = view.nodes.filter((n) => n.parent === 'replay-run')
+    expect(leafNodes).toHaveLength(3)
+    expect(leafNodes.every((n) => n.status === 'done')).toBe(true)
+    expect(view.inFlight).toBe(0)
+  })
+
+  it('replay fails loud on a journaled outRef missing from the blob store', async () => {
+    const journal = new InMemorySpawnJournal()
+    await journal.beginTree('gap', new Date(0).toISOString())
+    await journal.appendEvent('gap', {
+      kind: 'spawned',
+      id: 'gap:s0',
+      parent: 'gap',
+      label: 'x',
+      budget: { maxIterations: 1, maxTokens: 10 },
+      runtime: 'router',
+      seq: 0,
+      at: new Date(0).toISOString(),
+    })
+    await journal.appendEvent('gap', {
+      kind: 'settled',
+      id: 'gap:s0',
+      status: 'done',
+      outRef: 'mock:"orphan"',
+      spent: { iterations: 1, tokens: { input: 1, output: 1 }, usd: 0, ms: 0 },
+      seq: 1,
+      at: new Date(0).toISOString(),
+    })
+    await expect(replaySpawnTree(journal, new InMemoryResultBlobStore(), 'gap')).rejects.toThrow(
+      /no artifact for outRef/,
+    )
+  })
+})
+
+// ── helpers ────────────────────────────────────────────────────────────────────
+
+function deferred(): { promise: Promise<void>; resolve: () => void } {
+  let resolve!: () => void
+  const promise = new Promise<void>((r) => {
+    resolve = r
+  })
+  return { promise, resolve }
+}