From 4234c94396ac15bccda04ff0ce047aa037c39414 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 31 May 2026 03:18:11 -0600
Subject: [PATCH 1/9] feat(profiles/coder): default-on no-op + secret-path
 floor on the coder validator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First increment of the canonical MCP delegate hardening (the techniques the
ai-trading-blueprint delegation fork proved, folded back into agent-runtime so
delegate_code is reliable for the whole fleet — not re-forked per product):

- No-op rejection: an empty patch can trivially pass tests/typecheck (nothing
  changed) yet does no work — now valid=false (scores.nonEmpty=0).
- Secret-path floor: always-on, independent of task.forbiddenPaths — rejects a
  patch touching credential-shaped paths (.env, *.pem/*.key/*.p12/*.pfx,
  keystore, wallet, id_rsa/id_ed25519, secrets/credentials.json). valid=false.

Both are hard gates (flip valid), additive to the existing forbidden-path /
diff-size / tests / typecheck checks; the weighted composite is unchanged so
clean patches don't regress.

Tests: empty patch → invalid; secret path → invalid even when not in
forbiddenPaths; normal patch still valid. Full suite 407 green, tsc + biome clean.

Remaining hardening increments (this branch): reviewer/audit gate + winner-
selection strategy on delegate_code; physim's valid-only KB-growth (passage-
present storage guard, fail-closed judge registry, correct-on-veto/escalate,
circular-citation detection) on delegate_research. Umbrella: #828 (loop-runner).
---
 src/profiles/coder.ts        | 30 ++++++++++++++++++++++
 tests/profiles/coder.test.ts | 48 ++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
diff --git a/src/profiles/coder.ts b/src/profiles/coder.ts
index 80149d3..51fe3f4 100644
--- a/src/profiles/coder.ts
+++ b/src/profiles/coder.ts
@@ -250,6 +250,15 @@ function parseCoderEvents(events: SandboxEvent[]): CoderOutput {
  *
  * @experimental
  */
+/**
+ * Default-on safety floor (folded from the ai-trading-blueprint delegation
+ * MCP): a coder patch that touches a credential-shaped path is rejected
+ * regardless of `forbiddenPaths` config. Catches `.env`, private keys,
+ * keystores, wallets, and the common secret/credential JSON files.
+ */
+const SECRET_PATH_RE =
+  /(^|\/)(\.env(\.|$)|.*\.(pem|key|p12|pfx|keystore|wallet)|id_rsa|id_ed25519|secrets?\.json|credentials?\.json)$/i
+
 export function createCoderValidator(task: CoderTask): Validator<CoderOutput> {
   const maxDiff = task.maxDiffLines ?? DEFAULT_MAX_DIFF_LINES
   const forbidden = task.forbiddenPaths ?? []
@@ -260,6 +269,27 @@ export function createCoderValidator(task: CoderTask): Validator<CoderOutput> {
       let pass = true
 
       const touched = touchedPathsFromPatch(output.patch)
+
+      // No-op rejection: an empty patch can trivially "pass" tests/typecheck
+      // (nothing changed) yet does no work — never a valid coder result.
+      if (touched.length === 0 || output.patch.trim().length === 0) {
+        pass = false
+        scores.nonEmpty = 0
+        notes.push('empty patch — no files changed')
+      } else {
+        scores.nonEmpty = 1
+      }
+
+      // Secret-path floor: always-on, independent of `forbiddenPaths`.
+      const touchedSecrets = touched.filter((p) => SECRET_PATH_RE.test(p))
+      if (touchedSecrets.length > 0) {
+        pass = false
+        scores.noSecrets = 0
+        notes.push(`touched secret-shaped paths: ${touchedSecrets.join(', ')}`)
+      } else {
+        scores.noSecrets = 1
+      }
+
       const touchedForbidden = forbidden.filter((path) => {
         const prefix = path.endsWith('/') ? path : `${path}/`
         const exact = prefix.slice(0, -1)
diff --git a/tests/profiles/coder.test.ts b/tests/profiles/coder.test.ts
index 4b356c7..fc2d725 100644
--- a/tests/profiles/coder.test.ts
+++ b/tests/profiles/coder.test.ts
@@ -184,3 +184,51 @@ describe('multiHarnessCoderFanout — heterogeneous fanout bundle', () => {
     expect(bundle.agentRuns.every((s) => s.profile.tools?.git === true)).toBe(true)
   })
 })
+
+describe('createCoderValidator — default-on safety floor (no-op + secrets)', () => {
+  it('rejects an empty patch (no-op) even when tests + typecheck pass', async () => {
+    const validator = createCoderValidator(baseTask)
+    const output: CoderOutput = {
+      branch: 'feat/x',
+      patch: '',
+      testResult: { passed: true, output: 'ok' },
+      typecheckResult: { passed: true, output: 'ok' },
+      diffStats: { filesChanged: 0, insertions: 0, deletions: 0 },
+    }
+    const verdict = await validator.validate(output, ctx)
+    expect(verdict.valid).toBe(false)
+    expect(verdict.scores?.nonEmpty).toBe(0)
+    expect(verdict.notes).toMatch(/empty patch/i)
+  })
+
+  it('rejects a patch touching a secret-shaped path regardless of forbiddenPaths', async () => {
+    // `.env` is NOT in baseTask.forbiddenPaths — the secret floor is always-on.
+    const validator = createCoderValidator(baseTask)
+    const output: CoderOutput = {
+      branch: 'feat/x',
+      patch: diff(['config/.env', 'src/ok.ts'], 2, 0),
+      testResult: { passed: true, output: 'ok' },
+      typecheckResult: { passed: true, output: 'ok' },
+      diffStats: { filesChanged: 2, insertions: 2, deletions: 0 },
+    }
+    const verdict = await validator.validate(output, ctx)
+    expect(verdict.valid).toBe(false)
+    expect(verdict.scores?.noSecrets).toBe(0)
+    expect(verdict.notes).toMatch(/secret-shaped/i)
+  })
+
+  it('passes a normal non-empty, non-secret patch (floor does not regress clean work)', async () => {
+    const validator = createCoderValidator(baseTask)
+    const output: CoderOutput = {
+      branch: 'feat/x',
+      patch: diff(['src/foo.ts'], 3, 1),
+      testResult: { passed: true, output: 'ok' },
+      typecheckResult: { passed: true, output: 'ok' },
+      diffStats: { filesChanged: 1, insertions: 3, deletions: 1 },
+    }
+    const verdict = await validator.validate(output, ctx)
+    expect(verdict.valid).toBe(true)
+    expect(verdict.scores?.nonEmpty).toBe(1)
+    expect(verdict.scores?.noSecrets).toBe(1)
+  })
+})

From 688d701b318cd8d59a89e62e45ef31751549629a Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 31 May 2026 03:26:48 -0600
Subject: [PATCH 2/9] feat(mcp): reviewer gate + winner-selection on
 delegate_code; createKbGate for valid-only research
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Increments 2 + 3 of the canonical-MCP delegate hardening (folding the proven
techniques from the ai-trading-blueprint fork + physim's KB subsystem back into
agent-runtime, so every product's delegated loops are reliable without re-forking).

delegate_code (createDefaultCoderDelegate):
- Optional `reviewer` (CoderReviewer): a candidate that passes mechanical
  validation must ALSO be approved by an adversarial reviewer to win — catches the
  "compiles + tests pass but wrong/unsafe" class. No reviewer → unchanged behavior.
- `winnerSelection`: highest-score (default, = kernel) | smallest-diff |
  highest-readiness | first-approved, over ALL valid candidates not just the
  kernel's single winner. Fails loud when nothing survives validation (+ review).

delegate_research (createKbGate):
- Reusable, dependency-free valid-only KB-growth gate distilled from physim:
  fail-closed judge registry, first-veto-wins. Always-on floor — passage-non-empty,
  passage-present anti-hallucination guard (verbatim passage MUST appear in source),
  value-in-passage (literal / comma-grouped / billion-million shorthand),
  no-circular-citation (laundering catch). Consumer judges append after the floor.
  Operates on fact candidates, not a store — composes with agent-knowledge without
  importing it. Verdict only; remediation is the caller's (never drops silently).

Tests: delegate selection + reviewer fail-loud + backward-compat; kb-gate floor +
shorthand + circular + consumer-judge. Full suite 420 green, tsc + biome clean.
Engine for the loop-runner (#828). Increment 1 (no-op + secret floor) = 4234c94.
---
 src/mcp/delegates.ts                       | 155 +++++++++++++++++++--
 src/mcp/index.ts                           |  11 ++
 src/mcp/kb-gate.ts                         | 153 ++++++++++++++++++++
 tests/mcp/coder-delegate-selection.test.ts | 110 +++++++++++++++
 tests/mcp/kb-gate.test.ts                  |  94 +++++++++++++
 5 files changed, 511 insertions(+), 12 deletions(-)
 create mode 100644 src/mcp/kb-gate.ts
 create mode 100644 tests/mcp/coder-delegate-selection.test.ts
 create mode 100644 tests/mcp/kb-gate.test.ts

diff --git a/src/mcp/delegates.ts b/src/mcp/delegates.ts
index 59b3040..41375a9 100644
--- a/src/mcp/delegates.ts
+++ b/src/mcp/delegates.ts
@@ -16,9 +16,9 @@
  * pass `researcherDelegate` explicitly when constructing the server.
  */
 
-import type { LoopSandboxClient } from '../loops'
+import type { Iteration, LoopSandboxClient } from '../loops'
 import { runLoop } from '../loops'
-import { coderProfile, multiHarnessCoderFanout } from '../profiles/coder'
+import { type CoderOutput, coderProfile, multiHarnessCoderFanout } from '../profiles/coder'
 import { createSiblingSandboxExecutor, type DelegationExecutor } from './executor'
 import type {
   CoderTask,
@@ -46,6 +46,43 @@ export type ResearcherDelegate = (
   ctx: DelegateRunCtx,
 ) => Promise<ResearchOutputShape>
 
+/** @experimental Structured review verdict over a coder candidate. */
+export interface CoderReview {
+  /** Gate: only approved candidates are eligible to win. */
+  approved: boolean
+  /** Reviewer's recommendation — surfaced in traces. */
+  recommendation: 'ship' | 'approve-with-nits' | 'changes-requested' | 'reject'
+  /** Readiness 0..1, used by the `highest-readiness` winner-selection strategy. */
+  readiness: number
+  notes?: string
+}
+
+/**
+ * @experimental
+ *
+ * Optional adversarial reviewer over a coder candidate that already passed
+ * mechanical validation (tests/typecheck/forbidden/diff/no-op/secrets). Folded
+ * from the ai-trading-blueprint delegation MCP: a candidate is only eligible to
+ * win if the reviewer approves it. The reviewer is the consumer's seam — an LLM
+ * judge, a `pnpm review` command, anything returning a `CoderReview`.
+ */
+export type CoderReviewer = (
+  output: import('../profiles/coder').CoderOutput,
+  task: CoderTask,
+  ctx: { signal: AbortSignal },
+) => Promise<CoderReview> | CoderReview
+
+/**
+ * @experimental Winner-selection strategy among validated (+ reviewed)
+ * candidates. `highest-readiness` requires a `reviewer`. Default `highest-score`
+ * (the kernel's behavior — preserves backward compatibility).
+ */
+export type CoderWinnerSelection =
+  | 'highest-score'
+  | 'smallest-diff'
+  | 'highest-readiness'
+  | 'first-approved'
+
 /** @experimental */
 export interface CreateDefaultCoderDelegateOptions {
   /**
@@ -64,6 +101,15 @@ export interface CreateDefaultCoderDelegateOptions {
   fanoutHarnesses?: string[]
   /** Hard cap on the kernel's per-batch concurrency. Default 4. */
   maxConcurrency?: number
+  /**
+   * Optional adversarial reviewer. When set, a candidate must pass mechanical
+   * validation AND `reviewer.approved` to be eligible to win — empty/secret/
+   * test-failing patches are already gone; this catches the "compiles + passes
+   * but wrong/unsafe" class the deterministic validator can't see.
+   */
+  reviewer?: CoderReviewer
+  /** Winner-selection strategy among eligible candidates. Default `highest-score`. */
+  winnerSelection?: CoderWinnerSelection
 }
 
 /**
@@ -103,12 +149,16 @@ export function createDefaultCoderDelegate(
         maxIterations: 1,
         maxConcurrency,
       })
-      const winner = result.winner
-      if (!winner) {
-        throw new Error('coder delegate produced no winner')
-      }
+      const chosen = await pickCoderWinner({
+        iterations: result.iterations,
+        reviewer: options.reviewer,
+        selection: options.winnerSelection ?? 'highest-score',
+        task,
+        signal: ctx.signal,
+      })
+      if (!chosen) throw new Error(noWinnerMessage(options.reviewer))
       ctx.report({ iteration: 1, phase: 'completed' })
-      return winner.output
+      return chosen
     }
     const fanout = multiHarnessCoderFanout(
       fanoutHarnesses && fanoutHarnesses.length > 0
@@ -126,15 +176,96 @@ export function createDefaultCoderDelegate(
       maxIterations: variants,
       maxConcurrency: Math.min(maxConcurrency, variants),
     })
-    const winner = result.winner
-    if (!winner) {
-      throw new Error('coder delegate fanout produced no winner')
-    }
+    const chosen = await pickCoderWinner({
+      iterations: result.iterations,
+      reviewer: options.reviewer,
+      selection: options.winnerSelection ?? 'highest-score',
+      task,
+      signal: ctx.signal,
+    })
+    if (!chosen) throw new Error(noWinnerMessage(options.reviewer))
     ctx.report({ iteration: agentRuns.length, phase: 'completed' })
-    return winner.output
+    return chosen
   }
 }
 
+interface PickCoderWinnerArgs {
+  iterations: ReadonlyArray<Iteration<CoderTask, CoderOutput>>
+  reviewer: CoderReviewer | undefined
+  selection: CoderWinnerSelection
+  task: CoderTask
+  signal: AbortSignal
+}
+
+interface CoderCandidate {
+  index: number
+  output: CoderOutput
+  score: number
+  readiness: number
+}
+
+/**
+ * Pick the winning coder candidate from a finished loop's iterations:
+ *   1. keep only mechanically-VALID candidates (the validator already gated
+ *      tests/typecheck/forbidden/diff/no-op/secrets),
+ *   2. if a `reviewer` is wired, keep only those it APPROVES,
+ *   3. select among survivors by the chosen strategy.
+ * Returns `undefined` when nothing survives — the delegate fails loud.
+ */
+async function pickCoderWinner(args: PickCoderWinnerArgs): Promise<CoderOutput | undefined> {
+  const valid: CoderCandidate[] = []
+  for (const iter of args.iterations) {
+    if (iter.output === undefined || iter.error || iter.verdict?.valid !== true) continue
+    valid.push({
+      index: iter.index,
+      output: iter.output,
+      score: iter.verdict.score ?? 0,
+      readiness: iter.verdict.score ?? 0,
+    })
+  }
+  if (valid.length === 0) return undefined
+
+  let eligible = valid
+  if (args.reviewer) {
+    eligible = []
+    for (const c of valid) {
+      const review = await args.reviewer(c.output, args.task, { signal: args.signal })
+      if (review.approved) eligible.push({ ...c, readiness: review.readiness })
+    }
+    if (eligible.length === 0) return undefined
+  }
+
+  return selectCoderCandidate(eligible, args.selection).output
+}
+
+/** Apply the winner-selection strategy; ties broken by earliest iteration. */
+function selectCoderCandidate(
+  candidates: CoderCandidate[],
+  selection: CoderWinnerSelection,
+): CoderCandidate {
+  const diffLines = (c: CoderCandidate) =>
+    c.output.diffStats.insertions + c.output.diffStats.deletions
+  const sorted = [...candidates].sort((a, b) => {
+    switch (selection) {
+      case 'smallest-diff':
+        return diffLines(a) - diffLines(b) || a.index - b.index
+      case 'highest-readiness':
+        return b.readiness - a.readiness || a.index - b.index
+      case 'first-approved':
+        return a.index - b.index
+      default:
+        return b.score - a.score || a.index - b.index
+    }
+  })
+  return sorted[0]!
+}
+
+function noWinnerMessage(reviewer: CoderReviewer | undefined): string {
+  return reviewer
+    ? 'coder delegate: no candidate passed validation + review'
+    : 'coder delegate: no candidate passed validation'
+}
+
 function buildCoderGoal(args: DelegateCodeArgs): string {
   if (!args.contextHint) return args.goal
   return [args.goal, '', '## Context', args.contextHint].join('\n')
diff --git a/src/mcp/index.ts b/src/mcp/index.ts
index 6ab9b2b..24b5fc7 100644
--- a/src/mcp/index.ts
+++ b/src/mcp/index.ts
@@ -17,6 +17,9 @@ export type { DetectExecutorArgs } from './bin-helpers'
 export { detectExecutor } from './bin-helpers'
 export type {
   CoderDelegate,
+  CoderReview,
+  CoderReviewer,
+  CoderWinnerSelection,
   CreateDefaultCoderDelegateOptions,
   DelegateRunCtx,
   ResearcherDelegate,
@@ -36,6 +39,14 @@ export type {
   InProcessExecutorOptions,
 } from './in-process-executor'
 export { createInProcessExecutor } from './in-process-executor'
+export {
+  type CreateKbGateOptions,
+  createKbGate,
+  type FactCandidate,
+  type FactJudge,
+  type FactJudgeVerdict,
+  type KbGateResult,
+} from './kb-gate'
 export type { LocalHarness, LocalHarnessResult, RunLocalHarnessOptions } from './local-harness'
 export { runLocalHarness } from './local-harness'
 export { mcpToolsForRuntimeMcp, mcpToolsForRuntimeMcpSubset } from './openai-tools'
diff --git a/src/mcp/kb-gate.ts b/src/mcp/kb-gate.ts
new file mode 100644
index 0000000..2f2b340
--- /dev/null
+++ b/src/mcp/kb-gate.ts
@@ -0,0 +1,153 @@
+/**
+ * @experimental
+ *
+ * `createKbGate` — the valid-only knowledge-base growth gate, distilled from
+ * physim's KB-research subsystem. A research-in-a-loop delegate (or any KB
+ * writer) runs candidate facts through this before persisting, so the KB grows
+ * with ONLY grounded facts — hallucinated, unsourced, or laundered claims are
+ * vetoed at the gate.
+ *
+ * Fail-closed by construction: every judge must `accept`; the FIRST veto wins
+ * and the fact is rejected. The non-negotiable floor (always on, can't be
+ * disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST
+ * literally appear in its `sourceText`. That single check kills the dominant
+ * failure mode (a confident claim decoupled from any real source).
+ *
+ * Pure + dependency-free: it operates on fact candidates, not on a store, so it
+ * composes with `@tangle-network/agent-knowledge` or any persistence layer
+ * without importing it. The remediation policy (correct-on-veto vs
+ * escalate-as-unverified) is the caller's — this returns the verdict; it never
+ * drops a fact silently.
+ */
+
+/** @experimental A fact proposed for the KB, with its grounding. */
+export interface FactCandidate {
+  /** The atomic claim text. */
+  claim: string
+  /** Optional extracted value (number or string) the claim asserts. */
+  value?: string | number
+  /** Verbatim span lifted from the source that backs the claim. */
+  verbatimPassage: string
+  /** The raw source text the passage must be grounded in. */
+  sourceText: string
+  /** Where the fact claims to come from — checked for circular/self citations. */
+  citation?: string
+}
+
+/** @experimental */
+export interface FactJudgeVerdict {
+  accept: boolean
+  reason?: string
+}
+
+/** @experimental A pluggable fact validator. Throw is NOT allowed — return a
+ *  verdict; a thrown judge is a programmer error, not a veto. */
+export interface FactJudge {
+  name: string
+  judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>
+}
+
+/** @experimental */
+export interface KbGateResult {
+  accepted: boolean
+  /** Name of the judge that vetoed; undefined when accepted. */
+  vetoedBy?: string
+  reason?: string
+}
+
+/** @experimental */
+export interface CreateKbGateOptions {
+  /** Extra judges appended after the built-in floor (e.g. an LLM judge). */
+  judges?: FactJudge[]
+  /** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */
+  minPassageChars?: number
+  /**
+   * Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,
+   * `'cad_params'`, `'requirements'`). A citation naming one is circular
+   * (laundering) — the fact cites a derived artifact, not a real source.
+   * Default `[]` (no circular check unless the consumer declares its kinds).
+   */
+  selfArtifactKinds?: string[]
+}
+
+const norm = (s: string): string => s.toLowerCase().replace(/\s+/g, ' ').trim()
+
+/** Does `value` appear in the (normalized) passage — literally, comma-grouped,
+ *  or in billion/million shorthand (the forms a source actually writes). */
+function valueAppears(value: string | number, passageNorm: string): boolean {
+  if (passageNorm.includes(norm(String(value)))) return true
+  if (typeof value !== 'number' || !Number.isFinite(value)) return false
+  const forms = [value.toLocaleString('en-US')]
+  if (Math.abs(value) >= 1e9) forms.push(`${trimZero(value / 1e9)} billion`)
+  if (Math.abs(value) >= 1e6) forms.push(`${trimZero(value / 1e6)} million`)
+  return forms.some((f) => passageNorm.includes(norm(f)))
+}
+
+function trimZero(n: number): string {
+  return Number.isInteger(n) ? String(n) : String(Number(n.toFixed(2)))
+}
+
+/** The always-on floor judges. Order matters: cheapest / most-fundamental first. */
+function builtinJudges(minPassageChars: number, selfArtifactKinds: string[]): FactJudge[] {
+  const kinds = selfArtifactKinds.map((k) => k.toLowerCase())
+  return [
+    {
+      name: 'passage-non-empty',
+      judge: (c) =>
+        c.verbatimPassage.trim().length >= minPassageChars
+          ? { accept: true }
+          : { accept: false, reason: `passage shorter than ${minPassageChars} chars` },
+    },
+    {
+      // THE anti-hallucination floor — the passage must literally be in the source.
+      name: 'passage-present',
+      judge: (c) =>
+        norm(c.sourceText).includes(norm(c.verbatimPassage))
+          ? { accept: true }
+          : { accept: false, reason: 'verbatim passage not found in source (unbacked fact)' },
+    },
+    {
+      name: 'value-in-passage',
+      judge: (c) =>
+        c.value === undefined || valueAppears(c.value, norm(c.verbatimPassage))
+          ? { accept: true }
+          : { accept: false, reason: `value ${JSON.stringify(c.value)} not present in passage` },
+    },
+    {
+      name: 'no-circular-citation',
+      judge: (c) => {
+        if (!c.citation || kinds.length === 0) return { accept: true }
+        const cite = c.citation.toLowerCase()
+        const hit = kinds.find((k) => cite.includes(k))
+        return hit
+          ? { accept: false, reason: `circular citation to self-generated artifact "${hit}"` }
+          : { accept: true }
+      },
+    },
+  ]
+}
+
+/**
+ * @experimental
+ *
+ * Build a fail-closed KB gate. The returned function runs the built-in floor
+ * (passage-non-empty → passage-present → value-in-passage → no-circular-citation)
+ * then any consumer judges, returning on the first veto.
+ */
+export function createKbGate(
+  options: CreateKbGateOptions = {},
+): (candidate: FactCandidate) => Promise<KbGateResult> {
+  const judges = [
+    ...builtinJudges(options.minPassageChars ?? 12, options.selfArtifactKinds ?? []),
+    ...(options.judges ?? []),
+  ]
+  return async (candidate) => {
+    for (const j of judges) {
+      const verdict = await j.judge(candidate)
+      if (!verdict.accept) {
+        return { accepted: false, vetoedBy: j.name, reason: verdict.reason }
+      }
+    }
+    return { accepted: true }
+  }
+}
diff --git a/tests/mcp/coder-delegate-selection.test.ts b/tests/mcp/coder-delegate-selection.test.ts
new file mode 100644
index 0000000..92da502
--- /dev/null
+++ b/tests/mcp/coder-delegate-selection.test.ts
@@ -0,0 +1,110 @@
+import type { CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox'
+import { describe, expect, it } from 'vitest'
+import {
+  type CoderReview,
+  type CoderReviewer,
+  type CoderWinnerSelection,
+  createDefaultCoderDelegate,
+} from '../../src/mcp/delegates'
+import type { CoderOutput } from '../../src/profiles/coder'
+
+function diff(path: string, plus: number, minus: number): string {
+  const out = [`diff --git a/${path} b/${path}`, `--- a/${path}`, `+++ b/${path}`]
+  for (let i = 0; i < plus; i += 1) out.push(`+line ${i}`)
+  for (let i = 0; i < minus; i += 1) out.push(`-line ${i}`)
+  return out.join('\n')
+}
+
+// Two distinct, mechanically-VALID candidates that DIVERGE on diff-size vs
+// readiness, so the selection strategy is observable:
+//   - candidate "small": tiny diff (2 lines), low reviewer readiness
+//   - candidate "big":   larger diff (10 lines), high reviewer readiness
+const CANDIDATES: CoderOutput[] = [
+  {
+    branch: 'small',
+    patch: diff('src/small.ts', 1, 1),
+    testResult: { passed: true, output: 'ok' },
+    typecheckResult: { passed: true, output: 'ok' },
+    diffStats: { filesChanged: 1, insertions: 1, deletions: 1 },
+  },
+  {
+    branch: 'big',
+    patch: diff('src/big.ts', 5, 5),
+    testResult: { passed: true, output: 'ok' },
+    typecheckResult: { passed: true, output: 'ok' },
+    diffStats: { filesChanged: 1, insertions: 5, deletions: 5 },
+  },
+]
+
+// Stub sandbox client: each create() serves the next candidate (by call order)
+// as a parseable `result` event. Two harnesses → two branches → two candidates.
+function candidateClient() {
+  let i = 0
+  return {
+    async create(_opts?: CreateSandboxOptions): Promise<SandboxInstance> {
+      const out = CANDIDATES[i++ % CANDIDATES.length]!
+      return {
+        async *streamPrompt() {
+          yield { type: 'result', data: { result: out } } satisfies SandboxEvent
+        },
+      } as unknown as SandboxInstance
+    },
+  }
+}
+
+const ctx = { signal: new AbortController().signal, report() {} }
+const args = { goal: 'fix it', repoRoot: '/repo', variants: 2 }
+
+// Reviewer that approves both but rates the BIG candidate more ready.
+const readinessReviewer: CoderReviewer = (output) => ({
+  approved: true,
+  recommendation: 'ship',
+  readiness: output.branch === 'big' ? 0.9 : 0.4,
+})
+
+describe('createDefaultCoderDelegate — reviewer gate + winner selection', () => {
+  it('smallest-diff selects the smaller valid patch', async () => {
+    const delegate = createDefaultCoderDelegate({
+      sandboxClient: candidateClient(),
+      fanoutHarnesses: ['claude-code', 'codex'],
+      winnerSelection: 'smallest-diff' satisfies CoderWinnerSelection,
+    })
+    const out = await delegate(args, ctx)
+    expect(out.branch).toBe('small')
+  })
+
+  it('highest-readiness selects by the reviewer score, diverging from diff size', async () => {
+    const delegate = createDefaultCoderDelegate({
+      sandboxClient: candidateClient(),
+      fanoutHarnesses: ['claude-code', 'codex'],
+      reviewer: readinessReviewer,
+      winnerSelection: 'highest-readiness',
+    })
+    const out = await delegate(args, ctx)
+    expect(out.branch).toBe('big')
+  })
+
+  it('rejects when the reviewer approves nothing (fails loud, no winner)', async () => {
+    const rejectAll: CoderReviewer = (): CoderReview => ({
+      approved: false,
+      recommendation: 'changes-requested',
+      readiness: 0,
+    })
+    const delegate = createDefaultCoderDelegate({
+      sandboxClient: candidateClient(),
+      fanoutHarnesses: ['claude-code', 'codex'],
+      reviewer: rejectAll,
+    })
+    await expect(delegate(args, ctx)).rejects.toThrow(/validation \+ review/)
+  })
+
+  it('default highest-score (no reviewer) still returns a valid winner', async () => {
+    const delegate = createDefaultCoderDelegate({
+      sandboxClient: candidateClient(),
+      fanoutHarnesses: ['claude-code', 'codex'],
+    })
+    const out = await delegate(args, ctx)
+    // smaller diff → higher diffSize score → highest-score favors it; either way a valid winner.
+    expect(['small', 'big']).toContain(out.branch)
+  })
+})
diff --git a/tests/mcp/kb-gate.test.ts b/tests/mcp/kb-gate.test.ts
new file mode 100644
index 0000000..4899bd6
--- /dev/null
+++ b/tests/mcp/kb-gate.test.ts
@@ -0,0 +1,94 @@
+import { describe, expect, it } from 'vitest'
+import { createKbGate, type FactCandidate, type FactJudge } from '../../src/mcp/kb-gate'
+
+const SOURCE =
+  'The 2025 annual report states total revenue was $1,200,000,000 for the fiscal year, up 12% year over year.'
+
+function fact(overrides: Partial<FactCandidate>): FactCandidate {
+  return {
+    claim: 'revenue was 1.2B',
+    verbatimPassage: 'total revenue was $1,200,000,000 for the fiscal year',
+    sourceText: SOURCE,
+    ...overrides,
+  }
+}
+
+describe('createKbGate — valid-only KB growth', () => {
+  it('accepts a grounded fact whose passage is present in the source', async () => {
+    const gate = createKbGate()
+    const r = await gate(fact({}))
+    expect(r.accepted).toBe(true)
+    expect(r.vetoedBy).toBeUndefined()
+  })
+
+  it('vetoes a fact whose passage is NOT in the source (the anti-hallucination floor)', async () => {
+    const gate = createKbGate()
+    const r = await gate(fact({ verbatimPassage: 'revenue tripled to nine billion dollars' }))
+    expect(r.accepted).toBe(false)
+    expect(r.vetoedBy).toBe('passage-present')
+  })
+
+  it('vetoes a too-short passage', async () => {
+    const gate = createKbGate({ minPassageChars: 12 })
+    const r = await gate(fact({ verbatimPassage: 'revenue' }))
+    expect(r.accepted).toBe(false)
+    expect(r.vetoedBy).toBe('passage-non-empty')
+  })
+
+  it('vetoes a value not present in the passage', async () => {
+    const gate = createKbGate()
+    const r = await gate(fact({ value: 999 }))
+    expect(r.accepted).toBe(false)
+    expect(r.vetoedBy).toBe('value-in-passage')
+  })
+
+  it('accepts a numeric value via comma-grouped form', async () => {
+    const gate = createKbGate()
+    const r = await gate(fact({ value: 1_200_000_000 }))
+    expect(r.accepted).toBe(true)
+  })
+
+  it('accepts a numeric value via billion shorthand when the source uses it', async () => {
+    const gate = createKbGate()
+    const r = await gate(
+      fact({
+        verbatimPassage: 'revenue reached 1.2 billion in 2025',
+        sourceText: 'Per the filing, revenue reached 1.2 billion in 2025.',
+        value: 1_200_000_000,
+      }),
+    )
+    expect(r.accepted).toBe(true)
+  })
+
+  it('vetoes a circular citation to a self-generated artifact (laundering)', async () => {
+    const gate = createKbGate({ selfArtifactKinds: ['spec', 'cad_params'] })
+    const r = await gate(fact({ citation: '[cad_params.v2]' }))
+    expect(r.accepted).toBe(false)
+    expect(r.vetoedBy).toBe('no-circular-citation')
+  })
+
+  it('runs consumer judges after the floor, fail-closed on first veto', async () => {
+    const domainJudge: FactJudge = {
+      name: 'requires-year',
+      judge: (c) =>
+        /\b20\d{2}\b/.test(c.verbatimPassage)
+          ? { accept: true }
+          : { accept: false, reason: 'no year' },
+    }
+    const gate = createKbGate({ judges: [domainJudge] })
+    // passage is grounded but has no year → the consumer judge vetoes
+    const r = await gate(
+      fact({
+        verbatimPassage: 'total revenue was $1,200,000,000 for the fiscal year',
+      }),
+    )
+    expect(r.accepted).toBe(false)
+    expect(r.vetoedBy).toBe('requires-year')
+  })
+
+  it('accepts when no value is asserted (value check is conditional)', async () => {
+    const gate = createKbGate()
+    const r = await gate(fact({ value: undefined }))
+    expect(r.accepted).toBe(true)
+  })
+})

From 97382c2e0e0e96a7254e9b43997bb0212869c2bc Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 31 May 2026 03:27:02 -0600
Subject: [PATCH 3/9] =?UTF-8?q?chore(release):=200.36.0=20=E2=80=94=20MCP?=
 =?UTF-8?q?=20delegate=20hardening=20(reviewer=20gate,=20winner-selection,?=
 =?UTF-8?q?=20no-op+secret=20floor,=20createKbGate)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index ad7c33b..556408a 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-runtime",
-  "version": "0.35.0",
+  "version": "0.36.0",
   "description": "Reusable runtime lifecycle for domain-specific agents.",
   "homepage": "https://github.com/tangle-network/agent-runtime#readme",
   "repository": {

From 5f5fbbff19e63bc0a68469a9360ebd1e313a2b71 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 31 May 2026 03:35:00 -0600
Subject: [PATCH 4/9] =?UTF-8?q?feat(loop-runner):=20runDelegatedLoop=20?=
 =?UTF-8?q?=E2=80=94=20configured=20mode=20dispatcher=20over=20the=20harde?=
 =?UTF-8?q?ned=20engines?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The thin façade that makes the hardened delegation engines (this branch) usable as
ONE configured, schedulable entrypoint — the "configured delegated loop runner" (#828).

- runDelegatedLoop(mode, registry): dispatches code | review | research | audit |
  self-improve | dynamic to a pre-configured runner. Owns mode routing, timing,
  fail-loud on an unregistered mode (ConfigError), and a uniform DelegatedLoopResult
  (a thrown engine becomes { ok:false, error } so unattended/scheduled runs record
  and move on rather than crash).
- coderLoopRunner / reviewLoopRunner: default code/review runners over the hardened
  coder delegate (no-op + secret floor, reviewer gate, winner-selection). review
  mode TYPE-requires a reviewer — a review loop with no reviewer is just a code loop.
- Registry is partial + injectable: products/routines register only the modes they
  use; tests inject stubs; the engines stay the canonical agent-runtime ones (no fork).

This is the layer a scheduled routine targets (research/audit/self-improve on a
cadence; code/review/dynamic on demand). Tests: dispatch routing, fail-loud
unregistered mode, thrown-engine → ok:false, coderLoopRunner real wiring via stub.
Full suite green, tsc + biome clean. Engine = 4234c94 + 688d701.
---
 src/index.ts              |  13 +++-
 src/loop-runner.ts        | 143 ++++++++++++++++++++++++++++++++++++++
 tests/loop-runner.test.ts |  71 +++++++++++++++++++
 3 files changed, 226 insertions(+), 1 deletion(-)
 create mode 100644 src/loop-runner.ts
 create mode 100644 tests/loop-runner.test.ts

diff --git a/src/index.ts b/src/index.ts
index dc2e491..3cda443 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -105,13 +105,24 @@ export {
   RuntimeRunStateError,
   ValidationError,
 } from './errors'
+// ── Delegated loop-runner (configured code/research/review/audit/self-improve/dynamic) ──
+export {
+  type CoderLoopRunnerOptions,
+  coderLoopRunner,
+  type DelegatedLoopMode,
+  type DelegatedLoopRegistry,
+  type DelegatedLoopResult,
+  type DelegatedLoopRunner,
+  type RunDelegatedLoopOptions,
+  reviewLoopRunner,
+  runDelegatedLoop,
+} from './loop-runner'
 // ── MCP → OpenAI tools projection ────────────────────────────────────
 // Helper for eval / orchestrator code that routes through the
 // OpenAI-compat backend and needs the 5 delegation tools surfaced to
 // the model. Sandbox-SDK callers discover tools via the runtime's MCP
 // mount and don't need this projection.
 export { mcpToolsForRuntimeMcp, mcpToolsForRuntimeMcpSubset } from './mcp/openai-tools'
-
 // ── Chat-model resolution ────────────────────────────────────────────
 // Router catalog fetch + fail-closed id validation + precedence resolver.
 export type { ModelInfo, ResolvedChatModel, RouterEnv } from './model-resolution'
diff --git a/src/loop-runner.ts b/src/loop-runner.ts
new file mode 100644
index 0000000..058aba4
--- /dev/null
+++ b/src/loop-runner.ts
@@ -0,0 +1,143 @@
+/**
+ * @experimental
+ *
+ * `runDelegatedLoop` — the configured delegated loop-runner.
+ *
+ * One typed entrypoint a worker agent (or a scheduled routine) calls to run a
+ * disciplined loop in a chosen MODE, over agent-runtime's hardened engines:
+ *
+ *   code         → build-in-a-loop via the coder delegate (no-op + secret floor,
+ *                  optional reviewer gate, winner-selection)
+ *   review       → code mode with a REQUIRED reviewer (the gate is the point)
+ *   research     → research-in-a-loop with valid-only KB growth (createKbGate)
+ *   audit        → analyze trace/run data → findings (runAnalystLoop, caller-wired)
+ *   self-improve → identity-gated prompt optimization (optimizePrompt, caller-wired)
+ *   dynamic      → agent-authored topology (runLoop + createDynamicDriver)
+ *
+ * It is intentionally a thin façade: the value is that EVERY product reuses the
+ * one hardened engine instead of forking delegation logic. The dispatcher owns
+ * mode routing, timing, fail-loud on an unregistered mode, and a uniform result
+ * shape; each mode's engine is a pre-configured runner in the registry (build it
+ * with the factories below, or inject your own / a stub).
+ */
+
+import { ConfigError } from './errors'
+import type { LoopSandboxClient } from './loops'
+import {
+  type CoderReviewer,
+  type CoderWinnerSelection,
+  createDefaultCoderDelegate,
+  type DelegateRunCtx,
+} from './mcp/delegates'
+import type { DelegateCodeArgs } from './mcp/types'
+import type { CoderOutput } from './profiles/coder'
+
+/** @experimental */
+export type DelegatedLoopMode =
+  | 'code'
+  | 'review'
+  | 'research'
+  | 'audit'
+  | 'self-improve'
+  | 'dynamic'
+
+/** @experimental A pre-configured loop for one mode. Returns the mode's raw
+ *  output; the dispatcher wraps it in a {@link DelegatedLoopResult}. */
+export type DelegatedLoopRunner<T = unknown> = (signal: AbortSignal) => Promise<T>
+
+/** @experimental Mode → configured runner. Partial: only register the modes a
+ *  given product/routine actually uses. */
+export type DelegatedLoopRegistry = Partial<Record<DelegatedLoopMode, DelegatedLoopRunner>>
+
+/** @experimental Uniform result — never throws from a registered runner; a
+ *  thrown engine becomes `{ ok: false, error }` so a routine can record + move on. */
+export interface DelegatedLoopResult<T = unknown> {
+  mode: DelegatedLoopMode
+  ok: boolean
+  output?: T
+  error?: string
+  durationMs: number
+}
+
+/** @experimental */
+export interface RunDelegatedLoopOptions {
+  signal?: AbortSignal
+  /** Clock override for deterministic tests. */
+  now?: () => number
+}
+
+/**
+ * @experimental
+ *
+ * Dispatch a configured loop by mode. Fails loud (throws `ConfigError`) when no
+ * runner is registered for the mode — a routine pointed at an unwired mode is a
+ * config bug, not a silent no-op. A runner that throws is captured as
+ * `{ ok: false }` so unattended runs record the failure rather than crash.
+ */
+export async function runDelegatedLoop<T = unknown>(
+  mode: DelegatedLoopMode,
+  registry: DelegatedLoopRegistry,
+  options: RunDelegatedLoopOptions = {},
+): Promise<DelegatedLoopResult<T>> {
+  const runner = registry[mode] as DelegatedLoopRunner<T> | undefined
+  if (!runner) {
+    throw new ConfigError(
+      `runDelegatedLoop: no runner registered for mode '${mode}' (registered: ${
+        Object.keys(registry).join(', ') || 'none'
+      })`,
+    )
+  }
+  const now = options.now ?? Date.now
+  const signal = options.signal ?? new AbortController().signal
+  const start = now()
+  try {
+    const output = await runner(signal)
+    return { mode, ok: true, output, durationMs: now() - start }
+  } catch (err) {
+    return {
+      mode,
+      ok: false,
+      error: err instanceof Error ? err.message : String(err),
+      durationMs: now() - start,
+    }
+  }
+}
+
+/** @experimental Options for the default `code`/`review` runner. */
+export interface CoderLoopRunnerOptions {
+  sandboxClient: LoopSandboxClient
+  /** What to build — the delegate args (goal, repoRoot, variants, config, …). */
+  args: DelegateCodeArgs
+  /** Adversarial reviewer. REQUIRED for `review` mode (see `reviewLoopRunner`). */
+  reviewer?: CoderReviewer
+  /** Winner-selection strategy. Default `highest-score`. */
+  winnerSelection?: CoderWinnerSelection
+  /** Harnesses for `variants > 1` fanout. */
+  fanoutHarnesses?: string[]
+}
+
+/** @experimental Build a `code`-mode runner over the hardened coder delegate. */
+export function coderLoopRunner(options: CoderLoopRunnerOptions): DelegatedLoopRunner<CoderOutput> {
+  const delegate = createDefaultCoderDelegate({
+    sandboxClient: options.sandboxClient,
+    ...(options.reviewer ? { reviewer: options.reviewer } : {}),
+    ...(options.winnerSelection ? { winnerSelection: options.winnerSelection } : {}),
+    ...(options.fanoutHarnesses ? { fanoutHarnesses: options.fanoutHarnesses } : {}),
+  })
+  return async (signal) => {
+    const ctx: DelegateRunCtx = { signal, report: () => {} }
+    return delegate(options.args, ctx)
+  }
+}
+
+/**
+ * @experimental
+ *
+ * `review` mode = `code` with a REQUIRED reviewer. The gate is the whole point,
+ * so the type forces a reviewer (a "review loop" with no reviewer is a code loop).
+ */
+export function reviewLoopRunner(
+  options: CoderLoopRunnerOptions & { reviewer: CoderReviewer },
+): DelegatedLoopRunner<CoderOutput> {
+  return coderLoopRunner(options)
+}
diff --git a/tests/loop-runner.test.ts b/tests/loop-runner.test.ts
new file mode 100644
index 0000000..2e67dc3
--- /dev/null
+++ b/tests/loop-runner.test.ts
@@ -0,0 +1,71 @@
+import type { CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox'
+import { describe, expect, it } from 'vitest'
+import { ConfigError } from '../src/errors'
+import { coderLoopRunner, type DelegatedLoopRegistry, runDelegatedLoop } from '../src/loop-runner'
+import type { CoderOutput } from '../src/profiles/coder'
+
+const clock = () => {
+  let t = 0
+  return () => (t += 100)
+}
+
+describe('runDelegatedLoop — mode dispatch', () => {
+  it('routes to the registered runner and returns a uniform ok result', async () => {
+    const registry: DelegatedLoopRegistry = {
+      research: async () => ({ grounded: 3 }),
+    }
+    const r = await runDelegatedLoop('research', registry, { now: clock() })
+    expect(r.mode).toBe('research')
+    expect(r.ok).toBe(true)
+    expect(r.output).toEqual({ grounded: 3 })
+    expect(r.durationMs).toBeGreaterThan(0)
+  })
+
+  it('fails loud (ConfigError) on a mode with no registered runner', async () => {
+    await expect(runDelegatedLoop('audit', {})).rejects.toThrow(ConfigError)
+    await expect(runDelegatedLoop('audit', {})).rejects.toThrow(
+      /no runner registered for mode 'audit'/,
+    )
+  })
+
+  it('captures a thrown engine as ok:false (unattended runs record, not crash)', async () => {
+    const registry: DelegatedLoopRegistry = {
+      'self-improve': async () => {
+        throw new Error('reflection model 502')
+      },
+    }
+    const r = await runDelegatedLoop('self-improve', registry, { now: clock() })
+    expect(r.ok).toBe(false)
+    expect(r.error).toBe('reflection model 502')
+    expect(r.durationMs).toBeGreaterThan(0)
+  })
+})
+
+describe('coderLoopRunner — code mode over the hardened delegate', () => {
+  it('runs the coder delegate and returns its winning CoderOutput', async () => {
+    const out: CoderOutput = {
+      branch: 'feat/fix',
+      patch: 'diff --git a/src/x.ts b/src/x.ts\n--- a/src/x.ts\n+++ b/src/x.ts\n+ok\n',
+      testResult: { passed: true, output: 'ok' },
+      typecheckResult: { passed: true, output: 'ok' },
+      diffStats: { filesChanged: 1, insertions: 1, deletions: 0 },
+    }
+    const sandboxClient = {
+      async create(_o?: CreateSandboxOptions): Promise<SandboxInstance> {
+        return {
+          async *streamPrompt() {
+            yield { type: 'result', data: { result: out } } satisfies SandboxEvent
+          },
+        } as unknown as SandboxInstance
+      },
+    }
+    const runner = coderLoopRunner({
+      sandboxClient,
+      args: { goal: 'fix x', repoRoot: '/repo' },
+    })
+    const registry: DelegatedLoopRegistry = { code: runner }
+    const r = await runDelegatedLoop<CoderOutput>('code', registry)
+    expect(r.ok).toBe(true)
+    expect(r.output?.branch).toBe('feat/fix')
+  })
+})

From eea49c9549a603b7b393457fb0371e52b338e8bd Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 31 May 2026 03:35:18 -0600
Subject: [PATCH 5/9] =?UTF-8?q?chore(release):=200.37.0=20=E2=80=94=20runD?=
 =?UTF-8?q?elegatedLoop=20configured=20loop-runner=20over=20the=20hardened?=
 =?UTF-8?q?=20engines=20(#828)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 556408a..3081efb 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-runtime",
-  "version": "0.36.0",
+  "version": "0.37.0",
   "description": "Reusable runtime lifecycle for domain-specific agents.",
   "homepage": "https://github.com/tangle-network/agent-runtime#readme",
   "repository": {

From dc589965ec8d351ad6d66fff26d22582d87f3c99 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 31 May 2026 03:41:26 -0600
Subject: [PATCH 6/9] feat(loop-runner): default runner factories for all six
 modes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rounds out the configured loop-runner (#828) — every mode now has a default
factory wiring a shipped engine, so a routine can run any of them with config
only (still registry-injectable for stubs/custom engines):

- dynamicLoopRunner   — runLoop + createDynamicDriver (agent-authored topology)
- researchLoopRunner  — research-in-a-loop with valid-only KB growth: each round
  research → createKbGate (fail-closed) → accept clean facts, re-research vetoed
  ones up to maxRounds (correct-on-veto), and RETURN final vetoes (escalate,
  never silently drop). VetoedFact carries the gate reason.
- selfImproveLoopRunner — optimizePrompt (identity-gated)
- auditLoopRunner     — runAnalystLoop over captured trace/run data
(code/review shipped previously.)

Tests: research single-round accept/veto + escalation, research correct-on-veto
across rounds, dynamic real runLoop via stub. Full suite 427 green, tsc + biome
clean. Completes the engine (#827 target) + runner; the thin scheduled-routine
wrapper is the only remaining layer.
---
 src/index.ts              |   8 +++
 src/loop-runner.ts        | 137 +++++++++++++++++++++++++++++++++++++-
 tests/loop-runner.test.ts |  92 +++++++++++++++++++++++++
 3 files changed, 236 insertions(+), 1 deletion(-)

diff --git a/src/index.ts b/src/index.ts
index 3cda443..e86335e 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -107,15 +107,23 @@ export {
 } from './errors'
 // ── Delegated loop-runner (configured code/research/review/audit/self-improve/dynamic) ──
 export {
+  auditLoopRunner,
   type CoderLoopRunnerOptions,
   coderLoopRunner,
   type DelegatedLoopMode,
   type DelegatedLoopRegistry,
   type DelegatedLoopResult,
   type DelegatedLoopRunner,
+  type DynamicLoopRunnerOptions,
+  dynamicLoopRunner,
+  type ResearchLoopResult,
+  type ResearchLoopRunnerOptions,
   type RunDelegatedLoopOptions,
+  researchLoopRunner,
   reviewLoopRunner,
   runDelegatedLoop,
+  selfImproveLoopRunner,
+  type VetoedFact,
 } from './loop-runner'
 // ── MCP → OpenAI tools projection ────────────────────────────────────
 // Helper for eval / orchestrator code that routes through the
diff --git a/src/loop-runner.ts b/src/loop-runner.ts
index 058aba4..d93876b 100644
--- a/src/loop-runner.ts
+++ b/src/loop-runner.ts
@@ -21,14 +21,33 @@
  * with the factories below, or inject your own / a stub).
  */
 
+import type { Scenario } from '@tangle-network/agent-eval/campaign'
+import { runAnalystLoop } from './analyst-loop'
+import type { RunAnalystLoopOpts, RunAnalystLoopResult } from './analyst-loop/types'
 import { ConfigError } from './errors'
-import type { LoopSandboxClient } from './loops'
+import {
+  type OptimizePromptOptions,
+  type OptimizePromptResult,
+  optimizePrompt,
+} from './improvement/optimize-prompt'
+import {
+  type AgentRunSpec,
+  createDynamicDriver,
+  type DynamicDecision,
+  type LoopResult,
+  type LoopSandboxClient,
+  type OutputAdapter,
+  runLoop,
+  type TopologyPlanner,
+  type Validator,
+} from './loops'
 import {
   type CoderReviewer,
   type CoderWinnerSelection,
   createDefaultCoderDelegate,
   type DelegateRunCtx,
 } from './mcp/delegates'
+import { type CreateKbGateOptions, createKbGate, type FactCandidate } from './mcp/kb-gate'
 import type { DelegateCodeArgs } from './mcp/types'
 import type { CoderOutput } from './profiles/coder'
 
@@ -141,3 +160,119 @@ export function reviewLoopRunner(
 ): DelegatedLoopRunner<CoderOutput> {
   return coderLoopRunner(options)
 }
+
+/** @experimental Options for the default `dynamic` runner. */
+export interface DynamicLoopRunnerOptions<Task, Output> {
+  sandboxClient: LoopSandboxClient
+  /** The agent-authored topology planner (e.g. `createSandboxPlanner(...)`). */
+  planner: TopologyPlanner<Task, Output>
+  task: Task
+  output: OutputAdapter<Output>
+  validator?: Validator<Output>
+  /** Exactly one of `agentRun` / `agentRuns` (runLoop validates). */
+  agentRun?: AgentRunSpec<Task>
+  agentRuns?: AgentRunSpec<Task>[]
+  maxIterations?: number
+  maxFanout?: number
+}
+
+/** @experimental `dynamic` mode — agent-authored topology over `runLoop`. */
+export function dynamicLoopRunner<Task, Output>(
+  o: DynamicLoopRunnerOptions<Task, Output>,
+): DelegatedLoopRunner<LoopResult<Task, Output, DynamicDecision>> {
+  return async (signal) =>
+    runLoop<Task, Output, DynamicDecision>({
+      driver: createDynamicDriver<Task, Output>({
+        planner: o.planner,
+        ...(o.maxIterations !== undefined ? { maxIterations: o.maxIterations } : {}),
+        ...(o.maxFanout !== undefined ? { maxFanout: o.maxFanout } : {}),
+      }),
+      ...(o.agentRun ? { agentRun: o.agentRun } : {}),
+      ...(o.agentRuns ? { agentRuns: o.agentRuns } : {}),
+      output: o.output,
+      ...(o.validator ? { validator: o.validator } : {}),
+      task: o.task,
+      ctx: { sandboxClient: o.sandboxClient, signal },
+      ...(o.maxIterations !== undefined ? { maxIterations: o.maxIterations } : {}),
+    })
+}
+
+/** @experimental A fact rejected at the KB gate — surfaced, never dropped. */
+export interface VetoedFact {
+  candidate: FactCandidate
+  vetoedBy?: string
+  reason?: string
+}
+
+/** @experimental */
+export interface ResearchLoopResult {
+  /** Facts that passed the fail-closed gate — safe to write to the KB. */
+  accepted: FactCandidate[]
+  /** Facts the gate vetoed in the final round — escalate, do not silently drop. */
+  vetoed: VetoedFact[]
+  /** Research rounds actually run. */
+  rounds: number
+}
+
+/** @experimental Options for the default `research` runner. */
+export interface ResearchLoopRunnerOptions {
+  /**
+   * The research engine (the consumer's web/doc searcher + extractor). Called
+   * each round with the prior round's vetoes so it can re-research the gaps.
+   * Returns fact candidates carrying their grounding (`verbatimPassage` +
+   * `sourceText`).
+   */
+  research: (round: number, vetoed: VetoedFact[]) => Promise<FactCandidate[]>
+  /** Gate config (extra judges, self-artifact kinds, …). The floor is always on. */
+  gate?: CreateKbGateOptions
+  /** Max research rounds (correct-on-veto remediation). Default 1. */
+  maxRounds?: number
+}
+
+/**
+ * @experimental `research` mode — research-in-a-loop with valid-only KB growth.
+ *
+ * Each round: research → gate every candidate (fail-closed; passage MUST be in
+ * the source) → accept the clean ones → re-research the vetoed ones next round,
+ * up to `maxRounds`. Vetoed facts in the final round are RETURNED (escalate,
+ * never silently dropped) so the caller audits vs retries.
+ */
+export function researchLoopRunner(
+  o: ResearchLoopRunnerOptions,
+): DelegatedLoopRunner<ResearchLoopResult> {
+  const gate = createKbGate(o.gate)
+  const maxRounds = Math.max(1, Math.trunc(o.maxRounds ?? 1))
+  return async (signal) => {
+    const accepted: FactCandidate[] = []
+    let vetoed: VetoedFact[] = []
+    let rounds = 0
+    for (let round = 0; round < maxRounds; round += 1) {
+      if (signal.aborted) break
+      rounds += 1
+      const candidates = await o.research(round, vetoed)
+      if (candidates.length === 0) break
+      vetoed = []
+      for (const c of candidates) {
+        const v = await gate(c)
+        if (v.accepted) accepted.push(c)
+        else vetoed.push({ candidate: c, vetoedBy: v.vetoedBy, reason: v.reason })
+      }
+      if (vetoed.length === 0) break
+    }
+    return { accepted, vetoed, rounds }
+  }
+}
+
+/** @experimental `self-improve` mode — identity-gated prompt optimization. */
+export function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(
+  options: OptimizePromptOptions<TScenario, TArtifact>,
+): DelegatedLoopRunner<OptimizePromptResult<TArtifact, TScenario>> {
+  return async () => optimizePrompt<TScenario, TArtifact>(options)
+}
+
+/** @experimental `audit` mode — analyst loop over captured trace/run data. */
+export function auditLoopRunner<TProposal = unknown, TEdit = unknown>(
+  options: RunAnalystLoopOpts,
+): DelegatedLoopRunner<RunAnalystLoopResult<TProposal, TEdit>> {
+  return async () => runAnalystLoop<TProposal, TEdit>(options)
+}
diff --git a/tests/loop-runner.test.ts b/tests/loop-runner.test.ts
index 2e67dc3..4499c10 100644
--- a/tests/loop-runner.test.ts
+++ b/tests/loop-runner.test.ts
@@ -69,3 +69,95 @@ describe('coderLoopRunner — code mode over the hardened delegate', () => {
     expect(r.output?.branch).toBe('feat/fix')
   })
 })
+
+import { dynamicLoopRunner, researchLoopRunner, type VetoedFact } from '../src/loop-runner'
+import type { AgentRunSpec, OutputAdapter, TopologyPlanner, Validator } from '../src/loops'
+import type { FactCandidate } from '../src/mcp/kb-gate'
+
+const neverAbort = new AbortController().signal
+
+describe('researchLoopRunner — valid-only KB growth with remediation', () => {
+  const grounded: FactCandidate = {
+    claim: 'revenue was 100',
+    verbatimPassage: 'revenue was 100 in 2025',
+    sourceText: 'The filing notes revenue was 100 in 2025.',
+  }
+  const ungrounded: FactCandidate = {
+    claim: 'profit was 50',
+    verbatimPassage: 'profit was 50',
+    sourceText: 'this source says nothing of the sort',
+  }
+
+  it('accepts grounded facts, vetoes ungrounded ones (single round, escalates the veto)', async () => {
+    const runner = researchLoopRunner({ research: async () => [grounded, ungrounded] })
+    const res = await runner(neverAbort)
+    expect(res.rounds).toBe(1)
+    expect(res.accepted).toHaveLength(1)
+    expect(res.accepted[0]?.claim).toBe('revenue was 100')
+    expect(res.vetoed).toHaveLength(1)
+    expect(res.vetoed[0]?.vetoedBy).toBe('passage-present')
+  })
+
+  it('re-researches vetoed facts next round and accepts once grounded (correct-on-veto)', async () => {
+    const research = async (round: number, vetoed: VetoedFact[]): Promise<FactCandidate[]> => {
+      if (round === 0) return [grounded, ungrounded]
+      // round 1: re-ground the previously-vetoed candidate with a real source
+      return vetoed.map((v) => ({
+        ...v.candidate,
+        sourceText: 'a better source: profit was 50 last year',
+      }))
+    }
+    const runner = researchLoopRunner({ research, maxRounds: 2 })
+    const res = await runner(neverAbort)
+    expect(res.rounds).toBe(2)
+    expect(res.accepted.map((f) => f.claim).sort()).toEqual(['profit was 50', 'revenue was 100'])
+    expect(res.vetoed).toHaveLength(0)
+  })
+})
+
+describe('dynamicLoopRunner — agent-authored topology over runLoop', () => {
+  interface T {
+    goal: string
+  }
+  interface O {
+    score: number
+  }
+  it('runs the planner-driven loop and returns a finished LoopResult', async () => {
+    const moves = [{ kind: 'refine' as const, task: { goal: 'g' } }, { kind: 'stop' as const }]
+    let i = 0
+    const planner: TopologyPlanner<T, O> = () => moves[i++]!
+    const output: OutputAdapter<O> = {
+      parse: (events) => ({ score: (events.at(-1)?.data as { score?: number })?.score ?? 0 }),
+    }
+    const validator: Validator<O> = {
+      async validate(o) {
+        return { valid: o.score >= 0.5, score: o.score }
+      },
+    }
+    const spec: AgentRunSpec<T> = {
+      profile: { name: 'w' },
+      name: 'w',
+      taskToPrompt: (t) => t.goal,
+    }
+    const client = {
+      async create() {
+        return {
+          async *streamPrompt() {
+            yield { type: 'result', data: { score: 0.9 } }
+          },
+        } as unknown as import('@tangle-network/sandbox').SandboxInstance
+      },
+    }
+    const runner = dynamicLoopRunner<T, O>({
+      sandboxClient: client,
+      planner,
+      task: { goal: 'g' },
+      output,
+      validator,
+      agentRun: spec,
+    })
+    const res = await runner(neverAbort)
+    expect(res.decision).toBe('done')
+    expect(res.winner?.output.score).toBeCloseTo(0.9, 6)
+  })
+})

From 7f1f96e05a318aec889d185da1174f1fdc8e6cca Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 31 May 2026 03:41:31 -0600
Subject: [PATCH 7/9] =?UTF-8?q?chore(release):=200.38.0=20=E2=80=94=20loop?=
 =?UTF-8?q?-runner=20default=20factories=20for=20all=20six=20modes=20(#828?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 3081efb..8afd346 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-runtime",
-  "version": "0.37.0",
+  "version": "0.38.0",
   "description": "Reusable runtime lifecycle for domain-specific agents.",
   "homepage": "https://github.com/tangle-network/agent-runtime#readme",
   "repository": {

From 3193fb7ffdebe2b580a21809f52e4b1218a57f6e Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 31 May 2026 03:47:19 -0600
Subject: [PATCH 8/9] =?UTF-8?q?feat(loop-runner):=20agent-runtime-loop=20b?=
 =?UTF-8?q?in=20=E2=80=94=20the=20schedulable=20entrypoint?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the loop-runner (#828): a cron/routine/Makefile invokes
`agent-runtime-loop --mode <mode> --config <module>`. The config module wires
the DelegatedLoopRegistry (with full env/creds access — deps live there, not in
the generic bin), the bin runs the mode, prints the DelegatedLoopResult as JSON,
exits 0 ok / 1 recorded-failure / 2 usage-or-config-error.

- runLoopRunnerCli: pure, IO-free CLI core (mode validation → load registry →
  dispatch → exit code) — exported + unit-tested.
- parseLoopRunnerArgv, DELEGATED_LOOP_MODES, isDelegatedLoopMode exported.
- New bin `agent-runtime-loop` → dist/loop-runner-bin.js (tsup entry + package bin).

Tests: argv parsing (space + = forms), exit 0/1/2 paths (success, recorded
failure, unknown mode, no-runner-for-mode, config load failure). Full suite green,
tsc + biome clean.
---
 package.json                  |   3 +-
 src/index.ts                  |   8 +++
 src/loop-runner-bin.ts        | 126 ++++++++++++++++++++++++++++++++++
 src/loop-runner.ts            |  23 +++++--
 tests/loop-runner-bin.test.ts |  64 +++++++++++++++++
 tsup.config.ts                |   1 +
 6 files changed, 217 insertions(+), 8 deletions(-)
 create mode 100644 src/loop-runner-bin.ts
 create mode 100644 tests/loop-runner-bin.test.ts

diff --git a/package.json b/package.json
index 8afd346..f2d1fed 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,8 @@
     }
   },
   "bin": {
-    "agent-runtime-mcp": "./dist/mcp/bin.js"
+    "agent-runtime-mcp": "./dist/mcp/bin.js",
+    "agent-runtime-loop": "./dist/loop-runner-bin.js"
   },
   "files": [
     "dist",
diff --git a/src/index.ts b/src/index.ts
index e86335e..ae1ded6 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -110,12 +110,14 @@ export {
   auditLoopRunner,
   type CoderLoopRunnerOptions,
   coderLoopRunner,
+  DELEGATED_LOOP_MODES,
   type DelegatedLoopMode,
   type DelegatedLoopRegistry,
   type DelegatedLoopResult,
   type DelegatedLoopRunner,
   type DynamicLoopRunnerOptions,
   dynamicLoopRunner,
+  isDelegatedLoopMode,
   type ResearchLoopResult,
   type ResearchLoopRunnerOptions,
   type RunDelegatedLoopOptions,
@@ -125,6 +127,12 @@ export {
   selfImproveLoopRunner,
   type VetoedFact,
 } from './loop-runner'
+export {
+  type LoopRunnerCliArgs,
+  type LoopRunnerCliResult,
+  parseLoopRunnerArgv,
+  runLoopRunnerCli,
+} from './loop-runner-bin'
 // ── MCP → OpenAI tools projection ────────────────────────────────────
 // Helper for eval / orchestrator code that routes through the
 // OpenAI-compat backend and needs the 5 delegation tools surfaced to
diff --git a/src/loop-runner-bin.ts b/src/loop-runner-bin.ts
new file mode 100644
index 0000000..de9981d
--- /dev/null
+++ b/src/loop-runner-bin.ts
@@ -0,0 +1,126 @@
+#!/usr/bin/env node
+/**
+ * @experimental
+ *
+ * `agent-runtime-loop` — the schedulable entrypoint for the configured
+ * delegated loop-runner. A cron job / routine / Makefile target invokes:
+ *
+ *   agent-runtime-loop --mode research --config ./loops.config.js
+ *
+ * The config module wires the registry (with full access to env / creds —
+ * which is why the deps live there, not in this generic bin). It must default-
+ * export a `DelegatedLoopRegistry`, or a `() => DelegatedLoopRegistry | Promise<…>`.
+ * The bin runs the selected mode, prints the `DelegatedLoopResult` as JSON, and
+ * exits 0 on `ok`, 1 on a recorded failure, 2 on a usage/config error.
+ */
+
+import {
+  DELEGATED_LOOP_MODES,
+  type DelegatedLoopMode,
+  type DelegatedLoopRegistry,
+  type DelegatedLoopResult,
+  isDelegatedLoopMode,
+  runDelegatedLoop,
+} from './loop-runner'
+
+/** @experimental Parsed CLI invocation. */
+export interface LoopRunnerCliArgs {
+  mode: string
+  /** Loads the registry — the bin wires this from `--config`; tests inject a stub. */
+  loadRegistry: () => Promise<DelegatedLoopRegistry> | DelegatedLoopRegistry
+  now?: () => number
+}
+
+/** @experimental */
+export interface LoopRunnerCliResult {
+  exitCode: number
+  result?: DelegatedLoopResult
+  error?: string
+}
+
+/**
+ * @experimental
+ *
+ * Pure CLI core (no process / argv / IO) so it's unit-testable: validate the
+ * mode, load the registry, dispatch, map to an exit code (0 ok / 1 failed /
+ * 2 usage). Exported for embedding in custom runners + tests.
+ */
+export async function runLoopRunnerCli(args: LoopRunnerCliArgs): Promise<LoopRunnerCliResult> {
+  if (!isDelegatedLoopMode(args.mode)) {
+    return {
+      exitCode: 2,
+      error: `unknown mode '${args.mode}' (expected one of: ${DELEGATED_LOOP_MODES.join(', ')})`,
+    }
+  }
+  let registry: DelegatedLoopRegistry
+  try {
+    registry = await args.loadRegistry()
+  } catch (err) {
+    return { exitCode: 2, error: `failed to load registry: ${errMsg(err)}` }
+  }
+  if (!registry[args.mode]) {
+    return {
+      exitCode: 2,
+      error: `config registers no runner for mode '${args.mode}' (registered: ${
+        Object.keys(registry).join(', ') || 'none'
+      })`,
+    }
+  }
+  // runDelegatedLoop throws only on a missing runner (guarded above); a failing
+  // engine is captured as { ok: false } → exit 1, not a crash.
+  const result = await runDelegatedLoop(args.mode as DelegatedLoopMode, registry, {
+    ...(args.now ? { now: args.now } : {}),
+  })
+  return { exitCode: result.ok ? 0 : 1, result }
+}
+
+/** Parse `--mode X --config Y` from an argv tail (`process.argv.slice(2)`). */
+export function parseLoopRunnerArgv(argv: string[]): { mode?: string; config?: string } {
+  const out: { mode?: string; config?: string } = {}
+  for (let i = 0; i < argv.length; i += 1) {
+    const a = argv[i]
+    if (a === '--mode') out.mode = argv[++i]
+    else if (a === '--config') out.config = argv[++i]
+    else if (a?.startsWith('--mode=')) out.mode = a.slice('--mode='.length)
+    else if (a?.startsWith('--config=')) out.config = a.slice('--config='.length)
+  }
+  return out
+}
+
+/** Normalize a config module's default export → a registry. */
+function resolveRegistry(mod: unknown): DelegatedLoopRegistry {
+  const def = (mod as { default?: unknown })?.default ?? mod
+  const value = typeof def === 'function' ? (def as () => unknown)() : def
+  return value as DelegatedLoopRegistry
+}
+
+function errMsg(err: unknown): string {
+  return err instanceof Error ? err.message : String(err)
+}
+
+/** The argv → IO → exit shell. Kept thin; logic lives in `runLoopRunnerCli`. */
+async function main(): Promise<void> {
+  const { mode, config } = parseLoopRunnerArgv(process.argv.slice(2))
+  if (!mode || !config) {
+    process.stderr.write(
+      'usage: agent-runtime-loop --mode <mode> --config <module>\n' +
+        `  modes: ${DELEGATED_LOOP_MODES.join(' | ')}\n` +
+        '  config: a JS/TS module default-exporting a DelegatedLoopRegistry (or a factory)\n',
+    )
+    process.exit(2)
+  }
+  const { pathToFileURL } = await import('node:url')
+  const { resolve } = await import('node:path')
+  const cli = await runLoopRunnerCli({
+    mode,
+    loadRegistry: async () => resolveRegistry(await import(pathToFileURL(resolve(config)).href)),
+  })
+  process.stdout.write(`${JSON.stringify(cli.result ?? { error: cli.error }, null, 2)}\n`)
+  if (cli.error) process.stderr.write(`${cli.error}\n`)
+  process.exit(cli.exitCode)
+}
+
+// Run only when executed as the bin (not when imported for the testable core).
+if (process.argv[1] && /loop-runner-bin\.(js|ts|mjs)$/.test(process.argv[1])) {
+  void main()
+}
diff --git a/src/loop-runner.ts b/src/loop-runner.ts
index d93876b..ede4bb8 100644
--- a/src/loop-runner.ts
+++ b/src/loop-runner.ts
@@ -51,14 +51,23 @@ import { type CreateKbGateOptions, createKbGate, type FactCandidate } from './mc
 import type { DelegateCodeArgs } from './mcp/types'
 import type { CoderOutput } from './profiles/coder'
 
+/** @experimental Every delegated-loop mode, for validation + CLI surfaces. */
+export const DELEGATED_LOOP_MODES = [
+  'code',
+  'review',
+  'research',
+  'audit',
+  'self-improve',
+  'dynamic',
+] as const
+
 /** @experimental */
-export type DelegatedLoopMode =
-  | 'code'
-  | 'review'
-  | 'research'
-  | 'audit'
-  | 'self-improve'
-  | 'dynamic'
+export type DelegatedLoopMode = (typeof DELEGATED_LOOP_MODES)[number]
+
+/** @experimental Type guard for an untrusted mode string (CLI / config input). */
+export function isDelegatedLoopMode(value: unknown): value is DelegatedLoopMode {
+  return typeof value === 'string' && (DELEGATED_LOOP_MODES as readonly string[]).includes(value)
+}
 
 /** @experimental A pre-configured loop for one mode. Returns the mode's raw
  *  output; the dispatcher wraps it in a {@link DelegatedLoopResult}. */
diff --git a/tests/loop-runner-bin.test.ts b/tests/loop-runner-bin.test.ts
new file mode 100644
index 0000000..27420a6
--- /dev/null
+++ b/tests/loop-runner-bin.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, it } from 'vitest'
+import type { DelegatedLoopRegistry } from '../src/loop-runner'
+import { parseLoopRunnerArgv, runLoopRunnerCli } from '../src/loop-runner-bin'
+
+describe('parseLoopRunnerArgv', () => {
+  it('parses --mode/--config in both space and = forms', () => {
+    expect(parseLoopRunnerArgv(['--mode', 'research', '--config', './c.js'])).toEqual({
+      mode: 'research',
+      config: './c.js',
+    })
+    expect(parseLoopRunnerArgv(['--mode=code', '--config=./c.js'])).toEqual({
+      mode: 'code',
+      config: './c.js',
+    })
+  })
+})
+
+describe('runLoopRunnerCli', () => {
+  it('exit 0 when the selected runner succeeds', async () => {
+    const registry: DelegatedLoopRegistry = { research: async () => ({ grounded: 2 }) }
+    const r = await runLoopRunnerCli({ mode: 'research', loadRegistry: () => registry })
+    expect(r.exitCode).toBe(0)
+    expect(r.result?.ok).toBe(true)
+    expect(r.result?.output).toEqual({ grounded: 2 })
+  })
+
+  it('exit 1 when the runner fails (recorded, not crashed)', async () => {
+    const registry: DelegatedLoopRegistry = {
+      'self-improve': async () => {
+        throw new Error('router 502')
+      },
+    }
+    const r = await runLoopRunnerCli({ mode: 'self-improve', loadRegistry: () => registry })
+    expect(r.exitCode).toBe(1)
+    expect(r.result?.ok).toBe(false)
+    expect(r.result?.error).toBe('router 502')
+  })
+
+  it('exit 2 on an unknown mode', async () => {
+    const r = await runLoopRunnerCli({ mode: 'nonsense', loadRegistry: () => ({}) })
+    expect(r.exitCode).toBe(2)
+    expect(r.error).toMatch(/unknown mode/)
+  })
+
+  it('exit 2 when the config registers no runner for the mode', async () => {
+    const r = await runLoopRunnerCli({
+      mode: 'audit',
+      loadRegistry: () => ({ code: async () => 1 }),
+    })
+    expect(r.exitCode).toBe(2)
+    expect(r.error).toMatch(/registers no runner for mode 'audit'/)
+  })
+
+  it('exit 2 when the config module fails to load', async () => {
+    const r = await runLoopRunnerCli({
+      mode: 'code',
+      loadRegistry: () => {
+        throw new Error('module not found')
+      },
+    })
+    expect(r.exitCode).toBe(2)
+    expect(r.error).toMatch(/failed to load registry/)
+  })
+})
diff --git a/tsup.config.ts b/tsup.config.ts
index 67381f8..8c5a06a 100644
--- a/tsup.config.ts
+++ b/tsup.config.ts
@@ -11,6 +11,7 @@ export default defineConfig({
     profiles: 'src/profiles/index.ts',
     'mcp/index': 'src/mcp/index.ts',
     'mcp/bin': 'src/mcp/bin.ts',
+    'loop-runner-bin': 'src/loop-runner-bin.ts',
   },
   format: ['esm'],
   dts: true,

From 210c43c0acd2f2a7649e22c6025b31bd76006961 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 31 May 2026 03:47:25 -0600
Subject: [PATCH 9/9] =?UTF-8?q?chore(release):=200.39.0=20=E2=80=94=20agen?=
 =?UTF-8?q?t-runtime-loop=20schedulable=20bin=20(#828=20complete)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index f2d1fed..e3d39c1 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-runtime",
-  "version": "0.38.0",
+  "version": "0.39.0",
   "description": "Reusable runtime lifecycle for domain-specific agents.",
   "homepage": "https://github.com/tangle-network/agent-runtime#readme",
   "repository": {