tangle-network · drewstone · May 25, 2026 · May 25, 2026
diff --git a/examples/self-improving-loop/self-improving-loop.ts b/examples/self-improving-loop/self-improving-loop.ts
@@ -14,13 +14,13 @@
 // See README.md for the conceptual map.
 
 import {
-  runJudge,
-  runMultishot,
   type JudgeConfig,
   type MultishotMessage,
   type MultishotPersona,
   type MultishotResult,
   type MultishotShape,
+  runJudge,
+  runMultishot,
 } from '@tangle-network/agent-eval/multishot'
 import type { AgentProfile } from '@tangle-network/sandbox'
 
@@ -48,11 +48,16 @@ function installMockRouter(replies: ScriptedReply[]): () => void {
     return {
       ok: true,
       status: 200,
-      json: async () => ({ choices: [{ message }], usage: { prompt_tokens: 100, completion_tokens: 200 } }),
+      json: async () => ({
+        choices: [{ message }],
+        usage: { prompt_tokens: 100, completion_tokens: 200 },
+      }),
       text: async () => 'ok',
     } as Response
   }) as typeof fetch
-  return () => { global.fetch = original }
+  return () => {
+    global.fetch = original
+  }
 }
 
 // ── 2. A tiny domain — viral content scoring ─────────────────────────────────
@@ -70,8 +75,10 @@ const PERSONAS: FounderPersona[] = [
 ]
 
 const shape: MultishotShape<FounderPersona> = {
-  buildOpener: (p) => `I'm ${p.name}, ${p.domain}. Help me write content that actually gets engagement.`,
-  buildDriverSystemPrompt: (p) => `You are ${p.name} working in ${p.domain}. Push back on vague advice; demand concrete posts.`,
+  buildOpener: (p) =>
+    `I'm ${p.name}, ${p.domain}. Help me write content that actually gets engagement.`,
+  buildDriverSystemPrompt: (p) =>
+    `You are ${p.name} working in ${p.domain}. Push back on vague advice; demand concrete posts.`,
 }
 
 // ── 3. Baseline AgentProfile (v0) — intentionally weak ──────────────────────
@@ -84,20 +91,24 @@ const baseline: AgentProfile = {
 // ── 4. Judge — scores how concrete + audience-fit the agent's output is ────
 
 const dims = [
-  { key: 'concreteness', description: 'Real posts vs vague descriptions (0=descriptions, 10=ready-to-post)' },
-  { key: 'audience_fit', description: 'Tailored to the persona\'s domain (0=generic, 10=spot-on)' },
+  {
+    key: 'concreteness',
+    description: 'Real posts vs vague descriptions (0=descriptions, 10=ready-to-post)',
+  },
+  { key: 'audience_fit', description: "Tailored to the persona's domain (0=generic, 10=spot-on)" },
 ] as const
 
-const conversationJudge: JudgeConfig<{ transcript: MultishotMessage[]; persona: FounderPersona }> = {
-  name: 'content-quality',
-  systemPrompt: 'You are a strict judge. Output ONLY valid JSON.',
-  dimensions: [...dims],
-  buildPrompt: ({ transcript, persona }) =>
-    `Score this agent's output for ${persona.name} (${persona.domain}). 0-10 each.\n\n${transcript
-      .filter((m) => m.role !== 'tool')
-      .map((m) => `${m.role}: ${m.content}`)
-      .join('\n\n')}\n\nRespond with ONLY: {"concreteness":N,"audience_fit":N,"notes":"..."}`,
-}
+const conversationJudge: JudgeConfig<{ transcript: MultishotMessage[]; persona: FounderPersona }> =
+  {
+    name: 'content-quality',
+    systemPrompt: 'You are a strict judge. Output ONLY valid JSON.',
+    dimensions: [...dims],
+    buildPrompt: ({ transcript, persona }) =>
+      `Score this agent's output for ${persona.name} (${persona.domain}). 0-10 each.\n\n${transcript
+        .filter((m) => m.role !== 'tool')
+        .map((m) => `${m.role}: ${m.content}`)
+        .join('\n\n')}\n\nRespond with ONLY: {"concreteness":N,"audience_fit":N,"notes":"..."}`,
+  }
 
 // ── 5. Analyst — reads v0 transcripts + scores, proposes a mutation ────────
 
@@ -106,13 +117,16 @@ interface AnalystFinding {
   proposedMutation: string
 }
 
-async function runAnalyst(v0Runs: Array<{ persona: FounderPersona; result: MultishotResult; score: { composite: number } }>): Promise<AnalystFinding> {
+async function runAnalyst(
+  v0Runs: Array<{ persona: FounderPersona; result: MultishotResult; score: { composite: number } }>,
+): Promise<AnalystFinding> {
   // In a real product the analyst would be an LLM call (@tangle-network/agent-runtime/analyst-loop).
   // Here we synthesise the finding deterministically so the demo is reproducible.
   const worst = [...v0Runs].sort((a, b) => a.score.composite - b.score.composite)[0]
   return {
     rootCause: `${worst.persona.name} run scored ${worst.score.composite.toFixed(1)} — output was too generic, no concrete posts.`,
-    proposedMutation: 'Always include 2 ready-to-post examples tailored to the persona\'s exact domain (use specific verbs, numbers, and audience language).',
+    proposedMutation:
+      "Always include 2 ready-to-post examples tailored to the persona's exact domain (use specific verbs, numbers, and audience language).",
   }
 }
 
@@ -128,10 +142,19 @@ function applyMutation(base: AgentProfile, mutation: string): AgentProfile {
 
 // ── 6. Gate — promote v1 only if it beats v0 by >= delta ───────────────────
 
-function gate(v0Mean: number, v1Mean: number, requiredDelta = 0.5): { ship: boolean; delta: number; reason: string } {
+function gate(
+  v0Mean: number,
+  v1Mean: number,
+  requiredDelta = 0.5,
+): { ship: boolean; delta: number; reason: string } {
   const delta = v1Mean - v0Mean
-  if (delta >= requiredDelta) return { ship: true, delta, reason: `v1 beat v0 by ${delta.toFixed(2)} (>= ${requiredDelta})` }
-  return { ship: false, delta, reason: `v1 only beat v0 by ${delta.toFixed(2)} (< ${requiredDelta})` }
+  if (delta >= requiredDelta)
+    return { ship: true, delta, reason: `v1 beat v0 by ${delta.toFixed(2)} (>= ${requiredDelta})` }
+  return {
+    ship: false,
+    delta,
+    reason: `v1 only beat v0 by ${delta.toFixed(2)} (< ${requiredDelta})`,
+  }
 }
 
 // ── 7. Wire it together ─────────────────────────────────────────────────────
@@ -140,7 +163,11 @@ async function runVariant(profile: AgentProfile, scriptedReplies: ScriptedReply[
   const restore = installMockRouter(scriptedReplies)
   process.env.TANGLE_API_KEY ??= 'test-key'
   try {
-    const runs: Array<{ persona: FounderPersona; result: MultishotResult; score: { composite: number } }> = []
+    const runs: Array<{
+      persona: FounderPersona
+      result: MultishotResult
+      score: { composite: number }
+    }> = []
     for (const persona of PERSONAS) {
       const result = await runMultishot({ profile, persona, shape, maxTurns: 1 })
       const score = await runJudge(conversationJudge, { transcript: result.transcript, persona })
@@ -168,7 +195,8 @@ async function main(): Promise<void> {
   console.log('— Phase 1: v0 baseline run')
   const v0 = await runVariant(baseline, v0Replies)
   console.log(`  v0 mean: ${v0.mean.toFixed(2)} (over ${v0.runs.length} personas)`)
-  for (const r of v0.runs) console.log(`    ${r.persona.id.padEnd(14)} composite=${r.score.composite.toFixed(2)}`)
+  for (const r of v0.runs)
+    console.log(`    ${r.persona.id.padEnd(14)} composite=${r.score.composite.toFixed(2)}`)
 
   console.log('\n— Phase 2: analyst proposes mutation')
   const finding = await runAnalyst(v0.runs)
@@ -180,26 +208,37 @@ async function main(): Promise<void> {
 
   // v1 replies: now concrete + audience-fit
   const v1Replies: ScriptedReply[] = [
-    { text: 'Here are 2 tweets for Maya: "Just opened our 50th retailer in TX — onboarding playbook is up on Notion." / "Why we said no to Kroger: margin math + ops bandwidth."' },
+    {
+      text: 'Here are 2 tweets for Maya: "Just opened our 50th retailer in TX — onboarding playbook is up on Notion." / "Why we said no to Kroger: margin math + ops bandwidth."',
+    },
     { text: '{"concreteness":8,"audience_fit":9,"notes":"concrete + retail-specific"}' },
-    { text: 'Here are 2 LinkedIn posts for Theo: "We cut MRR churn 32% by routing every renewal through a forecasted-risk score." / "Why your B2B PLG playbook stalls at $5M ARR (and what to do)."' },
+    {
+      text: 'Here are 2 LinkedIn posts for Theo: "We cut MRR churn 32% by routing every renewal through a forecasted-risk score." / "Why your B2B PLG playbook stalls at $5M ARR (and what to do)."',
+    },
     { text: '{"concreteness":9,"audience_fit":8,"notes":"B2B-specific metrics"}' },
-    { text: 'Two TikTok hooks for Aurora: "POV: you finally found the foundation that matches NC15 + has SPF" / "What I wish I knew before booking my first brand deal at 50k followers."' },
+    {
+      text: 'Two TikTok hooks for Aurora: "POV: you finally found the foundation that matches NC15 + has SPF" / "What I wish I knew before booking my first brand deal at 50k followers."',
+    },
     { text: '{"concreteness":8,"audience_fit":9,"notes":"creator-economy-specific"}' },
   ]
 
   console.log('\n— Phase 4: v1 re-run')
   const v1Result = await runVariant(v1, v1Replies)
   console.log(`  v1 mean: ${v1Result.mean.toFixed(2)} (over ${v1Result.runs.length} personas)`)
-  for (const r of v1Result.runs) console.log(`    ${r.persona.id.padEnd(14)} composite=${r.score.composite.toFixed(2)}`)
+  for (const r of v1Result.runs)
+    console.log(`    ${r.persona.id.padEnd(14)} composite=${r.score.composite.toFixed(2)}`)
 
   console.log('\n— Phase 5: gate decision')
   const verdict = gate(v0.mean, v1Result.mean)
-  console.log(`  ship: ${verdict.ship} | delta: ${verdict.delta >= 0 ? '+' : ''}${verdict.delta.toFixed(2)} | ${verdict.reason}`)
+  console.log(
+    `  ship: ${verdict.ship} | delta: ${verdict.delta >= 0 ? '+' : ''}${verdict.delta.toFixed(2)} | ${verdict.reason}`,
+  )
 
   if (verdict.ship) {
     console.log('\n═══ PROMOTED v1 → production ═══')
-    console.log('In a real product the new systemPrompt would land in the production composer\nand subsequent chat turns would use it. See agent-eval-adoption skill Phase 3.')
+    console.log(
+      'In a real product the new systemPrompt would land in the production composer\nand subsequent chat turns would use it. See agent-eval-adoption skill Phase 3.',
+    )
   } else {
     console.log('\n═══ HELD — keep v0 ═══')
   }

diff --git a/src/mcp/bin-helpers.ts b/src/mcp/bin-helpers.ts
@@ -82,7 +82,10 @@ const KNOWN_HARNESSES: ReadonlyArray<LocalHarness> = ['claude', 'codex', 'openco
 
 function parseHarnesses(raw: string | undefined): ReadonlyArray<LocalHarness> | undefined {
   if (!raw) return undefined
-  const parts = raw.split(',').map((s) => s.trim()).filter(Boolean)
+  const parts = raw
+    .split(',')
+    .map((s) => s.trim())
+    .filter(Boolean)
   if (parts.length === 0) return undefined
   for (const part of parts) {
     if (!KNOWN_HARNESSES.includes(part as LocalHarness)) {

diff --git a/src/mcp/in-process-executor.ts b/src/mcp/in-process-executor.ts
@@ -37,12 +37,12 @@ import { randomUUID } from 'node:crypto'
 import type { CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox'
 import type { LoopSandboxClient, LoopSandboxPlacement } from '../loops'
 import type { DelegationExecutor } from './executor'
-import { runLocalHarness, type LocalHarness } from './local-harness'
+import { type LocalHarness, runLocalHarness } from './local-harness'
 import {
   captureWorktreeDiff,
   createWorktree,
-  removeWorktree,
   type GitRunner,
+  removeWorktree,
   type WorktreeHandle,
 } from './worktree'
 
@@ -85,7 +85,11 @@ export interface InProcessExecutorOptions {
    * Test seam — override the post-check runner. Defaults to spawning the
    * configured `testCmd` / `typecheckCmd` via `child_process.spawn`.
    */
-  runPostCheck?: (cmd: string, cwd: string, signal?: AbortSignal) => Promise<{ exitCode: number; stdout: string; stderr: string }>
+  runPostCheck?: (
+    cmd: string,
+    cwd: string,
+    signal?: AbortSignal,
+  ) => Promise<{ exitCode: number; stdout: string; stderr: string }>
 }
 
 /** @experimental */
@@ -125,7 +129,10 @@ const DEFAULT_POSTCHECK_TIMEOUT_MS = 2 * 60 * 1000
  * @experimental
  */
 export function createInProcessExecutor(options: InProcessExecutorOptions): DelegationExecutor {
-  const harnesses = options.harnesses && options.harnesses.length > 0 ? [...options.harnesses] : (['claude'] as const)
+  const harnesses =
+    options.harnesses && options.harnesses.length > 0
+      ? [...options.harnesses]
+      : (['claude'] as const)
   const runHarness = options.runHarness ?? runLocalHarness
   const runPostCheck = options.runPostCheck ?? defaultRunPostCheck
 
@@ -145,10 +152,21 @@ export function createInProcessExecutor(options: InProcessExecutorOptions): Dele
         id: `in-process-${runId}`,
         __inProcess: { runId, harness },
         // eslint-disable-next-line require-yield
-        async *streamPrompt(this: VirtualSandbox, message: string | unknown[], promptOpts?: { signal?: AbortSignal }): AsyncGenerator<SandboxEvent> {
-          const taskPrompt = typeof message === 'string'
-            ? message
-            : message.map((p) => (typeof p === 'object' && p && 'text' in p ? String((p as { text: unknown }).text) : '')).join('\n')
+        async *streamPrompt(
+          this: VirtualSandbox,
+          message: string | unknown[],
+          promptOpts?: { signal?: AbortSignal },
+        ): AsyncGenerator<SandboxEvent> {
+          const taskPrompt =
+            typeof message === 'string'
+              ? message
+              : message
+                  .map((p) =>
+                    typeof p === 'object' && p && 'text' in p
+                      ? String((p as { text: unknown }).text)
+                      : '',
+                  )
+                  .join('\n')
 
           let worktree: WorktreeHandle | undefined
           try {
@@ -198,18 +216,22 @@ export function createInProcessExecutor(options: InProcessExecutorOptions): Dele
             // Optional post-checks. Each runs in the WORKTREE so it sees the
             // harness's edits.
             const testCheck = options.testCmd
-              ? await runPostCheck(options.testCmd, worktree.path, promptOpts?.signal).catch((err) => ({
-                  exitCode: -1,
-                  stdout: '',
-                  stderr: err instanceof Error ? err.message : String(err),
-                }))
+              ? await runPostCheck(options.testCmd, worktree.path, promptOpts?.signal).catch(
+                  (err) => ({
+                    exitCode: -1,
+                    stdout: '',
+                    stderr: err instanceof Error ? err.message : String(err),
+                  }),
+                )
               : { exitCode: 0, stdout: '', stderr: '' }
             const typecheckCheck = options.typecheckCmd
-              ? await runPostCheck(options.typecheckCmd, worktree.path, promptOpts?.signal).catch((err) => ({
-                  exitCode: -1,
-                  stdout: '',
-                  stderr: err instanceof Error ? err.message : String(err),
-                }))
+              ? await runPostCheck(options.typecheckCmd, worktree.path, promptOpts?.signal).catch(
+                  (err) => ({
+                    exitCode: -1,
+                    stdout: '',
+                    stderr: err instanceof Error ? err.message : String(err),
+                  }),
+                )
               : { exitCode: 0, stdout: '', stderr: '' }
 
             const coderOutput = {
@@ -224,9 +246,10 @@ export function createInProcessExecutor(options: InProcessExecutorOptions): Dele
                 output: tail(typecheckCheck.stderr || typecheckCheck.stdout, 4000),
               },
               diffStats: diff.stats,
-              reviewerNotes: harnessResult.exitCode === 0
-                ? undefined
-                : `harness ${harness} exited ${harnessResult.exitCode}${harnessResult.timedOut ? ' (timed out)' : ''}`,
+              reviewerNotes:
+                harnessResult.exitCode === 0
+                  ? undefined
+                  : `harness ${harness} exited ${harnessResult.exitCode}${harnessResult.timedOut ? ' (timed out)' : ''}`,
             }
 
             // The terminal event the coderProfile parser looks for.
@@ -286,10 +309,16 @@ async function defaultRunPostCheck(
     const child = spawn('sh', ['-c', cmd], { cwd, stdio: 'pipe' })
     let stdout = ''
     let stderr = ''
-    child.stdout?.on('data', (c) => { stdout += String(c) })
-    child.stderr?.on('data', (c) => { stderr += String(c) })
+    child.stdout?.on('data', (c) => {
+      stdout += String(c)
+    })
+    child.stderr?.on('data', (c) => {
+      stderr += String(c)
+    })
     if (signal) {
-      const onAbort = () => { if (!child.killed) child.kill('SIGTERM') }
+      const onAbort = () => {
+        if (!child.killed) child.kill('SIGTERM')
+      }
       if (signal.aborted) onAbort()
       else signal.addEventListener('abort', onAbort, { once: true })
     }
@@ -299,7 +328,10 @@ async function defaultRunPostCheck(
     if (typeof (killTimer as { unref?: () => void }).unref === 'function') {
       ;(killTimer as { unref: () => void }).unref()
     }
-    child.on('error', (err) => { clearTimeout(killTimer); reject(err) })
+    child.on('error', (err) => {
+      clearTimeout(killTimer)
+      reject(err)
+    })
     child.on('close', (code) => {
       clearTimeout(killTimer)
       resolve({ exitCode: code ?? -1, stdout, stderr })

diff --git a/src/mcp/index.ts b/src/mcp/index.ts
@@ -29,24 +29,15 @@ export type {
   SiblingSandboxExecutorOptions,
 } from './executor'
 export { createFleetWorkspaceExecutor, createSiblingSandboxExecutor } from './executor'
+export type { FeedbackEvent, FeedbackStore } from './feedback-store'
+export { eventToSnapshot, InMemoryFeedbackStore } from './feedback-store'
 export type {
   InProcessExecutorDescribePlacement,
   InProcessExecutorOptions,
 } from './in-process-executor'
 export { createInProcessExecutor } from './in-process-executor'
 export type { LocalHarness, LocalHarnessResult, RunLocalHarnessOptions } from './local-harness'
 export { runLocalHarness } from './local-harness'
-export type {
-  CreateWorktreeOptions,
-  DiffOptions,
-  DiffResult,
-  GitRunner,
-  RemoveWorktreeOptions,
-  WorktreeHandle,
-} from './worktree'
-export { captureWorktreeDiff, createWorktree, removeWorktree } from './worktree'
-export type { FeedbackEvent, FeedbackStore } from './feedback-store'
-export { eventToSnapshot, InMemoryFeedbackStore } from './feedback-store'
 export { mcpToolsForRuntimeMcp, mcpToolsForRuntimeMcpSubset } from './openai-tools'
 export type {
   JsonRpcMessage,
@@ -130,3 +121,12 @@ export type {
   ResearchOutputShape,
   ResearchSource,
 } from './types'
+export type {
+  CreateWorktreeOptions,
+  DiffOptions,
+  DiffResult,
+  GitRunner,
+  RemoveWorktreeOptions,
+  WorktreeHandle,
+} from './worktree'
+export { captureWorktreeDiff, createWorktree, removeWorktree } from './worktree'