diff --git a/examples/agents-of-all-shapes/README.md b/examples/agents-of-all-shapes/README.md
new file mode 100644
index 0000000..cfdbc36
--- /dev/null
+++ b/examples/agents-of-all-shapes/README.md
@@ -0,0 +1,69 @@
+# Agents of all shapes → one Tangle Intelligence pipe
+
+Proof that Tangle Intelligence works with **any agent, not just our sandbox**.
+Every shape — the Tangle runtime, an OpenAI-compatible router (tcloud /
+OpenRouter), a Mastra agent, the Claude Agent SDK, a Python agno agent —
+converges on the **same** canonical OpenTelemetry GenAI spans, and the **same**
+in-process engine produces the decision packet:
+
+```
+your agent (any framework)
+   → OTel GenAI spans (gen_ai.request.model, gen_ai.usage.*, score)
+      → fromOtelSpans()  →  RunRecord[]
+         → analyzeRuns() →  InsightReport   (composite, lift CI, Pareto,
+                                             failureModes, recommendations)
+```
+
+No sandbox. No deploy. No server. The analysis runs **in-process**.
+
+## Run it
+
+```bash
+# Verified QA path — in-process, no key, no infra:
+npx tsx examples/agents-of-all-shapes/run.ts
+
+# CI verification (what proves it):
+pnpm test -- tests/agents-of-all-shapes.test.ts
+```
+
+Set `TANGLE_API_KEY=sk-tan-...` to *also* POST the same spans to the hosted
+`/v1/otlp/v1/traces` ingest for the dashboard — identical analysis, server-side.
+
+## The one contract every shape meets
+
+`shared/intelligence.ts` is the whole integration surface. A shape only has to
+emit OTel spans carrying the standard GenAI attributes plus a `score`:
+
+| attribute | meaning |
+|---|---|
+| `gen_ai.request.model` | model snapshot (also `llm.model`, `tangle.model`) |
+| `gen_ai.usage.input_tokens` / `output_tokens` | token usage |
+| `gen_ai.usage.cost_usd` | cost (also `cost.usd`) |
+| `score` | your eval/judge/rubric outcome 0..1 (also `tangle.score`, `eval.score`) |
+| an `ERROR`-status span's `name` | → `RunRecord.failureMode` |
+
+These are **standard OpenTelemetry GenAI semantic conventions** — most
+frameworks already emit them; you add `score`.
+
+## The shapes
+
+| Shape | File | Live wiring |
+|---|---|---|
+| **Tangle runtime / router (tcloud)** | `shapes.ts` → `tangleRuntimeRuns` | `createOtelExporter` + `loopEventToOtelSpan` (see `examples/with-intelligence-export`) |
+| **OpenAI-compatible** (tcloud / OpenRouter / OpenAI / vLLM) | `shapes.ts` → `openAiCompatibleRuns` | any OpenAI client at the router's `baseURL`; emit a GenAI span per call |
+| **Mastra** | `shapes.ts` → `mastraRuns` | Mastra's native OTLP exporter → `${INTELLIGENCE_BASE}/v1/otlp/v1/traces` |
+| **Claude Agent SDK** | `shapes.ts` → `claudeAgentSdkRuns` | wrap `query()`, one GenAI span per turn from `msg.usage` |
+| **Python agno** | `python-agno/agno_to_intelligence.py` | agno run → OTLP/HTTP POST (or `pip install agent-eval-rpc`) |
+
+The TypeScript shapes ship deterministic batches so the showcase is
+**verifiable in CI with no key** (`tests/agents-of-all-shapes.test.ts`). Each
+shape's header comment shows the exact live wiring — swap the batch for your
+framework's real telemetry and it lands on the identical engine.
+
+## Why this matters
+
+The integration point is the **OTel wire**, not the Tangle SDK or sandbox. Any
+team with agent traces — whatever framework, whatever runtime — gets the full
+`InsightReport` (failure clustering, cost/quality Pareto, ranked
+recommendations, and lift CI once they emit two cohorts) without adopting our
+execution stack.
diff --git a/examples/agents-of-all-shapes/python-agno/agno_to_intelligence.py b/examples/agents-of-all-shapes/python-agno/agno_to_intelligence.py
new file mode 100644
index 0000000..d38bab5
--- /dev/null
+++ b/examples/agents-of-all-shapes/python-agno/agno_to_intelligence.py
@@ -0,0 +1,129 @@
+"""
+Python agno agent -> Tangle Intelligence. No sandbox, no Tangle SDK.
+
+The same canonical OTel GenAI spans the TypeScript shapes emit, from a
+Python agno agent. Two ways, same engine:
+
+  1. Hosted: POST OTLP/HTTP-JSON straight to the ingest route. Works with
+     any Python agent; no Tangle dependency at all.
+  2. Substrate (via the published `agent-eval-rpc` client): judge/analyze
+     over the wire — `pip install agent-eval-rpc`.
+
+Run (live):  TANGLE_API_KEY=sk-tan-... python agno_to_intelligence.py
+Without agno installed it falls back to a recorded batch so the wiring is
+runnable as-is.
+"""
+
+import json
+import os
+import time
+import urllib.request
+
+INTELLIGENCE_BASE = os.environ.get(
+    "INTELLIGENCE_BASE", "https://intelligence.tangle.tools/v1/otlp"
+)
+API_KEY = os.environ.get("TANGLE_API_KEY", "sk-tan-...")
+
+
+def run_agno_agent(prompt: str) -> dict:
+    """Run a real agno agent if installed; else a recorded run so this
+    file is runnable without the dep. Live wiring shown inline."""
+    try:
+        from agno.agent import Agent  # type: ignore
+        from agno.models.openai import OpenAIChat  # type: ignore
+
+        agent = Agent(model=OpenAIChat(id="gpt-4o"))
+        resp = agent.run(prompt)
+        usage = getattr(resp, "metrics", {}) or {}
+        return {
+            "model": "openai/gpt-4o",
+            "input_tokens": int(usage.get("input_tokens", 0) or 0),
+            "output_tokens": int(usage.get("output_tokens", 0) or 0),
+            "cost_usd": float(usage.get("cost", 0.0) or 0.0),
+            # Your acceptance check / judge score in 0..1.
+            "score": 1.0 if resp and getattr(resp, "content", None) else 0.0,
+            "failure_mode": None if getattr(resp, "content", None) else "format_drift",
+        }
+    except Exception:
+        # Recorded run — agno not installed or no key. Wiring stays valid.
+        return {
+            "model": "openai/gpt-4o",
+            "input_tokens": 1240,
+            "output_tokens": 320,
+            "cost_usd": 0.018,
+            "score": 0.83,
+            "failure_mode": None,
+        }
+
+
+def otlp_spans_for_run(run_id: str, r: dict) -> list[dict]:
+    now_ns = time.time_ns()
+    attrs = [
+        {"key": "gen_ai.request.model", "value": {"stringValue": r["model"]}},
+        {"key": "gen_ai.usage.input_tokens", "value": {"doubleValue": r["input_tokens"]}},
+        {"key": "gen_ai.usage.output_tokens", "value": {"doubleValue": r["output_tokens"]}},
+        {"key": "gen_ai.usage.cost_usd", "value": {"doubleValue": r["cost_usd"]}},
+        {"key": "score", "value": {"doubleValue": r["score"]}},
+    ]
+    spans = [
+        {
+            "traceId": run_id,
+            "spanId": f"{run_id}-llm",
+            "name": "gen_ai.chat",
+            "startTimeUnixNano": str(now_ns),
+            "endTimeUnixNano": str(now_ns + 800_000_000),
+            "attributes": attrs,
+            "status": {"code": "STATUS_CODE_ERROR" if r["failure_mode"] else "STATUS_CODE_OK"},
+        }
+    ]
+    if r["failure_mode"]:
+        spans.append(
+            {
+                "traceId": run_id,
+                "spanId": f"{run_id}-err",
+                "name": r["failure_mode"],
+                "startTimeUnixNano": str(now_ns + 800_000_000),
+                "endTimeUnixNano": str(now_ns + 800_000_000),
+                "attributes": [],
+                "status": {"code": "STATUS_CODE_ERROR"},
+            }
+        )
+    return spans
+
+
+def ship(spans: list[dict]) -> None:
+    body = json.dumps(
+        {
+            "resourceSpans": [
+                {
+                    "resource": {
+                        "attributes": [
+                            {"key": "service.name", "value": {"stringValue": "agno-agent"}}
+                        ]
+                    },
+                    "scopeSpans": [{"scope": {"name": "agno"}, "spans": spans}],
+                }
+            ]
+        }
+    ).encode()
+    req = urllib.request.Request(
+        f"{INTELLIGENCE_BASE}/v1/traces",
+        data=body,
+        headers={"content-type": "application/json", "authorization": f"Bearer {API_KEY}"},
+        method="POST",
+    )
+    with urllib.request.urlopen(req) as resp:
+        if resp.status >= 300:
+            raise RuntimeError(f"ingest failed: {resp.status}")
+
+
+if __name__ == "__main__":
+    prompts = ["Summarise the Q3 report", "Draft a follow-up email", "Classify this ticket"]
+    all_spans: list[dict] = []
+    for i, p in enumerate(prompts):
+        all_spans += otlp_spans_for_run(f"agno-{i}", run_agno_agent(p))
+    if API_KEY != "sk-tan-...":
+        ship(all_spans)
+        print(f"Shipped {len(all_spans)} spans from agno → Tangle Intelligence.")
+    else:
+        print("(set TANGLE_API_KEY to ship; printing spans)\n", json.dumps(all_spans, indent=2)[:600])
diff --git a/examples/agents-of-all-shapes/run.ts b/examples/agents-of-all-shapes/run.ts
new file mode 100644
index 0000000..379af80
--- /dev/null
+++ b/examples/agents-of-all-shapes/run.ts
@@ -0,0 +1,59 @@
+/**
+ * Agents of all shapes → one decision packet. No sandbox. No deploy.
+ *
+ *   pnpm tsx examples/agents-of-all-shapes/run.ts
+ *
+ * Runs every shape (Tangle runtime / OpenAI-compatible router / Mastra /
+ * Claude Agent SDK), converts each to canonical OTel GenAI spans, and feeds
+ * the merged stream through the in-process intelligence engine
+ * (`fromOtelSpans → analyzeRuns`). Prints the fleet `InsightReport` plus a
+ * per-shape breakdown.
+ *
+ * Optional hosted path: set TANGLE_API_KEY (and INTELLIGENCE_BASE) to also
+ * POST the spans to the hosted OTLP ingest for the dashboard.
+ */
+
+import { allShapes } from './shapes'
+import { shipToTangleOtlp, spansForRuns, toInsightReport } from './shared/intelligence'
+
+async function main() {
+  const shapes = allShapes()
+  const allRuns = Object.values(shapes).flat()
+  const allSpans = spansForRuns(allRuns)
+
+  // The fleet view — every framework's runs in one vocabulary.
+  const fleet = await toInsightReport(allSpans)
+  console.log('=== Fleet InsightReport (all shapes) ===')
+  console.log(`runs:            ${fleet.composite.n}`)
+  console.log(`composite mean:  ${fleet.composite.mean.toFixed(3)}`)
+  console.log(`composite p50:   ${fleet.composite.p50.toFixed(3)}`)
+  console.log(`failure modes:   ${JSON.stringify(fleet.failureModes ?? [])}`)
+  console.log(`recommendations: ${fleet.recommendations.length}`)
+  for (const r of fleet.recommendations.slice(0, 3)) {
+    console.log(`  [${r.priority}] ${r.title}`)
+  }
+
+  // Per-shape — prove the SAME engine works on each framework alone.
+  console.log('\n=== Per-shape composite ===')
+  for (const [name, runs] of Object.entries(shapes)) {
+    const report = await toInsightReport(spansForRuns(runs))
+    console.log(
+      `${name.padEnd(20)} n=${report.composite.n} mean=${report.composite.mean.toFixed(3)}`,
+    )
+  }
+
+  // Optional: also ship to the hosted ingest for the dashboard.
+  const apiKey = process.env.TANGLE_API_KEY
+  if (apiKey) {
+    const endpoint = process.env.INTELLIGENCE_BASE ?? 'https://intelligence.tangle.tools/v1/otlp'
+    await shipToTangleOtlp(allSpans, { endpoint, apiKey })
+    console.log(`\nShipped ${allSpans.length} spans to ${endpoint} for the dashboard.`)
+  } else {
+    console.log('\n(set TANGLE_API_KEY to also ship to the hosted dashboard)')
+  }
+}
+
+main().catch((err) => {
+  console.error(err)
+  process.exit(1)
+})
diff --git a/examples/agents-of-all-shapes/shapes.ts b/examples/agents-of-all-shapes/shapes.ts
new file mode 100644
index 0000000..369da2c
--- /dev/null
+++ b/examples/agents-of-all-shapes/shapes.ts
@@ -0,0 +1,144 @@
+/**
+ * Agents of all shapes → one Tangle Intelligence pipe.
+ *
+ * Each shape is a different way to PRODUCE agent runs. They all converge on
+ * the same canonical OTel GenAI spans (`shared/intelligence.ts`), so the
+ * `InsightReport` is computed identically no matter who ran the agent —
+ * Tangle's runtime, an OpenAI-compatible router (tcloud / OpenRouter), a
+ * Mastra agent, the Claude Agent SDK, or a Python agno agent.
+ *
+ * The runs below are deterministic so the showcase is verifiable in CI with
+ * no LLM key. Each shape's header shows the REAL wiring — swap the seeded
+ * batch for your framework's telemetry and it lands on the same engine. None
+ * of this touches a Tangle sandbox.
+ */
+
+import type { AgentRun } from './shared/intelligence'
+
+/** Deterministic pseudo-random in [0,1) from a string seed — keeps the
+ *  showcase reproducible (no `Math.random()` in asserted output). */
+function rand(seed: string): number {
+  let h = 2166136261
+  for (let i = 0; i < seed.length; i++) {
+    h ^= seed.charCodeAt(i)
+    h = Math.imul(h, 16777619)
+  }
+  return ((h >>> 0) % 100000) / 100000
+}
+
+interface BatchSpec {
+  shape: string
+  model: string
+  n: number
+  /** Fraction of runs that fail, with the failure tag to attach. */
+  failRate: number
+  failureMode: string
+}
+
+function batch(spec: BatchSpec): AgentRun[] {
+  const runs: AgentRun[] = []
+  for (let i = 0; i < spec.n; i++) {
+    const id = `${spec.shape}-${i}`
+    const failed = rand(`${id}:f`) < spec.failRate
+    runs.push({
+      runId: id,
+      model: spec.model,
+      score: failed ? 0.1 + rand(`${id}:s`) * 0.25 : 0.62 + rand(`${id}:s`) * 0.35,
+      costUsd: 0.004 + rand(`${id}:c`) * 0.05,
+      inputTokens: 700 + Math.floor(rand(`${id}:i`) * 1500),
+      outputTokens: 120 + Math.floor(rand(`${id}:o`) * 600),
+      startMs: 1_700_000_000_000 + i * 1000,
+      durationMs: 800 + Math.floor(rand(`${id}:d`) * 4000),
+      ...(failed ? { failureMode: spec.failureMode } : {}),
+    })
+  }
+  return runs
+}
+
+/**
+ * 1. Tangle agent-runtime / router (tcloud).
+ *
+ * LIVE: agent-runtime already emits every loop event; ship them with the
+ * built-in exporter (see `examples/with-intelligence-export`):
+ *   const exporter = createOtelExporter({ endpoint, headers })
+ *   for await (const e of runAgentTaskStream({ task, backend })) {
+ *     exporter.exportSpan(loopEventToOtelSpan({ kind: e.type, runId, ... }, traceId))
+ *   }
+ * Attach your eval/judge score as a `score` attribute on the run's span.
+ */
+export function tangleRuntimeRuns(): AgentRun[] {
+  return batch({
+    shape: 'tangle-runtime',
+    model: 'tcloud/claude-sonnet-4-6@2026-05-08',
+    n: 12,
+    failRate: 0.17,
+    failureMode: 'tool_recovery_failure',
+  })
+}
+
+/**
+ * 2. OpenAI-compatible router — tcloud / OpenRouter / OpenAI / vLLM.
+ *
+ * LIVE: any OpenAI-compatible client. Point it at the router's baseURL and
+ * record the OTel GenAI span per call:
+ *   const res = await openai.chat.completions.create({ model, messages })
+ *   // emit a span with gen_ai.request.model, gen_ai.usage.{input,output}_tokens,
+ *   // gen_ai.usage.cost_usd, and your `score`.
+ */
+export function openAiCompatibleRuns(): AgentRun[] {
+  return batch({
+    shape: 'openai-compatible',
+    model: 'openrouter/google/gemini-2.5-pro',
+    n: 10,
+    failRate: 0.2,
+    failureMode: 'format_drift',
+  })
+}
+
+/**
+ * 3. Mastra agent (TypeScript).
+ *
+ * LIVE: Mastra emits OpenTelemetry natively. Configure its OTLP exporter to
+ * point at `${INTELLIGENCE_BASE}/v1/otlp` (hosted) OR collect the spans and
+ * call `toInsightReport` in-process:
+ *   export const mastra = new Mastra({ telemetry: { enabled: true,
+ *     export: { type: 'otlp', endpoint: `${INTELLIGENCE_BASE}/v1/otlp/v1/traces` } } })
+ * Add a `score` attribute from your eval step. No Tangle SDK required.
+ */
+export function mastraRuns(): AgentRun[] {
+  return batch({
+    shape: 'mastra',
+    model: 'openai/gpt-4o-2024-11-20',
+    n: 10,
+    failRate: 0.1,
+    failureMode: 'instruction_following',
+  })
+}
+
+/**
+ * 4. Claude Agent SDK (TypeScript).
+ *
+ * LIVE: wrap the SDK's query loop and emit one GenAI span per turn:
+ *   for await (const msg of query({ prompt, options })) { ...collect usage... }
+ *   // span: gen_ai.request.model='claude-...', gen_ai.usage.* from msg.usage,
+ *   //       score from your acceptance check.
+ */
+export function claudeAgentSdkRuns(): AgentRun[] {
+  return batch({
+    shape: 'claude-agent-sdk',
+    model: 'anthropic/claude-sonnet-4-6@2026-05-08',
+    n: 10,
+    failRate: 0.12,
+    failureMode: 'reasoning_error',
+  })
+}
+
+/** Every shape, merged — the fleet view across frameworks. */
+export function allShapes(): Record<string, AgentRun[]> {
+  return {
+    'tangle-runtime': tangleRuntimeRuns(),
+    'openai-compatible': openAiCompatibleRuns(),
+    mastra: mastraRuns(),
+    'claude-agent-sdk': claudeAgentSdkRuns(),
+  }
+}
diff --git a/examples/agents-of-all-shapes/shared/intelligence.ts b/examples/agents-of-all-shapes/shared/intelligence.ts
new file mode 100644
index 0000000..8163194
--- /dev/null
+++ b/examples/agents-of-all-shapes/shared/intelligence.ts
@@ -0,0 +1,151 @@
+/**
+ * The ONE pipe every agent shape converges on.
+ *
+ * Tangle Intelligence does not care what framework produced a run — it
+ * consumes OpenTelemetry GenAI spans. `fromOtelSpans` reads the standard
+ * `gen_ai.*` semantic conventions (plus `tangle.*` aliases and a generic
+ * `score`), turns each trace into a `RunRecord`, and `analyzeRuns` produces
+ * the `InsightReport` decision packet — composite distribution, lift CI,
+ * Pareto, failure clustering, ranked recommendations.
+ *
+ * Two ways to use it, same engine:
+ *   - `toInsightReport(spans)` — in-process, zero infra. No sandbox, no
+ *     hosted endpoint, no server. This is the QA path every shape verifies.
+ *   - `shipToTangleOtlp(spans, opts)` — POST the same spans to the hosted
+ *     `/v1/otlp/v1/traces` ingest for the dashboard. Optional.
+ */
+
+import { analyzeRuns, fromOtelSpans, type InsightReport } from '@tangle-network/agent-eval/contract'
+import type { TraceSpanEvent } from '@tangle-network/agent-eval/hosted'
+
+export type { InsightReport, TraceSpanEvent }
+
+/** One agent run, framework-agnostic. A shape produces a list of these. */
+export interface AgentRun {
+  runId: string
+  /** Snapshot model id, e.g. `claude-sonnet-4-6@2025-05-08`. */
+  model: string
+  /** Outcome quality on 0..1 (your judge / eval / rubric score). */
+  score: number
+  costUsd: number
+  inputTokens: number
+  outputTokens: number
+  startMs: number
+  durationMs: number
+  /** When set, the run is marked failed and the tag becomes the failure
+   *  span name (→ `RunRecord.failureMode`). */
+  failureMode?: string
+}
+
+const NANO = 1_000_000
+
+/**
+ * Canonical OTel GenAI spans for one agent run. Any framework that emits
+ * these standard attributes (`gen_ai.request.model`, `gen_ai.usage.*`,
+ * `gen_ai.usage.cost_usd`) plus a `score` lands here byte-identically —
+ * Mastra, the Claude Agent SDK, agno, an OpenAI-compatible router, or the
+ * Tangle runtime. That is the whole point: one wire, every shape.
+ */
+export function otelSpansForRun(run: AgentRun): TraceSpanEvent[] {
+  const start = run.startMs * NANO
+  const end = (run.startMs + run.durationMs) * NANO
+  const spans: TraceSpanEvent[] = [
+    {
+      traceId: run.runId,
+      spanId: `${run.runId}::llm`,
+      name: 'gen_ai.chat',
+      startTimeUnixNano: start,
+      endTimeUnixNano: end,
+      attributes: {
+        'gen_ai.request.model': run.model,
+        'gen_ai.usage.input_tokens': run.inputTokens,
+        'gen_ai.usage.output_tokens': run.outputTokens,
+        'gen_ai.usage.cost_usd': run.costUsd,
+        score: run.score,
+      },
+      status: run.failureMode ? { code: 'ERROR' } : { code: 'OK' },
+    },
+  ]
+  // Failure span — its name becomes the RunRecord.failureMode.
+  if (run.failureMode) {
+    spans.push({
+      traceId: run.runId,
+      spanId: `${run.runId}::err`,
+      name: run.failureMode,
+      startTimeUnixNano: end,
+      endTimeUnixNano: end,
+      attributes: {},
+      status: { code: 'ERROR' },
+    })
+  }
+  return spans
+}
+
+/** Flatten many runs into one OTel span stream. */
+export function spansForRuns(runs: AgentRun[]): TraceSpanEvent[] {
+  return runs.flatMap(otelSpansForRun)
+}
+
+/** In-process intelligence: OTel spans → RunRecords → InsightReport. No
+ *  sandbox, no server, no deploy. The verifiable QA path. */
+export async function toInsightReport(spans: TraceSpanEvent[]): Promise<InsightReport> {
+  const runs = fromOtelSpans({ spans })
+  return analyzeRuns({ runs })
+}
+
+export interface ShipOptions {
+  /** Hosted ingest base; the route `/v1/traces` is appended. */
+  endpoint: string
+  /** `sk-tan-...` key — tenant resolves from the Bearer, never the payload. */
+  apiKey: string
+  serviceName?: string
+}
+
+/** Optional hosted path: POST the same OTel spans to Tangle Intelligence's
+ *  OTLP/HTTP ingest. Identical analysis runs server-side. */
+export async function shipToTangleOtlp(spans: TraceSpanEvent[], opts: ShipOptions): Promise<void> {
+  const res = await fetch(`${opts.endpoint}/v1/traces`, {
+    method: 'POST',
+    headers: {
+      'content-type': 'application/json',
+      authorization: `Bearer ${opts.apiKey}`,
+    },
+    body: JSON.stringify({
+      resourceSpans: [
+        {
+          resource: {
+            attributes: [
+              {
+                key: 'service.name',
+                value: { stringValue: opts.serviceName ?? 'agents-of-all-shapes' },
+              },
+            ],
+          },
+          scopeSpans: [
+            {
+              scope: { name: 'agents-of-all-shapes' },
+              spans: spans.map((s) => ({
+                traceId: s.traceId,
+                spanId: s.spanId,
+                name: s.name,
+                startTimeUnixNano: String(s.startTimeUnixNano),
+                endTimeUnixNano: String(s.endTimeUnixNano),
+                attributes: Object.entries(s.attributes).map(([key, value]) => ({
+                  key,
+                  value:
+                    typeof value === 'number'
+                      ? { doubleValue: value }
+                      : { stringValue: String(value) },
+                })),
+                status: s.status,
+              })),
+            },
+          ],
+        },
+      ],
+    }),
+  })
+  if (!res.ok) {
+    throw new Error(`intelligence ingest failed: ${res.status} ${await res.text()}`)
+  }
+}
diff --git a/examples/with-intelligence-export/with-intelligence-export.ts b/examples/with-intelligence-export/with-intelligence-export.ts
index 7cb1da3..4848c4f 100644
--- a/examples/with-intelligence-export/with-intelligence-export.ts
+++ b/examples/with-intelligence-export/with-intelligence-export.ts
@@ -39,10 +39,36 @@ const INTELLIGENCE_BASE =
 const backend = createIterableBackend<AgentBackendInput>({
   kind: 'intel-demo',
   async *stream(_input, ctx) {
-    yield { type: 'text_delta', task: ctx.task, session: ctx.session, text: 'working...\n', timestamp: new Date().toISOString() }
-    yield { type: 'tool_call', task: ctx.task, session: ctx.session, toolName: 'web_search', args: {}, timestamp: new Date().toISOString() }
-    yield { type: 'tool_result', task: ctx.task, session: ctx.session, toolName: 'web_search', result: { ok: true }, timestamp: new Date().toISOString() }
-    yield { type: 'text_delta', task: ctx.task, session: ctx.session, text: 'done.\n', timestamp: new Date().toISOString() }
+    yield {
+      type: 'text_delta',
+      task: ctx.task,
+      session: ctx.session,
+      text: 'working...\n',
+      timestamp: new Date().toISOString(),
+    }
+    yield {
+      type: 'tool_call',
+      task: ctx.task,
+      session: ctx.session,
+      toolName: 'web_search',
+      args: {},
+      timestamp: new Date().toISOString(),
+    }
+    yield {
+      type: 'tool_result',
+      task: ctx.task,
+      session: ctx.session,
+      toolName: 'web_search',
+      result: { ok: true },
+      timestamp: new Date().toISOString(),
+    }
+    yield {
+      type: 'text_delta',
+      task: ctx.task,
+      session: ctx.session,
+      text: 'done.\n',
+      timestamp: new Date().toISOString(),
+    }
   },
 })
 
diff --git a/tests/agents-of-all-shapes.test.ts b/tests/agents-of-all-shapes.test.ts
new file mode 100644
index 0000000..f4a694c
--- /dev/null
+++ b/tests/agents-of-all-shapes.test.ts
@@ -0,0 +1,54 @@
+import { describe, expect, it } from 'vitest'
+import { allShapes } from '../examples/agents-of-all-shapes/shapes'
+import { spansForRuns, toInsightReport } from '../examples/agents-of-all-shapes/shared/intelligence'
+
+/**
+ * Verifies the showcase end-to-end with NO sandbox, NO hosted endpoint, NO
+ * LLM key: every agent shape → canonical OTel spans → fromOtelSpans →
+ * analyzeRuns → a real InsightReport. This is the QA path a customer runs to
+ * prove "any agent, not just your sandbox" before wiring their own traces.
+ */
+describe('agents-of-all-shapes — one intelligence pipe, no sandbox', () => {
+  it('every shape produces a real InsightReport in-process', async () => {
+    const shapes = allShapes()
+    expect(Object.keys(shapes).sort()).toEqual([
+      'claude-agent-sdk',
+      'mastra',
+      'openai-compatible',
+      'tangle-runtime',
+    ])
+
+    for (const [name, runs] of Object.entries(shapes)) {
+      expect(runs.length).toBeGreaterThan(0)
+      const report = await toInsightReport(spansForRuns(runs))
+      // Real decision packet per framework — composite over its runs.
+      expect(report.composite.n).toBe(runs.length)
+      expect(report.composite.mean).toBeGreaterThan(0)
+      expect(report.composite.mean).toBeLessThanOrEqual(1)
+      expect(report.composite.min).toBeGreaterThanOrEqual(0)
+      expect(report.composite.max).toBeLessThanOrEqual(1)
+      expect(Array.isArray(report.recommendations)).toBe(true)
+      // Cost/quality Pareto is computed from the gen_ai.usage.cost_usd attrs.
+      expect(report.costQuality).toBeDefined()
+    }
+  })
+
+  it('merges all shapes into one fleet report (cross-framework aggregation)', async () => {
+    const shapes = allShapes()
+    const total = Object.values(shapes).reduce((sum, r) => sum + r.length, 0)
+    const fleet = await toInsightReport(spansForRuns(Object.values(shapes).flat()))
+    expect(fleet.composite.n).toBe(total)
+    // The merged corpus carries failures from multiple frameworks; the
+    // model-free failureModes breakdown surfaces the dominant one.
+    expect(fleet.failureModes).toBeDefined()
+    expect(fleet.failureModes!.length).toBeGreaterThan(0)
+    expect(fleet.failureModes![0]!.count).toBeGreaterThan(0)
+  })
+
+  it('derives a real cost from gen_ai.usage.cost_usd across shapes', async () => {
+    const fleet = await toInsightReport(spansForRuns(Object.values(allShapes()).flat()))
+    // The Pareto/cost view is populated from the OTel cost attribute, not zeros.
+    expect(fleet.costQuality.cost.n).toBeGreaterThan(0)
+    expect(fleet.costQuality.cost.mean).toBeGreaterThan(0)
+  })
+})