From 20dd38cb226b3f453eef72b6312e8bc01cbdd759 Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <9553966+theagenticguy@users.noreply.github.com>
Date: Fri, 29 May 2026 16:17:48 -0500
Subject: [PATCH] fix(scanners): exclude indexer-ignored dirs from
 vulture/radon/ty (drop .venv noise)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vulture/radon/ty walk the project tree directly with no exclude, so they
descend into .venv/ and report library dead-code, complexity, and type
findings. On a uv-managed repo this was 127/133 findings — 95% library
noise drowning the 6 real ones. The indexer already excludes these dirs via
pipeline.HARDCODED_IGNORES; the scan runner never threaded that set into the
wrappers.

- VultureWrapperOptions.excludeGlobs → vulture --exclude, each ignore name
  ANCHORED to a path segment (*/.venv/* not bare .venv, which vulture would
  substring-match and so also suppress src/.venv_helpers.py).
- RadonWrapperOptions.ignoreDirs → radon -i (matches directory basenames, so
  bare names are correct here).
- TyWrapperOptions.excludeGlobs → ty --exclude <name>/ plus --force-exclude
  (CLI-named paths bypass excludes otherwise).
- DefaultWrapperContext gains vulture/radon/ty; createWrapperFor threads them
  the pip-audit-options way; scan.ts buildWrapperContext populates all three
  from pipeline.HARDCODED_IGNORES (single source of truth, no drift).

Verified end-to-end: codehub scan on ngs-research-agent now reports 6
findings (was 133) — vulture contributes 0 .venv noise; the 6 real findings
(2 semgrep, 1 osv, 1 pip-audit, 2 radon) are unchanged. Scanners 94/94, cli
263/263, tsc + biome clean.

Field-report Issue 2.
---
 .../world-class-code-exploration.mjs          | 415 ++++++++++++++++++
 packages/cli/src/commands/scan.ts             |  18 +
 packages/scanners/src/index.ts                |  18 +-
 .../src/wrappers/extended-wrappers.test.ts    |  20 +
 packages/scanners/src/wrappers/radon.ts       |  21 +-
 packages/scanners/src/wrappers/ty.ts          |  24 +-
 packages/scanners/src/wrappers/vulture.ts     |  39 +-
 7 files changed, 543 insertions(+), 12 deletions(-)
 create mode 100644 .claude/workflows/world-class-code-exploration.mjs
diff --git a/.claude/workflows/world-class-code-exploration.mjs b/.claude/workflows/world-class-code-exploration.mjs
new file mode 100644
index 00000000..bd6b38d1
--- /dev/null
+++ b/.claude/workflows/world-class-code-exploration.mjs
@@ -0,0 +1,415 @@
+export const meta = {
+  name: 'world-class-code-exploration',
+  description: 'Root-cause + design + verify fixes for the OpenCodeHub field-report issues and chart what makes code exploration world-class',
+  phases: [
+    { title: 'Diagnose', detail: 'pin exact root cause + fix site for each issue (grounded in code)' },
+    { title: 'Vision', detail: 'parallel lenses on what world-class code-exploration requires beyond the reported issues' },
+    { title: 'Design', detail: 'concrete fix design per issue, with test shape' },
+    { title: 'Adversarial verify', detail: 'skeptics try to refute each design' },
+    { title: 'Synthesize', detail: 'prioritized roadmap: confirmed fixes + vision gaps' },
+  ],
+}
+
+// ---------------------------------------------------------------------------
+// Shared context — the grounded findings I (orchestrator) already proved, so
+// agents don't re-derive from scratch and don't repeat the field report's
+// wrong hypotheses. Each agent re-verifies against the live code.
+// ---------------------------------------------------------------------------
+const REPO = '/Users/lalsaado/Projects/open-code-hub'
+const SUBJECT = '/Users/lalsaado/Projects/ngs-research-agent'
+
+const GROUNDING = `
+You are improving OpenCodeHub (OCH), a local code-graph + MCP tool for AI-driven
+code exploration. Repo root: ${REPO}. A field report drove an exploration session
+THROUGH the codehub CLI against subject repo ${SUBJECT} (a Python stdio MCP server,
+src-layout package ngs_research_agent) and filed 6 issues.
+
+CRITICAL grounded facts the orchestrator already PROVED empirically (re-verify, do
+not contradict without equally strong evidence — cite file:line + a repro):
+
+ISSUE 1 (the report's headline "cross-module CALLS edges drop / FQN-vs-filepath
+node-identity mismatch") — the report's hypothesis is WRONG for Python. Proven:
+  - scip-python emits ONE symbol string for both the def and every ref of
+    get_bedrock_client (no src/dist or external/FQN split like TS has).
+  - The decorated function get_bedrock_client (sole @cache-decorated def in
+    client.py) is DROPPED from the persisted lbug graph. WASM parse captures it,
+    pythonProvider.extractDefinitions returns it [146-171], idForDefinition gives a
+    unique id, KnowledgeGraph.addNode (packages/core-types/src/graph.ts) dedups by
+    id — yet the final graph has 5/6 client.py Function nodes; get_bedrock_client's
+    Function node is absent while its body Variables AND a Process node referencing
+    its (missing) Function id DO persist. Discriminator vs the 5 survivors: it's the
+    only bare-name @cache-decorated def. So the real bug is "decorated function def
+    lost between extraction and persistence/bulk-load". Suspect the lbug node COPY
+    struct-field type seeding (packages/storage/src/graphdb-adapter.ts
+    NODE_COPY_SUBQUERY / NODE_SENTINEL_ID) OR a later phase, OR decorated_definition
+    range handling.
+  - Bug A (independent, confirmed): extractPyImports in
+    packages/ingestion/src/providers/python.ts is LINE-BASED and silently drops
+    multi-line parenthesized imports: \`from pkg.mod import (\\n a,\\n b,\\n)\` →
+    first line rest="(" → 0 names → discarded. Ubiquitous in real Python.
+  - Bug B (confirmed): preprocessPyImportPath leaves dotted absolute imports
+    unchanged; resolveImportTarget (packages/ingestion/src/pipeline/phases/parse.ts
+    :761) only handles ./ ../ / → src-layout package imports (ngs_research_agent.client
+    → src/ngs_research_agent/client.py) stub as <external>.
+
+ISSUE 2: scan runner runs vulture (and radon, ty) against the absolute repo tree
+with NO exclude → vulture walks .venv/ → 127/133 findings are library noise. The
+indexer already excludes via HARDCODED_IGNORES (packages/ingestion/src/pipeline/
+gitignore.ts:225 incl ".venv"), exported via the pipeline barrel. semgrep/ruff dodge
+it by targeting "." and honoring gitignore. Fix: thread an exclude list into a new
+VultureWrapperOptions (mirror pip-audit's options plumbing through
+DefaultWrapperContext), populate from pipeline.HARDCODED_IGNORES in CLI
+buildWrapperContext (packages/cli/src/commands/scan.ts), emit vulture --exclude
+<comma-joined>. Apply to radon.ts + ty.ts too.
+
+ISSUE 3: list_findings/list_dead_code/license_audit/owners/route_map/project_profile/
+risk_trends/api_impact are MCP-only; no CLI subcommand (CLI uses commander, entry
+packages/cli/src/index.ts; verdict is the canonical CLI↔MCP shared-fn template —
+both call computeVerdict from @opencodehub/analysis). list_findings (store.graph
+.listFindings), list_dead_code (classifyDeadness), license_audit (classifyDependencies
++ listDependencies), project_profile (listNodesByKind), risk_trends (computeRiskTrends
++ loadSnapshots, already used in wiki.ts) are THIN; owners/route_map/api_impact are
+inlined in MCP handlers (need extraction to @opencodehub/analysis).
+
+ISSUE 4: codehub sql exposes only cochanges + symbol_summaries (DuckDB temporal
+tier); the node/edge graph lives in lbug (graph.lbug) and is NOT SQL-queryable.
+Docs oversell "SQL against the graph store". Fix docs framing or add a read-only
+nodes/edges view.
+
+ISSUE 5: symbol_summaries empty → query silently runs BM25-only even though doctor
+reports embedder weights present. status should surface "summaries: N / vectors:
+bm25-only|hybrid".
+
+ISSUE 6: doctor reports "bandit OK" by binary presence but bandit lacks the [sarif]
+extra → scan can't use it (argparse rejects -f sarif, exit 2, 0 findings). doctor
+should probe the formatter, not just --version. (installCmd already fixed to
+uv tool install 'bandit[sarif]' in a merged PR.)
+
+Storage interface: IGraphStore in packages/storage/src/interface.ts has listNodes,
+listNodesByKind, listEdgesByType, listFindings, listDependencies, listRoutes.
+ITemporalStore holds cochanges + symbol_summaries. ADR 0016 (DuckDB graph rip),
+ADR 0015 (WASM-only parser). Durable lessons in .erpaval/INDEX.md.
+`
+
+// ---------------------------------------------------------------------------
+// Schemas
+// ---------------------------------------------------------------------------
+const DIAGNOSIS_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['issue', 'rootCause', 'evidence', 'fixSites', 'severity', 'confidence'],
+  properties: {
+    issue: { type: 'string', description: 'Issue id, e.g. "Issue 1" / "Issue 1 Bug A"' },
+    rootCause: { type: 'string', description: 'The precise mechanism, in 1-3 sentences' },
+    evidence: {
+      type: 'array', items: { type: 'string' },
+      description: 'file:line citations + repro observations that prove the root cause',
+    },
+    fixSites: {
+      type: 'array',
+      items: {
+        type: 'object', additionalProperties: false,
+        required: ['file', 'what'],
+        properties: {
+          file: { type: 'string' },
+          what: { type: 'string', description: 'the change to make at this site' },
+        },
+      },
+    },
+    severity: { enum: ['HIGH', 'MEDIUM', 'LOW'] },
+    confidence: { type: 'number', description: '0..1 that this root cause is correct' },
+    openQuestions: { type: 'array', items: { type: 'string' } },
+  },
+}
+
+const VISION_SCHEMA = {
+  type: 'object', additionalProperties: false,
+  required: ['lens', 'gaps'],
+  properties: {
+    lens: { type: 'string' },
+    gaps: {
+      type: 'array',
+      items: {
+        type: 'object', additionalProperties: false,
+        required: ['capability', 'whyItMatters', 'effort', 'leverage'],
+        properties: {
+          capability: { type: 'string', description: 'a missing/weak capability for world-class code exploration' },
+          whyItMatters: { type: 'string' },
+          existingFoundation: { type: 'string', description: 'what in OCH today it builds on (file/tool), or "greenfield"' },
+          effort: { enum: ['S', 'M', 'L', 'XL'] },
+          leverage: { enum: ['transformational', 'high', 'medium', 'low'] },
+        },
+      },
+    },
+  },
+}
+
+const DESIGN_SCHEMA = {
+  type: 'object', additionalProperties: false,
+  required: ['issue', 'approach', 'diffSketch', 'testShape', 'risks', 'blastRadius'],
+  properties: {
+    issue: { type: 'string' },
+    approach: { type: 'string', description: 'the concrete fix, including exact functions/signatures touched' },
+    diffSketch: { type: 'string', description: 'pseudo-diff or precise prose of the edits per file' },
+    testShape: { type: 'string', description: 'the regression test(s) to add and where' },
+    risks: { type: 'array', items: { type: 'string' } },
+    blastRadius: { type: 'string', description: 'what else could break; which packages rebuild' },
+  },
+}
+
+const VERDICT_SCHEMA = {
+  type: 'object', additionalProperties: false,
+  required: ['issue', 'holds', 'reason'],
+  properties: {
+    issue: { type: 'string' },
+    holds: { type: 'boolean', description: 'true if the design is sound and the root cause is right' },
+    reason: { type: 'string' },
+    mustFix: { type: 'array', items: { type: 'string' }, description: 'concrete corrections the design needs before implementation' },
+  },
+}
+
+// ---------------------------------------------------------------------------
+// PHASE 1 — Diagnose: one agent per issue, grounded, returns structured RC.
+// Issue 1 gets the deepest treatment (its own dedicated bisection agent).
+// ---------------------------------------------------------------------------
+const ISSUES = [
+  {
+    id: 'Issue 1 (decorated-func drop)',
+    label: 'diag:issue1-core',
+    prompt: `${GROUNDING}
+
+YOUR TASK: Pin the EXACT drop point for the @cache-decorated get_bedrock_client
+Function node. It survives pythonProvider.extractDefinitions but is absent from the
+persisted lbug graph. Bisect the path: parse phase addNode loop
+(packages/ingestion/src/pipeline/phases/parse.ts ~363-378) → later phases that
+mutate nodes (processes.ts, accesses.ts, orm.ts, ownership.ts) → the lbug bulk-load
+(packages/storage/src/graphdb-adapter.ts NODE_COPY_SUBQUERY, struct-field type
+seeding, COPY ... IGNORE_ERRORS, any per-row filter on null startLine/endLine or
+field-shape). Read every candidate. Form ONE concrete root-cause hypothesis with the
+exact file:line where the node is dropped or overwritten, and explain why ONLY the
+decorated def is affected (what's structurally different about its GraphNode — range
+from decorated_definition? a field that trips the COPY type-seeding?). Also confirm
+or correct Bug A (multi-line imports) and Bug B (src-layout resolution) with file:line.
+Return THREE diagnosis objects (Issue 1 core, Issue 1 Bug A, Issue 1 Bug B) — but
+this schema is one object, so return the CORE one here and put Bug A + Bug B findings
+in openQuestions as "Bug A: ..." / "Bug B: ..." one-liners with their fix sites.`,
+    schema: DIAGNOSIS_SCHEMA,
+  },
+  {
+    id: 'Issue 2 (vulture .venv)',
+    label: 'diag:issue2',
+    prompt: `${GROUNDING}\n\nYOUR TASK: Confirm Issue 2 root cause and the cleanest fix
+site. Read packages/scanners/src/wrappers/{vulture,radon,ty,semgrep,ruff}.ts,
+packages/scanners/src/spec.ts (ScannerRunContext), packages/scanners/src/index.ts
+(DefaultWrapperContext, createWrapperFor), packages/cli/src/commands/scan.ts
+(buildWrapperContext), and packages/ingestion/src/pipeline/gitignore.ts
+(HARDCODED_IGNORES + barrel export). Verify vulture supports --exclude (comma glob).
+Confirm scanners package does NOT depend on ingestion (so threading from CLI is the
+right seam). Return the diagnosis with exact fixSites.`,
+    schema: DIAGNOSIS_SCHEMA,
+  },
+  {
+    id: 'Issue 3 (MCP-only CLI gap)',
+    label: 'diag:issue3',
+    prompt: `${GROUNDING}\n\nYOUR TASK: For each MCP-only reader (list_findings,
+list_dead_code, license_audit, project_profile, risk_trends, owners, route_map,
+api_impact) confirm whether it calls a shared @opencodehub/analysis fn or storage
+reader (THIN) vs inlined logic in the MCP handler (EXTRACT). Read packages/mcp/src/
+tools/*.ts for each + packages/cli/src/index.ts registration pattern + a template
+command (verdict.ts). Return a diagnosis whose fixSites enumerate, per tool, the new
+CLI command file + the lib fn it calls, and flag the 4-5 cheapest thin wins.`,
+    schema: DIAGNOSIS_SCHEMA,
+  },
+  {
+    id: 'Issue 4 (sql framing)',
+    label: 'diag:issue4',
+    prompt: `${GROUNDING}\n\nYOUR TASK: Confirm what \`codehub sql\` can reach. Read the
+sql command (packages/cli/src/commands/sql.ts or similar), ITemporalStore vs
+IGraphStore (packages/storage/src/interface.ts), and where the "SQL against the graph
+store" wording appears (CLAUDE.md, docs/, --help strings, MCP tool descriptions).
+Decide: doc-only fix vs adding a read-only nodes/edges view. Return diagnosis + fixSites.`,
+    schema: DIAGNOSIS_SCHEMA,
+  },
+  {
+    id: 'Issue 5 (status summaries/vectors)',
+    label: 'diag:issue5',
+    prompt: `${GROUNDING}\n\nYOUR TASK: Read the status command (packages/cli/src/
+commands/status.ts) and how query decides bm25 vs hybrid (search package + how it
+checks symbol_summaries / embeddings presence). Determine where status should read
+summaries count + vector mode and what exact line to print. Return diagnosis + fixSites.`,
+    schema: DIAGNOSIS_SCHEMA,
+  },
+  {
+    id: 'Issue 6 (doctor bandit[sarif])',
+    label: 'diag:issue6',
+    prompt: `${GROUNDING}\n\nYOUR TASK: Read the bandit doctor check + the bandit
+wrapper (packages/cli/src/commands/doctor.ts binaryOnPathCheck for bandit;
+packages/scanners/src/wrappers/bandit.ts banditExitAdvisory). Design a probe that
+verifies the [sarif] formatter is actually usable (e.g. run \`bandit -f sarif\` on a
+tiny temp input and check exit!=2 / no usage banner, or check the
+bandit-sarif-formatter entry point). Return diagnosis + fixSites.`,
+    schema: DIAGNOSIS_SCHEMA,
+  },
+]
+
+phase('Diagnose')
+const diagnoses = await parallel(
+  ISSUES.map((iss) => () =>
+    agent(iss.prompt, { label: iss.label, phase: 'Diagnose', schema: DIAGNOSIS_SCHEMA, agentType: 'Explore' }),
+  ),
+)
+const confirmedDiagnoses = diagnoses.filter(Boolean)
+log(`Diagnosed ${confirmedDiagnoses.length}/${ISSUES.length} issues`)
+
+// ---------------------------------------------------------------------------
+// PHASE 2 — Vision (parallel, runs concurrently with nothing depending on it
+// until synthesis): what does WORLD-CLASS code exploration require, beyond the
+// 6 reported issues? Distinct lenses so they don't converge.
+// ---------------------------------------------------------------------------
+const LENSES = [
+  {
+    lens: 'Graph correctness & completeness',
+    angle: `What categories of edges/nodes does OCH likely MISS or mis-bind today
+(beyond decorated funcs)? Think: dynamic dispatch, re-exports, decorators-as-wrappers,
+class attributes, async/await call chains, test→src coverage edges, monkeypatch,
+dependency-injection. What would make the graph trustworthy enough that a user
+believes the blast-radius number? Ground in OCH's parse/scip phases.`,
+  },
+  {
+    lens: 'Retrieval quality (BM25 → hybrid → reranked)',
+    angle: `The report found query silently runs BM25-only (no summaries/vectors).
+What does world-class code retrieval look like — hybrid dense+sparse, symbol
+summaries, query understanding, result grouping by process/flow, reranking? What does
+OCH have (embedder, search package) vs need? How to make hybrid the default that
+"just works" after analyze.`,
+  },
+  {
+    lens: 'Agent ergonomics & CLI/MCP parity',
+    angle: `OCH is driven BY an LLM agent. What makes a code-graph tool delightful for
+an agent: CLI↔MCP parity, structured + human output, disambiguation that never omits
+the real node, --kind/--exclude-docs defaults, next-step hints, staleness signals,
+self-describing errors (like AMBIGUOUS_REPO). What's missing for an agent to drive
+exploration confidently end-to-end?`,
+  },
+  {
+    lens: 'Trust, verification & "show your work"',
+    angle: `For impact/verdict to be trusted: edge provenance (scip vs heuristic
+confidence), "why is this in the blast radius" path explanations, coverage of the
+graph (what % of calls resolved vs dropped to <external>), a self-diagnostic that
+reports graph health (orphan rate, unresolved-import rate). What would let a user
+audit the graph's own accuracy?`,
+  },
+]
+
+phase('Vision')
+const visions = await parallel(
+  LENSES.map((l) => () =>
+    agent(
+      `${GROUNDING}\n\nYOU ARE A PRODUCT+ARCHITECTURE STRATEGIST for "world-class code
+exploration & understanding". LENS: ${l.lens}.\n${l.angle}\n\nReturn 3-6 concrete
+capability gaps. For each: why it matters for an AI agent exploring code, what OCH
+foundation it builds on (cite a file/tool/package) or "greenfield", effort (S/M/L/XL),
+and leverage. Be specific to THIS codebase — no generic advice. Prefer gaps that the
+existing architecture (lbug graph, scip-ingest, embedder, 28 MCP tools, IGraphStore)
+makes cheap to reach.`,
+      { label: `vision:${l.lens.slice(0, 18)}`, phase: 'Vision', schema: VISION_SCHEMA, agentType: 'Explore' },
+    ),
+  ),
+)
+const confirmedVisions = visions.filter(Boolean)
+log(`Collected ${confirmedVisions.length} vision lenses`)
+
+// ---------------------------------------------------------------------------
+// PHASE 3+4 — Design each confirmed diagnosis, then adversarially verify.
+// Pipeline: a design verifies as soon as it's produced (no global barrier).
+// ---------------------------------------------------------------------------
+phase('Design')
+const designVerdicts = await pipeline(
+  confirmedDiagnoses,
+  (diag) =>
+    agent(
+      `${GROUNDING}\n\nYOU ARE A STAFF ENGINEER designing the fix for: ${diag.issue}.
+Confirmed root cause: ${diag.rootCause}
+Evidence: ${(diag.evidence || []).join(' | ')}
+Fix sites: ${JSON.stringify(diag.fixSites)}
+
+Produce an implementation-ready design: exact functions/signatures, a pseudo-diff per
+file, the regression test(s) and where they live (match existing test conventions),
+risks, and blast radius (which packages rebuild, what else could break). Match the
+repo's idioms (DI seams in scanner wrappers, commander registration, structured-output
+schemas). Do NOT write the code — design it precisely enough that implementation is
+mechanical.`,
+      { label: `design:${diag.issue.slice(0, 22)}`, phase: 'Design', schema: DESIGN_SCHEMA, agentType: 'Explore' },
+    ),
+  async (design, diag) => {
+    const LENSES_V = ['correctness', 'completeness', 'repro-or-refute']
+    const thunks = LENSES_V.map((angleName) => () =>
+      agent(
+        `${GROUNDING}\n\nYOU ARE A SKEPTIC. Default to holds=false unless the design
+is clearly sound. Lens: ${angleName}.
+Issue: ${diag.issue}
+Root cause claim: ${diag.rootCause}
+Design: ${design ? design.approach : '(design failed)'}
+Diff sketch: ${design ? design.diffSketch : ''}
+Test shape: ${design ? design.testShape : ''}
+
+Try to REFUTE: is the root cause actually right? Will this fix actually resolve the
+reported symptom without breaking the 5 surviving cases / other languages / other
+scanners? Is the test real (would it fail before, pass after)? For ${angleName}
+specifically, find the hole. Return holds + reason + mustFix corrections.`,
+        { label: `verify:${diag.issue.slice(0, 14)}:${angleName}`, phase: 'Adversarial verify', schema: VERDICT_SCHEMA, agentType: 'Explore' },
+      ),
+    )
+    const votes = (await parallel(thunks)).filter(Boolean)
+    const holdCount = votes.filter((x) => x.holds).length
+    return {
+      issue: diag.issue,
+      severity: diag.severity,
+      confidence: diag.confidence,
+      rootCause: diag.rootCause,
+      fixSites: diag.fixSites,
+      design,
+      survives: holdCount >= 2,
+      votes,
+      mustFix: votes.flatMap((x) => x.mustFix || []),
+    }
+  },
+)
+const designs = designVerdicts.filter(Boolean)
+
+// ---------------------------------------------------------------------------
+// PHASE 5 — Synthesize: one agent merges confirmed designs + vision gaps into
+// a single prioritized roadmap. Gets the full structured corpus.
+// ---------------------------------------------------------------------------
+phase('Synthesize')
+const synthesis = await agent(
+  `${GROUNDING}\n\nYOU ARE THE TECH LEAD. Synthesize a single prioritized roadmap to
+make OpenCodeHub WORLD-CLASS for exploring and understanding code.
+
+CONFIRMED FIXES (root cause + design + adversarial verdict):
+${JSON.stringify(designs.map((d) => ({ issue: d.issue, severity: d.severity, survives: d.survives, rootCause: d.rootCause, approach: d.design?.approach, mustFix: d.mustFix })), null, 1)}
+
+VISION GAPS (what world-class requires beyond the reported issues):
+${JSON.stringify(confirmedVisions.flatMap((v) => v.gaps.map((g) => ({ lens: v.lens, ...g }))), null, 1)}
+
+Produce, in Markdown:
+1. **Ship now (this PR series)** — the confirmed bug fixes that SURVIVED adversarial
+   review, in dependency/priority order, each with the one-line fix and any mustFix
+   corrections folded in. Call out Issue 1 core (decorated-func drop) as the headline
+   correctness fix and whether it's ready or needs more diagnosis.
+2. **Fast follow** — designs that need the mustFix corrections, or thin vision gaps.
+3. **World-class roadmap** — the transformational/high-leverage vision gaps grouped by
+   theme (graph correctness, hybrid retrieval, agent ergonomics, trust/verification),
+   each with effort + the OCH foundation it builds on.
+4. **What I'd cut / defer** and why.
+Be decisive and specific to this codebase. This is the plan the orchestrator will
+implement, so make "Ship now" directly actionable.`,
+  { label: 'synthesize:roadmap', phase: 'Synthesize' },
+)
+
+return {
+  diagnoses: confirmedDiagnoses,
+  designs: designs.map((d) => ({ issue: d.issue, survives: d.survives, severity: d.severity, mustFix: d.mustFix })),
+  visionGapCount: confirmedVisions.flatMap((v) => v.gaps).length,
+  roadmap: synthesis,
+}
diff --git a/packages/cli/src/commands/scan.ts b/packages/cli/src/commands/scan.ts
index 2b836843..401f6cdc 100644
--- a/packages/cli/src/commands/scan.ts
+++ b/packages/cli/src/commands/scan.ts
@@ -27,6 +27,7 @@
 import { readFileSync } from "node:fs";
 import { mkdir, readFile, writeFile } from "node:fs/promises";
 import { join, resolve } from "node:path";
+import { pipeline } from "@opencodehub/ingestion";
 import {
   applyBaselineState,
   applySuppressions,
@@ -45,10 +46,13 @@ import {
   P1_SPECS,
   PIP_AUDIT_SPEC,
   type ProjectProfileGate,
+  RADON_SPEC,
   runScanners,
   type ScannerSpec,
   type ScannerStatus,
   SPECTRAL_SPEC,
+  TY_SPEC,
+  VULTURE_SPEC,
 } from "@opencodehub/scanners";
 import { resolveRepoMetaDir } from "@opencodehub/storage";
 import { readRegistry } from "../registry.js";
@@ -360,6 +364,20 @@ async function buildWrapperContext(
     // export lands in the gitignored .codehub/ meta dir.
     ctx.pipAudit = { exportDir: resolveRepoMetaDir(repoPath) };
   }
+  // Python tree-walking scanners (vulture/radon/ty) descend into `.venv` and
+  // report library noise unless told to skip the same dirs the indexer
+  // ignores. Reuse the indexer's single source of truth so the exclude set
+  // can't drift. Each wrapper anchors / formats these for its own CLI.
+  const ignoreDirs = pipeline.HARDCODED_IGNORES;
+  if (ids.has(VULTURE_SPEC.id)) {
+    ctx.vulture = { excludeGlobs: ignoreDirs };
+  }
+  if (ids.has(RADON_SPEC.id)) {
+    ctx.radon = { ignoreDirs };
+  }
+  if (ids.has(TY_SPEC.id)) {
+    ctx.ty = { excludeGlobs: ignoreDirs };
+  }
   return ctx;
 }
 
diff --git a/packages/scanners/src/index.ts b/packages/scanners/src/index.ts
index 1f740cd2..58b9556b 100644
--- a/packages/scanners/src/index.ts
+++ b/packages/scanners/src/index.ts
@@ -129,15 +129,15 @@ import { createHadolintWrapper, type HadolintWrapperOptions } from "./wrappers/h
 import { createNpmAuditWrapper } from "./wrappers/npm-audit.js";
 import { createOsvScannerWrapper } from "./wrappers/osv-scanner.js";
 import { createPipAuditWrapper, type PipAuditWrapperOptions } from "./wrappers/pip-audit.js";
-import { createRadonWrapper } from "./wrappers/radon.js";
+import { createRadonWrapper, type RadonWrapperOptions } from "./wrappers/radon.js";
 import { createRuffWrapper } from "./wrappers/ruff.js";
 import { createSemgrepWrapper } from "./wrappers/semgrep.js";
 import { DEFAULT_DEPS, type WrapperDeps } from "./wrappers/shared.js";
 import { createSpectralWrapper, type SpectralWrapperOptions } from "./wrappers/spectral.js";
 import { createTflintWrapper } from "./wrappers/tflint.js";
 import { createTrivyWrapper } from "./wrappers/trivy.js";
-import { createTyWrapper } from "./wrappers/ty.js";
-import { createVultureWrapper } from "./wrappers/vulture.js";
+import { createTyWrapper, type TyWrapperOptions } from "./wrappers/ty.js";
+import { createVultureWrapper, type VultureWrapperOptions } from "./wrappers/vulture.js";
 
 /**
  * Per-scanner context passed to `createDefaultWrappers`. Some wrappers
@@ -157,6 +157,12 @@ export interface DefaultWrapperContext {
   readonly hadolint?: HadolintWrapperOptions;
   readonly spectral?: SpectralWrapperOptions;
   readonly pipAudit?: PipAuditWrapperOptions;
+  // Python dead-code / complexity / type-check scanners walk the project
+  // tree directly; without an exclude they descend into `.venv` and report
+  // library noise. The CLI threads the indexer's ignore dirs in here.
+  readonly vulture?: VultureWrapperOptions;
+  readonly radon?: RadonWrapperOptions;
+  readonly ty?: TyWrapperOptions;
 }
 
 /**
@@ -216,11 +222,11 @@ function createWrapperFor(
     case GRYPE_SPEC.id:
       return deps ? createGrypeWrapper(deps) : createGrypeWrapper();
     case VULTURE_SPEC.id:
-      return deps ? createVultureWrapper(deps) : createVultureWrapper();
+      return createVultureWrapper(deps ?? DEFAULT_DEPS, ctx.vulture ?? {});
     case RADON_SPEC.id:
-      return deps ? createRadonWrapper(deps) : createRadonWrapper();
+      return createRadonWrapper(deps ?? DEFAULT_DEPS, ctx.radon ?? {});
     case TY_SPEC.id:
-      return deps ? createTyWrapper(deps) : createTyWrapper();
+      return createTyWrapper(deps ?? DEFAULT_DEPS, ctx.ty ?? {});
     case CLAMAV_SPEC.id:
       return deps ? createClamAvWrapper(deps) : createClamAvWrapper();
     case CHECKOV_DOCKER_COMPOSE_SPEC.id:
diff --git a/packages/scanners/src/wrappers/extended-wrappers.test.ts b/packages/scanners/src/wrappers/extended-wrappers.test.ts
index 9386b421..2aa12ce5 100644
--- a/packages/scanners/src/wrappers/extended-wrappers.test.ts
+++ b/packages/scanners/src/wrappers/extended-wrappers.test.ts
@@ -230,6 +230,26 @@ test("vulture wrapper emits empty SARIF when binary missing", async () => {
   assert.ok(out.skipped?.includes("not found on PATH"));
 });
 
+test("vulture wrapper anchors excludeGlobs to path segments (no .venv noise)", async () => {
+  const { deps, calls } = makeFakeDeps(() => ({ stdout: "", exitCode: 0 }));
+  await createVultureWrapper(deps, { excludeGlobs: [".venv", "node_modules"] }).run(ctx);
+  const args = calls[0]?.args ?? [];
+  const idx = args.indexOf("--exclude");
+  assert.ok(idx >= 0, "must pass --exclude when excludeGlobs is non-empty");
+  const value = args[idx + 1] ?? "";
+  // Anchored to a full path segment — NOT the bare name, which vulture would
+  // substring-match and so suppress e.g. src/.venv_helpers.py.
+  assert.ok(value.includes("*/.venv/*"), `expected anchored .venv glob, got: ${value}`);
+  assert.ok(value.includes("*/node_modules/*"));
+  assert.ok(!value.split(",").includes(".venv"), "must not pass the bare name .venv");
+});
+
+test("vulture wrapper omits --exclude when no excludeGlobs given", async () => {
+  const { deps, calls } = makeFakeDeps(() => ({ stdout: "", exitCode: 0 }));
+  await createVultureWrapper(deps).run(ctx);
+  assert.ok(!(calls[0]?.args ?? []).includes("--exclude"));
+});
+
 // ---------- radon ---------------------------------------------------------
 
 test("radon wrapper parses cc JSON into SARIF results above threshold", async () => {
diff --git a/packages/scanners/src/wrappers/radon.ts b/packages/scanners/src/wrappers/radon.ts
index 9af4ee1e..ac8147bf 100644
--- a/packages/scanners/src/wrappers/radon.ts
+++ b/packages/scanners/src/wrappers/radon.ts
@@ -21,7 +21,20 @@ import type { ScannerRunContext, ScannerRunResult, ScannerWrapper } from "../spe
 import { emptySarifFor } from "../spec.js";
 import { DEFAULT_DEPS, type WrapperDeps } from "./shared.js";
 
-export function createRadonWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper {
+export interface RadonWrapperOptions {
+  /**
+   * Directory names to skip (e.g. `.venv`, `node_modules`). radon's `-i`
+   * matches directory BASENAMES (not path globs), so the bare ignore names
+   * are passed through as-is. radon already skips hidden dirs by default, so
+   * `-i` mainly helps non-hidden entries (`node_modules`, `dist`, `build`).
+   */
+  readonly ignoreDirs?: readonly string[];
+}
+
+export function createRadonWrapper(
+  deps: WrapperDeps = DEFAULT_DEPS,
+  opts: RadonWrapperOptions = {},
+): ScannerWrapper {
   return {
     spec: RADON_SPEC,
     run: async (ctx: ScannerRunContext): Promise<ScannerRunResult> => {
@@ -37,7 +50,11 @@ export function createRadonWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWra
           durationMs: performance.now() - started,
         };
       }
-      const args: readonly string[] = ["cc", "-s", "-j", ctx.projectPath];
+      const ignoreArgs =
+        opts.ignoreDirs !== undefined && opts.ignoreDirs.length > 0
+          ? ["-i", opts.ignoreDirs.join(",")]
+          : [];
+      const args: readonly string[] = ["cc", "-s", "-j", ...ignoreArgs, ctx.projectPath];
       const result = await deps.runBinary("radon", args, {
         timeoutMs: ctx.timeoutMs,
         cwd: ctx.projectPath,
diff --git a/packages/scanners/src/wrappers/ty.ts b/packages/scanners/src/wrappers/ty.ts
index 491423bf..88146a49 100644
--- a/packages/scanners/src/wrappers/ty.ts
+++ b/packages/scanners/src/wrappers/ty.ts
@@ -21,7 +21,20 @@ import type { ScannerRunContext, ScannerRunResult, ScannerWrapper } from "../spe
 import { emptySarifFor } from "../spec.js";
 import { DEFAULT_DEPS, type WrapperDeps } from "./shared.js";
 
-export function createTyWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper {
+export interface TyWrapperOptions {
+  /**
+   * Directory names to exclude (e.g. `.venv`, `node_modules`). ty uses
+   * gitignore-style excludes; a trailing `/` anchors to a directory. We also
+   * pass `--force-exclude` so the excludes apply even though the project path
+   * is given explicitly on the CLI (CLI-named paths bypass excludes otherwise).
+   */
+  readonly excludeGlobs?: readonly string[];
+}
+
+export function createTyWrapper(
+  deps: WrapperDeps = DEFAULT_DEPS,
+  opts: TyWrapperOptions = {},
+): ScannerWrapper {
   return {
     spec: TY_SPEC,
     run: async (ctx: ScannerRunContext): Promise<ScannerRunResult> => {
@@ -37,7 +50,14 @@ export function createTyWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrappe
           durationMs: performance.now() - started,
         };
       }
-      const args: readonly string[] = ["check", ctx.projectPath];
+      const excludeArgs =
+        opts.excludeGlobs !== undefined && opts.excludeGlobs.length > 0
+          ? [
+              ...opts.excludeGlobs.flatMap((g) => ["--exclude", g.endsWith("/") ? g : `${g}/`]),
+              "--force-exclude",
+            ]
+          : [];
+      const args: readonly string[] = ["check", ...excludeArgs, ctx.projectPath];
       const result = await deps.runBinary("ty", args, {
         timeoutMs: ctx.timeoutMs,
         cwd: ctx.projectPath,
diff --git a/packages/scanners/src/wrappers/vulture.ts b/packages/scanners/src/wrappers/vulture.ts
index 6e0c0e3d..9ad4a930 100644
--- a/packages/scanners/src/wrappers/vulture.ts
+++ b/packages/scanners/src/wrappers/vulture.ts
@@ -22,7 +22,33 @@ import { DEFAULT_DEPS, type WrapperDeps } from "./shared.js";
 /** Minimum confidence percentage vulture emits findings at. */
 const DEFAULT_MIN_CONFIDENCE = "80";
 
-export function createVultureWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper {
+export interface VultureWrapperOptions {
+  /**
+   * Directory names the indexer ignores (e.g. `.venv`, `node_modules`).
+   * Threaded from the CLI so vulture doesn't walk the virtualenv and drown
+   * real findings in library dead-code. Anchored to path-segment globs
+   * inside the wrapper so a bare `.venv` can't substring-match `src/distance.py`.
+   */
+  readonly excludeGlobs?: readonly string[];
+}
+
+/**
+ * Turn an ignore directory name into a vulture `--exclude` glob anchored to a
+ * path segment. vulture matches `--exclude` patterns against ABSOLUTE paths
+ * and treats a wildcard-free pattern as a substring match, so the bare name
+ * `.venv` would also suppress `src/.venv_helpers.py`. Wrapping it as a
+ * slash-delimited glob segment matches only when the name is a full directory
+ * segment. Patterns already containing a glob pass through untouched.
+ */
+function toVultureExcludeGlob(name: string): string {
+  if (/[*?[\]]/.test(name)) return name;
+  return `*/${name}/*`;
+}
+
+export function createVultureWrapper(
+  deps: WrapperDeps = DEFAULT_DEPS,
+  opts: VultureWrapperOptions = {},
+): ScannerWrapper {
   return {
     spec: VULTURE_SPEC,
     run: async (ctx: ScannerRunContext): Promise<ScannerRunResult> => {
@@ -38,7 +64,16 @@ export function createVultureWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerW
           durationMs: performance.now() - started,
         };
       }
-      const args: readonly string[] = [ctx.projectPath, "--min-confidence", DEFAULT_MIN_CONFIDENCE];
+      const excludeArgs =
+        opts.excludeGlobs !== undefined && opts.excludeGlobs.length > 0
+          ? ["--exclude", opts.excludeGlobs.map(toVultureExcludeGlob).join(",")]
+          : [];
+      const args: readonly string[] = [
+        ctx.projectPath,
+        "--min-confidence",
+        DEFAULT_MIN_CONFIDENCE,
+        ...excludeArgs,
+      ];
       const result = await deps.runBinary("vulture", args, {
         timeoutMs: ctx.timeoutMs,
         cwd: ctx.projectPath,