diff --git a/.changeset/foraging-mcp-tools.md b/.changeset/foraging-mcp-tools.md new file mode 100644 index 0000000..959d7b7 --- /dev/null +++ b/.changeset/foraging-mcp-tools.md @@ -0,0 +1,37 @@ +--- +"@colony/foraging": minor +"@colony/core": minor +"@colony/storage": minor +"@colony/mcp-server": minor +--- + +Expose foraged food sources to MCP clients through three new tools and +wire `MemoryStore.search` with an optional kind/metadata filter so +scoped queries don't pollute the main search. + +New MCP tools (registered alongside spec in `apps/mcp-server`): + +- `examples_list({ repo_root })` — compact list of indexed example + names, manifest kinds, and cached observation counts. +- `examples_query({ query, example_name?, limit? })` — BM25 hits + scoped to `kind = 'foraged-pattern'` and optionally to a specific + example. Returns compact snippets — fetch full bodies via + `get_observations`. +- `examples_integrate_plan({ repo_root, example_name, target_hint? })` + — deterministic plan: npm dependency delta between the example and + the target `package.json`, files to copy (derived from indexed + entrypoints), `config_steps` (npm scripts), and an + `uncertainty_notes` list for everything the planner couldn't + resolve. No LLM in the loop. + +`@colony/foraging` adds `buildIntegrationPlan(storage, opts)`. The +function reads manifests fresh from disk to avoid round-tripping +structured JSON through the compressor. + +`@colony/core` extends `MemoryStore.search(query, limit?, embedder?, filter?)` +with `{ kind?: string; metadata?: Record }`. When a +filter is set the method skips vector ranking — the embedding index has +no kind column, so mixing vector hits would require a second pass to +drop them. `@colony/storage`'s `searchFts(query, limit, filter?)` +applies the filter in SQL via `json_extract` so the LIMIT still bounds +the scan. diff --git a/apps/mcp-server/package.json b/apps/mcp-server/package.json index 5097fab..741b090 100644 --- a/apps/mcp-server/package.json +++ b/apps/mcp-server/package.json @@ -19,6 +19,7 @@ "@colony/config": "workspace:*", "@colony/core": "workspace:*", "@colony/embedding": "workspace:*", + "@colony/foraging": "workspace:*", "@colony/hooks": "workspace:*", "@colony/process": "workspace:*", "@colony/spec": "workspace:*", diff --git a/apps/mcp-server/src/server.ts b/apps/mcp-server/src/server.ts index 879c602..849a826 100644 --- a/apps/mcp-server/src/server.ts +++ b/apps/mcp-server/src/server.ts @@ -7,6 +7,7 @@ import { isMainEntry } from '@colony/process'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import type { ToolContext } from './tools/context.js'; +import * as foraging from './tools/foraging.js'; import * as handoff from './tools/handoff.js'; import { installActiveSessionHeartbeat } from './tools/heartbeat.js'; import * as hivemind from './tools/hivemind.js'; @@ -77,6 +78,11 @@ export function buildServer(store: MemoryStore, settings: Settings): McpServer { // core tool first. spec.register(server, ctx); + // Foraging lane (@colony/foraging). Adds examples_list, examples_query, + // examples_integrate_plan. Registered after spec so the heartbeat has + // wrapped the earlier tools before we bind these three. + foraging.register(server, ctx); + return server; } diff --git a/apps/mcp-server/src/tools/foraging.ts b/apps/mcp-server/src/tools/foraging.ts new file mode 100644 index 0000000..28b7f81 --- /dev/null +++ b/apps/mcp-server/src/tools/foraging.ts @@ -0,0 +1,70 @@ +import { buildIntegrationPlan } from '@colony/foraging'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { ToolContext } from './context.js'; + +/** + * Foraging surface exposed to MCP clients. + * + * Progressive disclosure: `examples_list` and `examples_query` return + * compact shapes. Full observation bodies are fetched by + * `get_observations(ids[])`, which already exists in search.ts. Keeps + * the contract tight enough that a single `examples_query` call stays + * under the MCP response-size budget even on large example sets. + */ +export function register(server: McpServer, ctx: ToolContext): void { + const { store, resolveEmbedder } = ctx; + + server.tool( + 'examples_list', + 'List indexed example projects (food sources) for a repo root.', + { repo_root: z.string().min(1) }, + async ({ repo_root }) => { + const rows = store.storage.listExamples(repo_root); + const compact = rows.map((r) => ({ + example_name: r.example_name, + manifest_kind: r.manifest_kind, + observation_count: r.observation_count, + last_scanned_at: r.last_scanned_at, + })); + return { content: [{ type: 'text', text: JSON.stringify(compact) }] }; + }, + ); + + server.tool( + 'examples_query', + 'Search indexed example patterns. Compact hits — fetch bodies via get_observations.', + { + query: z.string().min(1), + example_name: z.string().optional(), + limit: z.number().int().positive().max(20).optional(), + }, + async ({ query, example_name, limit }) => { + const e = (await resolveEmbedder()) ?? undefined; + const filter: { kind: string; metadata?: Record } = { + kind: 'foraged-pattern', + }; + if (example_name) filter.metadata = { example_name }; + const hits = await store.search(query, limit ?? 10, e, filter); + return { content: [{ type: 'text', text: JSON.stringify(hits) }] }; + }, + ); + + server.tool( + 'examples_integrate_plan', + 'Build an integration plan: dependency delta + files to copy + config steps.', + { + example_name: z.string().min(1), + repo_root: z.string().min(1), + target_hint: z.string().optional(), + }, + async ({ example_name, repo_root, target_hint }) => { + const plan = buildIntegrationPlan(store.storage, { + example_name, + repo_root, + ...(target_hint !== undefined ? { target_hint } : {}), + }); + return { content: [{ type: 'text', text: JSON.stringify(plan) }] }; + }, + ); +} diff --git a/apps/mcp-server/test/foraging.test.ts b/apps/mcp-server/test/foraging.test.ts new file mode 100644 index 0000000..69a041a --- /dev/null +++ b/apps/mcp-server/test/foraging.test.ts @@ -0,0 +1,133 @@ +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { defaultSettings } from '@colony/config'; +import { MemoryStore } from '@colony/core'; +import { scanExamples } from '@colony/foraging'; +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { buildServer } from '../src/server.js'; + +let dir: string; +let repoRoot: string; +let store: MemoryStore; +let client: Client; + +function write(rel: string, contents: string): void { + const abs = join(repoRoot, rel); + mkdirSync(join(abs, '..'), { recursive: true }); + writeFileSync(abs, contents); +} + +beforeEach(async () => { + dir = mkdtempSync(join(tmpdir(), 'colony-mcp-forage-')); + repoRoot = join(dir, 'repo'); + mkdirSync(repoRoot, { recursive: true }); + store = new MemoryStore({ dbPath: join(dir, 'data.db'), settings: defaultSettings }); + store.startSession({ id: 'mcp-session', ide: 'test', cwd: repoRoot }); + + const server = buildServer(store, defaultSettings); + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair(); + client = new Client({ name: 'forage-test', version: '0.0.0' }); + await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]); +}); + +afterEach(async () => { + await client.close(); + store.close(); + rmSync(dir, { recursive: true, force: true }); +}); + +async function callJson(name: string, args: Record): Promise { + const res = await client.callTool({ name, arguments: args }); + const content = (res.content as Array<{ type: string; text: string }>)[0]; + if (!content || content.type !== 'text') throw new Error(`unexpected MCP reply for ${name}`); + return JSON.parse(content.text) as T; +} + +describe('MCP foraging tools', () => { + it('examples_list returns the compact rows for a scanned repo', async () => { + write('package.json', JSON.stringify({ name: 'target' })); + write('examples/stripe/package.json', JSON.stringify({ name: 'stripe' })); + write('examples/stripe/src/index.ts', 'export {}'); + scanExamples({ repo_root: repoRoot, store, session_id: 'mcp-session' }); + + const rows = await callJson< + Array<{ example_name: string; manifest_kind: string | null; observation_count: number }> + >('examples_list', { repo_root: repoRoot }); + + expect(rows).toHaveLength(1); + expect(rows[0]).toMatchObject({ example_name: 'stripe', manifest_kind: 'npm' }); + expect(rows[0]?.observation_count).toBeGreaterThan(0); + }); + + it('examples_query returns compact hits scoped to foraged-pattern rows', async () => { + write('package.json', JSON.stringify({ name: 'target' })); + write( + 'examples/stripe/package.json', + JSON.stringify({ name: 'stripe', dependencies: { stripe: '^14.0.0' } }), + ); + scanExamples({ repo_root: repoRoot, store, session_id: 'mcp-session' }); + // Add a *non*-foraged observation with the same keyword — it must not + // show up in the scoped query. + store.addObservation({ + session_id: 'mcp-session', + kind: 'note', + content: 'A random mention of stripe that should not match a foraged query.', + }); + + const hits = await callJson>('examples_query', { + query: 'stripe', + }); + expect(hits.length).toBeGreaterThan(0); + + // Every hit id must be a foraged-pattern row. + for (const h of hits) { + const row = store.storage.getObservation(h.id); + expect(row?.kind).toBe('foraged-pattern'); + } + }); + + it('examples_query honors the example_name filter', async () => { + write('examples/alpha/package.json', JSON.stringify({ name: 'alpha' })); + write('examples/beta/package.json', JSON.stringify({ name: 'beta' })); + scanExamples({ repo_root: repoRoot, store, session_id: 'mcp-session' }); + + const hits = await callJson>('examples_query', { + query: 'alpha', + example_name: 'alpha', + }); + + expect(hits.length).toBeGreaterThan(0); + for (const h of hits) { + const row = store.storage.getObservation(h.id); + const md = row?.metadata ? (JSON.parse(row.metadata) as { example_name: string }) : null; + expect(md?.example_name).toBe('alpha'); + } + }); + + it('examples_integrate_plan returns a deterministic plan', async () => { + write('package.json', JSON.stringify({ name: 'target', dependencies: { zod: '^3.23.0' } })); + write( + 'examples/stripe/package.json', + JSON.stringify({ + name: 'stripe', + dependencies: { zod: '^3.23.0', stripe: '^14.0.0' }, + scripts: { build: 'tsc' }, + }), + ); + scanExamples({ repo_root: repoRoot, store, session_id: 'mcp-session' }); + + const plan = await callJson<{ + example_name: string; + dependency_delta: { add: Record; remove: string[] }; + config_steps: string[]; + }>('examples_integrate_plan', { repo_root: repoRoot, example_name: 'stripe' }); + + expect(plan.example_name).toBe('stripe'); + expect(plan.dependency_delta.add.stripe).toBe('^14.0.0'); + expect(plan.dependency_delta.add.zod).toBeUndefined(); + expect(plan.config_steps).toContain('npm run build'); + }); +}); diff --git a/apps/mcp-server/test/server.test.ts b/apps/mcp-server/test/server.test.ts index 0e81e65..346d748 100644 --- a/apps/mcp-server/test/server.test.ts +++ b/apps/mcp-server/test/server.test.ts @@ -49,6 +49,9 @@ describe('MCP server', () => { 'agent_get_profile', 'agent_upsert_profile', 'attention_inbox', + 'examples_integrate_plan', + 'examples_list', + 'examples_query', 'get_observations', 'hivemind', 'hivemind_context', @@ -395,13 +398,7 @@ describe('MCP server', () => { isolatedClient.connect(clientTransport), ]); - const sessionFile = join( - repoRoot, - '.omx', - 'state', - 'active-sessions', - 'hb-session-1.json', - ); + const sessionFile = join(repoRoot, '.omx', 'state', 'active-sessions', 'hb-session-1.json'); const afterConnect = JSON.parse(readFileSync(sessionFile, 'utf8')); expect(afterConnect.sessionKey).toBe('hb-session-1'); expect(afterConnect.branch).toBe('hb-branch'); diff --git a/packages/core/src/memory-store.ts b/packages/core/src/memory-store.ts index 2282d84..2dbf504 100644 --- a/packages/core/src/memory-store.ts +++ b/packages/core/src/memory-store.ts @@ -117,10 +117,23 @@ export class MemoryStore { // --- search --- - async search(query: string, limit?: number, embedder?: Embedder): Promise { + async search( + query: string, + limit?: number, + embedder?: Embedder, + filter?: { kind?: string; metadata?: Record }, + ): Promise { const cap = limit ?? this.settings.search.defaultLimit; const alpha = this.settings.search.alpha; - const keyword = this.storage.searchFts(query, cap * 2); + const keyword = this.storage.searchFts(query, cap * 2, filter); + // When the caller scopes the result to a `kind` / `metadata` pair, + // skip vector ranking: the embedding index has no kind filter, so + // mixing vector hits would bring back observations from other kinds + // and force a second pass to drop them. The filtered FTS output is + // already scoped correctly — keyword-only is faster and cleaner. + if (filter && (filter.kind || (filter.metadata && Object.keys(filter.metadata).length > 0))) { + return keyword.slice(0, cap); + } if (!embedder || this.settings.embedding.provider === 'none') { return keyword.slice(0, cap); } diff --git a/packages/foraging/src/index.ts b/packages/foraging/src/index.ts index b826fc4..02f9971 100644 --- a/packages/foraging/src/index.ts +++ b/packages/foraging/src/index.ts @@ -4,6 +4,8 @@ export { extract, readCapped } from './extractor.js'; export type { ExtractedShape } from './extractor.js'; export { indexFoodSource } from './indexer.js'; export type { IndexFoodSourceOptions } from './indexer.js'; +export { buildIntegrationPlan } from './integration-plan.js'; +export type { BuildIntegrationPlanOptions } from './integration-plan.js'; export { redact } from './redact.js'; export type { ExampleManifestKind, diff --git a/packages/foraging/src/integration-plan.ts b/packages/foraging/src/integration-plan.ts new file mode 100644 index 0000000..4ea4061 --- /dev/null +++ b/packages/foraging/src/integration-plan.ts @@ -0,0 +1,236 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import type { Storage } from '@colony/storage'; +import type { IntegrationPlan } from './types.js'; + +export interface BuildIntegrationPlanOptions { + repo_root: string; + example_name: string; + /** Absolute or repo-relative path to the target package manifest that the + * plan should diff against. Defaults to `/package.json`. */ + target_hint?: string; +} + +/** + * Produce a deterministic plan an agent can reason about: + * - dependency_delta: what the example depends on but the target doesn't, + * and anything the target has but the example doesn't list (the `remove` + * list is informational, never a recommendation to delete). + * - files_to_copy: for each indexed filetree-listed entrypoint / manifest, + * a suggested destination under the target repo. + * - config_steps: side-effects the example expects (build scripts, env + * variables called out in the manifest) that an integrator must wire up. + * - uncertainty_notes: every ambiguity the planner couldn't resolve — the + * agent reads these and decides, the planner never hides them. + * + * No LLM in the loop. Everything here is pulled from the indexed + * observations or from re-reading the manifests; if that data isn't + * present, the relevant section is empty and the uncertainty is logged. + */ +export function buildIntegrationPlan( + storage: Storage, + opts: BuildIntegrationPlanOptions, +): IntegrationPlan { + const uncertainty_notes: string[] = []; + + const example = storage.getExample(opts.repo_root, opts.example_name); + if (!example) { + uncertainty_notes.push( + `No indexed row for '${opts.example_name}' — run \`colony foraging scan\` first.`, + ); + return emptyPlan(opts.example_name, uncertainty_notes); + } + + const observations = storage.listForagedObservations(opts.repo_root, opts.example_name); + const manifestObs = observations.find((r) => { + const md = r.metadata ? safeJson(r.metadata) : null; + return md && (md as { entry_kind?: string }).entry_kind === 'manifest'; + }); + const filetreeObs = observations.find((r) => { + const md = r.metadata ? safeJson(r.metadata) : null; + return md && (md as { entry_kind?: string }).entry_kind === 'filetree'; + }); + const entrypointMetas = observations + .map((r) => { + const md = r.metadata ? safeJson(r.metadata) : null; + return md as { entry_kind?: string; file_path?: string } | null; + }) + .filter((m): m is { entry_kind: string; file_path: string } => m?.entry_kind === 'entrypoint'); + + // Re-read the example's manifest from disk rather than parsing the + // compressed observation — the compressor preserves technical tokens + // but the round-trip is still lossy for structured JSON, and we need + // a fully parseable manifest to diff deps. + const exampleManifestPath = resolveExampleManifestPath( + opts.repo_root, + opts.example_name, + example.manifest_kind, + ); + const dependency_delta = buildDependencyDelta( + example.manifest_kind, + exampleManifestPath, + resolveTargetManifestPath(opts.repo_root, opts.target_hint), + uncertainty_notes, + ); + + const files_to_copy = entrypointMetas.map((m) => ({ + from: `examples/${opts.example_name}/${m.file_path}`, + to_suggestion: suggestTargetPath(m.file_path), + rationale: 'Entrypoint indexed from the example; keeps the same directory shape in the target.', + })); + + const config_steps = extractConfigSteps(example.manifest_kind, exampleManifestPath); + + if (!manifestObs) { + uncertainty_notes.push('Manifest observation missing — dependency_delta may be incomplete.'); + } + if (!filetreeObs) { + uncertainty_notes.push( + 'Filetree observation missing — files_to_copy may not reflect full shape.', + ); + } + + return { + example_name: opts.example_name, + dependency_delta, + files_to_copy, + config_steps, + uncertainty_notes, + }; +} + +function emptyPlan(example_name: string, uncertainty_notes: string[]): IntegrationPlan { + return { + example_name, + dependency_delta: { add: {}, remove: [] }, + files_to_copy: [], + config_steps: [], + uncertainty_notes, + }; +} + +/** + * For npm manifests we can read both package.jsons and return a true diff. + * Other kinds produce an empty `add` and an uncertainty note; cross-language + * dep diffing is too ecosystem-specific to guess at. + */ +function buildDependencyDelta( + kind: string | null, + exampleManifestPath: string | null, + targetManifestPath: string, + notes: string[], +): { add: Record; remove: string[] } { + if (kind !== 'npm') { + if (kind && kind !== 'unknown') { + notes.push( + `dependency_delta is only computed for npm examples today; '${kind}' left for the agent.`, + ); + } + return { add: {}, remove: [] }; + } + + const exampleDeps = exampleManifestPath ? parseNpmDepsFromFile(exampleManifestPath) : null; + if (!exampleDeps) { + notes.push('Example manifest could not be read or parsed as JSON; dependency_delta empty.'); + return { add: {}, remove: [] }; + } + + const targetDeps = parseNpmDepsFromFile(targetManifestPath); + if (targetDeps === null) { + notes.push( + `Target manifest not found at ${targetManifestPath}; reporting all example deps as 'add'.`, + ); + } + const targetMap = targetDeps ?? {}; + + const add: Record = {}; + for (const [name, version] of Object.entries(exampleDeps)) { + if (!(name in targetMap)) add[name] = version; + } + const remove: string[] = Object.keys(targetMap).filter((n) => !(n in exampleDeps)); + + return { add, remove }; +} + +function parseNpmDepsFromFile(path: string): Record | null { + let text: string; + try { + text = readFileSync(path, 'utf8'); + } catch { + return null; + } + try { + const pkg = JSON.parse(text) as { + dependencies?: Record; + devDependencies?: Record; + }; + return { ...(pkg.dependencies ?? {}), ...(pkg.devDependencies ?? {}) }; + } catch { + return null; + } +} + +function resolveExampleManifestPath( + repo_root: string, + example_name: string, + kind: string | null, +): string | null { + const baseDir = join(repo_root, 'examples', example_name); + switch (kind) { + case 'npm': + return join(baseDir, 'package.json'); + case 'pypi': + return join(baseDir, 'pyproject.toml'); + case 'cargo': + return join(baseDir, 'Cargo.toml'); + case 'go': + return join(baseDir, 'go.mod'); + default: + return null; + } +} + +/** + * Very conservative suggestion: keep the in-example path as-is. Agents + * move files around intentionally; giving them a cleaned-up shape + * to deviate from is more useful than inventing a destination. + */ +function suggestTargetPath(relFromExample: string): string { + return relFromExample; +} + +function extractConfigSteps(kind: string | null, manifestPath: string | null): string[] { + if (kind !== 'npm' || !manifestPath) return []; + let text: string; + try { + text = readFileSync(manifestPath, 'utf8'); + } catch { + return []; + } + try { + const pkg = JSON.parse(text) as { scripts?: Record }; + const scripts = pkg.scripts ?? {}; + const out: string[] = []; + for (const name of ['build', 'dev', 'start', 'test'] as const) { + if (scripts[name]) out.push(`npm run ${name}`); + } + return out; + } catch { + return []; + } +} + +function resolveTargetManifestPath(repo_root: string, hint?: string): string { + if (!hint) return join(repo_root, 'package.json'); + // Allow both absolute and repo-relative hints. Keep it simple: if it + // looks absolute, use it as-is; otherwise join onto repo_root. + return hint.startsWith('/') ? hint : join(repo_root, hint); +} + +function safeJson(s: string): unknown { + try { + return JSON.parse(s) as unknown; + } catch { + return null; + } +} diff --git a/packages/foraging/test/integration-plan.test.ts b/packages/foraging/test/integration-plan.test.ts new file mode 100644 index 0000000..7d9a658 --- /dev/null +++ b/packages/foraging/test/integration-plan.test.ts @@ -0,0 +1,118 @@ +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { SettingsSchema } from '@colony/config'; +import { MemoryStore } from '@colony/core'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { buildIntegrationPlan } from '../src/integration-plan.js'; +import { scanExamples } from '../src/scanner.js'; + +let repo: string; +let store: MemoryStore; + +beforeEach(() => { + repo = mkdtempSync(join(tmpdir(), 'colony-plan-')); + const settings = SettingsSchema.parse({}); + store = new MemoryStore({ dbPath: join(repo, 'colony.db'), settings }); + store.startSession({ id: 's', ide: 'test', cwd: repo }); +}); + +afterEach(() => { + store.close(); + rmSync(repo, { recursive: true, force: true }); +}); + +function write(rel: string, contents: string): void { + const abs = join(repo, rel); + mkdirSync(join(abs, '..'), { recursive: true }); + writeFileSync(abs, contents); +} + +describe('buildIntegrationPlan', () => { + it('reports an uncertainty note when the example was never scanned', () => { + const plan = buildIntegrationPlan(store.storage, { + repo_root: repo, + example_name: 'missing', + }); + expect(plan.example_name).toBe('missing'); + expect(plan.uncertainty_notes[0]).toMatch(/never|run.*scan|indexed row/i); + }); + + it('computes an npm dependency delta against a target package.json', () => { + // Target repo already has `zod` — the example also wants zod (no-op) + // and adds `stripe` (true delta). `lodash` only in target → goes to + // `remove`, informational. + write( + 'package.json', + JSON.stringify({ + name: 'target', + dependencies: { zod: '^3.23.0', lodash: '^4.17.0' }, + }), + ); + write( + 'examples/stripe-webhook/package.json', + JSON.stringify({ + name: 'stripe-webhook', + dependencies: { zod: '^3.23.0', stripe: '^14.0.0' }, + scripts: { build: 'tsc', test: 'vitest' }, + }), + ); + write('examples/stripe-webhook/src/index.ts', 'export const x = 1'); + + scanExamples({ repo_root: repo, store, session_id: 's' }); + + const plan = buildIntegrationPlan(store.storage, { + repo_root: repo, + example_name: 'stripe-webhook', + }); + expect(plan.dependency_delta.add).toMatchObject({ stripe: '^14.0.0' }); + expect(plan.dependency_delta.add.zod).toBeUndefined(); + expect(plan.dependency_delta.remove).toContain('lodash'); + expect(plan.config_steps).toEqual(expect.arrayContaining(['npm run build', 'npm run test'])); + expect(plan.uncertainty_notes).toHaveLength(0); + }); + + it('emits uncertainty when the example manifest is a non-npm kind', () => { + write('package.json', JSON.stringify({ name: 'target' })); + write('examples/rust-cli/Cargo.toml', '[package]\nname = "rust-cli"'); + write('examples/rust-cli/src/main.rs', 'fn main() {}'); + + scanExamples({ repo_root: repo, store, session_id: 's' }); + + const plan = buildIntegrationPlan(store.storage, { + repo_root: repo, + example_name: 'rust-cli', + }); + expect(plan.dependency_delta.add).toEqual({}); + expect(plan.uncertainty_notes.some((n) => /cargo/.test(n))).toBe(true); + }); + + it('files_to_copy reflects the indexed entrypoints', () => { + write('package.json', JSON.stringify({ name: 'target' })); + write('examples/app/package.json', JSON.stringify({ name: 'app' })); + write('examples/app/src/index.ts', 'export {}'); + + scanExamples({ repo_root: repo, store, session_id: 's' }); + + const plan = buildIntegrationPlan(store.storage, { + repo_root: repo, + example_name: 'app', + }); + expect(plan.files_to_copy.some((f) => f.from === 'examples/app/src/index.ts')).toBe(true); + expect(plan.files_to_copy[0]?.to_suggestion).toBe('src/index.ts'); + }); + + it('handles a missing target manifest gracefully', () => { + // No target package.json on disk. + write('examples/app/package.json', JSON.stringify({ dependencies: { stripe: '^14.0.0' } })); + + scanExamples({ repo_root: repo, store, session_id: 's' }); + + const plan = buildIntegrationPlan(store.storage, { + repo_root: repo, + example_name: 'app', + }); + expect(plan.dependency_delta.add).toMatchObject({ stripe: '^14.0.0' }); + expect(plan.uncertainty_notes.some((n) => /Target manifest not found/.test(n))).toBe(true); + }); +}); diff --git a/packages/storage/src/storage.ts b/packages/storage/src/storage.ts index 1e121c7..1b9194f 100644 --- a/packages/storage/src/storage.ts +++ b/packages/storage/src/storage.ts @@ -190,8 +190,39 @@ export class Storage { // --- search (BM25 via FTS5) --- - searchFts(query: string, limit = 10): SearchHit[] { + /** + * BM25-ranked search. An optional `filter` scopes the hits to a specific + * observation kind and/or to rows whose `metadata` JSON contains literal + * string matches for the given keys. The filter runs in SQL via + * `json_extract` so the LIMIT still bounds the scan. + * + * Design choice: we keep one method with an optional filter rather than + * a separate `searchForagedFts`. Callers such as MCP `examples_query` + * need filter support today, and every future kind-scoped search will + * want the same wiring — branching here is cheaper than a new method + * per caller. + */ + searchFts( + query: string, + limit = 10, + filter?: { kind?: string; metadata?: Record }, + ): SearchHit[] { if (!query.trim()) return []; + const conditions: string[] = ['observations_fts MATCH ?']; + const params: Array = [sanitizeMatch(query)]; + if (filter?.kind) { + conditions.push('o.kind = ?'); + params.push(filter.kind); + } + if (filter?.metadata) { + for (const [key, value] of Object.entries(filter.metadata)) { + // Allow only simple identifier-shaped keys to keep JSON path safe. + if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(key)) continue; + conditions.push(`json_extract(o.metadata, '$.${key}') = ?`); + params.push(value); + } + } + const where = conditions.join(' AND '); const rows = this.db .prepare( `SELECT o.id, o.session_id, o.ts, @@ -199,11 +230,11 @@ export class Storage { bm25(observations_fts) AS score FROM observations_fts JOIN observations o ON o.id = observations_fts.rowid - WHERE observations_fts MATCH ? + WHERE ${where} ORDER BY score ASC LIMIT ?`, ) - .all(sanitizeMatch(query), limit) as Array<{ + .all(...params, limit) as Array<{ id: number; session_id: string; ts: number; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e3c7d07..15bde59 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -118,6 +118,9 @@ importers: '@colony/embedding': specifier: workspace:* version: link:../../packages/embedding + '@colony/foraging': + specifier: workspace:* + version: link:../../packages/foraging '@colony/hooks': specifier: workspace:* version: link:../../packages/hooks