recodeee · NagyVikt · May 18, 2026 · May 18, 2026
diff --git a/.changeset/icm-feedback-record-search-stats.md b/.changeset/icm-feedback-record-search-stats.md
@@ -0,0 +1,30 @@
+---
+"@colony/storage": minor
+"@colony/core": minor
+"@colony/mcp-server": minor
+---
+
+ICM slice 2 — feedback `record`, `search`, and `stats` MCP tools.
+
+Adds a new `feedback` lane that records "AI predicted X, real answer
+was Y" corrections so a future agent can search prior mistakes by
+topic before repeating them. Migration 015 introduces the `feedback`
+table plus a porter-unicode61 `feedback_fts` virtual table mirrored
+by the standard `ai/ad/au` triggers; importance is a four-level enum
+defaulting to `medium`. `prediction`, `correction`, and the optional
+`context` flow through `MemoryStore.recordFeedback`, which routes each
+body through `prepareMemoryText` — the same redact-then-compress path
+observations use — so the compression invariant holds at the write
+boundary.
+
+MCP surface (progressive disclosure):
+
+- `feedback_record({ topic, prediction, correction, context?, importance?, created_by? })` → `{ id }`
+- `feedback_search({ query, topic?, limit? })` → compact hits (`id`, `topic`, `importance`, `score`, `snippet`, `created_at`)
+- `feedback_stats({ topic? })` → per-topic counts and `last_created_at`
+
+Follow-up (separate PR): a pre-tool-use hook that surfaces prior
+corrections on inbound prompts. This PR keeps the slice scoped to the
+storage + search surface so it can ship behind a manual query first.
+
+Reference: `docs/icm-integration-plan.md` slice 2.
diff --git a/apps/mcp-server/src/server.ts b/apps/mcp-server/src/server.ts
@@ -14,6 +14,7 @@ import * as autopilot from './tools/autopilot.js';
 import * as bridge from './tools/bridge.js';
 import type { ToolContext } from './tools/context.js';
 import * as drift from './tools/drift.js';
+import * as feedback from './tools/feedback.js';
 import * as foraging from './tools/foraging.js';
 import { registerTaskForagingReport } from './tools/foraging.js';
 import * as handoff from './tools/handoff.js';
@@ -30,8 +31,8 @@ import * as readyQueue from './tools/ready-queue.js';
 import * as recall from './tools/recall.js';
 import * as relay from './tools/relay.js';
 import * as rescue from './tools/rescue.js';
-import * as savings from './tools/savings.js';
 import * as savingsDrift from './tools/savings-drift.js';
+import * as savings from './tools/savings.js';
 import * as search from './tools/search.js';
 import * as spec from './tools/spec.js';
 import * as startupPanel from './tools/startup-panel.js';
@@ -125,6 +126,11 @@ export function buildServer(
   savings.register(server, ctx);
   savingsDrift.register(server, ctx);
 
+  // ICM slice 2 feedback lane (docs/icm-integration-plan.md). Registered
+  // after the read-side surfaces so the heartbeat wrapper has seen every
+  // core tool first.
+  feedback.register(server, ctx);
+
   // Autopilot lane (tick advisor + drift checker). Cheap compositions of
   // existing primitives; registered after the core surface so the heartbeat
   // wrapper has already wrapped the tools they delegate to.

diff --git a/apps/mcp-server/src/tools/feedback.test.ts b/apps/mcp-server/src/tools/feedback.test.ts
@@ -0,0 +1,102 @@
+import { mkdtempSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { defaultSettings } from '@colony/config';
+import { MemoryStore } from '@colony/core';
+import { Client } from '@modelcontextprotocol/sdk/client';
+import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { buildServer } from '../server.js';
+
+let dir: string;
+let store: MemoryStore;
+let client: Client;
+
+beforeEach(async () => {
+  dir = mkdtempSync(join(tmpdir(), 'colony-feedback-mcp-'));
+  store = new MemoryStore({ dbPath: join(dir, 'data.db'), settings: defaultSettings });
+  const server = buildServer(store, defaultSettings);
+  const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
+  client = new Client({ name: 'test', version: '0.0.0' });
+  await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]);
+});
+
+afterEach(async () => {
+  await client.close();
+  store.close();
+  rmSync(dir, { recursive: true, force: true });
+});
+
+interface ToolResponse {
+  content: Array<{ type: string; text: string }>;
+}
+
+function parseTextContent<T>(response: unknown): T {
+  const typed = response as ToolResponse;
+  const text = typed.content[0]?.text;
+  if (typeof text !== 'string') throw new Error('feedback tool returned no text content');
+  return JSON.parse(text) as T;
+}
+
+describe('feedback MCP surface (ICM slice 2)', () => {
+  it('records a correction and surfaces it via search + stats', async () => {
+    const recordRes = await client.callTool({
+      name: 'feedback_record',
+      arguments: {
+        topic: 'frontend.routing',
+        prediction: 'useRouter returns null in server components',
+        correction: 'useRouter throws in server components',
+        context: 'reviewing apps/web App Router migration',
+        importance: 'high',
+        created_by: 'claude',
+      },
+    });
+    const recordPayload = parseTextContent<{ id: number }>(recordRes);
+    expect(recordPayload.id).toBeGreaterThan(0);
+
+    await client.callTool({
+      name: 'feedback_record',
+      arguments: {
+        topic: 'backend.migrations',
+        prediction: 'ALTER TABLE works inside transactions',
+        correction: 'ALTER TABLE must run outside a transaction for partitioned tables',
+      },
+    });
+
+    const searchRes = await client.callTool({
+      name: 'feedback_search',
+      arguments: { query: 'router server component', limit: 5 },
+    });
+    const searchPayload = parseTextContent<{
+      hits: Array<{ id: number; topic: string; score: number; snippet: string }>;
+    }>(searchRes);
+    expect(searchPayload.hits.length).toBeGreaterThan(0);
+    expect(searchPayload.hits[0]?.topic).toBe('frontend.routing');
+
+    const statsRes = await client.callTool({
+      name: 'feedback_stats',
+      arguments: {},
+    });
+    const statsPayload = parseTextContent<{
+      stats: Array<{ topic: string; count: number; last_created_at: number }>;
+    }>(statsRes);
+    const topics = statsPayload.stats.map((row) => row.topic);
+    expect(topics).toContain('frontend.routing');
+    expect(topics).toContain('backend.migrations');
+  });
+
+  it('returns INTERNAL_ERROR when the prediction redacts to empty', async () => {
+    const res = await client.callTool({
+      name: 'feedback_record',
+      arguments: {
+        topic: 'auth',
+        prediction: '<private>secret-prediction</private>',
+        correction: 'real correction text',
+      },
+    });
+    const typed = res as ToolResponse & { isError?: boolean };
+    expect(typed.isError).toBe(true);
+    const payload = JSON.parse(typed.content[0]?.text ?? '{}') as { code: string };
+    expect(payload.code).toBe('INTERNAL_ERROR');
+  });
+});
diff --git a/apps/mcp-server/src/tools/feedback.ts b/apps/mcp-server/src/tools/feedback.ts
@@ -0,0 +1,115 @@
+import type { FeedbackImportance } from '@colony/core';
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { z } from 'zod';
+import { type ToolContext, defaultWrapHandler } from './context.js';
+import { mcpErrorResponse } from './shared.js';
+
+/**
+ * Feedback lane (ICM slice 2 — docs/icm-integration-plan.md). Records the
+ * "AI predicted X, real answer was Y" pairs that surface in code review,
+ * test failures, and human corrections so a later agent can search prior
+ * mistakes by topic.
+ *
+ * Progressive disclosure mirrors the observation surface:
+ *   feedback_record → row id only
+ *   feedback_search → compact hits (id, topic, score, snippet)
+ *   feedback_stats  → counts per topic
+ *
+ * Compression invariant: prediction/correction/context flow through
+ * `MemoryStore.recordFeedback`, which runs each body through the same
+ * `prepareMemoryText` path observations use. Tool handlers never write
+ * raw prose to storage directly.
+ *
+ * Note: this PR does not register a pre-tool-use hook that surfaces prior
+ * corrections on inbound prompts. That belongs to a follow-up PR so this
+ * slice can ship behind a search surface first.
+ */
+export function register(server: McpServer, ctx: ToolContext): void {
+  const wrapHandler = ctx.wrapHandler ?? defaultWrapHandler;
+  const { store } = ctx;
+
+  const importanceSchema = z
+    .enum(['critical', 'high', 'medium', 'low'])
+    .describe('how strongly this correction should weigh against repeating the prediction');
+
+  server.tool(
+    'feedback_record',
+    "Record an 'AI predicted X, real answer was Y' correction. Bodies are compressed via the same path observations use; returns only the new row id.",
+    {
+      topic: z
+        .string()
+        .min(1)
+        .max(200)
+        .describe('a short, stable label callers can pivot on (e.g. "frontend.routing")'),
+      prediction: z.string().min(1).describe('what the AI predicted / asserted'),
+      correction: z.string().min(1).describe('what the real answer turned out to be'),
+      context: z
+        .string()
+        .min(1)
+        .optional()
+        .describe('optional surrounding context (where the prediction was made)'),
+      importance: importanceSchema.optional(),
+      created_by: z.string().min(1).optional().describe('agent or human author for audit'),
+    },
+    wrapHandler('feedback_record', async (args) => {
+      const topic = args.topic.trim();
+      if (!topic) {
+        return mcpErrorResponse('INTERNAL_ERROR', 'feedback_record: topic must be non-empty');
+      }
+      const id = store.recordFeedback({
+        topic,
+        prediction: args.prediction,
+        correction: args.correction,
+        ...(args.context !== undefined ? { context: args.context } : {}),
+        ...(args.importance !== undefined
+          ? { importance: args.importance as FeedbackImportance }
+          : {}),
+        ...(args.created_by !== undefined ? { created_by: args.created_by } : {}),
+      });
+      if (id < 0) {
+        return mcpErrorResponse(
+          'INTERNAL_ERROR',
+          'feedback_record: prediction/correction collapsed to empty after privacy redaction',
+        );
+      }
+      return { content: [{ type: 'text', text: JSON.stringify({ id }) }] };
+    }),
+  );
+
+  server.tool(
+    'feedback_search',
+    'Search prior corrections. Returns compact hits (id, topic, importance, score, snippet); use feedback_record output ids and a follow-up read if you need the full bodies.',
+    {
+      query: z
+        .string()
+        .min(1)
+        .describe('FTS5 query across topic + prediction + correction + context'),
+      topic: z
+        .string()
+        .min(1)
+        .optional()
+        .describe('optional exact-match filter on the feedback topic'),
+      limit: z.number().int().positive().max(100).optional(),
+    },
+    wrapHandler('feedback_search', async (args) => {
+      const hits = store.searchFeedback({
+        query: args.query,
+        ...(args.topic !== undefined ? { topic: args.topic } : {}),
+        ...(args.limit !== undefined ? { limit: args.limit } : {}),
+      });
+      return { content: [{ type: 'text', text: JSON.stringify({ hits }) }] };
+    }),
+  );
+
+  server.tool(
+    'feedback_stats',
+    'Per-topic counts of recorded corrections, sorted by most recent first. Pass a topic to scope to a single bucket.',
+    {
+      topic: z.string().min(1).optional(),
+    },
+    wrapHandler('feedback_stats', async (args) => {
+      const stats = store.feedbackStats(args.topic !== undefined ? { topic: args.topic } : {});
+      return { content: [{ type: 'text', text: JSON.stringify({ stats }) }] };
+    }),
+  );
+}
diff --git a/apps/mcp-server/test/server.test.ts b/apps/mcp-server/test/server.test.ts
@@ -56,6 +56,9 @@ describe('MCP server', () => {
       'examples_integrate_plan',
       'examples_list',
       'examples_query',
+      'feedback_record',
+      'feedback_search',
+      'feedback_stats',
       'get_observations',
       'hivemind',
       'hivemind_context',

diff --git a/docs/mcp.md b/docs/mcp.md
@@ -102,6 +102,9 @@ workflow guidance.
 | Rescue | `rescue_stranded_run` | Emit rescue relays and release abandoned claims. |
 | Metrics | `savings_report` | Report live MCP token receipts and reference savings model. |
 | Metrics | `savings_drift_report` | Flag tools whose median tokens-per-call drifted vs a baseline window. |
+| Feedback | `feedback_record` | Record an "AI predicted X, real answer was Y" correction. |
+| Feedback | `feedback_search` | Search prior corrections by FTS5 query and optional topic. |
+| Feedback | `feedback_stats` | Per-topic counts of recorded corrections. |
 
 ## Ruflo sidecar boundary
 
@@ -2266,6 +2269,43 @@ Response shape:
 
 Classifications: `up_drift`, `down_drift`, `new_tool` (no baseline data), `gone` (no recent calls), `insufficient_data` (either window below `min_calls`), or `stable`. When the baseline window starts before the earliest `mcp_metrics` receipt the response adds a `warning` field nudging callers to wait for more history before trusting signals.
 
+## `feedback_record`
+
+Record an "AI predicted X, real answer was Y" correction. ICM slice 2 (see `docs/icm-integration-plan.md`). Bodies pass through the same compression path as observations — `MemoryStore.recordFeedback` runs `prediction`, `correction`, and (optional) `context` through `prepareMemoryText` before persisting. Returns only the new row id; full bodies stay behind the storage layer.
+
+Args:
+
+- `topic` — short, stable label callers can pivot on (e.g. `"frontend.routing"`).
+- `prediction` — what the AI predicted or asserted.
+- `correction` — what the real answer turned out to be.
+- `context?` — optional surrounding context (where the prediction was made).
+- `importance?` — `critical | high | medium | low`. Defaults to `medium`.
+- `created_by?` — agent or human author for audit.
+
+Returns `{ "id": <number> }`. When privacy redaction collapses `prediction` or `correction` to empty, the tool returns an `INTERNAL_ERROR` instead of writing a phantom row.
+
+## `feedback_search`
+
+Search prior corrections. Compact-hit progressive disclosure: returns `id`, `topic`, `importance`, FTS5 `score` (higher = better), `snippet`, and `created_at` only. Bodies live behind storage so callers don't pay the expansion cost on every search.
+
+Args:
+
+- `query` — FTS5 query across `topic + prediction + correction + context`.
+- `topic?` — exact-match filter on the feedback topic.
+- `limit?` — defaults to 20, max 100.
+
+If `query` is empty/whitespace and `topic` is set, returns a newest-first listing for that topic. Empty `query` without a `topic` returns no hits.
+
+## `feedback_stats`
+
+Per-topic counts of recorded corrections, sorted by most recent first.
+
+Args:
+
+- `topic?` — exact-match filter; scopes the response to a single bucket.
+
+Response shape: `{ "stats": [{ "topic": string, "count": number, "last_created_at": number }] }`.
+
 ## Plan observation kinds
 
 The lane introduces several observation kinds on the parent spec task and on the sub-task threads. They are written through `MemoryStore.addObservation`, so content is compressed and `metadata` carries the structured payload.

diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
@@ -336,6 +336,13 @@ export {
 export { buildDiscrepancyReport, type DiscrepancyReport } from './discrepancy.js';
 export { isPseudoClaimPath, normalizeClaimPath, normalizeRepoFilePath } from '@colony/storage';
 export type { ClaimPathContext, RepoFilePathContext } from '@colony/storage';
+export type {
+  AddFeedbackInput,
+  FeedbackHit,
+  FeedbackImportance,
+  FeedbackRow,
+  FeedbackStat,
+} from '@colony/storage';
 export {
   buildCoordinationSweep,
   type BlockedDownstreamTaskSignal,