diff --git a/apps/cli/src/commands/debrief.ts b/apps/cli/src/commands/debrief.ts new file mode 100644 index 0000000..042b854 --- /dev/null +++ b/apps/cli/src/commands/debrief.ts @@ -0,0 +1,213 @@ +import { join } from 'node:path'; +import { loadSettings, resolveDataDir } from '@cavemem/config'; +import { Storage } from '@cavemem/storage'; +import type { Command } from 'commander'; +import kleur from 'kleur'; + +/** + * Default window: last 24h. The "ran it today" common case. Overridable + * with --hours for zoomed-in post-mortems or multi-day sweeps. + */ +const DEFAULT_HOURS = 24; + +interface DebriefContext { + storage: Storage; + since: number; + taskId?: number | undefined; +} + +/** + * Section 1 — did agents use the task tools at all? + * + * Signal: ratio of task-thread-tagged observations to total observations + * per session. A session with many observations but zero task-thread + * ones is an agent that memorized nothing from the SessionStart preface. + */ +function sectionToolUsage(ctx: DebriefContext): string[] { + const lines = [kleur.bold('1. Did agents use the task tools?')]; + const rows = ctx.storage.toolUsageBySession(ctx.since); + if (rows.length === 0) { + lines.push(kleur.dim(' No activity in the window.')); + return lines; + } + for (const r of rows) { + const ratio = r.total_obs > 0 ? Math.round((r.task_tool_obs / r.total_obs) * 100) : 0; + const marker = ratio >= 10 ? kleur.green('✓') : ratio >= 2 ? kleur.yellow('~') : kleur.red('✗'); + lines.push( + ` ${marker} ${r.session_id.padEnd(16)} ${r.total_obs} obs, ${r.task_tool_obs} task-tool (${ratio}%)`, + ); + } + lines.push( + kleur.dim( + ' Interpretation: <2% = tool surface invisible; 2-10% = occasional; >10% = integrated.', + ), + ); + return lines; +} + +/** + * Section 2 — did auto-join land? + * + * We can't directly see what the agent "saw" in its context, but we CAN + * check: for each session that started, did a join event land in + * task_participants within ~2s of session start? If yes, the preface + * generation fired; if no, something broke the auto-join path. + */ +function sectionAutoJoin(ctx: DebriefContext): string[] { + const lines = ['', kleur.bold('2. Did auto-join land?')]; + const sessions = ctx.storage + .listSessions(200) + .filter((s) => s.started_at >= ctx.since && s.id !== 'observer'); + if (sessions.length === 0) { + lines.push(kleur.dim(' No sessions started in window.')); + return lines; + } + let joined = 0; + let missed = 0; + for (const s of sessions) { + const joinRow = ctx.storage.participantJoinFor(s.id); + if (joinRow && joinRow.joined_at - s.started_at < 2000) { + joined++; + } else { + missed++; + lines.push(` ${kleur.red('✗')} ${s.id} (${s.ide}) started but did not join a task`); + } + } + lines.push( + ` ${kleur.green('✓')} ${joined} sessions auto-joined, ${kleur.red(`${missed} missed`)}`, + ); + if (missed > 0) { + lines.push( + kleur.dim( + ' Missed joins usually mean cwd was outside a git repo or the branch lookup failed.', + ), + ); + } + return lines; +} + +/** + * Section 3 — did agents claim proactively? + * + * The critical diagnostic. Compares edit-observations (tool_use with a + * file_path in metadata) against explicit `claim`-kind observations. If + * claims << edits, proactive claiming is failing → auto-claim's safety + * net is doing the work, which argues for keeping it even if flaky. + */ +function sectionProactiveClaims(ctx: DebriefContext): string[] { + const lines = ['', kleur.bold('3. Did agents claim proactively?')]; + const stats = ctx.storage.editVsClaimStats(ctx.since); + lines.push(` Edits observed: ${stats.edit_count}`); + lines.push(` Claims recorded: ${stats.claim_count}`); + const ratio = stats.edit_count > 0 ? Math.round((stats.claim_count / stats.edit_count) * 100) : 0; + const verdict = + ratio >= 70 + ? kleur.green('proactive claiming works — auto-claim is a safety net, not the main path') + : ratio >= 20 + ? kleur.yellow('partial claiming — consider sharpening the preface wording') + : kleur.red('proactive claiming failing — auto-claim is carrying the load'); + lines.push(` Claim/edit ratio: ${ratio}% → ${verdict}`); + return lines; +} + +/** + * Section 4 — handoff outcomes. + * + * Groups handoffs by final status. >30% expiry suggests either a TTL + * that's too short or a receiver-side notification that isn't loud + * enough to land; also reports median accept latency so you can see + * "how fast did the hand-off baton actually pass" empirically. + */ +function sectionHandoffs(ctx: DebriefContext): string[] { + const lines = ['', kleur.bold('4. Handoff outcomes')]; + const dist = ctx.storage.handoffStatusDistribution(ctx.since); + const total = dist.accepted + dist.cancelled + dist.expired + dist.pending; + if (total === 0) { + lines.push(kleur.dim(' No handoffs in window.')); + return lines; + } + const pct = (n: number) => `${Math.round((n / total) * 100)}%`; + lines.push(` accepted: ${dist.accepted} (${pct(dist.accepted)})`); + lines.push(` cancelled: ${dist.cancelled} (${pct(dist.cancelled)}) ${kleur.dim('(declined)')}`); + lines.push(` expired: ${dist.expired} (${pct(dist.expired)})`); + lines.push(` pending: ${dist.pending} (${pct(dist.pending)}) ${kleur.dim('(still live)')}`); + + const expiryRate = dist.expired / total; + if (expiryRate > 0.3) { + lines.push( + ` ${kleur.yellow('⚠')} ${Math.round(expiryRate * 100)}% expiry rate — shorten TTL, sharpen notification, or rethink the default.`, + ); + } + + const times = ctx.storage.handoffAcceptLatencies(ctx.since); + if (times.length > 0) { + const sorted = [...times].sort((a, b) => a - b); + const median = sorted[Math.floor(sorted.length / 2)] ?? 0; + lines.push(` median time-to-accept: ${Math.round(median / 60_000)}m`); + } + return lines; +} + +/** + * Section 5 — interleaved timeline. + * + * No analysis, just chronology. Observer notes are colored magenta so + * you can scan for moments where your note sits next to an agent event — + * those are the coordination failures the numeric sections can't surface. + */ +function sectionTimeline(ctx: DebriefContext): string[] { + const lines = ['', kleur.bold('5. Timeline (observer notes interleaved with agent activity)')]; + const events = ctx.storage.mixedTimeline(ctx.since, ctx.taskId); + if (events.length === 0) { + lines.push(kleur.dim(' No events.')); + return lines; + } + for (const e of events) { + const ts = new Date(e.ts).toISOString().slice(11, 19); + const isNote = e.kind === 'observer-note'; + const prefix = isNote ? kleur.magenta(' NOTE ') : ` ${e.kind.padEnd(6)}`; + const who = kleur.dim(e.session_id.padEnd(10)); + const head = e.content.split('\n')[0]?.slice(0, 70) ?? ''; + lines.push(`${kleur.dim(ts)} ${prefix} ${who} ${head}`); + } + return lines; +} + +export function registerDebriefCommand(program: Command): void { + program + .command('debrief') + .description('End-of-day collaboration post-mortem: 5 structured sections over DB evidence.') + .option('--hours ', 'Window size in hours', String(DEFAULT_HOURS)) + .option('--task ', 'Narrow the timeline section to a specific task thread') + .action((opts: { hours: string; task?: string }) => { + const settings = loadSettings(); + const dbPath = join(resolveDataDir(settings.dataDir), 'data.db'); + const storage = new Storage(dbPath); + try { + const ctx: DebriefContext = { + storage, + since: Date.now() - Number(opts.hours) * 3_600_000, + taskId: opts.task ? Number(opts.task) : undefined, + }; + const sections = [ + sectionToolUsage(ctx), + sectionAutoJoin(ctx), + sectionProactiveClaims(ctx), + sectionHandoffs(ctx), + sectionTimeline(ctx), + ]; + for (const s of sections) process.stdout.write(`${s.join('\n')}\n`); + + // Hard-coded reflection prompts — the debrief's point is to pick + // one concrete next thing, not to admire the data. + process.stdout.write(`\n${kleur.bold('Next-action prompts:')}\n`); + process.stdout.write(' • Was collaboration meaningfully better than no-hivemind?\n'); + process.stdout.write( + ' • Which failures were missing-tool vs. tool-not-called vs. structural?\n', + ); + process.stdout.write(' • What was the most valuable moment the system created?\n'); + } finally { + storage.close(); + } + }); +} diff --git a/apps/cli/src/commands/note.ts b/apps/cli/src/commands/note.ts new file mode 100644 index 0000000..3d2d195 --- /dev/null +++ b/apps/cli/src/commands/note.ts @@ -0,0 +1,62 @@ +import { join } from 'node:path'; +import { loadSettings, resolveDataDir } from '@cavemem/config'; +import { MemoryStore } from '@cavemem/core'; +import type { Command } from 'commander'; +import kleur from 'kleur'; + +/** + * Reserved session identifier for human scratch notes. Using a fixed id + * (rather than a per-invocation random one) means every note across the + * whole day lives under the same session, which makes "all my notes" + * filters and timeline queries trivial. + */ +const OBSERVER_SESSION_ID = 'observer'; + +/** + * Idempotently materialise the observer session so the FK from + * observations.session_id holds. `startSession` is `INSERT OR IGNORE`, so + * this is effectively free after the first call. + */ +function ensureObserverSession(store: MemoryStore): void { + store.startSession({ + id: OBSERVER_SESSION_ID, + ide: 'observer', + cwd: process.cwd(), + }); +} + +export function registerNoteCommand(program: Command): void { + program + // Variadic so `cavemem note codex stepped on claude` works without + // quoting. The quoting-every-note friction kills adoption otherwise. + .command('note ') + .description('Record a timestamped scratch note into the memory timeline') + .option('--task ', 'Attach this note to a specific task thread (shows up in task_timeline)') + .action(async (words: string[], opts: { task?: string }) => { + const text = words.join(' ').trim(); + if (!text) { + process.stderr.write(`${kleur.red('empty note')}\n`); + process.exitCode = 1; + return; + } + + const settings = loadSettings(); + const dbPath = join(resolveDataDir(settings.dataDir), 'data.db'); + const store = new MemoryStore({ dbPath, settings }); + try { + ensureObserverSession(store); + const id = store.addObservation({ + session_id: OBSERVER_SESSION_ID, + kind: 'observer-note', + content: text, + ...(opts.task ? { task_id: Number(opts.task) } : {}), + }); + const when = new Date().toISOString().slice(11, 19); + process.stdout.write( + `${kleur.green('✓')} note #${id} at ${when}${opts.task ? ` on task #${opts.task}` : ''}\n`, + ); + } finally { + store.close(); + } + }); +} diff --git a/apps/cli/src/commands/observe.ts b/apps/cli/src/commands/observe.ts new file mode 100644 index 0000000..ae5345d --- /dev/null +++ b/apps/cli/src/commands/observe.ts @@ -0,0 +1,155 @@ +import { join } from 'node:path'; +import { loadSettings, resolveDataDir } from '@cavemem/config'; +import { Storage } from '@cavemem/storage'; +import type { Command } from 'commander'; +import kleur from 'kleur'; + +/** + * Refresh cadence. Three seconds is a compromise: fast enough that new + * claims show up while you're still looking at the screen, slow enough + * that the redraw flicker isn't distracting in peripheral vision. + */ +const REFRESH_MS = 3000; + +/** + * "Recent" window for the unclaimed-edits diagnostic. Five minutes + * matches the conflict-warning window used by `UserPromptSubmit` — + * keeping them aligned makes the mental math on both tools align. + */ +const RECENT_WINDOW_MS = 5 * 60_000; + +function fmtAgo(ts: number): string { + const ms = Date.now() - ts; + if (ms < 60_000) return `${Math.round(ms / 1000)}s ago`; + if (ms < 3_600_000) return `${Math.round(ms / 60_000)}m ago`; + return `${Math.round(ms / 3_600_000)}h ago`; +} + +/** + * Paint one frame. Extracted so the setInterval loop stays one line + * and so the renderer is easy to invoke from a future snapshot test. + */ +function renderFrame(storage: Storage): string { + const lines: string[] = []; + const now = new Date().toISOString().slice(11, 19); + lines.push(`${kleur.bold('cavemem observe')} ${kleur.dim(now)}`); + lines.push(kleur.dim('─'.repeat(60))); + + const tasks = storage.listTasks(5); + if (tasks.length === 0) { + lines.push(kleur.dim('No tasks yet. Start a session in a git repo to auto-create one.')); + return lines.join('\n'); + } + + for (const task of tasks) { + lines.push(''); + lines.push( + `${kleur.cyan(`task #${task.id}`)} ${kleur.bold(task.branch)} ${kleur.dim(task.repo_root)}`, + ); + + const participants = storage.listParticipants(task.id); + const participantLine = participants + .map((p) => `${p.agent} (${fmtAgo(p.joined_at)})`) + .join(', '); + lines.push(` ${kleur.dim('participants:')} ${participantLine || 'none'}`); + + const claims = storage.recentClaims(task.id, Date.now() - RECENT_WINDOW_MS); + if (claims.length > 0) { + lines.push(` ${kleur.dim('claims:')}`); + for (const c of claims) { + lines.push( + ` ${c.file_path.padEnd(40)} ${kleur.yellow(c.session_id.padEnd(10))} ${fmtAgo(c.claimed_at)}`, + ); + } + } + + const pending = storage.pendingHandoffs(task.id); + if (pending.length > 0) { + lines.push(` ${kleur.dim('pending handoffs:')}`); + for (const h of pending) { + const meta = safeJson(h.metadata) as { + from_agent?: string; + to_agent?: string; + summary?: string; + }; + const summary = (meta.summary ?? '').slice(0, 50); + lines.push(` #${h.id} ${meta.from_agent ?? '?'} → ${meta.to_agent ?? '?'}: ${summary}`); + } + } + + const recent = storage.taskTimeline(task.id, 6); + if (recent.length > 0) { + lines.push(` ${kleur.dim('recent:')}`); + // taskTimeline is DESC — reverse so the most recent line is last, + // matching "read top-down as a chronological stream". + for (const r of [...recent].reverse()) { + const ts = new Date(r.ts).toISOString().slice(11, 19); + const kindColor = r.kind === 'observer-note' ? kleur.magenta : kleur.cyan; + lines.push( + ` ${kleur.dim(ts)} ${kindColor(r.kind.padEnd(15))} ${r.content.slice(0, 48)}`, + ); + } + } + } + + // Diagnostic footer — the single most valuable piece of the dashboard, + // placed last where it doesn't scroll off. Only counts edits that lack + // an explicit `claim`-kind observation; the auto-claim side effect is + // deliberately not credited here, because the point of this diagnostic + // is to measure *proactive* behaviour. + lines.push(''); + const unclaimed = storage.recentEditsWithoutClaims(Date.now() - RECENT_WINDOW_MS); + if (unclaimed.length === 0) { + lines.push(kleur.green('edits without proactive claims (last 5m): none')); + } else { + lines.push(kleur.yellow(`edits without proactive claims (last 5m): ${unclaimed.length}`)); + for (const e of unclaimed.slice(0, 10)) { + lines.push(` ${e.file_path} ${kleur.dim(`(${e.session_id}, ${fmtAgo(e.ts)})`)}`); + } + } + + return lines.join('\n'); +} + +function safeJson(s: string | null): Record { + if (!s) return {}; + try { + const v = JSON.parse(s) as unknown; + return v && typeof v === 'object' ? (v as Record) : {}; + } catch { + return {}; + } +} + +export function registerObserveCommand(program: Command): void { + program + .command('observe') + .description('Live dashboard of collaboration state. Run in a spare terminal during a session.') + .option('--interval ', 'Refresh interval in milliseconds', String(REFRESH_MS)) + .action((opts: { interval: string }) => { + const settings = loadSettings(); + const dbPath = join(resolveDataDir(settings.dataDir), 'data.db'); + const storage = new Storage(dbPath); + const intervalMs = Math.max(500, Number(opts.interval)); + + // \x1b[2J clears the screen; \x1b[H sends the cursor home. Minimal + // cross-platform approach — avoids heavyweight `blessed`/`ink` deps + // for what is ultimately a glorified printf loop. + const paint = () => { + process.stdout.write('\x1b[2J\x1b[H'); + process.stdout.write(renderFrame(storage)); + process.stdout.write(`\n\n${kleur.dim(`refresh ${intervalMs}ms · ctrl-c to exit`)}\n`); + }; + + paint(); + const handle = setInterval(paint, intervalMs); + + const stop = () => { + clearInterval(handle); + storage.close(); + process.exit(0); + }; + process.on('SIGINT', stop); + process.on('SIGTERM', stop); + }); +} diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts index 35b414f..0753aec 100644 --- a/apps/cli/src/index.ts +++ b/apps/cli/src/index.ts @@ -4,12 +4,15 @@ import { pathToFileURL } from 'node:url'; import { Command } from 'commander'; import { registerCompressCommands } from './commands/compress.js'; import { registerConfigCommand } from './commands/config.js'; +import { registerDebriefCommand } from './commands/debrief.js'; import { registerDoctorCommand } from './commands/doctor.js'; import { registerExportCommand } from './commands/export.js'; import { registerHookCommand } from './commands/hook.js'; import { registerInstallCommand } from './commands/install.js'; import { registerLifecycleCommands } from './commands/lifecycle.js'; import { registerMcpCommand } from './commands/mcp.js'; +import { registerNoteCommand } from './commands/note.js'; +import { registerObserveCommand } from './commands/observe.js'; import { registerReindexCommand } from './commands/reindex.js'; import { registerSearchCommand } from './commands/search.js'; import { registerStatusCommand } from './commands/status.js'; @@ -37,6 +40,9 @@ export function createProgram(): Command { registerExportCommand(program); registerHookCommand(program); registerReindexCommand(program); + registerNoteCommand(program); + registerObserveCommand(program); + registerDebriefCommand(program); return program; } diff --git a/apps/cli/test/program.test.ts b/apps/cli/test/program.test.ts index f82cdb2..b1cf1c7 100644 --- a/apps/cli/test/program.test.ts +++ b/apps/cli/test/program.test.ts @@ -8,6 +8,7 @@ describe('cavemem CLI program', () => { const expected = [ 'compress', 'config', + 'debrief', 'doctor', 'expand', 'export', @@ -15,6 +16,8 @@ describe('cavemem CLI program', () => { 'import', 'install', 'mcp', + 'note', + 'observe', 'reindex', 'restart', 'search', diff --git a/packages/hooks/src/handlers/post-tool-use.ts b/packages/hooks/src/handlers/post-tool-use.ts index 253c703..230ca50 100644 --- a/packages/hooks/src/handlers/post-tool-use.ts +++ b/packages/hooks/src/handlers/post-tool-use.ts @@ -27,11 +27,21 @@ export async function postToolUse(store: MemoryStore, input: HookInput): Promise 4000, ); if (!body.trim()) return; + + // Capture touched files in the observation metadata. Parsing content for + // file_path later would require reversing compression — cheap to record + // at write time, expensive to recover at query time. The `observe` and + // `debrief` commands both depend on this surface for edit-vs-claim + // diagnostics, so we pay the tiny write cost unconditionally. + const touchedFiles = extractTouchedFiles(tool, toolInput); + const metadata: Record = { tool }; + if (touchedFiles.length > 0) metadata.file_path = touchedFiles[0]; + store.addObservation({ session_id: input.session_id, kind: 'tool_use', content: body, - metadata: { tool }, + metadata, }); // Side effect: record a claim for every file this tool edited. Observed diff --git a/packages/hooks/test/runner.test.ts b/packages/hooks/test/runner.test.ts index cd653d2..95a7a87 100644 --- a/packages/hooks/test/runner.test.ts +++ b/packages/hooks/test/runner.test.ts @@ -177,7 +177,10 @@ describe('runHook', () => { expect(r.ok).toBe(true); const tl = store.timeline('sess-cc'); expect(tl).toHaveLength(1); - expect(tl[0]?.metadata).toEqual({ tool: 'Edit' }); + // Edit + file_path: the handler now records the touched file path in + // metadata so observe/debrief can correlate edits with claims without + // re-parsing the content field. + expect(tl[0]?.metadata).toEqual({ tool: 'Edit', file_path: '/tmp/x.txt' }); expect(tl[0]?.content).toContain('Edit'); }); diff --git a/packages/storage/src/storage.ts b/packages/storage/src/storage.ts index a532a30..8fb7b24 100644 --- a/packages/storage/src/storage.ts +++ b/packages/storage/src/storage.ts @@ -472,6 +472,170 @@ export class Storage { transaction(fn: () => T): T { return this.db.transaction(fn)(); } + + // --- observe / debrief analytics --- + // + // These are read-heavy queries serving the CLI dashboards. They stay on + // the Storage class (not a separate analytics module) because they work + // on the same prepared-statement cache and benefit from colocation with + // the tables they query. + + /** Pending, non-expired handoffs on a task. */ + pendingHandoffs(task_id: number): ObservationRow[] { + return this.db + .prepare( + `SELECT * FROM observations + WHERE task_id = ? AND kind = 'handoff' + AND json_extract(metadata, '$.status') = 'pending' + ORDER BY ts DESC LIMIT 50`, + ) + .all(task_id) as ObservationRow[]; + } + + /** + * Recent write-tool observations whose file wasn't explicitly claimed by + * the agent that edited it. "Claimed" here means an explicit `claim`-kind + * observation — not the auto-claim side effect — so the query measures + * proactive behavior, not the automatic safety net. + */ + recentEditsWithoutClaims( + since_ts: number, + limit = 20, + ): Array<{ session_id: string; file_path: string; ts: number; task_id: number | null }> { + return this.db + .prepare( + `SELECT o.session_id, + json_extract(o.metadata, '$.file_path') AS file_path, + o.ts, + o.task_id + FROM observations o + WHERE o.kind = 'tool_use' + AND o.ts > ? + AND json_extract(o.metadata, '$.file_path') IS NOT NULL + AND NOT EXISTS ( + SELECT 1 FROM observations c + WHERE c.kind = 'claim' + AND c.session_id = o.session_id + AND json_extract(c.metadata, '$.file_path') = json_extract(o.metadata, '$.file_path') + AND c.ts <= o.ts + ) + ORDER BY o.ts DESC + LIMIT ?`, + ) + .all(since_ts, limit) as Array<{ + session_id: string; + file_path: string; + ts: number; + task_id: number | null; + }>; + } + + /** Per-session activity since `since_ts`, split into total observations + * and task-thread-tagged observations. Ratio is the debrief's first + * signal of whether an agent found the tools at all. */ + toolUsageBySession( + since_ts: number, + ): Array<{ session_id: string; total_obs: number; task_tool_obs: number }> { + return this.db + .prepare( + `SELECT + session_id, + COUNT(*) AS total_obs, + SUM(CASE WHEN task_id IS NOT NULL THEN 1 ELSE 0 END) AS task_tool_obs + FROM observations + WHERE ts > ? AND session_id != 'observer' + GROUP BY session_id + ORDER BY total_obs DESC`, + ) + .all(since_ts) as Array<{ + session_id: string; + total_obs: number; + task_tool_obs: number; + }>; + } + + /** First task-participant row for a session, used to verify auto-join + * fired within ~2s of SessionStart. */ + participantJoinFor(session_id: string): TaskParticipantRow | undefined { + return this.db + .prepare( + 'SELECT * FROM task_participants WHERE session_id = ? ORDER BY joined_at ASC LIMIT 1', + ) + .get(session_id) as TaskParticipantRow | undefined; + } + + /** Edit count vs explicit-claim count — the critical diagnostic for + * whether proactive claiming is working in the wild. */ + editVsClaimStats(since_ts: number): { edit_count: number; claim_count: number } { + const edit = this.db + .prepare( + `SELECT COUNT(*) AS n FROM observations + WHERE ts > ? AND kind = 'tool_use' + AND json_extract(metadata, '$.file_path') IS NOT NULL`, + ) + .get(since_ts) as { n: number }; + const claim = this.db + .prepare("SELECT COUNT(*) AS n FROM observations WHERE ts > ? AND kind = 'claim'") + .get(since_ts) as { n: number }; + return { edit_count: edit.n, claim_count: claim.n }; + } + + /** Count of handoffs by final status in the window. */ + handoffStatusDistribution(since_ts: number): { + accepted: number; + cancelled: number; + expired: number; + pending: number; + } { + const row = this.db + .prepare( + `SELECT + SUM(CASE WHEN json_extract(metadata, '$.status') = 'accepted' THEN 1 ELSE 0 END) AS accepted, + SUM(CASE WHEN json_extract(metadata, '$.status') = 'cancelled' THEN 1 ELSE 0 END) AS cancelled, + SUM(CASE WHEN json_extract(metadata, '$.status') = 'expired' THEN 1 ELSE 0 END) AS expired, + SUM(CASE WHEN json_extract(metadata, '$.status') = 'pending' THEN 1 ELSE 0 END) AS pending + FROM observations WHERE ts > ? AND kind = 'handoff'`, + ) + .get(since_ts) as { + accepted: number | null; + cancelled: number | null; + expired: number | null; + pending: number | null; + }; + return { + accepted: row.accepted ?? 0, + cancelled: row.cancelled ?? 0, + expired: row.expired ?? 0, + pending: row.pending ?? 0, + }; + } + + /** Milliseconds between handoff post and accept, for accepted handoffs. */ + handoffAcceptLatencies(since_ts: number): number[] { + const rows = this.db + .prepare( + `SELECT (json_extract(metadata, '$.accepted_at') - ts) AS latency_ms + FROM observations + WHERE ts > ? AND kind = 'handoff' + AND json_extract(metadata, '$.status') = 'accepted' + AND json_extract(metadata, '$.accepted_at') IS NOT NULL`, + ) + .all(since_ts) as Array<{ latency_ms: number }>; + return rows.map((r) => r.latency_ms).filter((n) => Number.isFinite(n) && n >= 0); + } + + /** Mixed-source timeline (agent activity + observer notes) ordered + * oldest → newest so the debrief reads as a chronological story. */ + mixedTimeline(since_ts: number, task_id?: number, limit = 200): ObservationRow[] { + if (task_id !== undefined) { + return this.db + .prepare('SELECT * FROM observations WHERE ts > ? AND task_id = ? ORDER BY ts ASC LIMIT ?') + .all(since_ts, task_id, limit) as ObservationRow[]; + } + return this.db + .prepare('SELECT * FROM observations WHERE ts > ? ORDER BY ts ASC LIMIT ?') + .all(since_ts, limit) as ObservationRow[]; + } } function sanitizeMatch(q: string): string {