From 8af21ee0be2f163c046a189089064e2d81810453 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Feb 2026 12:26:01 +0700 Subject: [PATCH 001/412] Scaffold planner extension with phases, tools, and state machine --- .gitignore | 8 + extensions/.gitkeep | 0 extensions/koan.ts | 96 ++++ package.json | 12 +- src/planner/phases/context-capture.ts | 383 ++++++++++++++ src/planner/phases/dispatch.ts | 62 +++ src/planner/phases/plan-design.ts | 206 ++++++++ src/planner/plan/mutate.ts | 667 +++++++++++++++++++++++++ src/planner/plan/serialize.ts | 45 ++ src/planner/plan/types.ts | 209 ++++++++ src/planner/plan/validate.ts | 133 +++++ src/planner/prompts/context-capture.ts | 91 ++++ src/planner/prompts/plan-design.ts | 218 ++++++++ src/planner/prompts/step.ts | 38 ++ src/planner/qr/mutate.ts | 91 ++++ src/planner/qr/types.ts | 20 + src/planner/session.ts | 196 ++++++++ src/planner/state.ts | 67 +++ src/planner/subagent.ts | 72 +++ src/planner/tools/context-store.ts | 34 ++ src/planner/tools/dispatch.ts | 140 ++++++ src/planner/tools/plan-entities.ts | 599 ++++++++++++++++++++++ src/planner/tools/plan-getters.ts | 167 +++++++ src/planner/tools/plan-setters.ts | 92 ++++ src/planner/tools/qr-tools.ts | 232 +++++++++ src/planner/tools/registry.ts | 190 +++++++ src/planner/types.ts | 21 + src/utils/logger.ts | 14 + src/utils/plan.ts | 72 +++ src/utils/progress.ts | 71 +++ 30 files changed, 4242 insertions(+), 4 deletions(-) delete mode 100644 extensions/.gitkeep create mode 100644 extensions/koan.ts create mode 100644 src/planner/phases/context-capture.ts create mode 100644 src/planner/phases/dispatch.ts create mode 100644 src/planner/phases/plan-design.ts create mode 100644 src/planner/plan/mutate.ts create mode 100644 src/planner/plan/serialize.ts create mode 100644 src/planner/plan/types.ts create mode 100644 src/planner/plan/validate.ts create mode 100644 src/planner/prompts/context-capture.ts create mode 100644 src/planner/prompts/plan-design.ts create mode 100644 src/planner/prompts/step.ts create mode 100644 src/planner/qr/mutate.ts create mode 100644 src/planner/qr/types.ts create mode 100644 src/planner/session.ts create mode 100644 src/planner/state.ts create mode 100644 src/planner/subagent.ts create mode 100644 src/planner/tools/context-store.ts create mode 100644 src/planner/tools/dispatch.ts create mode 100644 src/planner/tools/plan-entities.ts create mode 100644 src/planner/tools/plan-getters.ts create mode 100644 src/planner/tools/plan-setters.ts create mode 100644 src/planner/tools/qr-tools.ts create mode 100644 src/planner/tools/registry.ts create mode 100644 src/planner/types.ts create mode 100644 src/utils/logger.ts create mode 100644 src/utils/plan.ts create mode 100644 src/utils/progress.ts diff --git a/.gitignore b/.gitignore index 4909416..97d66e2 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,11 @@ node_modules/ dist/ .pi/ .DS_Store + +.claude/ +plans/ +.koan/ +*.tsbuildinfo +.env +.env.* +*.log diff --git a/extensions/.gitkeep b/extensions/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/extensions/koan.ts b/extensions/koan.ts new file mode 100644 index 0000000..3fce06c --- /dev/null +++ b/extensions/koan.ts @@ -0,0 +1,96 @@ +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { createSession } from "../src/planner/session.js"; +import { detectSubagentMode, dispatchPhase } from "../src/planner/phases/dispatch.js"; +import { createDispatch, registerWorkflowTools, createPlanRef } from "../src/planner/tools/dispatch.js"; +import { registerPlanGetterTools } from "../src/planner/tools/plan-getters.js"; +import { registerPlanSetterTools } from "../src/planner/tools/plan-setters.js"; +import { registerPlanEntityTools } from "../src/planner/tools/plan-entities.js"; +import { registerQRTools } from "../src/planner/tools/qr-tools.js"; +import { createLogger } from "../src/utils/logger.js"; + +export default function koan(pi: ExtensionAPI): void { + const log = createLogger("Koan"); + + pi.registerFlag("koan-role", { + description: "Koan subagent role (reserved)", + type: "string", + default: "", + }); + + pi.registerFlag("koan-phase", { + description: "Koan workflow phase (reserved)", + type: "string", + default: "", + }); + + pi.registerFlag("koan-plan-dir", { + description: "Koan plan directory path", + type: "string", + default: "", + }); + + pi.registerFlag("koan-subagent-dir", { + description: "Koan subagent working directory", + type: "string", + default: "", + }); + + // Pi snapshots tools during _buildRuntime() at init. All 44 tools + // register here unconditionally. Phases restrict access via tool_call + // blocking at runtime. + const dispatch = createDispatch(); + const planRef = createPlanRef(); + + registerWorkflowTools(pi, dispatch); + registerPlanGetterTools(pi, planRef); + registerPlanSetterTools(pi, planRef); + registerPlanEntityTools(pi, planRef); + registerQRTools(pi, planRef); + + // Subagent detection runs at before_agent_start (flags + // are unavailable during init). + let dispatched = false; + pi.on("before_agent_start", async () => { + if (dispatched) return; + dispatched = true; + const config = detectSubagentMode(pi); + if (config) { + const planDir = pi.getFlag("koan-plan-dir") as string; + if (planDir) { + planRef.dir = planDir; + } + await dispatchPhase(pi, config, dispatch, planRef, log); + } + }); + + // Session: parent-mode workflow engine. + const session = createSession(pi, dispatch, planRef); + + pi.registerCommand("koan", { + description: "Koan planning workflow", + handler: async (args, ctx) => { + const [subcommand, ...rest] = args.trim().split(/\s+/); + const command = subcommand ?? ""; + const remainingArgs = rest.join(" "); + + switch (command) { + case "plan": + await session.plan(remainingArgs, ctx); + break; + case "execute": + await session.execute(ctx); + break; + case "status": + await session.status(ctx); + break; + default: + ctx.ui.notify( + "Usage: /koan plan , /koan execute, or /koan status", + "error", + ); + break; + } + }, + }); +} diff --git a/package.json b/package.json index b2171e2..8781960 100644 --- a/package.json +++ b/package.json @@ -14,8 +14,12 @@ "extensions": ["./extensions"] }, "files": [ - "extensions", - "README.md", - "LICENSE" - ] + "extensions", + "src", + "README.md", + "LICENSE" + ], + "dependencies": { + "@sinclair/typebox": "^0.32.30" + } } diff --git a/src/planner/phases/context-capture.ts b/src/planner/phases/context-capture.ts new file mode 100644 index 0000000..404e69a --- /dev/null +++ b/src/planner/phases/context-capture.ts @@ -0,0 +1,383 @@ +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; + +import { + draftGuidance, + verifyGuidance, + refineGuidance, + type RefinePromptOptions, +} from "../prompts/context-capture.js"; +import { formatStep } from "../prompts/step.js"; +import type { ContextCaptureState, PlanInfo, WorkflowState } from "../state.js"; +import type { ContextData } from "../types.js"; +import { CONTEXT_KEYS } from "../types.js"; +import type { ContextToolResult } from "../tools/context-store.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch } from "../tools/dispatch.js"; +import { createLogger, type Logger } from "../../utils/logger.js"; +import { checkPermission } from "../tools/registry.js"; + +const MAX_ATTEMPTS = 3; + +interface ValidationResult { + ok: boolean; + data?: ContextData; + errors: string[]; +} + +export class ContextCapturePhase { + private readonly state: WorkflowState; + private readonly pi: ExtensionAPI; + private readonly log: Logger; + private readonly dispatch: WorkflowDispatch; + private readonly onComplete?: (ctx: ExtensionContext) => Promise; + + constructor( + pi: ExtensionAPI, + state: WorkflowState, + dispatch: WorkflowDispatch, + log?: Logger, + onComplete?: (ctx: ExtensionContext) => Promise, + ) { + this.pi = pi; + this.state = state; + this.dispatch = dispatch; + this.log = log ?? createLogger("Context"); + this.onComplete = onComplete; + + this.registerHandlers(); + } + + async begin(taskDescription: string, plan: PlanInfo, ctx: ExtensionContext): Promise { + if (this.state.context?.active) { + ctx.ui.notify("Context capture is already in progress.", "warning"); + return; + } + + const contextFilePath = path.join(plan.directory, "context.json"); + await fs.rm(contextFilePath, { force: true }); + + this.state.phase = "context"; + this.state.context = { + active: true, + subPhase: "drafting", + attempt: 0, + maxAttempts: MAX_ATTEMPTS, + taskDescription, + planId: plan.id, + planDirectory: plan.directory, + contextFilePath, + lastPrompt: null, + feedback: [], + } satisfies ContextCaptureState; + + // Hook dispatch slots here (not constructor) because dispatch is + // shared with plan-design. Each phase hooks when activated (begin() + // for context-capture, begin() for plan-design). hookDispatch throws + // if the slot is already occupied (phase hook ownership prevents + // silent misrouting). + hookDispatch(this.dispatch, "onNextStep", () => this.handleSubPhaseComplete()); + hookDispatch(this.dispatch, "onStoreContext", (p, c) => this.handleContextToolCall(p, c)); + + this.log("Starting context capture (draft phase)", { planId: plan.id }); + ctx.ui.notify(`Koan context capture started for plan ${plan.id}.`, "info"); + + await this.updatePlanMetadata({ + status: "context", + context: { + expectedPath: contextFilePath, + startedAt: new Date().toISOString(), + }, + }); + + const prompt = formatStep(draftGuidance(taskDescription)); + this.state.context.lastPrompt = prompt; + this.pi.sendUserMessage(prompt); + } + + // Advances context capture sub-phase via tool call result. + // The returned prompt becomes the tool result text that the LLM + // processes within the same agent loop -- no sendUserMessage needed. + // Tool result delivery is synchronous regardless of -p mode. + private handleSubPhaseComplete(): { ok: boolean; prompt?: string; error?: string } { + const ctx = this.state.context; + if (!ctx || !this.shouldHandle()) { + return { ok: false, error: "Context capture is not active." }; + } + + if (ctx.subPhase === "drafting") { + ctx.subPhase = "verifying"; + const prompt = formatStep(verifyGuidance()); + ctx.lastPrompt = prompt; + this.log("Draft complete, transition to verify phase (tool call)"); + return { ok: true, prompt }; + } + + if (ctx.subPhase === "verifying") { + ctx.subPhase = "refining"; + ctx.attempt = 1; + const prompt = formatStep( + refineGuidance({ + attempt: 1, + maxAttempts: ctx.maxAttempts, + feedback: [], + }), + ); + ctx.lastPrompt = prompt; + this.log("Verify complete, transition to refine phase (tool call)"); + return { ok: true, prompt }; + } + + // Refine phase: koan_store_context handles completion, not this tool. + return { + ok: false, + error: "Refine phase: use koan_store_context to store the context.", + }; + } + + private registerHandlers(): void { + this.pi.on("tool_call", async (event) => { + if (!this.shouldHandle()) return; + + const perm = checkPermission("context-capture", event.toolName); + if (!perm.allowed) { + return { block: true, reason: perm.reason }; + } + + const ctx = this.state.context!; + + if (ctx.subPhase === "drafting") { + if (event.toolName === "koan_store_context") { + return { + block: true, + reason: "Draft phase: explore and draft first, then call koan_next_step.", + }; + } + return undefined; + } + + if (ctx.subPhase === "verifying") { + if (event.toolName === "koan_next_step") { + return undefined; + } + return { + block: true, + reason: "Verify phase: review your draft, then call koan_next_step. No other tools.", + }; + } + + if (ctx.subPhase === "refining") { + if (event.toolName === "koan_store_context") { + return undefined; + } + return { + block: true, + reason: "Refine phase: call koan_store_context with the verified context.", + }; + } + + return undefined; + }); + + // Safety net: if the LLM ends a turn without calling the expected + // tool, nudge it to try again. The primary transition mechanism is + // tool calls (koan_next_step for sub-phase advancement, + // koan_store_context for completion). This handler only fires when + // the LLM produces a text-only response instead of calling tools. + this.pi.on("agent_end", async (_event, ctx) => { + if (!this.shouldHandle()) return; + const contextState = this.state.context!; + + if (contextState.subPhase === "drafting" || contextState.subPhase === "verifying") { + // LLM ended without calling koan_next_step. + this.log("LLM ended turn without calling koan_next_step", { + subPhase: contextState.subPhase, + }); + this.pi.sendUserMessage( + "You must call koan_next_step when you have finished this step.", + ); + return; + } + + if (contextState.subPhase === "refining") { + // LLM ended without calling koan_store_context. Retry logic. + this.log("Refine phase ended without koan_store_context call", { + attempt: contextState.attempt, + }); + + if (contextState.feedback.length === 0) { + contextState.feedback = [ + "You must call the `koan_store_context` tool with the structured context.", + ]; + } + + const remaining = contextState.maxAttempts - contextState.attempt; + if (remaining > 0) { + contextState.attempt += 1; + ctx.ui.notify("Context capture incomplete. Retrying.", "warning"); + this.sendRefinePrompt(); + return; + } + + contextState.active = false; + this.state.phase = "context-failed"; + // Unhook on both success (handleContextToolCall) and failure + // (agent_end max-attempts). + unhookDispatch(this.dispatch, "onNextStep"); + unhookDispatch(this.dispatch, "onStoreContext"); + await this.updatePlanMetadata({ + status: "context-failed", + context: { + failedAt: new Date().toISOString(), + attempt: contextState.attempt, + }, + }); + ctx.ui.notify("Context capture failed after maximum attempts.", "error"); + } + }); + } + + private sendRefinePrompt(): void { + const ctx = this.state.context!; + const prompt = formatStep( + refineGuidance({ + attempt: ctx.attempt, + maxAttempts: ctx.maxAttempts, + feedback: ctx.feedback, + }), + ); + ctx.lastPrompt = prompt; + this.log("Sending refine prompt", { attempt: ctx.attempt }); + this.pi.sendUserMessage(prompt); + } + + private shouldHandle(): boolean { + return Boolean(this.state.context?.active && this.state.phase === "context"); + } + + private async handleContextToolCall(payload: unknown, ctx: ExtensionContext): Promise { + if (!this.state.context || !this.shouldHandle()) { + return { + ok: false, + message: "Context capture is not active.", + errors: ["Context capture is not active."], + }; + } + + const validation = validateContextData(payload); + + if (!validation.ok || !validation.data) { + const errors = validation.errors.length > 0 ? validation.errors : ["Context validation failed."]; + this.state.context.feedback = errors; + this.log("Context validation failed", { errors }); + return { ok: false, message: formatErrors(errors), errors }; + } + + const rawText = JSON.stringify(payload, null, 2); + try { + await fs.writeFile(this.state.context.contextFilePath, `${rawText}\n`, "utf8"); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to write context file", { error: message }); + return { + ok: false, + message: `Failed to write context.json: ${message}`, + errors: [`Failed to write context.json: ${message}`], + }; + } + + this.state.context.active = false; + this.state.context.data = validation.data; + this.state.context.lastRawContent = rawText; + this.state.context.feedback = []; + this.state.phase = "context-complete"; + // Unhook on both success (handleContextToolCall) and failure + // (agent_end max-attempts). + unhookDispatch(this.dispatch, "onNextStep"); + unhookDispatch(this.dispatch, "onStoreContext"); + + ctx.ui.notify("Koan context capture complete.", "success"); + this.log("Context capture succeeded", { + planId: this.state.context.planId, + attempt: this.state.context.attempt, + }); + + await this.updatePlanMetadata({ + status: "context-complete", + context: { + capturedAt: new Date().toISOString(), + attempt: this.state.context.attempt, + file: this.state.context.contextFilePath, + }, + }); + + // Trigger completion callback (e.g. architect spawn) synchronously + // within the tool call. The tool blocks until the callback resolves, + // preventing the LLM from taking intermediate turns. + if (this.onComplete) { + const message = await this.onComplete(ctx); + return { ok: true, message }; + } + return { ok: true, message: "Context captured successfully." }; + } + + private async updatePlanMetadata(patch: Record): Promise { + const plan = this.state.plan; + if (!plan) return; + + try { + let current: Record = {}; + try { + const existing = await fs.readFile(plan.metadataPath, "utf8"); + current = JSON.parse(existing); + } catch { + current = { id: plan.id, createdAt: plan.createdAt }; + } + + const next = { ...current, ...patch }; + await fs.writeFile(plan.metadataPath, `${JSON.stringify(next, null, 2)}\n`, "utf8"); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to update plan metadata", { error: message }); + } + } +} + +function formatErrors(errors: string[]): string { + return `Context validation failed:\n${errors.map((e) => `- ${e}`).join("\n")}`; +} + +function validateContextData(value: unknown): ValidationResult { + if (typeof value !== "object" || value === null) { + return { ok: false, errors: ["Context data must be a JSON object."] }; + } + + const data = value as Record; + const errors: string[] = []; + const result: Record = {}; + + for (const key of CONTEXT_KEYS) { + const field = data[key]; + if (!Array.isArray(field)) { + errors.push(`${key} must be an array of strings.`); + continue; + } + if (field.length === 0) { + errors.push(`${key} must not be empty.`); + continue; + } + const bad = field.findIndex((item) => typeof item !== "string" || item.trim().length === 0); + if (bad !== -1) { + errors.push(`${key}[${bad}] must be a non-empty string.`); + continue; + } + result[key] = field.map((s: string) => s.trim()); + } + + if (errors.length > 0) { + return { ok: false, errors }; + } + + return { ok: true, data: result as unknown as ContextData, errors: [] }; +} diff --git a/src/planner/phases/dispatch.ts b/src/planner/phases/dispatch.ts new file mode 100644 index 0000000..ce72f8a --- /dev/null +++ b/src/planner/phases/dispatch.ts @@ -0,0 +1,62 @@ +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { PlanDesignPhase } from "./plan-design.js"; +import { createLogger, type Logger } from "../../utils/logger.js"; +import type { WorkflowDispatch, PlanRef } from "../tools/dispatch.js"; + +export interface SubagentConfig { + role: string; + phase: string; + planDir: string; + subagentDir: string; +} + +// Detects subagent mode by checking flags set via CLI (pi -p --koan-role +// architect --koan-phase plan-design ...). Flags are unavailable during +// init (getFlag() returns undefined before _buildRuntime()), so this +// must be called from before_agent_start or later. +export function detectSubagentMode(pi: ExtensionAPI): SubagentConfig | null { + const role = pi.getFlag("koan-role"); + if (!role || typeof role !== "string" || role.trim().length === 0) { + return null; + } + + const phase = pi.getFlag("koan-phase"); + const planDir = pi.getFlag("koan-plan-dir"); + const subagentDir = pi.getFlag("koan-subagent-dir"); + + return { + role: role.trim(), + phase: typeof phase === "string" ? phase.trim() : "", + planDir: typeof planDir === "string" ? planDir.trim() : "", + subagentDir: typeof subagentDir === "string" ? subagentDir.trim() : "", + }; +} + +export async function dispatchPhase( + pi: ExtensionAPI, + config: SubagentConfig, + dispatch: WorkflowDispatch, + planRef: PlanRef, + log?: Logger, +): Promise { + const logger = log ?? createLogger("Dispatch"); + + if (config.role === "architect" && config.phase === "plan-design") { + logger("Dispatching to plan-design workflow", { planDir: config.planDir }); + const phase = new PlanDesignPhase( + pi, + { + planDir: config.planDir, + subagentDir: config.subagentDir || undefined, + }, + dispatch, + planRef, + logger, + ); + await phase.begin(); + return; + } + + logger("Unknown role/phase combination", { role: config.role, phase: config.phase }); +} diff --git a/src/planner/phases/plan-design.ts b/src/planner/phases/plan-design.ts new file mode 100644 index 0000000..4e90d39 --- /dev/null +++ b/src/planner/phases/plan-design.ts @@ -0,0 +1,206 @@ +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { validatePlanDesign, validateRefs } from "../plan/validate.js"; +import { + loadPlanDesignSystemPrompt, + formatContextForStep1, + buildPlanDesignSystemPrompt, + planDesignStepGuidance, + STEP_NAMES, +} from "../prompts/plan-design.js"; +import { formatStep } from "../prompts/step.js"; +import type { ContextData } from "../types.js"; +import { createLogger, type Logger } from "../../utils/logger.js"; +import { ProgressReporter } from "../../utils/progress.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../tools/dispatch.js"; +import { checkPermission, PLAN_GETTER_TOOLS } from "../tools/registry.js"; + +type PlanDesignStep = 1 | 2 | 3 | 4 | 5 | 6; + +interface PlanDesignState { + active: boolean; + step: PlanDesignStep; + step1Prompt: string | null; + contextData: ContextData | null; + systemPrompt: string | null; +} + +export class PlanDesignPhase { + private readonly pi: ExtensionAPI; + private readonly planDir: string; + private readonly log: Logger; + private readonly state: PlanDesignState; + private readonly progress: ProgressReporter | null; + private readonly dispatch: WorkflowDispatch; + private readonly planRef: PlanRef; + + constructor(pi: ExtensionAPI, config: { planDir: string; subagentDir?: string }, dispatch: WorkflowDispatch, planRef: PlanRef, log?: Logger) { + this.pi = pi; + this.planDir = config.planDir; + this.dispatch = dispatch; + this.planRef = planRef; + this.log = log ?? createLogger("PlanDesign"); + this.progress = config.subagentDir + ? new ProgressReporter(config.subagentDir, "architect", "plan-design") + : null; + + this.state = { + active: false, + step: 1, + step1Prompt: null, + contextData: null, + systemPrompt: null, + }; + + this.registerHandlers(); + } + + async begin(): Promise { + const contextPath = path.join(this.planDir, "context.json"); + try { + const raw = await fs.readFile(contextPath, "utf8"); + this.state.contextData = JSON.parse(raw) as ContextData; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to read context.json", { error: message }); + return; + } + + let basePrompt: string; + try { + basePrompt = await loadPlanDesignSystemPrompt(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to load plan-design system prompt", { error: message }); + return; + } + + const contextXml = formatContextForStep1(this.state.contextData); + this.state.systemPrompt = buildPlanDesignSystemPrompt(basePrompt); + this.state.step1Prompt = formatStep(planDesignStepGuidance(1, contextXml)); + this.state.active = true; + this.state.step = 1; + + // No koan_store_plan tool. Each mutation writes to disk immediately. + // Step 6 ends with koan_next_step, which runs validation. Removes + // the two-step 'build then finalize' pattern that caused LLM to skip + // intermediate tools. + hookDispatch(this.dispatch, "onNextStep", () => this.handleStepComplete()); + + this.log("Starting plan-design workflow", { step: 1 }); + await this.progress?.update(`Step 1/6: ${STEP_NAMES[1]} -- started`); + } + + private registerHandlers(): void { + this.pi.on("before_agent_start", () => { + if (!this.state.active || !this.state.systemPrompt) return undefined; + return { systemPrompt: this.state.systemPrompt }; + }); + + // Step 1 prompt injection. The CLI message is a process trigger -- + // the context event fires before each LLM call and replaces the + // user message with the actual step 1 instructions. Messages are + // structuredCloned before reaching this handler (runner.ts:660), + // so stored history is unaffected. Handler is a no-op once the + // step advances past 1. + // + // Why context event instead of sendUserMessage? Step 1 has no + // preceding tool call (no tool result to inject prompt into). + // Context event injects the prompt before the initial LLM call. + // pi structuredClones messages, so modifications here are isolated. + this.pi.on("context", (event) => { + if (!this.state.active) return undefined; + if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; + + const messages = event.messages.map((m) => { + if (m.role === "user") { + return { ...m, content: this.state.step1Prompt! }; + } + return m; + }); + return { messages }; + }); + + this.pi.on("tool_call", (event) => { + if (!this.state.active) return undefined; + + const perm = checkPermission("plan-design", event.toolName); + if (!perm.allowed) { + return { block: true, reason: perm.reason }; + } + + const step = this.state.step; + if (step < 6 && !PLAN_GETTER_TOOLS.has(event.toolName) && event.toolName !== "koan_next_step") { + return { + block: true, + reason: `${event.toolName} available in step 6 (current: ${step})`, + }; + } + + return undefined; + }); + + this.pi.on("turn_end", (event) => { + if (!this.state.active) return; + }); + } + + private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { + const prev = this.state.step; + + if (prev === 6) { + const result = await this.handleFinalize(); + if (!result.ok) { + return { ok: false, error: result.errors?.join("; ") }; + } + this.state.active = false; + unhookDispatch(this.dispatch, "onNextStep"); + this.log("Plan finalized, workflow complete"); + return { ok: true, prompt: "Plan validation passed. Workflow complete." }; + } + + this.state.step = (prev + 1) as PlanDesignStep; + const nextName = STEP_NAMES[this.state.step]; + const prompt = formatStep(planDesignStepGuidance(this.state.step)); + + this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName }); + + this.progress?.update(`Step ${prev}/6: ${STEP_NAMES[prev]} -- complete`); + this.progress?.update(`Step ${this.state.step}/6: ${nextName} -- started`); + + return { ok: true, prompt }; + } + + private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { + const planPath = path.join(this.planDir, "plan.json"); + let plan; + try { + const raw = await fs.readFile(planPath, "utf8"); + plan = JSON.parse(raw); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to read plan.json for validation", { error: message }); + return { ok: false, errors: [`Failed to read plan.json: ${message}`] }; + } + + const designValidation = validatePlanDesign(plan); + if (!designValidation.ok) { + this.log("Plan design validation failed", { errors: designValidation.errors }); + return { ok: false, errors: designValidation.errors }; + } + + const refValidation = validateRefs(plan); + if (!refValidation.ok) { + this.log("Plan reference validation failed", { errors: refValidation.errors }); + return { ok: false, errors: refValidation.errors }; + } + + this.log("Plan validation passed", { path: planPath }); + await this.progress?.update("Step 6/6: " + STEP_NAMES[6] + " -- complete"); + await this.progress?.complete("completed"); + return { ok: true }; + } +} diff --git a/src/planner/plan/mutate.ts b/src/planner/plan/mutate.ts new file mode 100644 index 0000000..666af63 --- /dev/null +++ b/src/planner/plan/mutate.ts @@ -0,0 +1,667 @@ +// Monotonic version counter on entities. No CAS enforcement -- single-writer +// per phase. Counter is for debugging and audit trail, not concurrency control. + +import type { + Plan, + Decision, + RejectedAlternative, + Risk, + Milestone, + CodeIntent, + CodeChange, + Wave, + DiagramGraph, + DiagramNode, + DiagramEdge, + ReadmeEntry, + Overview, + InvisibleKnowledge, +} from "./types.js"; +import { + nextDecisionId, + nextMilestoneId, + nextIntentId, + nextRiskId, + nextRejectedAltId, + nextWaveId, + nextDiagramId, + nextChangeId, +} from "./types.js"; + +// -- Top-level -- + +export function setOverview( + p: Plan, + data: { problem?: string; approach?: string }, +): Plan { + const overview: Overview = { + problem: data.problem ?? p.overview.problem, + approach: data.approach ?? p.overview.approach, + }; + return { ...p, overview }; +} + +export function setConstraints(p: Plan, constraints: string[]): Plan { + return { + ...p, + planning_context: { + ...p.planning_context, + constraints, + }, + }; +} + +export function setInvisibleKnowledge( + p: Plan, + data: { system?: string; invariants?: string[]; tradeoffs?: string[] }, +): Plan { + const ik: InvisibleKnowledge = { + system: data.system ?? p.invisible_knowledge.system, + invariants: data.invariants ?? p.invisible_knowledge.invariants, + tradeoffs: data.tradeoffs ?? p.invisible_knowledge.tradeoffs, + }; + return { ...p, invisible_knowledge: ik }; +} + +// -- Decision -- + +export function addDecision( + p: Plan, + data: { decision: string; reasoning: string }, +): { plan: Plan; id: string } { + const id = nextDecisionId(p); + const decision: Decision = { + id, + version: 1, + decision: data.decision, + reasoning_chain: data.reasoning, + }; + return { + plan: { + ...p, + planning_context: { + ...p.planning_context, + decision_log: [...p.planning_context.decision_log, decision], + }, + }, + id, + }; +} + +export function setDecision( + p: Plan, + id: string, + data: { decision?: string; reasoning?: string }, +): Plan { + const idx = p.planning_context.decision_log.findIndex((d) => d.id === id); + if (idx === -1) throw new Error(`decision ${id} not found`); + + const d = p.planning_context.decision_log[idx]; + const updated: Decision = { + ...d, + version: d.version + 1, + decision: data.decision ?? d.decision, + reasoning_chain: data.reasoning ?? d.reasoning_chain, + }; + + const log = [...p.planning_context.decision_log]; + log[idx] = updated; + + return { + ...p, + planning_context: { ...p.planning_context, decision_log: log }, + }; +} + +// -- RejectedAlternative -- + +export function addRejectedAlternative( + p: Plan, + data: { alternative: string; rejection_reason: string; decision_ref: string }, +): { plan: Plan; id: string } { + const id = nextRejectedAltId(p); + const ra: RejectedAlternative = { + id, + alternative: data.alternative, + rejection_reason: data.rejection_reason, + decision_ref: data.decision_ref, + }; + return { + plan: { + ...p, + planning_context: { + ...p.planning_context, + rejected_alternatives: [ + ...p.planning_context.rejected_alternatives, + ra, + ], + }, + }, + id, + }; +} + +export function setRejectedAlternative( + p: Plan, + id: string, + data: { + alternative?: string; + rejection_reason?: string; + decision_ref?: string; + }, +): Plan { + const idx = p.planning_context.rejected_alternatives.findIndex( + (r) => r.id === id, + ); + if (idx === -1) throw new Error(`rejected_alternative ${id} not found`); + + const r = p.planning_context.rejected_alternatives[idx]; + const updated: RejectedAlternative = { + ...r, + alternative: data.alternative ?? r.alternative, + rejection_reason: data.rejection_reason ?? r.rejection_reason, + decision_ref: data.decision_ref ?? r.decision_ref, + }; + + const list = [...p.planning_context.rejected_alternatives]; + list[idx] = updated; + + return { + ...p, + planning_context: { ...p.planning_context, rejected_alternatives: list }, + }; +} + +// -- Risk -- + +export function addRisk( + p: Plan, + data: { + risk: string; + mitigation: string; + anchor?: string; + decision_ref?: string; + }, +): { plan: Plan; id: string } { + const id = nextRiskId(p); + const risk: Risk = { + id, + risk: data.risk, + mitigation: data.mitigation, + anchor: data.anchor ?? null, + decision_ref: data.decision_ref ?? null, + }; + return { + plan: { + ...p, + planning_context: { + ...p.planning_context, + known_risks: [...p.planning_context.known_risks, risk], + }, + }, + id, + }; +} + +export function setRisk( + p: Plan, + id: string, + data: { + risk?: string; + mitigation?: string; + anchor?: string; + decision_ref?: string; + }, +): Plan { + const idx = p.planning_context.known_risks.findIndex((r) => r.id === id); + if (idx === -1) throw new Error(`risk ${id} not found`); + + const r = p.planning_context.known_risks[idx]; + const updated: Risk = { + ...r, + risk: data.risk ?? r.risk, + mitigation: data.mitigation ?? r.mitigation, + anchor: data.anchor ?? r.anchor, + decision_ref: data.decision_ref ?? r.decision_ref, + }; + + const list = [...p.planning_context.known_risks]; + list[idx] = updated; + + return { + ...p, + planning_context: { ...p.planning_context, known_risks: list }, + }; +} + +// -- Milestone -- + +export function addMilestone( + p: Plan, + data: { + name: string; + files?: string[]; + flags?: string[]; + requirements?: string[]; + acceptance_criteria?: string[]; + tests?: string[]; + }, +): { plan: Plan; id: string } { + const id = nextMilestoneId(p); + const milestone: Milestone = { + id, + version: 1, + number: p.milestones.length + 1, + name: data.name, + files: data.files ?? [], + flags: data.flags ?? [], + requirements: data.requirements ?? [], + acceptance_criteria: data.acceptance_criteria ?? [], + tests: data.tests ?? [], + code_intents: [], + code_changes: [], + documentation: { + module_comment: null, + docstrings: [], + function_blocks: [], + inline_comments: [], + }, + is_documentation_only: false, + delegated_to: null, + }; + return { + plan: { + ...p, + milestones: [...p.milestones, milestone], + }, + id, + }; +} + +function updateMilestone( + p: Plan, + id: string, + fn: (m: Milestone) => Milestone, +): Plan { + const idx = p.milestones.findIndex((m) => m.id === id); + if (idx === -1) throw new Error(`milestone ${id} not found`); + + const updated = [...p.milestones]; + updated[idx] = fn(p.milestones[idx]); + return { ...p, milestones: updated }; +} + +export function setMilestoneName(p: Plan, id: string, name: string): Plan { + return updateMilestone(p, id, (m) => ({ ...m, version: m.version + 1, name })); +} + +export function setMilestoneFiles(p: Plan, id: string, files: string[]): Plan { + return updateMilestone(p, id, (m) => ({ + ...m, + version: m.version + 1, + files, + })); +} + +export function setMilestoneFlags(p: Plan, id: string, flags: string[]): Plan { + return updateMilestone(p, id, (m) => ({ + ...m, + version: m.version + 1, + flags, + })); +} + +export function setMilestoneRequirements( + p: Plan, + id: string, + requirements: string[], +): Plan { + return updateMilestone(p, id, (m) => ({ + ...m, + version: m.version + 1, + requirements, + })); +} + +export function setMilestoneAcceptanceCriteria( + p: Plan, + id: string, + criteria: string[], +): Plan { + return updateMilestone(p, id, (m) => ({ + ...m, + version: m.version + 1, + acceptance_criteria: criteria, + })); +} + +export function setMilestoneTests(p: Plan, id: string, tests: string[]): Plan { + return updateMilestone(p, id, (m) => ({ + ...m, + version: m.version + 1, + tests, + })); +} + +// -- CodeIntent -- + +export function addIntent( + p: Plan, + data: { + milestone: string; + file: string; + function?: string; + behavior: string; + decision_refs?: string[]; + }, +): { plan: Plan; id: string } { + const idx = p.milestones.findIndex((m) => m.id === data.milestone); + if (idx === -1) throw new Error(`milestone ${data.milestone} not found`); + + const m = p.milestones[idx]; + const id = nextIntentId(m); + const intent: CodeIntent = { + id, + version: 1, + file: data.file, + function: data.function ?? null, + behavior: data.behavior, + decision_refs: data.decision_refs ?? [], + }; + + const updated = [...p.milestones]; + updated[idx] = { + ...m, + code_intents: [...m.code_intents, intent], + }; + + return { + plan: { ...p, milestones: updated }, + id, + }; +} + +export function setIntent( + p: Plan, + id: string, + data: { + file?: string; + function?: string; + behavior?: string; + decision_refs?: string[]; + }, +): Plan { + for (let i = 0; i < p.milestones.length; i++) { + const m = p.milestones[i]; + const ciIdx = m.code_intents.findIndex((ci) => ci.id === id); + if (ciIdx !== -1) { + const ci = m.code_intents[ciIdx]; + const updated: CodeIntent = { + ...ci, + version: ci.version + 1, + file: data.file ?? ci.file, + function: data.function ?? ci.function, + behavior: data.behavior ?? ci.behavior, + decision_refs: data.decision_refs ?? ci.decision_refs, + }; + + const intents = [...m.code_intents]; + intents[ciIdx] = updated; + + const milestones = [...p.milestones]; + milestones[i] = { ...m, code_intents: intents }; + + return { ...p, milestones }; + } + } + throw new Error(`intent ${id} not found`); +} + +// -- CodeChange -- + +export function addChange( + p: Plan, + data: { + milestone: string; + file: string; + intent_ref?: string; + diff?: string; + doc_diff?: string; + comments?: string; + }, +): { plan: Plan; id: string } { + const idx = p.milestones.findIndex((m) => m.id === data.milestone); + if (idx === -1) throw new Error(`milestone ${data.milestone} not found`); + + const m = p.milestones[idx]; + const id = nextChangeId(m); + const change: CodeChange = { + id, + version: 1, + intent_ref: data.intent_ref ?? null, + file: data.file, + diff: data.diff ?? "", + doc_diff: data.doc_diff ?? "", + comments: data.comments ?? "", + }; + + const updated = [...p.milestones]; + updated[idx] = { + ...m, + code_changes: [...m.code_changes, change], + }; + + return { + plan: { ...p, milestones: updated }, + id, + }; +} + +function updateChange( + p: Plan, + id: string, + fn: (c: CodeChange) => CodeChange, +): Plan { + for (let i = 0; i < p.milestones.length; i++) { + const m = p.milestones[i]; + const ccIdx = m.code_changes.findIndex((cc) => cc.id === id); + if (ccIdx !== -1) { + const changes = [...m.code_changes]; + changes[ccIdx] = fn(m.code_changes[ccIdx]); + + const milestones = [...p.milestones]; + milestones[i] = { ...m, code_changes: changes }; + + return { ...p, milestones }; + } + } + throw new Error(`code_change ${id} not found`); +} + +export function setChangeDiff(p: Plan, id: string, diff: string): Plan { + return updateChange(p, id, (c) => ({ ...c, version: c.version + 1, diff })); +} + +export function setChangeDocDiff(p: Plan, id: string, doc_diff: string): Plan { + return updateChange(p, id, (c) => ({ + ...c, + version: c.version + 1, + doc_diff, + })); +} + +export function setChangeComments(p: Plan, id: string, comments: string): Plan { + return updateChange(p, id, (c) => ({ + ...c, + version: c.version + 1, + comments, + })); +} + +export function setChangeFile(p: Plan, id: string, file: string): Plan { + return updateChange(p, id, (c) => ({ ...c, version: c.version + 1, file })); +} + +export function setChangeIntentRef( + p: Plan, + id: string, + intent_ref: string, +): Plan { + return updateChange(p, id, (c) => ({ + ...c, + version: c.version + 1, + intent_ref, + })); +} + +// -- Wave -- + +export function addWave( + p: Plan, + data: { milestones: string[] }, +): { plan: Plan; id: string } { + const id = nextWaveId(p); + const wave: Wave = { + id, + milestones: data.milestones, + }; + return { + plan: { + ...p, + waves: [...p.waves, wave], + }, + id, + }; +} + +export function setWaveMilestones( + p: Plan, + id: string, + milestones: string[], +): Plan { + const idx = p.waves.findIndex((w) => w.id === id); + if (idx === -1) throw new Error(`wave ${id} not found`); + + const updated = [...p.waves]; + updated[idx] = { ...p.waves[idx], milestones }; + + return { ...p, waves: updated }; +} + +// -- Diagram -- + +export function addDiagram( + p: Plan, + data: { + type: "architecture" | "state" | "sequence" | "dataflow"; + scope: string; + title: string; + }, +): { plan: Plan; id: string } { + const id = nextDiagramId(p); + const diagram: DiagramGraph = { + id, + type: data.type, + scope: data.scope, + title: data.title, + nodes: [], + edges: [], + ascii_render: null, + }; + return { + plan: { + ...p, + diagram_graphs: [...p.diagram_graphs, diagram], + }, + id, + }; +} + +export function setDiagram( + p: Plan, + id: string, + data: { title?: string; scope?: string; ascii_render?: string }, +): Plan { + const idx = p.diagram_graphs.findIndex((d) => d.id === id); + if (idx === -1) throw new Error(`diagram ${id} not found`); + + const d = p.diagram_graphs[idx]; + const updated: DiagramGraph = { + ...d, + title: data.title ?? d.title, + scope: data.scope ?? d.scope, + ascii_render: data.ascii_render ?? d.ascii_render, + }; + + const diagrams = [...p.diagram_graphs]; + diagrams[idx] = updated; + + return { ...p, diagram_graphs: diagrams }; +} + +export function addDiagramNode( + p: Plan, + diagramId: string, + data: { id: string; label: string; type?: string }, +): Plan { + const idx = p.diagram_graphs.findIndex((d) => d.id === diagramId); + if (idx === -1) throw new Error(`diagram ${diagramId} not found`); + + const d = p.diagram_graphs[idx]; + const node: DiagramNode = { + id: data.id, + label: data.label, + type: data.type ?? null, + }; + + const diagrams = [...p.diagram_graphs]; + diagrams[idx] = { + ...d, + nodes: [...d.nodes, node], + }; + + return { ...p, diagram_graphs: diagrams }; +} + +export function addDiagramEdge( + p: Plan, + diagramId: string, + data: { source: string; target: string; label: string; protocol?: string }, +): Plan { + const idx = p.diagram_graphs.findIndex((d) => d.id === diagramId); + if (idx === -1) throw new Error(`diagram ${diagramId} not found`); + + const d = p.diagram_graphs[idx]; + const edge: DiagramEdge = { + source: data.source, + target: data.target, + label: data.label, + protocol: data.protocol ?? null, + }; + + const diagrams = [...p.diagram_graphs]; + diagrams[idx] = { + ...d, + edges: [...d.edges, edge], + }; + + return { ...p, diagram_graphs: diagrams }; +} + +// -- ReadmeEntry -- + +export function setReadmeEntry(p: Plan, path: string, content: string): Plan { + const idx = p.readme_entries.findIndex((r) => r.path === path); + const entry: ReadmeEntry = { path, content }; + + if (idx === -1) { + return { + ...p, + readme_entries: [...p.readme_entries, entry], + }; + } + + const entries = [...p.readme_entries]; + entries[idx] = entry; + return { ...p, readme_entries: entries }; +} diff --git a/src/planner/plan/serialize.ts b/src/planner/plan/serialize.ts new file mode 100644 index 0000000..9256709 --- /dev/null +++ b/src/planner/plan/serialize.ts @@ -0,0 +1,45 @@ +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { Plan } from "./types.js"; +import { createEmptyPlan } from "./types.js"; + +export function serializePlan(p: Plan): string { + return `${JSON.stringify(p, null, 2)}\n`; +} + +export async function writePlan(p: Plan, filePath: string): Promise { + const dir = path.dirname(filePath); + try { + await fs.access(dir); + } catch { + throw new Error(`Plan directory does not exist: ${dir}`); + } + + const content = serializePlan(p); + await fs.writeFile(filePath, content, "utf8"); +} + +// Atomic write: tmp file + rename. Prevents corrupted plan.json if +// process crashes mid-write. +export async function savePlan(p: Plan, dir: string): Promise { + const planPath = path.join(dir, "plan.json"); + const tmpPath = path.join(dir, ".plan.json.tmp"); + const content = serializePlan(p); + await fs.writeFile(tmpPath, content, "utf8"); + await fs.rename(tmpPath, planPath); +} + +export async function loadPlan(dir: string): Promise { + const planPath = path.join(dir, "plan.json"); + try { + const content = await fs.readFile(planPath, "utf8"); + return JSON.parse(content) as Plan; + } catch (err: unknown) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + const planId = path.basename(dir); + return createEmptyPlan(planId); + } + throw err; + } +} diff --git a/src/planner/plan/types.ts b/src/planner/plan/types.ts new file mode 100644 index 0000000..6a4d943 --- /dev/null +++ b/src/planner/plan/types.ts @@ -0,0 +1,209 @@ +export interface Decision { + id: string; + version: number; + decision: string; + reasoning_chain: string; +} + +export interface RejectedAlternative { + id: string; + alternative: string; + rejection_reason: string; + decision_ref: string; +} + +export interface Risk { + id: string; + risk: string; + mitigation: string; + anchor?: string | null; + decision_ref?: string | null; +} + +export interface PlanningContext { + decision_log: Decision[]; + rejected_alternatives: RejectedAlternative[]; + constraints: string[]; + known_risks: Risk[]; +} + +export interface InvisibleKnowledge { + system: string; + invariants: string[]; + tradeoffs: string[]; +} + +export interface Overview { + problem: string; + approach: string; +} + +export interface CodeIntent { + id: string; + version: number; + file: string; + function?: string | null; + behavior: string; + decision_refs: string[]; +} + +export interface CodeChange { + id: string; + version: number; + intent_ref: string | null; + file: string; + diff: string; + doc_diff: string; + comments: string; +} + +export interface Docstring { + function: string; + docstring: string; +} + +export interface FunctionBlock { + function: string; + comment: string; + decision_ref: string | null; + source: string | null; +} + +export interface InlineComment { + location: string; + comment: string; + decision_ref: string | null; + source: string | null; +} + +// DEPRECATED per reference schema. Kept for backwards compatibility with +// Python-based planner plans. New plans use CodeChange.doc_diff. +export interface Documentation { + module_comment: string | null; + docstrings: Docstring[]; + function_blocks: FunctionBlock[]; + inline_comments: InlineComment[]; +} + +// DEPRECATED per reference schema. Kept for backwards compatibility with +// Python-based planner plans. New plans use CodeChange.doc_diff. +export interface ReadmeEntry { + path: string; + content: string; +} + +export interface DiagramNode { + id: string; + label: string; + type: string | null; +} + +export interface DiagramEdge { + source: string; + target: string; + label: string; + protocol: string | null; +} + +export interface DiagramGraph { + id: string; + type: "architecture" | "state" | "sequence" | "dataflow"; + scope: string; + title: string; + nodes: DiagramNode[]; + edges: DiagramEdge[]; + ascii_render: string | null; +} + +export interface Milestone { + id: string; + version: number; + number: number; + name: string; + files: string[]; + flags: string[]; + requirements: string[]; + acceptance_criteria: string[]; + tests: string[]; + code_intents: CodeIntent[]; + code_changes: CodeChange[]; + documentation: Documentation; + is_documentation_only: boolean; + delegated_to: string | null; +} + +export interface Wave { + id: string; + milestones: string[]; +} + +export interface Plan { + plan_id: string; + created_at: string; + frozen_at: string | null; + overview: Overview; + planning_context: PlanningContext; + invisible_knowledge: InvisibleKnowledge; + milestones: Milestone[]; + waves: Wave[]; + diagram_graphs: DiagramGraph[]; + readme_entries: ReadmeEntry[]; +} + +export function createEmptyPlan(planId: string): Plan { + return { + plan_id: planId, + created_at: new Date().toISOString(), + frozen_at: null, + overview: { problem: "", approach: "" }, + planning_context: { + decision_log: [], + rejected_alternatives: [], + constraints: [], + known_risks: [], + }, + invisible_knowledge: { system: "", invariants: [], tradeoffs: [] }, + milestones: [], + waves: [], + diagram_graphs: [], + readme_entries: [], + }; +} + +function pad3(n: number): string { + return String(n).padStart(3, "0"); +} + +export function nextDecisionId(p: Plan): string { + return `DL-${pad3(p.planning_context.decision_log.length + 1)}`; +} + +export function nextMilestoneId(p: Plan): string { + return `M-${pad3(p.milestones.length + 1)}`; +} + +export function nextIntentId(m: Milestone): string { + const num = m.code_intents.length + 1; + return `CI-${m.id}-${pad3(num)}`; +} + +export function nextRiskId(p: Plan): string { + return `R-${pad3(p.planning_context.known_risks.length + 1)}`; +} + +export function nextRejectedAltId(p: Plan): string { + return `RA-${pad3(p.planning_context.rejected_alternatives.length + 1)}`; +} + +export function nextWaveId(p: Plan): string { + return `W-${pad3(p.waves.length + 1)}`; +} + +export function nextDiagramId(p: Plan): string { + return `DIAG-${pad3(p.diagram_graphs.length + 1)}`; +} + +export function nextChangeId(m: Milestone): string { + const num = m.code_changes.length + 1; + return `CC-${m.id}-${pad3(num)}`; +} diff --git a/src/planner/plan/validate.ts b/src/planner/plan/validate.ts new file mode 100644 index 0000000..cc9fe8d --- /dev/null +++ b/src/planner/plan/validate.ts @@ -0,0 +1,133 @@ +import type { Plan } from "./types.js"; + +export interface ValidationResult { + ok: boolean; + errors: string[]; +} + +export function validatePlanDesign(p: Plan): ValidationResult { + const errors: string[] = []; + + if (p.overview.problem.trim().length === 0) { + errors.push("overview.problem must not be empty"); + } + + if (p.milestones.length === 0) { + errors.push("plan must have at least one milestone"); + } + + for (const m of p.milestones) { + if (m.code_intents.length === 0) { + errors.push(`milestone ${m.id} must have at least one code_intent`); + } + } + + return { ok: errors.length === 0, errors }; +} + +export function validateRefs(p: Plan): ValidationResult { + const errors: string[] = []; + const decisionIds = new Set(p.planning_context.decision_log.map((d) => d.id)); + const milestoneIds = new Set(p.milestones.map((m) => m.id)); + + for (const m of p.milestones) { + const intentIds = new Set(m.code_intents.map((ci) => ci.id)); + + for (const ci of m.code_intents) { + for (const ref of ci.decision_refs) { + if (!decisionIds.has(ref)) { + errors.push(`${ci.id}.decision_refs '${ref}' not in decisions`); + } + } + } + + for (const cc of m.code_changes) { + if (cc.intent_ref && !intentIds.has(cc.intent_ref)) { + errors.push( + `${cc.id}.intent_ref '${cc.intent_ref}' not in milestone ${m.id} intents`, + ); + } + } + } + + for (const ra of p.planning_context.rejected_alternatives) { + if (!decisionIds.has(ra.decision_ref)) { + errors.push( + `rejected_alternative ${ra.id}.decision_ref '${ra.decision_ref}' not in decisions`, + ); + } + } + + for (const risk of p.planning_context.known_risks) { + if (risk.decision_ref && !decisionIds.has(risk.decision_ref)) { + errors.push(`risk ${risk.id}.decision_ref '${risk.decision_ref}' not in decisions`); + } + } + + // Milestone references in DiagramGraph.scope are validated against + // plan.milestones for referential integrity. Prevents orphaned diagrams + // when milestones are merged or deleted. + for (const diag of p.diagram_graphs) { + if (diag.scope.startsWith("milestone:")) { + const milestoneId = diag.scope.substring("milestone:".length); + if (!milestoneIds.has(milestoneId)) { + errors.push( + `diagram ${diag.id}.scope '${diag.scope}' references unknown milestone`, + ); + } + } + + const nodeIds = new Set(diag.nodes.map((n) => n.id)); + for (const edge of diag.edges) { + if (!nodeIds.has(edge.source)) { + errors.push(`diagram ${diag.id} edge source '${edge.source}' not in nodes`); + } + if (!nodeIds.has(edge.target)) { + errors.push(`diagram ${diag.id} edge target '${edge.target}' not in nodes`); + } + } + } + + return { ok: errors.length === 0, errors }; +} + +export function validateDiagramScope(scope: string): ValidationResult { + const errors: string[] = []; + if ( + scope !== "overview" && + scope !== "invisible_knowledge" && + !scope.startsWith("milestone:") + ) { + errors.push( + `diagram scope must be 'overview', 'invisible_knowledge', or 'milestone:M-XXX', got '${scope}'`, + ); + } + return { ok: errors.length === 0, errors }; +} + +export function validatePlanCode(p: Plan): ValidationResult { + const errors: string[] = []; + for (const m of p.milestones) { + const changeIntents = new Set( + m.code_changes.map((cc) => cc.intent_ref).filter((r) => r !== null), + ); + for (const ci of m.code_intents) { + if (!changeIntents.has(ci.id)) { + errors.push(`milestone ${m.id} intent ${ci.id} has no corresponding code_change`); + } + } + } + return { ok: errors.length === 0, errors }; +} + +export function validatePlanDocs(p: Plan): ValidationResult { + const errors: string[] = []; + for (const m of p.milestones) { + for (const cc of m.code_changes) { + if (cc.diff.trim().length > 0 && cc.doc_diff.trim().length === 0) { + errors.push(`milestone ${m.id} change ${cc.id} has diff but no doc_diff`); + } + } + } + return { ok: errors.length === 0, errors }; +} diff --git a/src/planner/prompts/context-capture.ts b/src/planner/prompts/context-capture.ts new file mode 100644 index 0000000..9657e85 --- /dev/null +++ b/src/planner/prompts/context-capture.ts @@ -0,0 +1,91 @@ +import type { StepGuidance } from "./step.js"; + +export function draftGuidance(taskDescription: string): StepGuidance { + return { + title: "Context Capture: Draft", + instructions: [ + "You are about to begin a structured planning workflow. Before any formalization, think carefully through the full context of this task.", + "", + `Task: ${taskDescription}`, + "", + "Your primary source is the conversation so far. Most of what you need is already here.", + "", + "You MAY use tools during this phase if -- and only if -- a specific lookup would", + "resolve genuine uncertainty that materially affects planning. Examples of justified reads:", + "- Confirming an API signature you are unsure about", + "- Checking whether a file or module actually exists", + "- Reading a config that determines a key constraint", + "", + "Do NOT explore speculatively. If you can draft a confident answer from context alone, do so.", + "", + "Think through each of these dimensions:", + "", + "- What exactly is being asked? What is the user's goal? What is in scope and what is explicitly not?", + "- What technical constraints apply to the task itself -- API contracts, performance targets, compatibility requirements, architectural rules? Only include constraints that are specific to this task. Do not include general tool usage instructions, coding style guides, or editor/IDE conventions.", + "- Which files, modules, or entry points in the codebase are relevant? If this is greenfield work with no existing code, say so.", + "- Were any alternative approaches discussed and rejected during this session? Why?", + "- What is your current understanding of the system or domain involved?", + "- What assumptions are you making that haven't been verified? How confident are you in each?", + "- Is there any implicit design knowledge -- invariants, rationale, accepted tradeoffs -- that should be preserved for downstream work?", + "- Are there reference documents or specs in the project that apply?", + "", + "Write your analysis as a draft. For each dimension, note your confidence:", + "- HIGH: you have direct evidence from this session", + "- LOW: you are extrapolating or guessing", + "", + "Flag any LOW-confidence point where a single targeted read would raise it to HIGH.", + "This is a working document, not a final artifact.", + ], + }; +} + +export function verifyGuidance(): StepGuidance { + return { + title: "Context Capture: Verify", + instructions: [ + "Review the draft you just wrote. Check three things:", + "", + "1. Completeness: scan each dimension above. Is anything missing?", + "2. Accuracy: are any items wrong, speculative, or conflating things?", + "3. Phrasing: would a downstream agent understand without ambiguity?", + "", + "Rewrite the draft with corrections. If nothing needs changing, reproduce it as-is.", + // Verify phase: tool_call handler blocks all tools except koan_next_step. + // Instruction directs LLM to avoid exploration during review. Two-layer + // defense: prohibition in description, blocking in tool_call handler. + "Do not use exploration tools during this review.", + ], + }; +} + +export interface RefinePromptOptions { + attempt: number; + maxAttempts: number; + feedback: string[]; +} + +export function refineGuidance(opts: RefinePromptOptions): StepGuidance { + const instructions: string[] = []; + if (opts.attempt > 1) { + instructions.push(`Retry (attempt ${opts.attempt} of ${opts.maxAttempts}).`); + } + instructions.push( + "Now call the `koan_store_context` tool with the verified context.", + "The tool's parameter schema defines exactly what fields are needed.", + ); + if (opts.feedback.length > 0) { + instructions.push("", "Address these issues from the previous attempt:"); + for (const item of opts.feedback) { + instructions.push(`- ${item}`); + } + } + return { + title: "Context Capture: Refine", + instructions, + // Refine completes with koan_store_context, not koan_next_step. + invokeAfter: [ + "WHEN DONE: After completing the instructions above, call koan_store_context with the verified context data.", + "Do NOT call this tool until you have prepared the structured context.", + ].join("\n"), + }; +} diff --git a/src/planner/prompts/plan-design.ts b/src/planner/prompts/plan-design.ts new file mode 100644 index 0000000..66e4075 --- /dev/null +++ b/src/planner/prompts/plan-design.ts @@ -0,0 +1,218 @@ +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import type { ContextData } from "../types.js"; +import type { StepGuidance } from "./step.js"; + +export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { + 1: "Task Analysis & Exploration Planning", + 2: "Codebase Exploration", + 3: "Testing Strategy Discovery", + 4: "Approach Generation", + 5: "Assumption Surfacing", + 6: "Milestone Definition & Plan Writing", +}; + +export async function loadPlanDesignSystemPrompt(): Promise { + const homeDir = os.homedir(); + const promptPath = path.join(homeDir, ".claude/agents/architect.md"); + try { + const content = await fs.readFile(promptPath, "utf8"); + const body = content.replace(/^---\n[\s\S]*?\n---\n/, ""); + return body; + } catch (error) { + throw new Error(`Architect prompt not found at ${promptPath}`); + } +} + +export function formatContextForStep1(ctx: ContextData): string { + return [ + "", + JSON.stringify(ctx, null, 2), + "", + ].join("\n"); +} + +export function buildPlanDesignSystemPrompt(basePrompt: string): string { + return [ + basePrompt, + "", + "---", + "", + "WORKFLOW: 6-STEP PLAN-DESIGN", + "", + "You will execute a 6-step workflow.", + "Step 1 instructions are in the user message below.", + "Complete the work described, then call koan_next_step.", + "The tool result contains the next step's instructions.", + "In step 6, use plan mutation tools, then call koan_next_step.", + "", + // Directive prevents immediate tool call without substantive work. + // Failure mode: koan_next_step called with zero file reads, + // producing an empty step with no exploration data. The directive + // repeats guidance from tool descriptions to strengthen the signal. + "CRITICAL: Do the actual work described in each step BEFORE calling", + "koan_next_step. Read files, explore code, analyze. Do not skip.", + "Do NOT produce a final text response until koan_next_step completes.", + ].join("\n"); +} + +export function planDesignStepGuidance(step: 1 | 2 | 3 | 4 | 5 | 6, context?: string): StepGuidance { + switch (step) { + case 1: + return { + title: "Step 1: Task Analysis & Exploration Planning", + instructions: [ + "PLANNING CONTEXT (from session):", + "", + context ?? "", + "", + "Parse the user's task description. Identify:", + " - What needs to change (files, modules, behavior)", + " - What exploration is needed (patterns, constraints, existing code)", + " - What directories/files are relevant", + "", + "Read project context files to understand structure:", + " - Project root CLAUDE.md", + " - Subdirectory CLAUDE.md files in relevant areas", + " - All paths in context.json reference_docs field (if any)", + "", + "CONTEXT.JSON CONTRACT: READ-ONLY.", + " - context.json is owned by the session", + " - You MUST NOT write, modify, or append to context.json", + " - Your outputs go to plan.json (step 6) -- never context.json", + "", + "DO NOT write any files yet. Gather understanding for step 2.", + "Record your analysis mentally for use in subsequent steps.", + ], + }; + + case 2: + return { + title: "Step 2: Codebase Exploration", + instructions: [ + "Use Glob, Grep, Read tools directly to discover:", + " - Existing patterns and implementations", + " - Constraints from code structure", + " - Conventions to follow", + "", + "Read conventions/ files as needed:", + " - structural.md (architectural patterns)", + " - temporal.md (comment hygiene)", + " - diff-format.md (diff specification)", + "", + "NUDGE: If you need additional context to plan well, read more files.", + "Better to over-explore than under-explore.", + "", + "Record discoveries for use in steps 4-6. Do NOT write files.", + ], + }; + + case 3: + return { + title: "Step 3: Testing Strategy Discovery", + instructions: [ + "DISCOVER testing strategy from:", + " - User conversation hints", + " - Project CLAUDE.md / README.md", + " - conventions/structural.md domain='testing-strategy'", + "", + "Record confirmed strategy for use in step 6.", + "Decisions will be recorded via tools in step 6.", + ], + }; + + case 4: + return { + title: "Step 4: Approach Generation", + instructions: [ + "GENERATE 2-3 approach options:", + " - Include 'minimal change' option", + " - Include 'idiomatic/modern' option", + " - Document advantage/disadvantage for each", + "", + "TARGET TECH RESEARCH (if new tech/migration):", + " - What is canonical usage of target tech?", + " - Does it have different abstractions?", + "", + "Use exploration findings from step 2 to ground tradeoffs.", + "Record approach analysis for step 6.", + ], + }; + + case 5: + return { + title: "Step 5: Assumption Surfacing", + instructions: [ + "FAST PATH: Skip if task involves NONE of:", + " - Migration to new tech", + " - Policy defaults (lifecycle, capacity, failure handling)", + " - Architectural decisions with multiple valid approaches", + "", + "FULL CHECK (if any apply):", + " Audit each category with OPEN questions:", + " Pattern preservation, Migration strategy, Idiomatic usage,", + " Abstraction boundary, Policy defaults", + "", + "Record assumptions for step 6.", + ], + }; + + case 6: + return { + title: "Step 6: Milestone Definition & Plan Writing", + instructions: [ + "EVALUATE approaches: P(success), failure mode, backtrack cost", + "", + "SELECT and record in Decision Log with MULTI-STEP chain:", + " BAD: 'Polling | Webhooks unreliable'", + " GOOD: 'Use polling | 30% webhook failure -> need fallback anyway -> polling simpler'", + "", + "Use the following tools to build the plan:", + "", + "OVERVIEW & CONSTRAINTS:", + " - koan_set_overview: Define problem and approach", + " - koan_set_constraints: Record constraints", + " - koan_set_invisible_knowledge: Document project-specific context", + "", + "DECISIONS & RISKS:", + " - koan_add_decision, koan_set_decision: Record architectural decisions", + " - koan_add_rejected_alternative: Document rejected approaches", + " - koan_add_risk: Track implementation risks", + "", + "MILESTONES & INTENTS:", + " - koan_add_milestone: Create milestones (deployable increments)", + " - koan_set_milestone_name/files/flags/requirements/acceptance_criteria/tests: Configure milestones", + " - koan_add_intent, koan_set_intent: Define code intents (WHAT to change, not HOW)", + "", + "WAVES & STRUCTURE:", + " - koan_add_wave, koan_set_wave_milestones: Group milestones into deployment waves", + " - koan_add_diagram, koan_set_diagram, koan_add_diagram_node, koan_add_diagram_edge: Visual structure", + " - koan_set_readme_entry: Link plan sections to README.md", + "", + "Each tool writes to disk immediately. Inspect with koan_get_plan.", + "", + "MILESTONES (each deployable increment):", + " - Files: exact paths (each file in ONE milestone only)", + " - Requirements: specific behaviors", + " - Acceptance: testable pass/fail criteria", + " - Code Intent: WHAT to change (Developer converts to code_changes later)", + " - Tests: type, backing, scenarios", + "", + "PARALLELIZATION:", + " Vertical slices (parallel) > Horizontal layers (sequential)", + " BAD: M1=models, M2=services, M3=controllers (sequential)", + " GOOD: M1=auth stack, M2=users stack, M3=posts stack (parallel)", + " If file overlap: extract to M0 (foundation) or consolidate", + ], + invokeAfter: [ + "WHEN DONE: After completing the instructions above, call koan_next_step to validate.", + "Do NOT call this tool until you have used the plan mutation tools.", + ].join("\n"), + }; + + default: + return { title: "", instructions: [] }; + } +} diff --git a/src/planner/prompts/step.ts b/src/planner/prompts/step.ts new file mode 100644 index 0000000..a6598c7 --- /dev/null +++ b/src/planner/prompts/step.ts @@ -0,0 +1,38 @@ +// Step prompt assembly for koan workflows. +// +// Format matches the reference planner's format_step() in +// skills/lib/workflow/prompts/step.py. Both use "NEXT STEP:" +// directives. Reference uses "Command:" for shell execution. +// Koan uses "Tool:" -- tool results are synchronous within +// the agent loop (deterministic delivery regardless of -p mode). +// +// Why strengthen invoke-after? The original weak format ("Now call +// koan_next_step.") produced skipped steps. Strengthened format +// mirrors reference planner's explicit directive structure. + +export interface StepGuidance { + title: string; + instructions: string[]; + // Custom invoke-after directive. When omitted, formatStep + // appends the default koan_next_step directive. + // Terminal steps override this (e.g., step 6 plan validation). + invokeAfter?: string; +} + +// Default invoke-after: conditional gate for koan_next_step. +// "WHEN DONE" + "Do NOT call until" creates a two-part gate: +// the LLM must complete work before advancing. Unconditional +// imperatives ("Execute this tool now.") cause immediate tool +// calls because tool calls with empty params have zero friction +// (unlike shell commands which require mechanical copy-paste). +const DEFAULT_INVOKE = [ + "WHEN DONE: After completing the instructions above, call koan_next_step to advance.", + "Do NOT call this tool until the work described in this step is finished.", +].join("\n"); + +export function formatStep(g: StepGuidance): string { + const header = `${g.title}\n${"=".repeat(g.title.length)}\n\n`; + const body = g.instructions.join("\n"); + const invoke = g.invokeAfter ?? DEFAULT_INVOKE; + return `${header}${body}\n\n${invoke}`; +} diff --git a/src/planner/qr/mutate.ts b/src/planner/qr/mutate.ts new file mode 100644 index 0000000..b831074 --- /dev/null +++ b/src/planner/qr/mutate.ts @@ -0,0 +1,91 @@ +import type { QRFile, QRItem, QRSeverity, QRItemStatus } from "./types.js"; + +function pad3(n: number): string { + return String(n).padStart(3, "0"); +} + +function nextQRId(qr: QRFile): string { + return `QR-${qr.phase}-${pad3(qr.items.length + 1)}`; +} + +export function addQRItem( + qr: QRFile, + data: { scope: string; check: string; severity?: QRSeverity }, +): { qr: QRFile; id: string } { + const id = nextQRId(qr); + const item: QRItem = { + id, + scope: data.scope, + check: data.check, + status: "TODO", + version: 1, + finding: null, + parent_id: null, + group_id: null, + severity: data.severity ?? "MUST", + }; + return { + qr: { + ...qr, + items: [...qr.items, item], + }, + id, + }; +} + +// PASS is terminal: cannot transition from PASS to FAIL. +// FAIL requires finding (explains what failed). +// PASS forbids finding. +export function setQRItem( + qr: QRFile, + id: string, + data: { + status?: QRItemStatus; + finding?: string; + check?: string; + severity?: QRSeverity; + }, +): QRFile { + const idx = qr.items.findIndex((i) => i.id === id); + if (idx === -1) throw new Error(`qr_item ${id} not found`); + + const item = qr.items[idx]; + + if (item.status === "PASS" && data.status === "FAIL") { + throw new Error(`cannot transition ${id} from PASS to FAIL (PASS is terminal)`); + } + + const status = data.status ?? item.status; + const finding = data.finding ?? item.finding; + + if (status === "FAIL" && !finding) { + throw new Error(`FAIL status requires finding for ${id}`); + } + + if (status === "PASS" && finding) { + throw new Error(`PASS status forbids finding for ${id}`); + } + + const updated: QRItem = { + ...item, + version: item.version + 1, + status, + finding, + check: data.check ?? item.check, + severity: data.severity ?? item.severity, + }; + + const items = [...qr.items]; + items[idx] = updated; + + return { ...qr, items }; +} + +// Does not increment version (grouping is metadata). +export function assignGroup(qr: QRFile, ids: string[], groupId: string): QRFile { + const idSet = new Set(ids); + const items = qr.items.map((item) => + idSet.has(item.id) ? { ...item, group_id: groupId } : item, + ); + return { ...qr, items }; +} diff --git a/src/planner/qr/types.ts b/src/planner/qr/types.ts new file mode 100644 index 0000000..3345631 --- /dev/null +++ b/src/planner/qr/types.ts @@ -0,0 +1,20 @@ +export type QRSeverity = "MUST" | "SHOULD" | "COULD"; +export type QRItemStatus = "TODO" | "PASS" | "FAIL"; + +export interface QRItem { + id: string; + scope: string; + check: string; + status: QRItemStatus; + version: number; + finding: string | null; + parent_id: string | null; + group_id: string | null; + severity: QRSeverity; +} + +export interface QRFile { + phase: string; + iteration: number; + items: QRItem[]; +} diff --git a/src/planner/session.ts b/src/planner/session.ts new file mode 100644 index 0000000..4bb533a --- /dev/null +++ b/src/planner/session.ts @@ -0,0 +1,196 @@ +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@mariozechner/pi-coding-agent"; + +import { ContextCapturePhase } from "./phases/context-capture.js"; +import { createInitialState, initializePlanState, type WorkflowState } from "./state.js"; +import { createPlanInfo } from "../utils/plan.js"; +import { spawnArchitect } from "./subagent.js"; +import { createLogger } from "../utils/logger.js"; +import { createSubagentDir, readSubagentState } from "../utils/progress.js"; +import type { WorkflowDispatch, PlanRef } from "./tools/dispatch.js"; + +interface Session { + plan(args: string, ctx: ExtensionCommandContext): Promise; + execute(_ctx: ExtensionCommandContext): Promise; + status(ctx: ExtensionCommandContext): Promise; +} + +export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, planRef: PlanRef): Session { + const state: WorkflowState = createInitialState(); + const log = createLogger("Session"); + + // Completion callback for context-capture phase. Runs inside the + // koan_store_context tool call -- the tool blocks until the architect + // subagent finishes. The LLM sees context capture + architect outcome + // in one tool response. No agent_end polling needed. + const onContextComplete = async (ctx: ExtensionContext): Promise => { + if (!state.plan) { + return "Context captured but no plan state available."; + } + + const planDir = state.plan.directory; + const planJsonPath = path.join(planDir, "plan.json"); + const subagentDir = await createSubagentDir(planDir, "architect"); + + state.phase = "architect-running"; + ctx.ui.notify("Launching architect subagent for plan-design...", "info"); + log("Spawning architect after context capture", { planDir, subagentDir }); + + const extensionPath = path.resolve(import.meta.dirname, "../../extensions/koan.ts"); + + const pollInterval = setInterval(async () => { + const s = await readSubagentState(subagentDir); + if (s?.current) { + ctx.ui.notify(`Architect: ${s.current}`, "info"); + } + }, 2000); + + const result = await spawnArchitect({ + planDir, + subagentDir, + cwd: ctx.cwd, + extensionPath, + log, + }); + + clearInterval(pollInterval); + + if (result.exitCode !== 0) { + state.phase = "architect-failed"; + const detail = result.stderr.slice(0, 500); + log("Architect subagent failed", { exitCode: result.exitCode, stderr: detail }); + ctx.ui.notify(`Architect subagent failed (exit ${result.exitCode}).`, "error"); + return `Context captured. Architect subagent failed (exit ${result.exitCode}).\n\nStderr:\n${detail}`; + } + + let planExists = false; + try { + await fs.access(planJsonPath); + planExists = true; + } catch { + // plan.json not written + } + + if (!planExists) { + state.phase = "architect-failed"; + log("Architect completed but plan.json not found", { planJsonPath }); + ctx.ui.notify("Architect completed but plan.json was not written.", "error"); + return "Context captured. Architect completed but plan.json was not written."; + } + + state.phase = "plan-design-complete"; + log("Architect plan-design complete", { planDir }); + ctx.ui.notify("Plan-design phase complete.", "success"); + return `Context captured. Plan written to ${planDir}/plan.json.`; + }; + + const contextPhase = new ContextCapturePhase(pi, state, dispatch, createLogger("Context"), onContextComplete); + + return { + async plan(args, ctx) { + const description = args.trim(); + if (!description) { + ctx.ui.notify("Usage: /koan plan ", "error"); + return; + } + + if (state.phase === "context" && state.context?.active) { + ctx.ui.notify("Context capture already running. Use /koan status to check progress.", "warning"); + return; + } + + await ctx.waitForIdle(); + + const planInfo = await createPlanInfo(description, ctx.cwd); + initializePlanState(state, planInfo, description); + planRef.dir = planInfo.directory; + + log("Plan command invoked", { + cwd: ctx.cwd, + description, + planId: planInfo.id, + planDirectory: planInfo.directory, + }); + + await contextPhase.begin(description, planInfo, ctx); + }, + + async execute(ctx) { + ctx.ui.notify("Execution mode is not yet implemented.", "warning"); + }, + + async status(ctx) { + const summary = buildStatusSummary(state, ctx.cwd); + ctx.ui.notify(summary, "info"); + }, + }; +} + +function buildStatusSummary(state: WorkflowState, cwd: string): string { + const lines: string[] = []; + const plan = state.plan; + + if (plan) { + lines.push(`Plan ${plan.id}`); + lines.push(`Directory: ${formatPath(plan.directory, cwd)}`); + } else { + lines.push("No active plan."); + } + + switch (state.phase) { + case "idle": + lines.push("Koan planner is idle."); + break; + case "context": { + const attempt = state.context?.attempt ?? 0; + lines.push(`Context capture in progress (attempt ${attempt}).`); + if (state.context?.contextFilePath) { + lines.push(`Target: ${formatPath(state.context.contextFilePath, cwd)}`); + } + break; + } + case "context-complete": + lines.push("Context captured successfully."); + if (state.context?.contextFilePath) { + lines.push(`Stored at: ${formatPath(state.context.contextFilePath, cwd)}`); + } + break; + case "context-failed": + lines.push("Context capture failed. Re-run /koan plan to try again."); + break; + case "architect-running": + lines.push("Architect subagent running (plan-design phase)..."); + break; + case "architect-failed": + lines.push("Architect subagent failed. Check plan directory for details."); + break; + case "plan-design-complete": + lines.push("Plan-design phase complete."); + if (plan) { + lines.push(`Plan: ${formatPath(path.join(plan.directory, "plan.json"), cwd)}`); + } + break; + default: + lines.push("Unknown planner state."); + break; + } + + return lines.join("\n"); +} + +function formatPath(target: string, cwd: string): string { + const home = os.homedir(); + if (target.startsWith(home)) { + return `~${target.slice(home.length)}`; + } + + const relative = path.relative(cwd, target); + if (!relative.startsWith("..")) { + return relative; + } + + return target; +} diff --git a/src/planner/state.ts b/src/planner/state.ts new file mode 100644 index 0000000..5d47d63 --- /dev/null +++ b/src/planner/state.ts @@ -0,0 +1,67 @@ +import type { ContextData } from "./types.js"; + +export type WorkflowPhase = + | "idle" + | "context" + | "context-complete" + | "context-failed" + | "architect-running" + | "architect-failed" + | "plan-design-complete"; + +export interface PlanInfo { + id: string; + directory: string; + createdAt: string; + metadataPath: string; +} + +export interface ContextCaptureState { + readonly maxAttempts: number; + active: boolean; + subPhase: "drafting" | "verifying" | "refining"; + attempt: number; + taskDescription: string; + planId: string; + planDirectory: string; + contextFilePath: string; + lastPrompt: string | null; + feedback: string[]; + data?: ContextData; + lastRawContent?: string; +} + +export interface WorkflowState { + phase: WorkflowPhase; + taskDescription: string | null; + plan: PlanInfo | null; + context: ContextCaptureState | null; +} + +export function createInitialState(): WorkflowState { + return { + phase: "idle", + taskDescription: null, + plan: null, + context: null, + }; +} + +export function resetContextState(state: WorkflowState): void { + state.context = null; + if ( + state.phase === "context" || + state.phase === "context-failed" || + state.phase === "context-complete" || + state.phase === "architect-failed" || + state.phase === "plan-design-complete" + ) { + state.phase = "idle"; + } +} + +export function initializePlanState(state: WorkflowState, plan: PlanInfo, taskDescription: string): void { + state.plan = plan; + state.taskDescription = taskDescription; + resetContextState(state); +} diff --git a/src/planner/subagent.ts b/src/planner/subagent.ts new file mode 100644 index 0000000..997c8f8 --- /dev/null +++ b/src/planner/subagent.ts @@ -0,0 +1,72 @@ +import { spawn } from "node:child_process"; +import { createWriteStream } from "node:fs"; +import * as path from "node:path"; + +import { createLogger, type Logger } from "../utils/logger.js"; + +export interface SubagentResult { + exitCode: number; + stderr: string; + subagentDir: string; +} + +export interface SpawnArchitectOptions { + planDir: string; + subagentDir: string; + cwd: string; + extensionPath: string; + log?: Logger; +} + +export function spawnArchitect(opts: SpawnArchitectOptions): Promise { + const log = opts.log ?? createLogger("Subagent"); + + const args = [ + "-p", + "-e", opts.extensionPath, + "--koan-role", "architect", + "--koan-phase", "plan-design", + "--koan-plan-dir", opts.planDir, + "--koan-subagent-dir", opts.subagentDir, + "Begin the plan-design phase.", + ]; + + log("Spawning architect subagent", { planDir: opts.planDir, subagentDir: opts.subagentDir }); + + return new Promise((resolve) => { + const stdoutLog = createWriteStream(path.join(opts.subagentDir, "stdout.log"), { flags: "w" }); + const stderrLog = createWriteStream(path.join(opts.subagentDir, "stderr.log"), { flags: "w" }); + + const proc = spawn("pi", args, { + cwd: opts.cwd, + shell: false, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stderr = ""; + + proc.stdout.on("data", (data: Buffer) => { + stdoutLog.write(data); + }); + + proc.stderr.on("data", (data: Buffer) => { + stderr += data.toString(); + stderrLog.write(data); + }); + + proc.on("close", (code) => { + stdoutLog.end(); + stderrLog.end(); + const exitCode = code ?? 1; + log("Architect subagent exited", { exitCode }); + resolve({ exitCode, stderr, subagentDir: opts.subagentDir }); + }); + + proc.on("error", (error) => { + stdoutLog.end(); + stderrLog.end(); + log("Architect subagent spawn error", { error: error.message }); + resolve({ exitCode: 1, stderr: error.message, subagentDir: opts.subagentDir }); + }); + }); +} diff --git a/src/planner/tools/context-store.ts b/src/planner/tools/context-store.ts new file mode 100644 index 0000000..cb4e97e --- /dev/null +++ b/src/planner/tools/context-store.ts @@ -0,0 +1,34 @@ +import { Type } from "@sinclair/typebox"; + +const NonEmptyStringArray = Type.Array(Type.String({ minLength: 1 }), { minItems: 1 }); + +export const ContextStoreSchema = Type.Object({ + task_spec: NonEmptyStringArray, + constraints: NonEmptyStringArray, + entry_points: NonEmptyStringArray, + rejected_alternatives: NonEmptyStringArray, + current_understanding: NonEmptyStringArray, + assumptions: NonEmptyStringArray, + invisible_knowledge: NonEmptyStringArray, + reference_docs: NonEmptyStringArray, +}, { + description: [ + "Structured planning context. All fields are string arrays.", + "task_spec: subject, scope, out-of-scope items.", + "constraints: 'MUST/SHOULD/MUST-NOT: rule (source)' or 'none confirmed'.", + "entry_points: 'file:symbol - why relevant' or 'greenfield'.", + "rejected_alternatives: 'approach - why dismissed' or 'none discussed'.", + "current_understanding: how the system works, relevant behavior.", + "assumptions: 'claim (H/M/L confidence)' or 'none'.", + "invisible_knowledge: design rationale, invariants, accepted tradeoffs.", + "reference_docs: 'path - what it covers' or 'none'.", + ].join(" "), +}); + +export interface ContextToolResult { + ok: boolean; + message: string; + errors?: string[]; +} + +export type ContextToolHandler = (payload: unknown, ctx: unknown) => Promise; diff --git a/src/planner/tools/dispatch.ts b/src/planner/tools/dispatch.ts new file mode 100644 index 0000000..ee9fd5d --- /dev/null +++ b/src/planner/tools/dispatch.ts @@ -0,0 +1,140 @@ +// Workflow tool dispatch for koan. +// +// Workflow tools (koan_next_step, koan_store_context) are registered once +// at init and read from this dispatch at call time. +// Pi snapshots tools during _buildRuntime() -- late registration is +// invisible to the LLM. The dispatch decouples static registration +// from dynamic phase routing. + +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; + +import { ContextStoreSchema, type ContextToolResult } from "./context-store.js"; +import { createLogger } from "../../utils/logger.js"; + +const log = createLogger("Dispatch"); + +// -- Result types -- + +export interface StepResult { + ok: boolean; + prompt?: string; + error?: string; +} + +// -- Dispatch -- + +export interface WorkflowDispatch { + onNextStep: (() => StepResult) | null; + onStoreContext: + | ((payload: unknown, ctx: ExtensionContext) => Promise) + | null; +} + +export function createDispatch(): WorkflowDispatch { + return { onNextStep: null, onStoreContext: null }; +} + +// Decouples tool registration (init-time, before _buildRuntime) from +// plan directory creation (runtime, after flags available). Same +// indirection pattern as WorkflowDispatch. +export interface PlanRef { + dir: string | null; +} + +export function createPlanRef(): PlanRef { + return { dir: null }; +} + +// Sets a dispatch slot. Throws if the slot is already occupied -- +// prevents silent misrouting when two phases attempt to claim +// the same tool. +export function hookDispatch( + dispatch: WorkflowDispatch, + key: K, + handler: NonNullable, +): void { + if (dispatch[key] !== null) { + throw new Error(`dispatch.${String(key)} is already hooked`); + } + (dispatch as Record)[key] = handler; +} + +export function unhookDispatch( + dispatch: WorkflowDispatch, + key: keyof WorkflowDispatch, +): void { + (dispatch as Record)[key] = null; +} + +// -- Tool registration -- + +// Registers workflow tools. Called once at init in koan.ts, +// before pi's _buildRuntime() snapshot. Tool execute callbacks read +// from the dispatch at call time -- the dispatch is mutable, the +// tool list is not. +// +// Why register all tools unconditionally? Flags are unavailable during +// init (getFlag() returns undefined before _buildRuntime() sets flagValues), +// so conditional registration based on role/phase is impossible. Tools +// registered after _buildRuntime() are invisible to the LLM. +export function registerWorkflowTools( + pi: ExtensionAPI, + dispatch: WorkflowDispatch, +): void { + // -- koan_next_step -- + // "DO NOT call until told" creates prohibition/activation pattern + // with step prompts. Description = default prohibition, step prompt + // invoke-after = explicit activation. + pi.registerTool({ + name: "koan_next_step", + label: "Advance to next workflow step", + description: [ + "Signal completion of the current workflow step.", + "DO NOT call this tool until the step instructions explicitly tell you to.", + "Do the actual work described in each step BEFORE calling this tool.", + ].join(" "), + parameters: Type.Object({}), + async execute() { + // Two-layer defense: tool_call blocks with descriptive reasons + // (primary gate), dispatch null checks as fallback. Dispatch check + // fires only if tool_call handler is bypassed or misconfigured. + if (!dispatch.onNextStep) { + throw new Error("No workflow phase is active."); + } + const r = dispatch.onNextStep(); + if (!r.ok) { + throw new Error(r.error ?? "Step transition failed."); + } + return { + content: [{ type: "text" as const, text: r.prompt ?? "Step complete." }], + }; + }, + }); + + // -- koan_store_context -- + pi.registerTool({ + name: "koan_store_context", + label: "Store planning context", + description: [ + "Store structured planning context.", + "DO NOT call this tool until the step instructions explicitly tell you to.", + "Each field is a string array -- encode structure within strings, not as nested objects.", + ].join(" "), + parameters: ContextStoreSchema, + async execute(_toolCallId, params, _signal, _onUpdate, ctx) { + if (!dispatch.onStoreContext) { + throw new Error("Context capture is not active."); + } + const r = await dispatch.onStoreContext(params, ctx); + if (!r.ok) { + log("Context store rejected", { errors: r.errors }); + throw new Error(r.message); + } + log("Context stored"); + return { + content: [{ type: "text" as const, text: r.message }], + }; + }, + }); +} diff --git a/src/planner/tools/plan-entities.ts b/src/planner/tools/plan-entities.ts new file mode 100644 index 0000000..f431f1a --- /dev/null +++ b/src/planner/tools/plan-entities.ts @@ -0,0 +1,599 @@ +// Every tool follows load-mutate-save: loadPlan -> pure mutation -> savePlan. +// Disk is single source of truth. Single-writer assumption per phase. +// Feedback messages prevent the LLM from skipping tools (prior architecture +// returned opaque JSON). + +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; +import type { TSchema } from "@sinclair/typebox"; + +import type { PlanRef } from "./dispatch.js"; +import { loadPlan, savePlan } from "../plan/serialize.js"; +import type { Plan } from "../plan/types.js"; +import { + addDecision, + setDecision, + addRejectedAlternative, + setRejectedAlternative, + addRisk, + setRisk, + addMilestone, + setMilestoneName, + setMilestoneFiles, + setMilestoneFlags, + setMilestoneRequirements, + setMilestoneAcceptanceCriteria, + setMilestoneTests, + addIntent, + setIntent, + addChange, + setChangeDiff, + setChangeDocDiff, + setChangeComments, + setChangeFile, + setChangeIntentRef, + addWave, + setWaveMilestones, + addDiagram, + setDiagram, + addDiagramNode, + addDiagramEdge, + setReadmeEntry, +} from "../plan/mutate.js"; + +function planTool

( + pi: ExtensionAPI, + planRef: PlanRef, + opts: { + name: string; + label: string; + description: string; + parameters: TSchema; + execute: (plan: Plan, params: P) => { plan: Plan; message: string }; + }, +): void { + pi.registerTool({ + name: opts.name, + label: opts.label, + description: opts.description, + parameters: opts.parameters, + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const plan = await loadPlan(planRef.dir); + const result = opts.execute(plan, params as P); + await savePlan(result.plan, planRef.dir); + return { + content: [{ type: "text" as const, text: result.message }], + }; + }, + }); +} + +export function registerPlanEntityTools( + pi: ExtensionAPI, + planRef: PlanRef, +): void { + // -- Decision -- + planTool(pi, planRef, { + name: "koan_add_decision", + label: "Add decision", + description: "Add decision to decision log.", + parameters: Type.Object({ + decision: Type.String(), + reasoning: Type.String(), + }), + execute: (p, params) => { + const r = addDecision(p, params); + return { + plan: r.plan, + message: `Added decision ${r.id}: "${params.decision}"`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_decision", + label: "Update decision", + description: "Update existing decision by ID.", + parameters: Type.Object({ + id: Type.String(), + decision: Type.Optional(Type.String()), + reasoning: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = setDecision(p, params.id, params); + return { + plan: updated, + message: `Updated decision ${params.id}`, + }; + }, + }); + + // -- RejectedAlternative -- + planTool(pi, planRef, { + name: "koan_add_rejected_alternative", + label: "Add rejected alternative", + description: "Add rejected alternative to decision log.", + parameters: Type.Object({ + alternative: Type.String(), + rejection_reason: Type.String(), + decision_ref: Type.String(), + }), + execute: (p, params) => { + const r = addRejectedAlternative(p, params); + return { + plan: r.plan, + message: `Added rejected alternative ${r.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_rejected_alternative", + label: "Update rejected alternative", + description: "Update existing rejected alternative by ID.", + parameters: Type.Object({ + id: Type.String(), + alternative: Type.Optional(Type.String()), + rejection_reason: Type.Optional(Type.String()), + decision_ref: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = setRejectedAlternative(p, params.id, params); + return { + plan: updated, + message: `Updated rejected alternative ${params.id}`, + }; + }, + }); + + // -- Risk -- + planTool(pi, planRef, { + name: "koan_add_risk", + label: "Add risk", + description: "Add risk to known risks.", + parameters: Type.Object({ + risk: Type.String(), + mitigation: Type.String(), + anchor: Type.Optional(Type.String()), + decision_ref: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const r = addRisk(p, params); + return { + plan: r.plan, + message: `Added risk ${r.id}: "${params.risk}"`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_risk", + label: "Update risk", + description: "Update existing risk by ID.", + parameters: Type.Object({ + id: Type.String(), + risk: Type.Optional(Type.String()), + mitigation: Type.Optional(Type.String()), + anchor: Type.Optional(Type.String()), + decision_ref: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = setRisk(p, params.id, params); + return { + plan: updated, + message: `Updated risk ${params.id}`, + }; + }, + }); + + // -- Milestone -- + planTool(pi, planRef, { + name: "koan_add_milestone", + label: "Add milestone", + description: "Create new milestone.", + parameters: Type.Object({ + name: Type.String(), + files: Type.Optional(Type.Array(Type.String())), + flags: Type.Optional(Type.Array(Type.String())), + requirements: Type.Optional(Type.Array(Type.String())), + acceptance_criteria: Type.Optional(Type.Array(Type.String())), + tests: Type.Optional(Type.Array(Type.String())), + }), + execute: (p, params) => { + const r = addMilestone(p, params); + return { + plan: r.plan, + message: `Added milestone ${r.id}: "${params.name}"`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_name", + label: "Set milestone name", + description: "Update milestone name.", + parameters: Type.Object({ + id: Type.String(), + name: Type.String(), + }), + execute: (p, params) => { + const updated = setMilestoneName(p, params.id, params.name); + return { + plan: updated, + message: `Set name for milestone ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_files", + label: "Set milestone files", + description: "Update milestone files list.", + parameters: Type.Object({ + id: Type.String(), + files: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setMilestoneFiles(p, params.id, params.files); + return { + plan: updated, + message: `Set files for milestone ${params.id} (${params.files.length} files)`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_flags", + label: "Set milestone flags", + description: "Update milestone flags list.", + parameters: Type.Object({ + id: Type.String(), + flags: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setMilestoneFlags(p, params.id, params.flags); + return { + plan: updated, + message: `Set flags for milestone ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_requirements", + label: "Set milestone requirements", + description: "Update milestone requirements list.", + parameters: Type.Object({ + id: Type.String(), + requirements: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setMilestoneRequirements(p, params.id, params.requirements); + return { + plan: updated, + message: `Set requirements for milestone ${params.id} (${params.requirements.length} items)`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_acceptance_criteria", + label: "Set milestone acceptance criteria", + description: "Update milestone acceptance criteria list.", + parameters: Type.Object({ + id: Type.String(), + acceptance_criteria: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setMilestoneAcceptanceCriteria( + p, + params.id, + params.acceptance_criteria, + ); + return { + plan: updated, + message: `Set acceptance criteria for milestone ${params.id} (${params.acceptance_criteria.length} items)`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_tests", + label: "Set milestone tests", + description: "Update milestone tests list.", + parameters: Type.Object({ + id: Type.String(), + tests: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setMilestoneTests(p, params.id, params.tests); + return { + plan: updated, + message: `Set tests for milestone ${params.id} (${params.tests.length} tests)`, + }; + }, + }); + + // -- CodeIntent -- + planTool(pi, planRef, { + name: "koan_add_intent", + label: "Add code intent", + description: "Add code intent to milestone.", + parameters: Type.Object({ + milestone: Type.String(), + file: Type.String(), + function: Type.Optional(Type.String()), + behavior: Type.String(), + decision_refs: Type.Optional(Type.Array(Type.String())), + }), + execute: (p, params) => { + const r = addIntent(p, params); + return { + plan: r.plan, + message: `Added intent ${r.id} to milestone ${params.milestone}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_intent", + label: "Update code intent", + description: "Update existing code intent by ID.", + parameters: Type.Object({ + id: Type.String(), + file: Type.Optional(Type.String()), + function: Type.Optional(Type.String()), + behavior: Type.Optional(Type.String()), + decision_refs: Type.Optional(Type.Array(Type.String())), + }), + execute: (p, params) => { + const updated = setIntent(p, params.id, params); + return { + plan: updated, + message: `Updated intent ${params.id}`, + }; + }, + }); + + // -- CodeChange -- + planTool(pi, planRef, { + name: "koan_add_change", + label: "Add code change", + description: "Add code change to milestone.", + parameters: Type.Object({ + milestone: Type.String(), + file: Type.String(), + intent_ref: Type.Optional(Type.String()), + diff: Type.Optional(Type.String()), + doc_diff: Type.Optional(Type.String()), + comments: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const r = addChange(p, params); + return { + plan: r.plan, + message: `Added change ${r.id} to milestone ${params.milestone}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_change_diff", + label: "Set code change diff", + description: "Update change diff.", + parameters: Type.Object({ + id: Type.String(), + diff: Type.String(), + }), + execute: (p, params) => { + const updated = setChangeDiff(p, params.id, params.diff); + return { + plan: updated, + message: `Set diff for change ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_change_doc_diff", + label: "Set code change doc_diff", + description: "Update change doc_diff.", + parameters: Type.Object({ + id: Type.String(), + doc_diff: Type.String(), + }), + execute: (p, params) => { + const updated = setChangeDocDiff(p, params.id, params.doc_diff); + return { + plan: updated, + message: `Set doc_diff for change ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_change_comments", + label: "Set code change comments", + description: "Update change comments.", + parameters: Type.Object({ + id: Type.String(), + comments: Type.String(), + }), + execute: (p, params) => { + const updated = setChangeComments(p, params.id, params.comments); + return { + plan: updated, + message: `Set comments for change ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_change_file", + label: "Set code change file", + description: "Update change file path.", + parameters: Type.Object({ + id: Type.String(), + file: Type.String(), + }), + execute: (p, params) => { + const updated = setChangeFile(p, params.id, params.file); + return { + plan: updated, + message: `Set file for change ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_change_intent_ref", + label: "Set code change intent_ref", + description: "Update change intent reference.", + parameters: Type.Object({ + id: Type.String(), + intent_ref: Type.String(), + }), + execute: (p, params) => { + const updated = setChangeIntentRef(p, params.id, params.intent_ref); + return { + plan: updated, + message: `Set intent_ref for change ${params.id}`, + }; + }, + }); + + // -- Wave -- + planTool(pi, planRef, { + name: "koan_add_wave", + label: "Add wave", + description: "Create wave with milestone list.", + parameters: Type.Object({ + milestones: Type.Array(Type.String()), + }), + execute: (p, params) => { + const r = addWave(p, params); + return { + plan: r.plan, + message: `Added wave ${r.id} with ${params.milestones.length} milestones`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_wave_milestones", + label: "Set wave milestones", + description: "Update wave milestones list.", + parameters: Type.Object({ + id: Type.String(), + milestones: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setWaveMilestones(p, params.id, params.milestones); + return { + plan: updated, + message: `Set milestones for wave ${params.id}`, + }; + }, + }); + + // -- Diagram -- + planTool(pi, planRef, { + name: "koan_add_diagram", + label: "Add diagram", + description: "Create diagram graph.", + parameters: Type.Object({ + type: Type.Union([ + Type.Literal("architecture"), + Type.Literal("state"), + Type.Literal("sequence"), + Type.Literal("dataflow"), + ]), + scope: Type.String(), + title: Type.String(), + }), + execute: (p, params) => { + const r = addDiagram(p, params); + return { + plan: r.plan, + message: `Added diagram ${r.id}: "${params.title}"`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_diagram", + label: "Update diagram", + description: "Update diagram properties.", + parameters: Type.Object({ + id: Type.String(), + title: Type.Optional(Type.String()), + scope: Type.Optional(Type.String()), + ascii_render: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = setDiagram(p, params.id, params); + return { + plan: updated, + message: `Updated diagram ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_add_diagram_node", + label: "Add diagram node", + description: "Add node to diagram.", + parameters: Type.Object({ + diagram_id: Type.String(), + id: Type.String(), + label: Type.String(), + type: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = addDiagramNode(p, params.diagram_id, params); + return { + plan: updated, + message: `Added node ${params.id} to diagram ${params.diagram_id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_add_diagram_edge", + label: "Add diagram edge", + description: "Add edge to diagram.", + parameters: Type.Object({ + diagram_id: Type.String(), + source: Type.String(), + target: Type.String(), + label: Type.String(), + protocol: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = addDiagramEdge(p, params.diagram_id, params); + return { + plan: updated, + message: `Added edge ${params.source}->${params.target} to diagram ${params.diagram_id}`, + }; + }, + }); + + // -- ReadmeEntry -- + planTool(pi, planRef, { + name: "koan_set_readme_entry", + label: "Set readme entry", + description: "Upsert readme entry by path.", + parameters: Type.Object({ + path: Type.String(), + content: Type.String(), + }), + execute: (p, params) => { + const updated = setReadmeEntry(p, params.path, params.content); + return { + plan: updated, + message: `Set readme entry for ${params.path}`, + }; + }, + }); +} diff --git a/src/planner/tools/plan-getters.ts b/src/planner/tools/plan-getters.ts new file mode 100644 index 0000000..ff1fc2e --- /dev/null +++ b/src/planner/tools/plan-getters.ts @@ -0,0 +1,167 @@ +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import type { PlanRef } from "./dispatch.js"; +import { loadPlan } from "../plan/serialize.js"; +import type { Plan, Milestone, CodeIntent, CodeChange } from "../plan/types.js"; + +export function registerPlanGetterTools( + pi: ExtensionAPI, + planRef: PlanRef, +): void { + pi.registerTool({ + name: "koan_get_plan", + label: "Get plan summary", + description: + "Returns plan overview and entity counts with IDs for drill-down.", + parameters: Type.Object({}), + async execute() { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = await loadPlan(planRef.dir); + const summary = formatPlanSummary(p); + return { + content: [{ type: "text" as const, text: summary }], + }; + }, + }); + + pi.registerTool({ + name: "koan_get_milestone", + label: "Get milestone by ID", + description: "Returns full milestone with code_intents and code_changes.", + parameters: Type.Object({ + id: Type.String({ description: "Milestone ID (e.g., M-001)" }), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = await loadPlan(planRef.dir); + const m = p.milestones.find((x) => x.id === (params as { id: string }).id); + if (!m) throw new Error(`Milestone ${(params as { id: string }).id} not found`); + return { + content: [{ type: "text" as const, text: JSON.stringify(m, null, 2) }], + }; + }, + }); + + pi.registerTool({ + name: "koan_get_decision", + label: "Get decision by ID", + description: "Returns decision from decision log.", + parameters: Type.Object({ + id: Type.String({ description: "Decision ID (e.g., DL-001)" }), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = await loadPlan(planRef.dir); + const d = p.planning_context.decision_log.find( + (x) => x.id === (params as { id: string }).id, + ); + if (!d) throw new Error(`Decision ${(params as { id: string }).id} not found`); + return { + content: [{ type: "text" as const, text: JSON.stringify(d, null, 2) }], + }; + }, + }); + + pi.registerTool({ + name: "koan_get_intent", + label: "Get code intent by ID", + description: "Returns code intent and parent milestone ID.", + parameters: Type.Object({ + id: Type.String({ description: "Intent ID (e.g., CI-M-001-001)" }), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = await loadPlan(planRef.dir); + const result = findIntent(p, (params as { id: string }).id); + if (!result) + throw new Error(`Intent ${(params as { id: string }).id} not found`); + return { + content: [ + { + type: "text" as const, + text: JSON.stringify( + { milestone_id: result.milestoneId, intent: result.intent }, + null, + 2, + ), + }, + ], + }; + }, + }); + + pi.registerTool({ + name: "koan_get_change", + label: "Get code change by ID", + description: "Returns code change and parent milestone ID.", + parameters: Type.Object({ + id: Type.String({ description: "Change ID (e.g., CC-M-001-001)" }), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = await loadPlan(planRef.dir); + const result = findChange(p, (params as { id: string }).id); + if (!result) + throw new Error(`Change ${(params as { id: string }).id} not found`); + return { + content: [ + { + type: "text" as const, + text: JSON.stringify( + { milestone_id: result.milestoneId, change: result.change }, + null, + 2, + ), + }, + ], + }; + }, + }); +} + +function formatPlanSummary(p: Plan): string { + const lines = [ + "Plan Summary", + "============", + "", + "Overview:", + ` Problem: ${p.overview.problem || "(empty)"}`, + ` Approach: ${p.overview.approach || "(empty)"}`, + "", + `Milestones (${p.milestones.length}):`, + ...p.milestones.map((m) => ` ${m.id}: ${m.name}`), + "", + `Decisions (${p.planning_context.decision_log.length}):`, + ...p.planning_context.decision_log.map((d) => ` ${d.id}: ${d.decision}`), + "", + `Waves (${p.waves.length}):`, + ...p.waves.map((w) => ` ${w.id}: [${w.milestones.join(", ")}]`), + "", + `Diagrams (${p.diagram_graphs.length}):`, + ...p.diagram_graphs.map((d) => ` ${d.id}: ${d.title} (${d.type})`), + ]; + return lines.join("\n"); +} + +function findIntent( + p: Plan, + id: string, +): { milestoneId: string; intent: CodeIntent } | null { + for (const m of p.milestones) { + const intent = m.code_intents.find((ci) => ci.id === id); + if (intent) return { milestoneId: m.id, intent }; + } + return null; +} + +function findChange( + p: Plan, + id: string, +): { milestoneId: string; change: CodeChange } | null { + for (const m of p.milestones) { + const change = m.code_changes.find((cc) => cc.id === id); + if (change) return { milestoneId: m.id, change }; + } + return null; +} diff --git a/src/planner/tools/plan-setters.ts b/src/planner/tools/plan-setters.ts new file mode 100644 index 0000000..16a0a87 --- /dev/null +++ b/src/planner/tools/plan-setters.ts @@ -0,0 +1,92 @@ +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import type { PlanRef } from "./dispatch.js"; +import { loadPlan, savePlan } from "../plan/serialize.js"; +import { + setOverview, + setConstraints, + setInvisibleKnowledge, +} from "../plan/mutate.js"; + +export function registerPlanSetterTools( + pi: ExtensionAPI, + planRef: PlanRef, +): void { + pi.registerTool({ + name: "koan_set_overview", + label: "Set plan overview", + description: "Set problem statement and approach.", + parameters: Type.Object({ + problem: Type.Optional(Type.String()), + approach: Type.Optional(Type.String()), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = await loadPlan(planRef.dir); + const updated = setOverview( + p, + params as { problem?: string; approach?: string }, + ); + await savePlan(updated, planRef.dir); + return { + content: [{ type: "text" as const, text: "Overview updated." }], + }; + }, + }); + + pi.registerTool({ + name: "koan_set_constraints", + label: "Set plan constraints", + description: "Set planning constraints list.", + parameters: Type.Object({ + constraints: Type.Array(Type.String()), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = await loadPlan(planRef.dir); + const updated = setConstraints( + p, + (params as { constraints: string[] }).constraints, + ); + await savePlan(updated, planRef.dir); + return { + content: [ + { + type: "text" as const, + text: `Constraints set (${(params as { constraints: string[] }).constraints.length} items).`, + }, + ], + }; + }, + }); + + pi.registerTool({ + name: "koan_set_invisible_knowledge", + label: "Set invisible knowledge", + description: "Set system description, invariants, and tradeoffs.", + parameters: Type.Object({ + system: Type.Optional(Type.String()), + invariants: Type.Optional(Type.Array(Type.String())), + tradeoffs: Type.Optional(Type.Array(Type.String())), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = await loadPlan(planRef.dir); + const updated = setInvisibleKnowledge( + p, + params as { + system?: string; + invariants?: string[]; + tradeoffs?: string[]; + }, + ); + await savePlan(updated, planRef.dir); + return { + content: [ + { type: "text" as const, text: "Invisible knowledge updated." }, + ], + }; + }, + }); +} diff --git a/src/planner/tools/qr-tools.ts b/src/planner/tools/qr-tools.ts new file mode 100644 index 0000000..bf62bda --- /dev/null +++ b/src/planner/tools/qr-tools.ts @@ -0,0 +1,232 @@ +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { PlanRef } from "./dispatch.js"; +import type { QRFile, QRSeverity, QRItemStatus } from "../qr/types.js"; +import { addQRItem, setQRItem, assignGroup } from "../qr/mutate.js"; + +function createEmptyQRFile(phase: string): QRFile { + return { + phase, + iteration: 1, + items: [], + }; +} + +async function loadQR(dir: string, phase: string): Promise { + const qrPath = path.join(dir, `qr-${phase}.json`); + try { + const content = await fs.readFile(qrPath, "utf8"); + return JSON.parse(content) as QRFile; + } catch (err: unknown) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + return createEmptyQRFile(phase); + } + throw err; + } +} + +async function saveQR(qr: QRFile, dir: string, phase: string): Promise { + const qrPath = path.join(dir, `qr-${phase}.json`); + const tmpPath = path.join(dir, `.qr-${phase}.json.tmp`); + const content = `${JSON.stringify(qr, null, 2)}\n`; + await fs.writeFile(tmpPath, content, "utf8"); + await fs.rename(tmpPath, qrPath); +} + +export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { + pi.registerTool({ + name: "koan_qr_add_item", + label: "Add QR item", + description: "Add quality review item.", + parameters: Type.Object({ + phase: Type.String(), + scope: Type.String(), + check: Type.String(), + severity: Type.Optional( + Type.Union([ + Type.Literal("MUST"), + Type.Literal("SHOULD"), + Type.Literal("COULD"), + ]), + ), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = params as { + phase: string; + scope: string; + check: string; + severity?: QRSeverity; + }; + const qr = await loadQR(planRef.dir, p.phase); + const r = addQRItem(qr, p); + await saveQR(r.qr, planRef.dir, p.phase); + return { + content: [{ type: "text" as const, text: `Added QR item ${r.id}` }], + }; + }, + }); + + pi.registerTool({ + name: "koan_qr_set_item", + label: "Update QR item", + description: "Update QR item status or finding.", + parameters: Type.Object({ + phase: Type.String(), + id: Type.String(), + status: Type.Optional( + Type.Union([ + Type.Literal("TODO"), + Type.Literal("PASS"), + Type.Literal("FAIL"), + ]), + ), + finding: Type.Optional(Type.String()), + check: Type.Optional(Type.String()), + severity: Type.Optional( + Type.Union([ + Type.Literal("MUST"), + Type.Literal("SHOULD"), + Type.Literal("COULD"), + ]), + ), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = params as { + phase: string; + id: string; + status?: QRItemStatus; + finding?: string; + check?: string; + severity?: QRSeverity; + }; + const qr = await loadQR(planRef.dir, p.phase); + const updated = setQRItem(qr, p.id, p); + await saveQR(updated, planRef.dir, p.phase); + return { + content: [{ type: "text" as const, text: `Updated QR item ${p.id}` }], + }; + }, + }); + + pi.registerTool({ + name: "koan_qr_assign_group", + label: "Assign QR group", + description: "Assign group ID to QR items.", + parameters: Type.Object({ + phase: Type.String(), + ids: Type.Array(Type.String()), + group_id: Type.String(), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = params as { + phase: string; + ids: string[]; + group_id: string; + }; + const qr = await loadQR(planRef.dir, p.phase); + const updated = assignGroup(qr, p.ids, p.group_id); + await saveQR(updated, planRef.dir, p.phase); + return { + content: [ + { + type: "text" as const, + text: `Assigned ${p.ids.length} items to group ${p.group_id}`, + }, + ], + }; + }, + }); + + pi.registerTool({ + name: "koan_qr_get_item", + label: "Get QR item", + description: "Get QR item by ID.", + parameters: Type.Object({ + phase: Type.String(), + id: Type.String(), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = params as { phase: string; id: string }; + const qr = await loadQR(planRef.dir, p.phase); + const item = qr.items.find((x) => x.id === p.id); + if (!item) throw new Error(`QR item ${p.id} not found`); + return { + content: [{ type: "text" as const, text: JSON.stringify(item, null, 2) }], + }; + }, + }); + + pi.registerTool({ + name: "koan_qr_list_items", + label: "List QR items", + description: "List QR items, optionally filtered by status.", + parameters: Type.Object({ + phase: Type.String(), + status: Type.Optional( + Type.Union([ + Type.Literal("TODO"), + Type.Literal("PASS"), + Type.Literal("FAIL"), + ]), + ), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = params as { phase: string; status?: QRItemStatus }; + const qr = await loadQR(planRef.dir, p.phase); + const filtered = p.status + ? qr.items.filter((item) => item.status === p.status) + : qr.items; + return { + content: [ + { type: "text" as const, text: JSON.stringify(filtered, null, 2) }, + ], + }; + }, + }); + + pi.registerTool({ + name: "koan_qr_summary", + label: "QR summary", + description: "Get QR summary with counts by status and severity.", + parameters: Type.Object({ + phase: Type.String(), + }), + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const p = params as { phase: string }; + const qr = await loadQR(planRef.dir, p.phase); + + const byStatus = { + TODO: qr.items.filter((x) => x.status === "TODO").length, + PASS: qr.items.filter((x) => x.status === "PASS").length, + FAIL: qr.items.filter((x) => x.status === "FAIL").length, + }; + + const bySeverity = { + MUST: qr.items.filter((x) => x.severity === "MUST").length, + SHOULD: qr.items.filter((x) => x.severity === "SHOULD").length, + COULD: qr.items.filter((x) => x.severity === "COULD").length, + }; + + const summary = { + total: qr.items.length, + by_status: byStatus, + by_severity: bySeverity, + }; + + return { + content: [ + { type: "text" as const, text: JSON.stringify(summary, null, 2) }, + ], + }; + }, + }); +} diff --git a/src/planner/tools/registry.ts b/src/planner/tools/registry.ts new file mode 100644 index 0000000..36391c1 --- /dev/null +++ b/src/planner/tools/registry.ts @@ -0,0 +1,190 @@ +// Default-deny permissions. Read tools bypass this map. Write tools +// (edit/write) always blocked during planning. The map defines OUTER +// boundaries; phase handlers narrow further. + +const READ_TOOLS = new Set(["read", "bash", "grep", "glob", "find", "ls"]); +const WRITE_TOOLS = new Set(["edit", "write"]); + +const PLAN_GETTER_TOOLS_LIST = [ + "koan_get_plan", + "koan_get_milestone", + "koan_get_decision", + "koan_get_intent", + "koan_get_change", +]; + +const PLAN_SETTER_TOOLS_LIST = [ + "koan_set_overview", + "koan_set_constraints", + "koan_set_invisible_knowledge", +]; + +const PLAN_DECISION_TOOLS_LIST = ["koan_add_decision", "koan_set_decision"]; + +const PLAN_REJECTED_ALT_TOOLS_LIST = [ + "koan_add_rejected_alternative", + "koan_set_rejected_alternative", +]; + +const PLAN_RISK_TOOLS_LIST = ["koan_add_risk", "koan_set_risk"]; + +const PLAN_MILESTONE_TOOLS_LIST = [ + "koan_add_milestone", + "koan_set_milestone_name", + "koan_set_milestone_files", + "koan_set_milestone_flags", + "koan_set_milestone_requirements", + "koan_set_milestone_acceptance_criteria", + "koan_set_milestone_tests", +]; + +const PLAN_INTENT_TOOLS_LIST = ["koan_add_intent", "koan_set_intent"]; + +const PLAN_CHANGE_TOOLS_LIST = [ + "koan_add_change", + "koan_set_change_diff", + "koan_set_change_doc_diff", + "koan_set_change_comments", + "koan_set_change_file", + "koan_set_change_intent_ref", +]; + +const PLAN_WAVE_TOOLS_LIST = ["koan_add_wave", "koan_set_wave_milestones"]; + +const PLAN_DIAGRAM_TOOLS_LIST = [ + "koan_add_diagram", + "koan_set_diagram", + "koan_add_diagram_node", + "koan_add_diagram_edge", +]; + +const PLAN_README_TOOLS_LIST = ["koan_set_readme_entry"]; + +const QR_TOOLS_LIST = [ + "koan_qr_add_item", + "koan_qr_set_item", + "koan_qr_assign_group", + "koan_qr_get_item", + "koan_qr_list_items", + "koan_qr_summary", +]; + +const ALL_PLAN_ENTITY_TOOLS = [ + ...PLAN_DECISION_TOOLS_LIST, + ...PLAN_REJECTED_ALT_TOOLS_LIST, + ...PLAN_RISK_TOOLS_LIST, + ...PLAN_MILESTONE_TOOLS_LIST, + ...PLAN_INTENT_TOOLS_LIST, + ...PLAN_WAVE_TOOLS_LIST, + ...PLAN_DIAGRAM_TOOLS_LIST, + ...PLAN_README_TOOLS_LIST, +]; + +const PLAN_DESIGN_ENTITY_TOOLS = ALL_PLAN_ENTITY_TOOLS.filter( + (t) => !PLAN_CHANGE_TOOLS_LIST.includes(t), +); + +export const PLAN_GETTER_TOOLS: ReadonlySet = new Set( + PLAN_GETTER_TOOLS_LIST, +); + +export const PLAN_MUTATION_TOOLS: ReadonlySet = new Set([ + ...PLAN_SETTER_TOOLS_LIST, + ...ALL_PLAN_ENTITY_TOOLS, + ...PLAN_CHANGE_TOOLS_LIST, +]); + +// Missing phase keys are blocked (default-deny extends to unknown phases). +// Prevents security boundary breach when a new phase is added without +// updating the permissions map. +export const PHASE_PERMISSIONS: ReadonlyMap> = + new Map([ + ["context-capture", new Set(["koan_store_context", "koan_next_step"])], + [ + "plan-design", + new Set([ + "koan_next_step", + ...PLAN_GETTER_TOOLS_LIST, + ...PLAN_SETTER_TOOLS_LIST, + ...PLAN_DESIGN_ENTITY_TOOLS, + ]), + ], + [ + "plan-code", + new Set([ + "koan_next_step", + ...PLAN_GETTER_TOOLS_LIST, + ...PLAN_CHANGE_TOOLS_LIST, + "koan_set_intent", + ]), + ], + [ + "plan-docs", + new Set([ + "koan_next_step", + ...PLAN_GETTER_TOOLS_LIST, + "koan_set_change_doc_diff", + "koan_set_change_comments", + "koan_set_readme_entry", + "koan_set_diagram", + ]), + ], + [ + "qr-plan-design", + new Set(["koan_next_step", ...PLAN_GETTER_TOOLS_LIST, ...QR_TOOLS_LIST]), + ], + [ + "qr-plan-code", + new Set([ + "koan_next_step", + "koan_get_plan", + "koan_get_milestone", + "koan_get_intent", + "koan_get_change", + ...QR_TOOLS_LIST, + ]), + ], + [ + "qr-plan-docs", + new Set([ + "koan_next_step", + "koan_get_plan", + "koan_get_milestone", + "koan_get_change", + ...QR_TOOLS_LIST, + ]), + ], + ]); + +export function checkPermission( + phaseKey: string, + toolName: string, +): { allowed: boolean; reason?: string } { + if (READ_TOOLS.has(toolName)) { + return { allowed: true }; + } + + if (WRITE_TOOLS.has(toolName)) { + return { + allowed: false, + reason: "Edit/write tools blocked during planning.", + }; + } + + if (!PHASE_PERMISSIONS.has(phaseKey)) { + return { + allowed: false, + reason: `Unknown phase: ${phaseKey}`, + }; + } + + const allowed = PHASE_PERMISSIONS.get(phaseKey)!; + if (!allowed.has(toolName)) { + return { + allowed: false, + reason: `${toolName} is not available in phase ${phaseKey}`, + }; + } + + return { allowed: true }; +} diff --git a/src/planner/types.ts b/src/planner/types.ts new file mode 100644 index 0000000..2a71e39 --- /dev/null +++ b/src/planner/types.ts @@ -0,0 +1,21 @@ +export interface ContextData { + task_spec: string[]; + constraints: string[]; + entry_points: string[]; + rejected_alternatives: string[]; + current_understanding: string[]; + assumptions: string[]; + invisible_knowledge: string[]; + reference_docs: string[]; +} + +export const CONTEXT_KEYS: ReadonlyArray = [ + "task_spec", + "constraints", + "entry_points", + "rejected_alternatives", + "current_understanding", + "assumptions", + "invisible_knowledge", + "reference_docs", +]; diff --git a/src/utils/logger.ts b/src/utils/logger.ts new file mode 100644 index 0000000..67f4c2e --- /dev/null +++ b/src/utils/logger.ts @@ -0,0 +1,14 @@ +const prefix = "[koan]"; + +export type Logger = | undefined>(message: string, details?: T) => void; + +export function createLogger(scope: string): Logger { + const label = `${prefix} ${scope}`; + return (message, details) => { + if (details && Object.keys(details).length > 0) { + console.log(`${label}: ${message}`, details); + } else { + console.log(`${label}: ${message}`); + } + }; +} diff --git a/src/utils/plan.ts b/src/utils/plan.ts new file mode 100644 index 0000000..a34f382 --- /dev/null +++ b/src/utils/plan.ts @@ -0,0 +1,72 @@ +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import type { PlanInfo } from "../planner/state.js"; + +const KOAN_HOME = path.join(os.homedir(), ".koan"); +const PLANS_HOME = path.join(KOAN_HOME, "plans"); + +function slugify(input: string): string { + const base = input + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+|-+$/g, "") + .slice(0, 48); + + return base.length > 0 ? base : "plan"; +} + +function generatePlanId(description: string, now: Date): string { + const timestamp = now.toISOString().replace(/[-:]/g, "").replace(/\..+/, ""); + const slug = slugify(description); + return `${timestamp}-${slug}`; +} + +async function ensurePlanDirectoryUnique(baseId: string): Promise<{ id: string; directory: string }> { + let suffix = 0; + while (true) { + const candidateId = suffix === 0 ? baseId : `${baseId}-${suffix}`; + const directory = path.join(PLANS_HOME, candidateId); + + try { + await fs.mkdir(directory, { recursive: false }); + return { id: candidateId, directory }; + } catch (error) { + const err = error as NodeJS.ErrnoException; + if (err.code === "EEXIST") { + suffix += 1; + continue; + } + throw error; + } + } +} + +export async function createPlanInfo(description: string, projectCwd: string, now = new Date()): Promise { + await fs.mkdir(PLANS_HOME, { recursive: true }); + + const baseId = generatePlanId(description, now); + const { id, directory } = await ensurePlanDirectoryUnique(baseId); + + const metadataPath = path.join(directory, "metadata.json"); + + const plan: PlanInfo = { + id, + directory, + metadataPath, + createdAt: now.toISOString(), + }; + + const metadata = { + id: plan.id, + createdAt: plan.createdAt, + description, + status: "created" as const, + projectCwd, + }; + + await fs.writeFile(metadataPath, `${JSON.stringify(metadata, null, 2)}\n`, "utf8"); + + return plan; +} diff --git a/src/utils/progress.ts b/src/utils/progress.ts new file mode 100644 index 0000000..566bda8 --- /dev/null +++ b/src/utils/progress.ts @@ -0,0 +1,71 @@ +import { promises as fs } from "node:fs"; +import * as crypto from "node:crypto"; +import * as path from "node:path"; + +export interface TrailEntry { + at: string; + msg: string; +} + +export interface SubagentState { + role: string; + phase: string; + status: "running" | "completed" | "failed"; + current: string; + updated_at: string; + trail: TrailEntry[]; +} + +export async function createSubagentDir(planDir: string, role: string): Promise { + const hex = crypto.randomBytes(2).toString("hex"); + const dir = path.join(planDir, "subagents", `${role}-${hex}`); + await fs.mkdir(dir, { recursive: true }); + return dir; +} + +export class ProgressReporter { + private readonly stateFile: string; + private readonly state: SubagentState; + + constructor(dir: string, role: string, phase: string) { + this.stateFile = path.join(dir, "state.json"); + this.state = { + role, + phase, + status: "running", + current: "", + updated_at: new Date().toISOString(), + trail: [], + }; + } + + async update(msg: string): Promise { + const now = new Date().toISOString(); + this.state.current = msg; + this.state.updated_at = now; + this.state.trail.push({ at: now, msg }); + await this.flush(); + } + + async complete(status: "completed" | "failed"): Promise { + const now = new Date().toISOString(); + this.state.status = status; + this.state.current = status; + this.state.updated_at = now; + this.state.trail.push({ at: now, msg: status }); + await this.flush(); + } + + private async flush(): Promise { + await fs.writeFile(this.stateFile, JSON.stringify(this.state, null, 2) + "\n"); + } +} + +export async function readSubagentState(dir: string): Promise { + try { + const raw = await fs.readFile(path.join(dir, "state.json"), "utf8"); + return JSON.parse(raw) as SubagentState; + } catch { + return null; + } +} From 28a65128f57512c8a612ff124c129609ecbc2d28 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Feb 2026 12:54:02 +0700 Subject: [PATCH 002/412] Fix step gate to use blocklist instead of whitelist The whitelist pattern (!PLAN_GETTER_TOOLS.has(name)) blocked read tools and future pi-native tools that checkPermission already approved. Switch to blocklist (PLAN_MUTATION_TOOLS.has(name)) so only mutation tools are step-gated and everything else defers to checkPermission. --- src/planner/phases/plan-design.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/planner/phases/plan-design.ts b/src/planner/phases/plan-design.ts index 4e90d39..8928616 100644 --- a/src/planner/phases/plan-design.ts +++ b/src/planner/phases/plan-design.ts @@ -16,7 +16,7 @@ import type { ContextData } from "../types.js"; import { createLogger, type Logger } from "../../utils/logger.js"; import { ProgressReporter } from "../../utils/progress.js"; import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../tools/dispatch.js"; -import { checkPermission, PLAN_GETTER_TOOLS } from "../tools/registry.js"; +import { checkPermission, PLAN_MUTATION_TOOLS } from "../tools/registry.js"; type PlanDesignStep = 1 | 2 | 3 | 4 | 5 | 6; @@ -132,8 +132,11 @@ export class PlanDesignPhase { return { block: true, reason: perm.reason }; } + // Step gate: mutation tools are step-6-only. Blocklist (not whitelist) + // so read tools and future pi-native tools pass through after + // checkPermission approves them. const step = this.state.step; - if (step < 6 && !PLAN_GETTER_TOOLS.has(event.toolName) && event.toolName !== "koan_next_step") { + if (step < 6 && PLAN_MUTATION_TOOLS.has(event.toolName)) { return { block: true, reason: `${event.toolName} available in step 6 (current: ${step})`, From 25a9047c4937b530491d0f87288422bcfec392ac Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Feb 2026 13:05:59 +0700 Subject: [PATCH 003/412] Fix async/sync mismatch in onNextStep dispatch handleStepComplete() is async but the dispatch slot was typed synchronous. Every koan_next_step call checked .ok on the raw Promise (undefined), unconditionally throwing "Step transition failed." --- src/planner/tools/dispatch.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/planner/tools/dispatch.ts b/src/planner/tools/dispatch.ts index ee9fd5d..28e91b8 100644 --- a/src/planner/tools/dispatch.ts +++ b/src/planner/tools/dispatch.ts @@ -25,7 +25,7 @@ export interface StepResult { // -- Dispatch -- export interface WorkflowDispatch { - onNextStep: (() => StepResult) | null; + onNextStep: (() => StepResult | Promise) | null; onStoreContext: | ((payload: unknown, ctx: ExtensionContext) => Promise) | null; @@ -102,7 +102,7 @@ export function registerWorkflowTools( if (!dispatch.onNextStep) { throw new Error("No workflow phase is active."); } - const r = dispatch.onNextStep(); + const r = await dispatch.onNextStep(); if (!r.ok) { throw new Error(r.error ?? "Step transition failed."); } From 11d9a97a38a7c1f3d1f69543020d7589b90435fa Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Feb 2026 16:43:47 +0700 Subject: [PATCH 004/412] Rename koan_next_step to koan_complete_step with thoughts param GPT-5-codex cannot produce text + tool_call in the same response, causing it to narrate "Calling koan_next_step now" as text without emitting an actual tool_call block. The thoughts parameter captures the model's work output (analysis, findings) as a tool parameter instead of requiring text output alongside the tool call. Also: remove agent_end retry handler (unreliable in -p mode), improve type safety with Static (eliminates manual casts), add tsconfig.json and devDependencies for tsc --noEmit checks. --- design-decisions.md | 342 ++ package-lock.json | 4044 ++++++++++++++++++++++++ package.json | 11 +- src/planner/phases/context-capture.ts | 85 +- src/planner/phases/plan-design.ts | 6 +- src/planner/prompts/context-capture.ts | 9 +- src/planner/prompts/plan-design.ts | 14 +- src/planner/prompts/step.ts | 23 +- src/planner/session.ts | 4 +- src/planner/tools/dispatch.ts | 48 +- src/planner/tools/plan-entities.ts | 16 +- src/planner/tools/plan-getters.ts | 21 +- src/planner/tools/plan-setters.ts | 24 +- src/planner/tools/qr-tools.ts | 66 +- src/planner/tools/registry.ts | 14 +- tsconfig.json | 15 + 16 files changed, 4528 insertions(+), 214 deletions(-) create mode 100644 design-decisions.md create mode 100644 package-lock.json create mode 100644 tsconfig.json diff --git a/design-decisions.md b/design-decisions.md new file mode 100644 index 0000000..7a10117 --- /dev/null +++ b/design-decisions.md @@ -0,0 +1,342 @@ +# Koan Design Decisions & Invariants + +Authoritative record of design decisions, invariants, and lessons learned +across the koan project. Distilled from 6 conversations (Feb 10-13 2026), +the master plan (plans/2026-02-10-init.md), and the approved tool registry +plan (~/.claude/plans/fluffy-hopping-zebra.md). + +--- + +## Fundamental Invariants + +### INV-1: Inversion of Control + +Scripts drive the LLM, not LLM drives scripts. The extension +programmatically feeds prompts, collects output, and enforces constraints. +The LLM is a worker, not a coordinator. This is the entire reason koan +exists -- the Claude Code skill model has the LLM in the driver's seat, +which causes unreliable workflow execution. + +### INV-2: Need-to-Know Principle + +The LLM always operates on a need-to-know basis. When given the choice +between exposing more or less information, always choose less. This is +a permanent invariant. + +Concrete implications: +- No implementation details in prompts (temp dirs, state file paths, + orchestrator internals, phase routing) +- No full plan state when partial suffices (QR reviewer for design does + not see code plan or docs plan) +- No accumulated history across phases (subagents start fresh) +- No meta-instructions about the workflow ("you are step 3 of 14") +- No defensive over-specification of edge cases + +### INV-3: Pi Tool Error Contract + +Pi framework determines isError on ToolResultMessage from whether +tool.execute() THROWS, not from the return value. The returned isError +field is silently discarded (agent-loop.ts:316-357). To signal errors +from tools: always `throw new Error(msg)` -- never `return { isError: true }`. + +--- + +## Architecture Decisions + +### AD-1: Two LLM Interaction Levels + +- `sendUserMessage()` in parent session: ONLY for context capture. The + session LLM is the only entity with the conversational understanding. + A fresh LLM reading a serialized transcript loses implicit context. +- `spawn()` subagent: for all substantial work (architect, developer, + writer, QR decomposer, QR reviewer). +- `complete()` from pi-ai: NOT used in koan. No direct LLM calls + without agent loop. + +### AD-2: Self-Loading Extension Pattern + +Same extension file (extensions/koan.ts) serves both modes: +- **Parent mode** (no --koan-role flag): registers /koan command, tools, + and dispatch. Zero overhead in normal pi sessions. +- **Subagent mode** (--koan-role present): activates role-specific event + hooks (state machine, tool enforcement, step prompts). + +The extension detects which mode via flag presence at before_agent_start +time (not at init -- see AD-3). + +### AD-3: CLI Flag Timing + +Pi applies CLI flag values AFTER extension factory functions run +(main.ts:568). getFlag() returns defaults during factory time. +Subagent detection MUST happen in `before_agent_start`, not in the +factory function body. Uses closure-scoped `dispatched` boolean guard +to ensure one-shot dispatch. + +### AD-4: Tool-Call-Driven Step Transitions (Uniform Pattern) + +ALL step transitions use the koan_next_step registered tool. The LLM +calls koan_next_step -> tool execute() returns next step's prompt. +This works in both -p mode and interactive mode. sendUserMessage() +is only used for the initial trigger (/koan plan) and as a safety net +in agent_end when the LLM fails to call the expected tool. + +**KEY CORRECTION**: Early design (Feb 10) considered turn_end + agent_end ++ sendUserMessage() chaining for step transitions. This was ABANDONED +because subagents in -p mode exit after the first agent loop completes. +Tool calls keep the agent loop alive within a single loop. The context +capture phase preserves sendUserMessage() in agent_end only as a +fallback retry mechanism, not as the primary transition path. + +### AD-5: koan_next_step Has No Arguments + +The extension is stateful -- it knows exactly which step the LLM is on +via closure state. No step number parameter needed. The tool response +contains the next step's full prompt. + +### AD-6: Tool Naming Conventions + +Settled names (corrected from earlier iterations): +- `koan_next_step` (was koan_complete_step) +- `koan_store_context` (was koan_finalize_context) +- `koan_store_plan` was later REMOVED entirely (see AD-14) +- Prompts use "instructions" not "actions" + +### AD-7: invoke_after Pattern Is Critical + +Every step prompt MUST have a clear "invoke after" directive telling +the LLM to call koan_next_step after completing the step's work. +Mirrors the reference planner's "NEXT STEP: Command: python3 -m ... +--step N" pattern. Without this, the LLM produces text-only responses +and the agent loop exits. + +Implementation: formatStep() in src/planner/prompts/step.ts appends a +default invoke-after block. Steps can override with custom invokeAfter. + +The "WHEN DONE" + "Do NOT call until" creates a two-part gate: the LLM +must do work before advancing. Unconditional imperatives ("Execute this +tool now.") cause immediate tool calls because empty-param tool calls +have zero friction. + +### AD-8: Store Tools Need "Not Yet" Guidance + +koan_store_context (and formerly koan_store_plan) are always registered +and visible to the LLM even in steps where they should NOT be called. +Their tool descriptions include "DO NOT call this tool until the step +instructions explicitly tell you to." This creates a prohibition/activation +pattern with step prompts. + +### AD-9: Subagent Progress Tracking + +Per-subagent state directory, NOT a single progress.json. +Structure: `/subagents/-/` +Contains: state.json, stdout.log, stderr.log. +ProgressReporter class manages state.json updates with trail. + +### AD-10: Architect System Prompt + +The architect's system prompt is loaded from ~/.claude/agents/architect.md +at runtime via loadPlanDesignSystemPrompt(). Injected via +before_agent_start returning { systemPrompt: ... }. + +### AD-11: Plan Schema Self-Documentation via TypeBox + +No 300-line schema prompt embedded in step 6. Tool parameter schemas +with rich TypeBox descriptions are sufficient for the LLM to discover +the schema through tool definitions. This is the "most elegant" approach +per user preference. + +### AD-12: Context Capture Phases + +Three sub-phases within context capture: +1. **Drafting**: LLM reflects on conversation. MAY use tools for "high + value" targeted exploration (confirm API signature, check file existence). + DO NOT explore speculatively. Confidence tagging: HIGH (direct evidence) + vs LOW (extrapolating). +2. **Verifying**: Self-check. Completeness, accuracy, phrasing for + downstream agents. No tools except koan_next_step. +3. **Refining**: Pure tool invocation (koan_store_context). Up to 3 + attempts with validation feedback. + +### AD-13: Default-Deny Tool Permissions + +Centralized Map> in src/planner/tools/registry.ts. +Unknown tools blocked in all phases. READ_TOOLS (read, bash, grep, glob, +find, ls) always allowed. WRITE_TOOLS (edit, write) always blocked during +planning. Missing phase keys are denied. + +Previous code had a "fails open" bug where tool_call handlers returned +undefined at the end of if-else chains, silently allowing unknown tools. + +### AD-14: Disk-Backed Plan Mutations (No Finalize) + +Each mutation tool: loadPlan(dir) -> mutate -> savePlan(plan, dir). +Atomic write. No in-memory accumulation + finalize pattern. The +koan_store_plan/koan_finalize_plan tool was REMOVED. + +Root cause: the LLM was skipping intermediate mutation tools and calling +koan_store_plan directly. The "build in memory then finalize" pattern +makes intermediate tools feel like ceremony. Immediate disk writes give +visible results per tool call. + +Every mutation tool returns descriptive feedback ("Added decision DL-003: +'Use polling'"). This prevents the LLM from skipping tools -- the LLM +needs evidence that each tool call produces results. + +### AD-15: Module Ownership + +- Context-capture prompts belong to the "orchestrator" (session.ts / + context-capture.ts) +- Plan-design prompts belong to the "architect" (plan-design.ts / + prompts/plan-design.ts) +- These are organizational decisions about which module owns which prompts + +### AD-16: 6-Step Architect Workflow (plan-design execute) + +1. Task Analysis & Exploration Planning +2. Codebase Exploration +3. Testing Strategy Discovery +4. Approach Generation +5. Assumption Surfacing +6. Milestone Definition & Plan Writing (plan mutation tools available) + +Steps 1-5: only READ_TOOLS + PLAN_GETTER_TOOLS + koan_next_step allowed. +Step 6: plan mutation tools unlocked. + +--- + +## Workflow Dispatch Architecture + +### WorkflowDispatch (dispatch pattern) + +Workflow tools (koan_next_step, koan_store_context) are registered once +at init. Their execute() callbacks read from a mutable dispatch object. +Phases hook/unhook dispatch slots at activation/deactivation time. + +hookDispatch() throws if a slot is already occupied -- prevents silent +misrouting when two phases try to claim the same tool. + +### PlanRef (mutable reference) + +All plan mutation tools share a mutable `{ dir: string | null }` set +when /koan plan creates a directory or when --koan-plan-dir is received. +Decouples tool registration (init-time) from directory creation (runtime). + +### Pi Registers Tools at _buildRuntime() + +Pi snapshots tools during _buildRuntime(). Tools registered after this +point are invisible to the LLM. All 44+ tools register unconditionally +at init; phases restrict access via tool_call blocking at runtime. + +--- + +## What Is NOT Ported from Reference Planner + +| Reference planner component | Koan replacement | +|----|-----| +| CLI mutation scripts (cli/plan.py) | Pi extension tool registration | +| Thin router pattern (shared/routing.py) | Orchestrator deterministic gate logic | +| File-based state_dir | In-memory state + appendEntry() | +| Template dispatch | Direct process spawning | +| Constraint enforcement via prompt | tool_call event blocking | +| Agent markdown definitions | Self-loading extension pattern | +| Question relay handler | Not implemented (may add later) | + +--- + +## Bugs & Lessons Learned + +### BUG-1: LLM Conflates Tool Instructions with Plan Content + +In context capture, the LLM captured tool usage instructions as +constraints (e.g. "Use read tool before modifying files; edit for +surgical changes"). These are irrelevant developer instructions, not +task constraints. Solution: prompts explicitly state "Only include +constraints that are specific to this task. Do not include general +tool usage instructions, coding style guides, or editor/IDE conventions." + +### BUG-2: LLM Skips Mutation Tools + +The LLM called koan_next_step through steps 1-5, then at step 6 skipped +all mutation tools and called koan_store_plan directly. The in-memory +plan was empty. Root cause: mutation tools returned opaque JSON with no +feedback -- they felt like ceremony. Solution: remove finalize tool, +disk-backed mutations, descriptive feedback per tool call (AD-14). + +### BUG-3: tool_call Handlers Fail Open + +Original tool_call handlers returned undefined at end of if-else chains, +silently allowing any new tool. Solution: default-deny permissions map +(AD-13). + +### BUG-4: isError Return Value Discarded + +Pi discards the isError field from tool return values. Only throw/no-throw +determines error status. This caused silent failures where tools returned +{ isError: true } but the framework treated them as success. Solution: +always throw new Error(msg) for error conditions (INV-3). + +### BUG-5: Weak invoke_after Causes Step Skipping + +Original weak format ("Now call koan_next_step.") produced skipped steps. +The LLM called the tool immediately without doing work, because tool +calls with empty params have zero friction. Solution: strengthen to +"WHEN DONE: After completing the instructions above, call koan_next_step. +Do NOT call this tool until the work described in this step is finished." + +### BUG-6: Flag Detection at Init Time + +Early implementation tried to detect --koan-role in the extension factory +function body. Flags are unavailable at that point (main.ts:568 sets them +after). Solution: move detection to before_agent_start with dispatched +guard (AD-3). + +--- + +## Plan JSON Schema + +Matches reference planner's Pydantic schema (shared/schema.py). +Types defined in src/planner/plan/types.ts. + +Key entities: Plan, Decision, RejectedAlternative, Risk, Milestone, +CodeIntent, CodeChange, Wave, DiagramGraph, ReadmeEntry, Overview, +InvisibleKnowledge, PlanningContext. + +Cross-reference validation: intent_ref -> intents, decision_ref -> +decisions, diagram edges source/target -> nodes, wave milestones -> milestone IDs. + +--- + +## QR Block Pattern + +Work -> Decompose -> Verify (parallel) -> Gate. Repeated per phase +(design, code, docs). Gate is deterministic code, no LLM. Max 5 +iterations. Force-proceed after limit. + +QR tools: koan_qr_add_item, koan_qr_set_item, koan_qr_assign_group, +koan_qr_get_item, koan_qr_list_items, koan_qr_summary. + +--- + +## Current Implementation State (Feb 13 2026) + +Implemented: +- [x] Extension entry point with dual-mode detection +- [x] Context capture (3-phase: draft/verify/refine) +- [x] Plan-design architect subagent (6-step workflow) +- [x] 44+ plan mutation/getter tools with TypeBox schemas +- [x] Default-deny tool permissions (registry.ts) +- [x] WorkflowDispatch + PlanRef patterns +- [x] Subagent spawning with progress tracking +- [x] Disk-backed plan mutations (no finalize) +- [x] Plan validation (design + cross-references) + +Not yet implemented: +- [ ] Developer role (plan-code phase) +- [ ] Technical writer role (plan-docs phase) +- [ ] QR decompose subagent +- [ ] QR verify subagent (parallel) +- [ ] QR gate routing +- [ ] Fix mode (re-spawn with QR failure report) +- [ ] State persistence (appendEntry + session_start restore) +- [ ] Plan execution workflow (milestone execution) +- [ ] /koan execute command diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..3859420 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,4044 @@ +{ + "name": "@solatis/koan", + "version": "0.0.1", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@solatis/koan", + "version": "0.0.1", + "license": "Apache-2.0", + "dependencies": { + "@sinclair/typebox": "^0.32.30" + }, + "devDependencies": { + "@mariozechner/pi-coding-agent": "^0.52.10", + "typescript": "^5.9.3" + } + }, + "node_modules/@anthropic-ai/sdk": { + "version": "0.73.0", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.73.0.tgz", + "integrity": "sha512-URURVzhxXGJDGUGFunIOtBlSl7KWvZiAAKY/ttTkZAkXT9bTPqdk2eK0b8qqSxXpikh3QKPnPYpiyX98zf5ebw==", + "dev": true, + "license": "MIT", + "dependencies": { + "json-schema-to-ts": "^3.1.1" + }, + "bin": { + "anthropic-ai-sdk": "bin/cli" + }, + "peerDependencies": { + "zod": "^3.25.0 || ^4.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/@aws-crypto/crc32": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-5.2.0.tgz", + "integrity": "sha512-nLbCWqQNgUiwwtFsen1AdzAtvuLRsQS8rYgMuxCrdKf9kOssamGLuPwyTY9wyYblNr9+1XM8v6zoDTPPSIeANg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/util": "^5.2.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-crypto/sha256-browser": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/sha256-browser/-/sha256-browser-5.2.0.tgz", + "integrity": "sha512-AXfN/lGotSQwu6HNcEsIASo7kWXZ5HYWvfOmSNKDsEqC4OashTp8alTmaz+F7TC2L083SFv5RdB+qU3Vs1kZqw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/sha256-js": "^5.2.0", + "@aws-crypto/supports-web-crypto": "^5.2.0", + "@aws-crypto/util": "^5.2.0", + "@aws-sdk/types": "^3.222.0", + "@aws-sdk/util-locate-window": "^3.0.0", + "@smithy/util-utf8": "^2.0.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/sha256-browser/node_modules/@smithy/is-array-buffer": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-2.2.0.tgz", + "integrity": "sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha256-browser/node_modules/@smithy/util-buffer-from": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-2.2.0.tgz", + "integrity": "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/is-array-buffer": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha256-browser/node_modules/@smithy/util-utf8": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-2.3.0.tgz", + "integrity": "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/util-buffer-from": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha256-js": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/sha256-js/-/sha256-js-5.2.0.tgz", + "integrity": "sha512-FFQQyu7edu4ufvIZ+OadFpHHOt+eSTBaYaki44c+akjg7qZg9oOQeLlk77F6tSYqjDAFClrHJk9tMf0HdVyOvA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/util": "^5.2.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-crypto/supports-web-crypto": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/supports-web-crypto/-/supports-web-crypto-5.2.0.tgz", + "integrity": "sha512-iAvUotm021kM33eCdNfwIN//F77/IADDSs58i+MDaOqFrVjZo9bAal0NK7HurRuWLLpF1iLX7gbWrjHjeo+YFg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/util": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/util/-/util-5.2.0.tgz", + "integrity": "sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.222.0", + "@smithy/util-utf8": "^2.0.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/util/node_modules/@smithy/is-array-buffer": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-2.2.0.tgz", + "integrity": "sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/util/node_modules/@smithy/util-buffer-from": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-2.2.0.tgz", + "integrity": "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/is-array-buffer": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/util/node_modules/@smithy/util-utf8": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-2.3.0.tgz", + "integrity": "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/util-buffer-from": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/client-bedrock-runtime": { + "version": "3.989.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-bedrock-runtime/-/client-bedrock-runtime-3.989.0.tgz", + "integrity": "sha512-qVa5B0wXjIuPRhX1dcZo1sa9Y4ycI9tiqK7B4FLok67gUWckiKmEf1xQDFrTmc2eCK5g0CTaeiRdbeM1eWmW1Q==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/credential-provider-node": "^3.972.8", + "@aws-sdk/eventstream-handler-node": "^3.972.5", + "@aws-sdk/middleware-eventstream": "^3.972.3", + "@aws-sdk/middleware-host-header": "^3.972.3", + "@aws-sdk/middleware-logger": "^3.972.3", + "@aws-sdk/middleware-recursion-detection": "^3.972.3", + "@aws-sdk/middleware-user-agent": "^3.972.9", + "@aws-sdk/middleware-websocket": "^3.972.6", + "@aws-sdk/region-config-resolver": "^3.972.3", + "@aws-sdk/token-providers": "3.989.0", + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-endpoints": "3.989.0", + "@aws-sdk/util-user-agent-browser": "^3.972.3", + "@aws-sdk/util-user-agent-node": "^3.972.7", + "@smithy/config-resolver": "^4.4.6", + "@smithy/core": "^3.23.0", + "@smithy/eventstream-serde-browser": "^4.2.8", + "@smithy/eventstream-serde-config-resolver": "^4.3.8", + "@smithy/eventstream-serde-node": "^4.2.8", + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/hash-node": "^4.2.8", + "@smithy/invalid-dependency": "^4.2.8", + "@smithy/middleware-content-length": "^4.2.8", + "@smithy/middleware-endpoint": "^4.4.14", + "@smithy/middleware-retry": "^4.4.31", + "@smithy/middleware-serde": "^4.2.9", + "@smithy/middleware-stack": "^4.2.8", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/node-http-handler": "^4.4.10", + "@smithy/protocol-http": "^5.3.8", + "@smithy/smithy-client": "^4.11.3", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-body-length-browser": "^4.2.0", + "@smithy/util-body-length-node": "^4.2.1", + "@smithy/util-defaults-mode-browser": "^4.3.30", + "@smithy/util-defaults-mode-node": "^4.2.33", + "@smithy/util-endpoints": "^3.2.8", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-retry": "^4.2.8", + "@smithy/util-stream": "^4.5.12", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/client-sso": { + "version": "3.989.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso/-/client-sso-3.989.0.tgz", + "integrity": "sha512-3sC+J1ru5VFXLgt9KZmXto0M7mnV5RkS6FNGwRMK3XrojSjHso9DLOWjbnXhbNv4motH8vu53L1HK2VC1+Nj5w==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/middleware-host-header": "^3.972.3", + "@aws-sdk/middleware-logger": "^3.972.3", + "@aws-sdk/middleware-recursion-detection": "^3.972.3", + "@aws-sdk/middleware-user-agent": "^3.972.9", + "@aws-sdk/region-config-resolver": "^3.972.3", + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-endpoints": "3.989.0", + "@aws-sdk/util-user-agent-browser": "^3.972.3", + "@aws-sdk/util-user-agent-node": "^3.972.7", + "@smithy/config-resolver": "^4.4.6", + "@smithy/core": "^3.23.0", + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/hash-node": "^4.2.8", + "@smithy/invalid-dependency": "^4.2.8", + "@smithy/middleware-content-length": "^4.2.8", + "@smithy/middleware-endpoint": "^4.4.14", + "@smithy/middleware-retry": "^4.4.31", + "@smithy/middleware-serde": "^4.2.9", + "@smithy/middleware-stack": "^4.2.8", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/node-http-handler": "^4.4.10", + "@smithy/protocol-http": "^5.3.8", + "@smithy/smithy-client": "^4.11.3", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-body-length-browser": "^4.2.0", + "@smithy/util-body-length-node": "^4.2.1", + "@smithy/util-defaults-mode-browser": "^4.3.30", + "@smithy/util-defaults-mode-node": "^4.2.33", + "@smithy/util-endpoints": "^3.2.8", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-retry": "^4.2.8", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/core": { + "version": "3.973.9", + "resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.973.9.tgz", + "integrity": "sha512-cyUOfJSizn8da7XrBEFBf4UMI4A6JQNX6ZFcKtYmh/CrwfzsDcabv3k/z0bNwQ3pX5aeq5sg/8Bs/ASiL0bJaA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/xml-builder": "^3.972.4", + "@smithy/core": "^3.23.0", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/signature-v4": "^5.3.8", + "@smithy/smithy-client": "^4.11.3", + "@smithy/types": "^4.12.0", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-env": { + "version": "3.972.7", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.972.7.tgz", + "integrity": "sha512-r8kBtglvLjGxBT87l6Lqkh9fL8yJJ6O4CYQPjKlj3AkCuL4/4784x3rxxXWw9LTKXOo114VB6mjxAuy5pI7XIg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-http": { + "version": "3.972.9", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-http/-/credential-provider-http-3.972.9.tgz", + "integrity": "sha512-40caFblEg/TPrp9EpvyMxp4xlJ5TuTI+A8H6g8FhHn2hfH2PObFAPLF9d5AljK/G69E1YtTklkuQeAwPlV3w8Q==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/types": "^3.973.1", + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/node-http-handler": "^4.4.10", + "@smithy/property-provider": "^4.2.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/smithy-client": "^4.11.3", + "@smithy/types": "^4.12.0", + "@smithy/util-stream": "^4.5.12", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-ini": { + "version": "3.972.7", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.972.7.tgz", + "integrity": "sha512-zeYKrMwM5bCkHFho/x3+1OL0vcZQ0OhTR7k35tLq74+GP5ieV3juHXTZfa2LVE0Bg75cHIIerpX0gomVOhzo/w==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/credential-provider-env": "^3.972.7", + "@aws-sdk/credential-provider-http": "^3.972.9", + "@aws-sdk/credential-provider-login": "^3.972.7", + "@aws-sdk/credential-provider-process": "^3.972.7", + "@aws-sdk/credential-provider-sso": "^3.972.7", + "@aws-sdk/credential-provider-web-identity": "^3.972.7", + "@aws-sdk/nested-clients": "3.989.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/credential-provider-imds": "^4.2.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-login": { + "version": "3.972.7", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-login/-/credential-provider-login-3.972.7.tgz", + "integrity": "sha512-Q103cLU6OjAllYjX7+V+PKQw654jjvZUkD+lbUUiFbqut6gR5zwl1DrelvJPM5hnzIty7BCaxaRB3KMuz3M/ug==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/nested-clients": "3.989.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-node": { + "version": "3.972.8", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.972.8.tgz", + "integrity": "sha512-AaDVOT7iNJyLjc3j91VlucPZ4J8Bw+eu9sllRDugJqhHWYyR3Iyp2huBUW8A3+DfHoh70sxGkY92cThAicSzlQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/credential-provider-env": "^3.972.7", + "@aws-sdk/credential-provider-http": "^3.972.9", + "@aws-sdk/credential-provider-ini": "^3.972.7", + "@aws-sdk/credential-provider-process": "^3.972.7", + "@aws-sdk/credential-provider-sso": "^3.972.7", + "@aws-sdk/credential-provider-web-identity": "^3.972.7", + "@aws-sdk/types": "^3.973.1", + "@smithy/credential-provider-imds": "^4.2.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-process": { + "version": "3.972.7", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.972.7.tgz", + "integrity": "sha512-hxMo1V3ujWWrQSONxQJAElnjredkRpB6p8SDjnvRq70IwYY38R/CZSys0IbhRPxdgWZ5j12yDRk2OXhxw4Gj3g==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-sso": { + "version": "3.972.7", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.972.7.tgz", + "integrity": "sha512-ZGKBOHEj8Ap15jhG2XMncQmKLTqA++2DVU2eZfLu3T/pkwDyhCp5eZv5c/acFxbZcA/6mtxke+vzO/n+aeHs4A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/client-sso": "3.989.0", + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/token-providers": "3.989.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-web-identity": { + "version": "3.972.7", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.972.7.tgz", + "integrity": "sha512-AbYupBIoSJoVMlbMqBhNvPhqj+CdGtzW7Uk4ZIMBm2br18pc3rkG1VaKVFV85H87QCvLHEnni1idJjaX1wOmIw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/nested-clients": "3.989.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/eventstream-handler-node": { + "version": "3.972.5", + "resolved": "https://registry.npmjs.org/@aws-sdk/eventstream-handler-node/-/eventstream-handler-node-3.972.5.tgz", + "integrity": "sha512-xEmd3dnyn83K6t4AJxBJA63wpEoCD45ERFG0XMTViD2E/Ohls9TLxjOWPb1PAxR9/46cKy/TImez1GoqP6xVNQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/eventstream-codec": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-eventstream": { + "version": "3.972.3", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-eventstream/-/middleware-eventstream-3.972.3.tgz", + "integrity": "sha512-pbvZ6Ye/Ks6BAZPa3RhsNjHrvxU9li25PMhSdDpbX0jzdpKpAkIR65gXSNKmA/REnSdEMWSD4vKUW+5eMFzB6w==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-host-header": { + "version": "3.972.3", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-host-header/-/middleware-host-header-3.972.3.tgz", + "integrity": "sha512-aknPTb2M+G3s+0qLCx4Li/qGZH8IIYjugHMv15JTYMe6mgZO8VBpYgeGYsNMGCqCZOcWzuf900jFBG5bopfzmA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-logger": { + "version": "3.972.3", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-logger/-/middleware-logger-3.972.3.tgz", + "integrity": "sha512-Ftg09xNNRqaz9QNzlfdQWfpqMCJbsQdnZVJP55jfhbKi1+FTWxGuvfPoBhDHIovqWKjqbuiew3HuhxbJ0+OjgA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-recursion-detection": { + "version": "3.972.3", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-recursion-detection/-/middleware-recursion-detection-3.972.3.tgz", + "integrity": "sha512-PY57QhzNuXHnwbJgbWYTrqIDHYSeOlhfYERTAuc16LKZpTZRJUjzBFokp9hF7u1fuGeE3D70ERXzdbMBOqQz7Q==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@aws/lambda-invoke-store": "^0.2.2", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-user-agent": { + "version": "3.972.9", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.972.9.tgz", + "integrity": "sha512-1g1B7yf7KzessB0mKNiV9gAHEwbM662xgU+VE4LxyGe6kVGZ8LqYsngjhE+Stna09CJ7Pxkjr6Uq1OtbGwJJJg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-endpoints": "3.989.0", + "@smithy/core": "^3.23.0", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-websocket": { + "version": "3.972.6", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-websocket/-/middleware-websocket-3.972.6.tgz", + "integrity": "sha512-1DedO6N3m8zQ/vG6twNiHtsdwBgk773VdavLEbB3NXeKZDlzSK1BTviqWwvJdKx5UnIy4kGGP6WWpCEFEt/bhQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-format-url": "^3.972.3", + "@smithy/eventstream-codec": "^4.2.8", + "@smithy/eventstream-serde-browser": "^4.2.8", + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/protocol-http": "^5.3.8", + "@smithy/signature-v4": "^5.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-hex-encoding": "^4.2.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@aws-sdk/nested-clients": { + "version": "3.989.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/nested-clients/-/nested-clients-3.989.0.tgz", + "integrity": "sha512-Dbk2HMPU3mb6RrSRzgf0WCaWSbgtZG258maCpuN2/ONcAQNpOTw99V5fU5CA1qVK6Vkm4Fwj2cnOnw7wbGVlOw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/middleware-host-header": "^3.972.3", + "@aws-sdk/middleware-logger": "^3.972.3", + "@aws-sdk/middleware-recursion-detection": "^3.972.3", + "@aws-sdk/middleware-user-agent": "^3.972.9", + "@aws-sdk/region-config-resolver": "^3.972.3", + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-endpoints": "3.989.0", + "@aws-sdk/util-user-agent-browser": "^3.972.3", + "@aws-sdk/util-user-agent-node": "^3.972.7", + "@smithy/config-resolver": "^4.4.6", + "@smithy/core": "^3.23.0", + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/hash-node": "^4.2.8", + "@smithy/invalid-dependency": "^4.2.8", + "@smithy/middleware-content-length": "^4.2.8", + "@smithy/middleware-endpoint": "^4.4.14", + "@smithy/middleware-retry": "^4.4.31", + "@smithy/middleware-serde": "^4.2.9", + "@smithy/middleware-stack": "^4.2.8", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/node-http-handler": "^4.4.10", + "@smithy/protocol-http": "^5.3.8", + "@smithy/smithy-client": "^4.11.3", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-body-length-browser": "^4.2.0", + "@smithy/util-body-length-node": "^4.2.1", + "@smithy/util-defaults-mode-browser": "^4.3.30", + "@smithy/util-defaults-mode-node": "^4.2.33", + "@smithy/util-endpoints": "^3.2.8", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-retry": "^4.2.8", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/region-config-resolver": { + "version": "3.972.3", + "resolved": "https://registry.npmjs.org/@aws-sdk/region-config-resolver/-/region-config-resolver-3.972.3.tgz", + "integrity": "sha512-v4J8qYAWfOMcZ4MJUyatntOicTzEMaU7j3OpkRCGGFSL2NgXQ5VbxauIyORA+pxdKZ0qQG2tCQjQjZDlXEC3Ow==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/config-resolver": "^4.4.6", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/token-providers": { + "version": "3.989.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.989.0.tgz", + "integrity": "sha512-OdBByMv+OjOZoekrk4THPFpLuND5aIQbDHCGh3n2rvifAbm31+6e0OLhxSeCF1UMPm+nKq12bXYYEoCIx5SQBg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.9", + "@aws-sdk/nested-clients": "3.989.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/types": { + "version": "3.973.1", + "resolved": "https://registry.npmjs.org/@aws-sdk/types/-/types-3.973.1.tgz", + "integrity": "sha512-DwHBiMNOB468JiX6+i34c+THsKHErYUdNQ3HexeXZvVn4zouLjgaS4FejiGSi2HyBuzuyHg7SuOPmjSvoU9NRg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/util-endpoints": { + "version": "3.989.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-endpoints/-/util-endpoints-3.989.0.tgz", + "integrity": "sha512-eKmAOeQM4Qusq0jtcbZPiNWky8XaojByKC/n+THbJ8vJf7t4ys8LlcZ4PrBSHZISe9cC484mQsPVOQh6iySjqw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-endpoints": "^3.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/util-format-url": { + "version": "3.972.3", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-format-url/-/util-format-url-3.972.3.tgz", + "integrity": "sha512-n7F2ycckcKFXa01vAsT/SJdjFHfKH9s96QHcs5gn8AaaigASICeME8WdUL9uBp8XV/OVwEt8+6gzn6KFUgQa8g==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/querystring-builder": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/util-locate-window": { + "version": "3.965.4", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-locate-window/-/util-locate-window-3.965.4.tgz", + "integrity": "sha512-H1onv5SkgPBK2P6JR2MjGgbOnttoNzSPIRoeZTNPZYyaplwGg50zS3amXvXqF0/qfXpWEC9rLWU564QTB9bSog==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/util-user-agent-browser": { + "version": "3.972.3", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-browser/-/util-user-agent-browser-3.972.3.tgz", + "integrity": "sha512-JurOwkRUcXD/5MTDBcqdyQ9eVedtAsZgw5rBwktsPTN7QtPiS2Ld1jkJepNgYoCufz1Wcut9iup7GJDoIHp8Fw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "bowser": "^2.11.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-sdk/util-user-agent-node": { + "version": "3.972.7", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.972.7.tgz", + "integrity": "sha512-oyhv+FjrgHjP+F16cmsrJzNP4qaRJzkV1n9Lvv4uyh3kLqo3rIe9NSBSBa35f2TedczfG2dD+kaQhHBB47D6Og==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/middleware-user-agent": "^3.972.9", + "@aws-sdk/types": "^3.973.1", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "aws-crt": ">=1.0.0" + }, + "peerDependenciesMeta": { + "aws-crt": { + "optional": true + } + } + }, + "node_modules/@aws-sdk/xml-builder": { + "version": "3.972.4", + "resolved": "https://registry.npmjs.org/@aws-sdk/xml-builder/-/xml-builder-3.972.4.tgz", + "integrity": "sha512-0zJ05ANfYqI6+rGqj8samZBFod0dPPousBjLEqg8WdxSgbMAkRgLyn81lP215Do0rFJ/17LIXwr7q0yK24mP6Q==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "fast-xml-parser": "5.3.4", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws/lambda-invoke-store": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/@aws/lambda-invoke-store/-/lambda-invoke-store-0.2.3.tgz", + "integrity": "sha512-oLvsaPMTBejkkmHhjf09xTgk71mOqyr/409NKhRIL08If7AhVfUsJhVsx386uJaqNd42v9kWamQ9lFbkoC2dYw==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@babel/runtime": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.6.tgz", + "integrity": "sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@borewit/text-codec": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.1.tgz", + "integrity": "sha512-k7vvKPbf7J2fZ5klGRD9AeKfUvojuZIQ3BT5u7Jfv+puwXkUBUT5PVyMDfJZpy30CBDXGMgw7fguK/lpOMBvgw==", + "dev": true, + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/@google/genai": { + "version": "1.41.0", + "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.41.0.tgz", + "integrity": "sha512-S4WGil+PG0NBQRAx+0yrQuM/TWOLn2gGEy5wn4IsoOI6ouHad0P61p3OWdhJ3aqr9kfj8o904i/jevfaGoGuIQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "google-auth-library": "^10.3.0", + "p-retry": "^7.1.1", + "protobufjs": "^7.5.4", + "ws": "^8.18.0" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "@modelcontextprotocol/sdk": "^1.25.2" + }, + "peerDependenciesMeta": { + "@modelcontextprotocol/sdk": { + "optional": true + } + } + }, + "node_modules/@isaacs/cliui": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-9.0.0.tgz", + "integrity": "sha512-AokJm4tuBHillT+FpMtxQ60n8ObyXBatq7jD2/JA9dxbDDokKQm8KMht5ibGzLVU9IJDIKK4TPKgMHEYMn3lMg==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/@mariozechner/clipboard": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard/-/clipboard-0.3.2.tgz", + "integrity": "sha512-IHQpksNjo7EAtGuHFU+tbWDp5LarH3HU/8WiB9O70ZEoBPHOg0/6afwSLK0QyNMMmx4Bpi/zl6+DcBXe95nWYA==", + "dev": true, + "license": "MIT", + "optional": true, + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@mariozechner/clipboard-darwin-arm64": "0.3.2", + "@mariozechner/clipboard-darwin-universal": "0.3.2", + "@mariozechner/clipboard-darwin-x64": "0.3.2", + "@mariozechner/clipboard-linux-arm64-gnu": "0.3.2", + "@mariozechner/clipboard-linux-arm64-musl": "0.3.2", + "@mariozechner/clipboard-linux-riscv64-gnu": "0.3.2", + "@mariozechner/clipboard-linux-x64-gnu": "0.3.2", + "@mariozechner/clipboard-linux-x64-musl": "0.3.2", + "@mariozechner/clipboard-win32-arm64-msvc": "0.3.2", + "@mariozechner/clipboard-win32-x64-msvc": "0.3.2" + } + }, + "node_modules/@mariozechner/clipboard-darwin-arm64": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard-darwin-arm64/-/clipboard-darwin-arm64-0.3.2.tgz", + "integrity": "sha512-uBf6K7Je1ihsgvmWxA8UCGCeI+nbRVRXoarZdLjl6slz94Zs1tNKFZqx7aCI5O1i3e0B6ja82zZ06BWrl0MCVw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@mariozechner/clipboard-darwin-universal": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard-darwin-universal/-/clipboard-darwin-universal-0.3.2.tgz", + "integrity": "sha512-mxSheKTW2U9LsBdXy0SdmdCAE5HqNS9QUmpNHLnfJ+SsbFKALjEZc5oRrVMXxGQSirDvYf5bjmRyT0QYYonnlg==", + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@mariozechner/clipboard-darwin-x64": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard-darwin-x64/-/clipboard-darwin-x64-0.3.2.tgz", + "integrity": "sha512-U1BcVEoidvwIp95+HJswSW+xr28EQiHR7rZjH6pn8Sja5yO4Yoe3yCN0Zm8Lo72BbSOK/fTSq0je7CJpaPCspg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@mariozechner/clipboard-linux-arm64-gnu": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard-linux-arm64-gnu/-/clipboard-linux-arm64-gnu-0.3.2.tgz", + "integrity": "sha512-BsinwG3yWTIjdgNCxsFlip7LkfwPk+ruw/aFCXHUg/fb5XC/Ksp+YMQ7u0LUtiKzIv/7LMXgZInJQH6gxbAaqQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@mariozechner/clipboard-linux-arm64-musl": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard-linux-arm64-musl/-/clipboard-linux-arm64-musl-0.3.2.tgz", + "integrity": "sha512-0/Gi5Xq2V6goXBop19ePoHvXsmJD9SzFlO3S+d6+T2b+BlPcpOu3Oa0wTjl+cZrLAAEzA86aPNBI+VVAFDFPKw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@mariozechner/clipboard-linux-riscv64-gnu": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard-linux-riscv64-gnu/-/clipboard-linux-riscv64-gnu-0.3.2.tgz", + "integrity": "sha512-2AFFiXB24qf0zOZsxI1GJGb9wQGlOJyN6UwoXqmKS3dpQi/l6ix30IzDDA4c4ZcCcx4D+9HLYXhC1w7Sov8pXA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@mariozechner/clipboard-linux-x64-gnu": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard-linux-x64-gnu/-/clipboard-linux-x64-gnu-0.3.2.tgz", + "integrity": "sha512-v6fVnsn7WMGg73Dab8QMwyFce7tzGfgEixKgzLP8f1GJqkJZi5zO4k4FOHzSgUufgLil63gnxvMpjWkgfeQN7A==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@mariozechner/clipboard-linux-x64-musl": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard-linux-x64-musl/-/clipboard-linux-x64-musl-0.3.2.tgz", + "integrity": "sha512-xVUtnoMQ8v2JVyfJLKKXACA6avdnchdbBkTsZs8BgJQo29qwCp5NIHAUO8gbJ40iaEGToW5RlmVk2M9V0HsHEw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@mariozechner/clipboard-win32-arm64-msvc": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard-win32-arm64-msvc/-/clipboard-win32-arm64-msvc-0.3.2.tgz", + "integrity": "sha512-AEgg95TNi8TGgak2wSXZkXKCvAUTjWoU1Pqb0ON7JHrX78p616XUFNTJohtIon3e0w6k0pYPZeCuqRCza/Tqeg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@mariozechner/clipboard-win32-x64-msvc": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/@mariozechner/clipboard-win32-x64-msvc/-/clipboard-win32-x64-msvc-0.3.2.tgz", + "integrity": "sha512-tGRuYpZwDOD7HBrCpyRuhGnHHSCknELvqwKKUG4JSfSB7JIU7LKRh6zx6fMUOQd8uISK35TjFg5UcNih+vJhFA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@mariozechner/jiti": { + "version": "2.6.5", + "resolved": "https://registry.npmjs.org/@mariozechner/jiti/-/jiti-2.6.5.tgz", + "integrity": "sha512-faGUlTcXka5l7rv0lP3K3vGW/ejRuOS24RR2aSFWREUQqzjgdsuWNo/IiPqL3kWRGt6Ahl2+qcDAwtdeWeuGUw==", + "dev": true, + "license": "MIT", + "dependencies": { + "std-env": "^3.10.0", + "yoctocolors": "^2.1.2" + }, + "bin": { + "jiti": "lib/jiti-cli.mjs" + } + }, + "node_modules/@mariozechner/pi-agent-core": { + "version": "0.52.10", + "resolved": "https://registry.npmjs.org/@mariozechner/pi-agent-core/-/pi-agent-core-0.52.10.tgz", + "integrity": "sha512-rTM3ug6rMuDFbQINympIIV9CW3Z8ONyBSehsoDNWtdXTWNA7Nzpx3mAYsA91B856HM0Zbl45UBNRN1YHDeaFTg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@mariozechner/pi-ai": "^0.52.10" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@mariozechner/pi-ai": { + "version": "0.52.10", + "resolved": "https://registry.npmjs.org/@mariozechner/pi-ai/-/pi-ai-0.52.10.tgz", + "integrity": "sha512-dgV5emMbDoz0GGyDy6CjY+RcW/PqwQvUzqAehjDUj1M+3b7+fIB7E2WKZQKvjYIY79qTvAIyrdEmIs2BQX+enA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@anthropic-ai/sdk": "^0.73.0", + "@aws-sdk/client-bedrock-runtime": "^3.983.0", + "@google/genai": "^1.40.0", + "@mistralai/mistralai": "1.10.0", + "@sinclair/typebox": "^0.34.41", + "ajv": "^8.17.1", + "ajv-formats": "^3.0.1", + "chalk": "^5.6.2", + "openai": "6.10.0", + "partial-json": "^0.1.7", + "proxy-agent": "^6.5.0", + "undici": "^7.19.1", + "zod-to-json-schema": "^3.24.6" + }, + "bin": { + "pi-ai": "dist/cli.js" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@mariozechner/pi-ai/node_modules/@sinclair/typebox": { + "version": "0.34.48", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.48.tgz", + "integrity": "sha512-kKJTNuK3AQOrgjjotVxMrCn1sUJwM76wMszfq1kdU4uYVJjvEWuFQ6HgvLt4Xz3fSmZlTOxJ/Ie13KnIcWQXFA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@mariozechner/pi-coding-agent": { + "version": "0.52.10", + "resolved": "https://registry.npmjs.org/@mariozechner/pi-coding-agent/-/pi-coding-agent-0.52.10.tgz", + "integrity": "sha512-88gBrk+aDKMe4M6hY63LT8ylXEeoNdwnKHB7Ijmxzw5ShtWl7+H8vTBIwxZu/5yNR2b4VhjB0NGi3khpwT5I1A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@mariozechner/jiti": "^2.6.2", + "@mariozechner/pi-agent-core": "^0.52.10", + "@mariozechner/pi-ai": "^0.52.10", + "@mariozechner/pi-tui": "^0.52.10", + "@silvia-odwyer/photon-node": "^0.3.4", + "chalk": "^5.5.0", + "cli-highlight": "^2.1.11", + "diff": "^8.0.2", + "file-type": "^21.1.1", + "glob": "^13.0.1", + "hosted-git-info": "^9.0.2", + "ignore": "^7.0.5", + "marked": "^15.0.12", + "minimatch": "^10.1.1", + "proper-lockfile": "^4.1.2", + "yaml": "^2.8.2" + }, + "bin": { + "pi": "dist/cli.js" + }, + "engines": { + "node": ">=20.0.0" + }, + "optionalDependencies": { + "@mariozechner/clipboard": "^0.3.2" + } + }, + "node_modules/@mariozechner/pi-tui": { + "version": "0.52.10", + "resolved": "https://registry.npmjs.org/@mariozechner/pi-tui/-/pi-tui-0.52.10.tgz", + "integrity": "sha512-j0re5FXzznkrzC7BOc1fb+DUWYetRZAVSUbdZoxa6S5S7amxmIJzbSNCgKBaF1ZyY40jp+B5Z4W60Qc7Pn1rxA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/mime-types": "^2.1.4", + "chalk": "^5.5.0", + "get-east-asian-width": "^1.3.0", + "marked": "^15.0.12", + "mime-types": "^3.0.1" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@mistralai/mistralai": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/@mistralai/mistralai/-/mistralai-1.10.0.tgz", + "integrity": "sha512-tdIgWs4Le8vpvPiUEWne6tK0qbVc+jMenujnvTqOjogrJUsCSQhus0tHTU1avDDh5//Rq2dFgP9mWRAdIEoBqg==", + "dev": true, + "dependencies": { + "zod": "^3.20.0", + "zod-to-json-schema": "^3.24.1" + } + }, + "node_modules/@mistralai/mistralai/node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/@pkgjs/parseargs": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", + "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "dev": true, + "license": "MIT", + "optional": true, + "engines": { + "node": ">=14" + } + }, + "node_modules/@protobufjs/aspromise": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", + "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/base64": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", + "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/codegen": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", + "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/eventemitter": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", + "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/fetch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", + "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.1", + "@protobufjs/inquire": "^1.1.0" + } + }, + "node_modules/@protobufjs/float": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", + "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/inquire": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", + "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/path": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", + "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/pool": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", + "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/utf8": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", + "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@silvia-odwyer/photon-node": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/@silvia-odwyer/photon-node/-/photon-node-0.3.4.tgz", + "integrity": "sha512-bnly4BKB3KDTFxrUIcgCLbaeVVS8lrAkri1pEzskpmxu9MdfGQTy8b8EgcD83ywD3RPMsIulY8xJH5Awa+t9fA==", + "dev": true, + "license": "Apache-2.0" + }, + "node_modules/@sinclair/typebox": { + "version": "0.32.35", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.32.35.tgz", + "integrity": "sha512-Ul3YyOTU++to8cgNkttakC0dWvpERr6RYoHO2W47DLbFvrwBDJUY31B1sImH6JZSYc4Kt4PyHtoPNu+vL2r2dA==", + "license": "MIT" + }, + "node_modules/@smithy/abort-controller": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/abort-controller/-/abort-controller-4.2.8.tgz", + "integrity": "sha512-peuVfkYHAmS5ybKxWcfraK7WBBP0J+rkfUcbHJJKQ4ir3UAUNQI+Y4Vt/PqSzGqgloJ5O1dk7+WzNL8wcCSXbw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/config-resolver": { + "version": "4.4.6", + "resolved": "https://registry.npmjs.org/@smithy/config-resolver/-/config-resolver-4.4.6.tgz", + "integrity": "sha512-qJpzYC64kaj3S0fueiu3kXm8xPrR3PcXDPEgnaNMRn0EjNSZFoFjvbUp0YUDsRhN1CB90EnHJtbxWKevnH99UQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/node-config-provider": "^4.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-config-provider": "^4.2.0", + "@smithy/util-endpoints": "^3.2.8", + "@smithy/util-middleware": "^4.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/core": { + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/@smithy/core/-/core-3.23.0.tgz", + "integrity": "sha512-Yq4UPVoQICM9zHnByLmG8632t2M0+yap4T7ANVw482J0W7HW0pOuxwVmeOwzJqX2Q89fkXz0Vybz55Wj2Xzrsg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/middleware-serde": "^4.2.9", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-body-length-browser": "^4.2.0", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-stream": "^4.5.12", + "@smithy/util-utf8": "^4.2.0", + "@smithy/uuid": "^1.1.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/credential-provider-imds": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/credential-provider-imds/-/credential-provider-imds-4.2.8.tgz", + "integrity": "sha512-FNT0xHS1c/CPN8upqbMFP83+ul5YgdisfCfkZ86Jh2NSmnqw/AJ6x5pEogVCTVvSm7j9MopRU89bmDelxuDMYw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/node-config-provider": "^4.3.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/eventstream-codec": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/eventstream-codec/-/eventstream-codec-4.2.8.tgz", + "integrity": "sha512-jS/O5Q14UsufqoGhov7dHLOPCzkYJl9QDzusI2Psh4wyYx/izhzvX9P4D69aTxcdfVhEPhjK+wYyn/PzLjKbbw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/crc32": "5.2.0", + "@smithy/types": "^4.12.0", + "@smithy/util-hex-encoding": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-browser": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/eventstream-serde-browser/-/eventstream-serde-browser-4.2.8.tgz", + "integrity": "sha512-MTfQT/CRQz5g24ayXdjg53V0mhucZth4PESoA5IhvaWVDTOQLfo8qI9vzqHcPsdd2v6sqfTYqF5L/l+pea5Uyw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/eventstream-serde-universal": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-config-resolver": { + "version": "4.3.8", + "resolved": "https://registry.npmjs.org/@smithy/eventstream-serde-config-resolver/-/eventstream-serde-config-resolver-4.3.8.tgz", + "integrity": "sha512-ah12+luBiDGzBruhu3efNy1IlbwSEdNiw8fOZksoKoWW1ZHvO/04MQsdnws/9Aj+5b0YXSSN2JXKy/ClIsW8MQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-node": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/eventstream-serde-node/-/eventstream-serde-node-4.2.8.tgz", + "integrity": "sha512-cYpCpp29z6EJHa5T9WL0KAlq3SOKUQkcgSoeRfRVwjGgSFl7Uh32eYGt7IDYCX20skiEdRffyDpvF2efEZPC0A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/eventstream-serde-universal": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-universal": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/eventstream-serde-universal/-/eventstream-serde-universal-4.2.8.tgz", + "integrity": "sha512-iJ6YNJd0bntJYnX6s52NC4WFYcZeKrPUr1Kmmr5AwZcwCSzVpS7oavAmxMR7pMq7V+D1G4s9F5NJK0xwOsKAlQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/eventstream-codec": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/fetch-http-handler": { + "version": "5.3.9", + "resolved": "https://registry.npmjs.org/@smithy/fetch-http-handler/-/fetch-http-handler-5.3.9.tgz", + "integrity": "sha512-I4UhmcTYXBrct03rwzQX1Y/iqQlzVQaPxWjCjula++5EmWq9YGBrx6bbGqluGc1f0XEfhSkiY4jhLgbsJUMKRA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/protocol-http": "^5.3.8", + "@smithy/querystring-builder": "^4.2.8", + "@smithy/types": "^4.12.0", + "@smithy/util-base64": "^4.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/hash-node": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/hash-node/-/hash-node-4.2.8.tgz", + "integrity": "sha512-7ZIlPbmaDGxVoxErDZnuFG18WekhbA/g2/i97wGj+wUBeS6pcUeAym8u4BXh/75RXWhgIJhyC11hBzig6MljwA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "@smithy/util-buffer-from": "^4.2.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/invalid-dependency": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/invalid-dependency/-/invalid-dependency-4.2.8.tgz", + "integrity": "sha512-N9iozRybwAQ2dn9Fot9kI6/w9vos2oTXLhtK7ovGqwZjlOcxu6XhPlpLpC+INsxktqHinn5gS2DXDjDF2kG5sQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/is-array-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-4.2.0.tgz", + "integrity": "sha512-DZZZBvC7sjcYh4MazJSGiWMI2L7E0oCiRHREDzIxi/M2LY79/21iXt6aPLHge82wi5LsuRF5A06Ds3+0mlh6CQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/middleware-content-length": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/middleware-content-length/-/middleware-content-length-4.2.8.tgz", + "integrity": "sha512-RO0jeoaYAB1qBRhfVyq0pMgBoUK34YEJxVxyjOWYZiOKOq2yMZ4MnVXMZCUDenpozHue207+9P5ilTV1zeda0A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/middleware-endpoint": { + "version": "4.4.14", + "resolved": "https://registry.npmjs.org/@smithy/middleware-endpoint/-/middleware-endpoint-4.4.14.tgz", + "integrity": "sha512-FUFNE5KVeaY6U/GL0nzAAHkaCHzXLZcY1EhtQnsAqhD8Du13oPKtMB9/0WK4/LK6a/T5OZ24wPoSShff5iI6Ag==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/core": "^3.23.0", + "@smithy/middleware-serde": "^4.2.9", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-middleware": "^4.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/middleware-retry": { + "version": "4.4.31", + "resolved": "https://registry.npmjs.org/@smithy/middleware-retry/-/middleware-retry-4.4.31.tgz", + "integrity": "sha512-RXBzLpMkIrxBPe4C8OmEOHvS8aH9RUuCOH++Acb5jZDEblxDjyg6un72X9IcbrGTJoiUwmI7hLypNfuDACypbg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/node-config-provider": "^4.3.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/service-error-classification": "^4.2.8", + "@smithy/smithy-client": "^4.11.3", + "@smithy/types": "^4.12.0", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-retry": "^4.2.8", + "@smithy/uuid": "^1.1.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/middleware-serde": { + "version": "4.2.9", + "resolved": "https://registry.npmjs.org/@smithy/middleware-serde/-/middleware-serde-4.2.9.tgz", + "integrity": "sha512-eMNiej0u/snzDvlqRGSN3Vl0ESn3838+nKyVfF2FKNXFbi4SERYT6PR392D39iczngbqqGG0Jl1DlCnp7tBbXQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/middleware-stack": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/middleware-stack/-/middleware-stack-4.2.8.tgz", + "integrity": "sha512-w6LCfOviTYQjBctOKSwy6A8FIkQy7ICvglrZFl6Bw4FmcQ1Z420fUtIhxaUZZshRe0VCq4kvDiPiXrPZAe8oRA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/node-config-provider": { + "version": "4.3.8", + "resolved": "https://registry.npmjs.org/@smithy/node-config-provider/-/node-config-provider-4.3.8.tgz", + "integrity": "sha512-aFP1ai4lrbVlWjfpAfRSL8KFcnJQYfTl5QxLJXY32vghJrDuFyPZ6LtUL+JEGYiFRG1PfPLHLoxj107ulncLIg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/node-http-handler": { + "version": "4.4.10", + "resolved": "https://registry.npmjs.org/@smithy/node-http-handler/-/node-http-handler-4.4.10.tgz", + "integrity": "sha512-u4YeUwOWRZaHbWaebvrs3UhwQwj+2VNmcVCwXcYTvPIuVyM7Ex1ftAj+fdbG/P4AkBwLq/+SKn+ydOI4ZJE9PA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/abort-controller": "^4.2.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/querystring-builder": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/property-provider": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/property-provider/-/property-provider-4.2.8.tgz", + "integrity": "sha512-EtCTbyIveCKeOXDSWSdze3k612yCPq1YbXsbqX3UHhkOSW8zKsM9NOJG5gTIya0vbY2DIaieG8pKo1rITHYL0w==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/protocol-http": { + "version": "5.3.8", + "resolved": "https://registry.npmjs.org/@smithy/protocol-http/-/protocol-http-5.3.8.tgz", + "integrity": "sha512-QNINVDhxpZ5QnP3aviNHQFlRogQZDfYlCkQT+7tJnErPQbDhysondEjhikuANxgMsZrkGeiAxXy4jguEGsDrWQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/querystring-builder": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/querystring-builder/-/querystring-builder-4.2.8.tgz", + "integrity": "sha512-Xr83r31+DrE8CP3MqPgMJl+pQlLLmOfiEUnoyAlGzzJIrEsbKsPy1hqH0qySaQm4oWrCBlUqRt+idEgunKB+iw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "@smithy/util-uri-escape": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/querystring-parser": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/querystring-parser/-/querystring-parser-4.2.8.tgz", + "integrity": "sha512-vUurovluVy50CUlazOiXkPq40KGvGWSdmusa3130MwrR1UNnNgKAlj58wlOe61XSHRpUfIIh6cE0zZ8mzKaDPA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/service-error-classification": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/service-error-classification/-/service-error-classification-4.2.8.tgz", + "integrity": "sha512-mZ5xddodpJhEt3RkCjbmUQuXUOaPNTkbMGR0bcS8FE0bJDLMZlhmpgrvPNCYglVw5rsYTpSnv19womw9WWXKQQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/shared-ini-file-loader": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/@smithy/shared-ini-file-loader/-/shared-ini-file-loader-4.4.3.tgz", + "integrity": "sha512-DfQjxXQnzC5UbCUPeC3Ie8u+rIWZTvuDPAGU/BxzrOGhRvgUanaP68kDZA+jaT3ZI+djOf+4dERGlm9mWfFDrg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/signature-v4": { + "version": "5.3.8", + "resolved": "https://registry.npmjs.org/@smithy/signature-v4/-/signature-v4-5.3.8.tgz", + "integrity": "sha512-6A4vdGj7qKNRF16UIcO8HhHjKW27thsxYci+5r/uVRkdcBEkOEiY8OMPuydLX4QHSrJqGHPJzPRwwVTqbLZJhg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/is-array-buffer": "^4.2.0", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-hex-encoding": "^4.2.0", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-uri-escape": "^4.2.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/smithy-client": { + "version": "4.11.3", + "resolved": "https://registry.npmjs.org/@smithy/smithy-client/-/smithy-client-4.11.3.tgz", + "integrity": "sha512-Q7kY5sDau8OoE6Y9zJoRGgje8P4/UY0WzH8R2ok0PDh+iJ+ZnEKowhjEqYafVcubkbYxQVaqwm3iufktzhprGg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/core": "^3.23.0", + "@smithy/middleware-endpoint": "^4.4.14", + "@smithy/middleware-stack": "^4.2.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-stream": "^4.5.12", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/types": { + "version": "4.12.0", + "resolved": "https://registry.npmjs.org/@smithy/types/-/types-4.12.0.tgz", + "integrity": "sha512-9YcuJVTOBDjg9LWo23Qp0lTQ3D7fQsQtwle0jVfpbUHy9qBwCEgKuVH4FqFB3VYu0nwdHKiEMA+oXz7oV8X1kw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/url-parser": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/url-parser/-/url-parser-4.2.8.tgz", + "integrity": "sha512-NQho9U68TGMEU639YkXnVMV3GEFFULmmaWdlu1E9qzyIePOHsoSnagTGSDv1Zi8DCNN6btxOSdgmy5E/hsZwhA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/querystring-parser": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-base64": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/@smithy/util-base64/-/util-base64-4.3.0.tgz", + "integrity": "sha512-GkXZ59JfyxsIwNTWFnjmFEI8kZpRNIBfxKjv09+nkAWPt/4aGaEWMM04m4sxgNVWkbt2MdSvE3KF/PfX4nFedQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/util-buffer-from": "^4.2.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-body-length-browser": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-body-length-browser/-/util-body-length-browser-4.2.0.tgz", + "integrity": "sha512-Fkoh/I76szMKJnBXWPdFkQJl2r9SjPt3cMzLdOB6eJ4Pnpas8hVoWPYemX/peO0yrrvldgCUVJqOAjUrOLjbxg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-body-length-node": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/@smithy/util-body-length-node/-/util-body-length-node-4.2.1.tgz", + "integrity": "sha512-h53dz/pISVrVrfxV1iqXlx5pRg3V2YWFcSQyPyXZRrZoZj4R4DeWRDo1a7dd3CPTcFi3kE+98tuNyD2axyZReA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-buffer-from": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-4.2.0.tgz", + "integrity": "sha512-kAY9hTKulTNevM2nlRtxAG2FQ3B2OR6QIrPY3zE5LqJy1oxzmgBGsHLWTcNhWXKchgA0WHW+mZkQrng/pgcCew==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/is-array-buffer": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-config-provider": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-config-provider/-/util-config-provider-4.2.0.tgz", + "integrity": "sha512-YEjpl6XJ36FTKmD+kRJJWYvrHeUvm5ykaUS5xK+6oXffQPHeEM4/nXlZPe+Wu0lsgRUcNZiliYNh/y7q9c2y6Q==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-defaults-mode-browser": { + "version": "4.3.30", + "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-browser/-/util-defaults-mode-browser-4.3.30.tgz", + "integrity": "sha512-cMni0uVU27zxOiU8TuC8pQLC1pYeZ/xEMxvchSK/ILwleRd1ugobOcIRr5vXtcRqKd4aBLWlpeBoDPJJ91LQng==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/property-provider": "^4.2.8", + "@smithy/smithy-client": "^4.11.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-defaults-mode-node": { + "version": "4.2.33", + "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-node/-/util-defaults-mode-node-4.2.33.tgz", + "integrity": "sha512-LEb2aq5F4oZUSzWBG7S53d4UytZSkOEJPXcBq/xbG2/TmK9EW5naUZ8lKu1BEyWMzdHIzEVN16M3k8oxDq+DJA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/config-resolver": "^4.4.6", + "@smithy/credential-provider-imds": "^4.2.8", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/smithy-client": "^4.11.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-endpoints": { + "version": "3.2.8", + "resolved": "https://registry.npmjs.org/@smithy/util-endpoints/-/util-endpoints-3.2.8.tgz", + "integrity": "sha512-8JaVTn3pBDkhZgHQ8R0epwWt+BqPSLCjdjXXusK1onwJlRuN69fbvSK66aIKKO7SwVFM6x2J2ox5X8pOaWcUEw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/node-config-provider": "^4.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-hex-encoding": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-hex-encoding/-/util-hex-encoding-4.2.0.tgz", + "integrity": "sha512-CCQBwJIvXMLKxVbO88IukazJD9a4kQ9ZN7/UMGBjBcJYvatpWk+9g870El4cB8/EJxfe+k+y0GmR9CAzkF+Nbw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-middleware": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/util-middleware/-/util-middleware-4.2.8.tgz", + "integrity": "sha512-PMqfeJxLcNPMDgvPbbLl/2Vpin+luxqTGPpW3NAQVLbRrFRzTa4rNAASYeIGjRV9Ytuhzny39SpyU04EQreF+A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-retry": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/@smithy/util-retry/-/util-retry-4.2.8.tgz", + "integrity": "sha512-CfJqwvoRY0kTGe5AkQokpURNCT1u/MkRzMTASWMPPo2hNSnKtF1D45dQl3DE2LKLr4m+PW9mCeBMJr5mCAVThg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/service-error-classification": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-stream": { + "version": "4.5.12", + "resolved": "https://registry.npmjs.org/@smithy/util-stream/-/util-stream-4.5.12.tgz", + "integrity": "sha512-D8tgkrmhAX/UNeCZbqbEO3uqyghUnEmmoO9YEvRuwxjlkKKUE7FOgCJnqpTlQPe9MApdWPky58mNQQHbnCzoNg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/node-http-handler": "^4.4.10", + "@smithy/types": "^4.12.0", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-buffer-from": "^4.2.0", + "@smithy/util-hex-encoding": "^4.2.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-uri-escape": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-uri-escape/-/util-uri-escape-4.2.0.tgz", + "integrity": "sha512-igZpCKV9+E/Mzrpq6YacdTQ0qTiLm85gD6N/IrmyDvQFA4UnU3d5g3m8tMT/6zG/vVkWSU+VxeUyGonL62DuxA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-utf8": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-4.2.0.tgz", + "integrity": "sha512-zBPfuzoI8xyBtR2P6WQj63Rz8i3AmfAaJLuNG8dWsfvPe8lO4aCPYLn879mEgHndZH1zQ2oXmG8O1GGzzaoZiw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@smithy/util-buffer-from": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/uuid": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@smithy/uuid/-/uuid-1.1.0.tgz", + "integrity": "sha512-4aUIteuyxtBUhVdiQqcDhKFitwfd9hqoSDYY2KRXiWtgoWJ9Bmise+KfEPDiVHWeJepvF8xJO9/9+WDIciMFFw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@tokenizer/inflate": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@tokenizer/inflate/-/inflate-0.4.1.tgz", + "integrity": "sha512-2mAv+8pkG6GIZiF1kNg1jAjh27IDxEPKwdGul3snfztFerfPGI1LjDezZp3i7BElXompqEtPmoPx6c2wgtWsOA==", + "dev": true, + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "token-types": "^6.1.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/@tokenizer/token": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz", + "integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tootallnate/quickjs-emscripten": { + "version": "0.23.0", + "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", + "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/mime-types": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/@types/mime-types/-/mime-types-2.1.4.tgz", + "integrity": "sha512-lfU4b34HOri+kAY5UheuFMWPDOI+OPceBSHZKp69gEyTL/mmJ4cnU6Y/rlme3UL3GyOn6Y42hyIEw0/q8sWx5w==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "25.2.3", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.3.tgz", + "integrity": "sha512-m0jEgYlYz+mDJZ2+F4v8D1AyQb+QzsNqRuI7xg1VQX/KlKS0qT9r1Mo16yo5F/MtifXFgaofIFsdFMox2SxIbQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~7.16.0" + } + }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/ajv": { + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz", + "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/any-promise": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", + "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", + "dev": true, + "license": "MIT" + }, + "node_modules/ast-types": { + "version": "0.13.4", + "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz", + "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==", + "dev": true, + "license": "MIT", + "dependencies": { + "tslib": "^2.0.1" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/balanced-match": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.2.tgz", + "integrity": "sha512-x0K50QvKQ97fdEz2kPehIerj+YTeptKF9hyYkKf6egnwmMWAkADiO0QCzSp0R5xN8FTZgYaBfSaue46Ej62nMg==", + "dev": true, + "license": "MIT", + "dependencies": { + "jackspeak": "^4.2.3" + }, + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/basic-ftp": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.1.0.tgz", + "integrity": "sha512-RkaJzeJKDbaDWTIPiJwubyljaEPwpVWkm9Rt5h9Nd6h7tEXTJ3VB4qxdZBioV7JO5yLUaOKwz7vDOzlncUsegw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/bignumber.js": { + "version": "9.3.1", + "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz", + "integrity": "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/bowser": { + "version": "2.14.1", + "resolved": "https://registry.npmjs.org/bowser/-/bowser-2.14.1.tgz", + "integrity": "sha512-tzPjzCxygAKWFOJP011oxFHs57HzIhOEracIgAePE4pqB3LikALKnSzUyU4MGs9/iCEUuHlAJTjTc5M+u7YEGg==", + "dev": true, + "license": "MIT" + }, + "node_modules/brace-expansion": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.2.tgz", + "integrity": "sha512-Pdk8c9poy+YhOgVWw1JNN22/HcivgKWwpxKq04M/jTmHyCZn12WPJebZxdjSa5TmBqISrUSgNYU3eRORljfCCw==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/chalk": { + "version": "5.6.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz", + "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/cli-highlight": { + "version": "2.1.11", + "resolved": "https://registry.npmjs.org/cli-highlight/-/cli-highlight-2.1.11.tgz", + "integrity": "sha512-9KDcoEVwyUXrjcJNvHD0NFc/hiwe/WPVYIleQh2O1N2Zro5gWJZ/K+3DGn8w8P/F6FxOgzyC5bxDyHIgCSPhGg==", + "dev": true, + "license": "ISC", + "dependencies": { + "chalk": "^4.0.0", + "highlight.js": "^10.7.1", + "mz": "^2.4.0", + "parse5": "^5.1.1", + "parse5-htmlparser2-tree-adapter": "^6.0.0", + "yargs": "^16.0.0" + }, + "bin": { + "highlight": "bin/highlight" + }, + "engines": { + "node": ">=8.0.0", + "npm": ">=5.0.0" + } + }, + "node_modules/cli-highlight/node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/cliui": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", + "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "dev": true, + "license": "ISC", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.0", + "wrap-ansi": "^7.0.0" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true, + "license": "MIT" + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/data-uri-to-buffer": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz", + "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/degenerator": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz", + "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ast-types": "^0.13.4", + "escodegen": "^2.1.0", + "esprima": "^4.0.1" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/diff": { + "version": "8.0.3", + "resolved": "https://registry.npmjs.org/diff/-/diff-8.0.3.tgz", + "integrity": "sha512-qejHi7bcSD4hQAZE0tNAawRK1ZtafHDmMTMkrrIGgSLl7hTnQHmKCeB45xAcbfTqK2zowkM3j3bHt/4b/ARbYQ==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/eastasianwidth": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", + "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", + "dev": true, + "license": "MIT" + }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true, + "license": "MIT" + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/escodegen": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz", + "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "esprima": "^4.0.1", + "estraverse": "^5.2.0", + "esutils": "^2.0.2" + }, + "bin": { + "escodegen": "bin/escodegen.js", + "esgenerate": "bin/esgenerate.js" + }, + "engines": { + "node": ">=6.0" + }, + "optionalDependencies": { + "source-map": "~0.6.1" + } + }, + "node_modules/esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "dev": true, + "license": "BSD-2-Clause", + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "dev": true, + "license": "MIT" + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/fast-uri": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", + "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/fast-xml-parser": { + "version": "5.3.4", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.3.4.tgz", + "integrity": "sha512-EFd6afGmXlCx8H8WTZHhAoDaWaGyuIBoZJ2mknrNxug+aZKjkp0a0dlars9Izl+jF+7Gu1/5f/2h68cQpe0IiA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT", + "dependencies": { + "strnum": "^2.1.0" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, + "node_modules/fetch-blob": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz", + "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "paypal", + "url": "https://paypal.me/jimmywarting" + } + ], + "license": "MIT", + "dependencies": { + "node-domexception": "^1.0.0", + "web-streams-polyfill": "^3.0.3" + }, + "engines": { + "node": "^12.20 || >= 14.13" + } + }, + "node_modules/file-type": { + "version": "21.3.0", + "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.0.tgz", + "integrity": "sha512-8kPJMIGz1Yt/aPEwOsrR97ZyZaD1Iqm8PClb1nYFclUCkBi0Ma5IsYNQzvSFS9ib51lWyIw5mIT9rWzI/xjpzA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@tokenizer/inflate": "^0.4.1", + "strtok3": "^10.3.4", + "token-types": "^6.1.1", + "uint8array-extras": "^1.4.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sindresorhus/file-type?sponsor=1" + } + }, + "node_modules/foreground-child": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", + "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", + "dev": true, + "license": "ISC", + "dependencies": { + "cross-spawn": "^7.0.6", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/foreground-child/node_modules/signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/formdata-polyfill": { + "version": "4.0.10", + "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz", + "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==", + "dev": true, + "license": "MIT", + "dependencies": { + "fetch-blob": "^3.1.2" + }, + "engines": { + "node": ">=12.20.0" + } + }, + "node_modules/gaxios": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-7.1.3.tgz", + "integrity": "sha512-YGGyuEdVIjqxkxVH1pUTMY/XtmmsApXrCVv5EU25iX6inEPbV+VakJfLealkBtJN69AQmh1eGOdCl9Sm1UP6XQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "node-fetch": "^3.3.2", + "rimraf": "^5.0.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/gcp-metadata": { + "version": "8.1.2", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-8.1.2.tgz", + "integrity": "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^7.0.0", + "google-logging-utils": "^1.0.0", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "dev": true, + "license": "ISC", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/get-east-asian-width": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.4.0.tgz", + "integrity": "sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/get-uri": { + "version": "6.0.5", + "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.5.tgz", + "integrity": "sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==", + "dev": true, + "license": "MIT", + "dependencies": { + "basic-ftp": "^5.0.2", + "data-uri-to-buffer": "^6.0.2", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/get-uri/node_modules/data-uri-to-buffer": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", + "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/glob": { + "version": "13.0.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-13.0.3.tgz", + "integrity": "sha512-/g3B0mC+4x724v1TgtBlBtt2hPi/EWptsIAmXUx9Z2rvBYleQcsrmaOzd5LyL50jf/Soi83ZDJmw2+XqvH/EeA==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "minimatch": "^10.2.0", + "minipass": "^7.1.2", + "path-scurry": "^2.0.0" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/google-auth-library": { + "version": "10.5.0", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-10.5.0.tgz", + "integrity": "sha512-7ABviyMOlX5hIVD60YOfHw4/CxOfBhyduaYB+wbFWCWoni4N7SLcV46hrVRktuBbZjFC9ONyqamZITN7q3n32w==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^7.0.0", + "gcp-metadata": "^8.0.0", + "google-logging-utils": "^1.0.0", + "gtoken": "^8.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/google-logging-utils": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-1.1.3.tgz", + "integrity": "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "dev": true, + "license": "ISC" + }, + "node_modules/gtoken": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-8.0.0.tgz", + "integrity": "sha512-+CqsMbHPiSTdtSO14O51eMNlrp9N79gmeqmXeouJOhfucAedHw9noVe/n5uJk3tbKE6a+6ZCQg3RPhVhHByAIw==", + "dev": true, + "license": "MIT", + "dependencies": { + "gaxios": "^7.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/highlight.js": { + "version": "10.7.3", + "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-10.7.3.tgz", + "integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": "*" + } + }, + "node_modules/hosted-git-info": { + "version": "9.0.2", + "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-9.0.2.tgz", + "integrity": "sha512-M422h7o/BR3rmCQ8UHi7cyyMqKltdP9Uo+J2fXK+RSAY+wTcKOIRyhTuKv4qn+DJf3g+PL890AzId5KZpX+CBg==", + "dev": true, + "license": "ISC", + "dependencies": { + "lru-cache": "^11.1.0" + }, + "engines": { + "node": "^20.17.0 || >=22.9.0" + } + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/ignore": { + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", + "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/ip-address": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", + "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-network-error": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/is-network-error/-/is-network-error-1.3.0.tgz", + "integrity": "sha512-6oIwpsgRfnDiyEDLMay/GqCl3HoAtH5+RUKW29gYkL0QA+ipzpDLA16yQs7/RHCSu+BwgbJaOUqa4A99qNVQVw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true, + "license": "ISC" + }, + "node_modules/jackspeak": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-4.2.3.tgz", + "integrity": "sha512-ykkVRwrYvFm1nb2AJfKKYPr0emF6IiXDYUaFx4Zn9ZuIH7MrzEZ3sD5RlqGXNRpHtvUHJyOnCEFxOlNDtGo7wg==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/cliui": "^9.0.0" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/json-bigint": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz", + "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "bignumber.js": "^9.0.0" + } + }, + "node_modules/json-schema-to-ts": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz", + "integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.3", + "ts-algebra": "^2.0.0" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "dev": true, + "license": "MIT" + }, + "node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "dev": true, + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.1.tgz", + "integrity": "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==", + "dev": true, + "license": "MIT", + "dependencies": { + "jwa": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/long": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz", + "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==", + "dev": true, + "license": "Apache-2.0" + }, + "node_modules/lru-cache": { + "version": "11.2.6", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.6.tgz", + "integrity": "sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/marked": { + "version": "15.0.12", + "resolved": "https://registry.npmjs.org/marked/-/marked-15.0.12.tgz", + "integrity": "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==", + "dev": true, + "license": "MIT", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/minimatch": { + "version": "10.2.0", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.0.tgz", + "integrity": "sha512-ugkC31VaVg9cF0DFVoADH12k6061zNZkZON+aX8AWsR9GhPcErkcMBceb6znR8wLERM2AkkOxy2nWRLpT9Jq5w==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "brace-expansion": "^5.0.2" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/minipass": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", + "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, + "node_modules/mz": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", + "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "any-promise": "^1.0.0", + "object-assign": "^4.0.1", + "thenify-all": "^1.0.0" + } + }, + "node_modules/netmask": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz", + "integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "deprecated": "Use your platform's native DOMException instead", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "license": "MIT", + "engines": { + "node": ">=10.5.0" + } + }, + "node_modules/node-fetch": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz", + "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==", + "dev": true, + "license": "MIT", + "dependencies": { + "data-uri-to-buffer": "^4.0.0", + "fetch-blob": "^3.1.4", + "formdata-polyfill": "^4.0.10" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/node-fetch" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/openai": { + "version": "6.10.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.10.0.tgz", + "integrity": "sha512-ITxOGo7rO3XRMiKA5l7tQ43iNNu+iXGFAcf2t+aWVzzqRaS0i7m1K2BhxNdaveB+5eENhO0VY1FkiZzhBk4v3A==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, + "node_modules/p-retry": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-7.1.1.tgz", + "integrity": "sha512-J5ApzjyRkkf601HpEeykoiCvzHQjWxPAHhyjFcEUP2SWq0+35NKh8TLhpLw+Dkq5TZBFvUM6UigdE9hIVYTl5w==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-network-error": "^1.1.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/pac-proxy-agent": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz", + "integrity": "sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@tootallnate/quickjs-emscripten": "^0.23.0", + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "get-uri": "^6.0.1", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.6", + "pac-resolver": "^7.0.1", + "socks-proxy-agent": "^8.0.5" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/pac-resolver": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz", + "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==", + "dev": true, + "license": "MIT", + "dependencies": { + "degenerator": "^5.0.0", + "netmask": "^2.0.2" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/package-json-from-dist": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", + "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", + "dev": true, + "license": "BlueOak-1.0.0" + }, + "node_modules/parse5": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.1.tgz", + "integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==", + "dev": true, + "license": "MIT" + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz", + "integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==", + "dev": true, + "license": "MIT", + "dependencies": { + "parse5": "^6.0.1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter/node_modules/parse5": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", + "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==", + "dev": true, + "license": "MIT" + }, + "node_modules/partial-json": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/partial-json/-/partial-json-0.1.7.tgz", + "integrity": "sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA==", + "dev": true, + "license": "MIT" + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-scurry": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.1.tgz", + "integrity": "sha512-oWyT4gICAu+kaA7QWk/jvCHWarMKNs6pXOGWKDTr7cw4IGcUbW+PeTfbaQiLGheFRpjo6O9J0PmyMfQPjH71oA==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^11.0.0", + "minipass": "^7.1.2" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/proper-lockfile": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/proper-lockfile/-/proper-lockfile-4.1.2.tgz", + "integrity": "sha512-TjNPblN4BwAWMXU8s9AEz4JmQxnD1NNL7bNOY/AKUzyamc379FWASUhc/K1pL2noVb+XmZKLL68cjzLsiOAMaA==", + "dev": true, + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.4", + "retry": "^0.12.0", + "signal-exit": "^3.0.2" + } + }, + "node_modules/protobufjs": { + "version": "7.5.4", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz", + "integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==", + "dev": true, + "hasInstallScript": true, + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.2", + "@protobufjs/base64": "^1.1.2", + "@protobufjs/codegen": "^2.0.4", + "@protobufjs/eventemitter": "^1.1.0", + "@protobufjs/fetch": "^1.1.0", + "@protobufjs/float": "^1.0.2", + "@protobufjs/inquire": "^1.1.0", + "@protobufjs/path": "^1.1.2", + "@protobufjs/pool": "^1.1.0", + "@protobufjs/utf8": "^1.1.0", + "@types/node": ">=13.7.0", + "long": "^5.0.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/proxy-agent": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.5.0.tgz", + "integrity": "sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "http-proxy-agent": "^7.0.1", + "https-proxy-agent": "^7.0.6", + "lru-cache": "^7.14.1", + "pac-proxy-agent": "^7.1.0", + "proxy-from-env": "^1.1.0", + "socks-proxy-agent": "^8.0.5" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/proxy-agent/node_modules/lru-cache": { + "version": "7.18.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", + "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "dev": true, + "license": "MIT" + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/retry": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/retry/-/retry-0.12.0.tgz", + "integrity": "sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/rimraf": { + "version": "5.0.10", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz", + "integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==", + "dev": true, + "license": "ISC", + "dependencies": { + "glob": "^10.3.7" + }, + "bin": { + "rimraf": "dist/esm/bin.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/@isaacs/cliui": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", + "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", + "dev": true, + "license": "ISC", + "dependencies": { + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/rimraf/node_modules/ansi-regex": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", + "integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/rimraf/node_modules/ansi-styles": { + "version": "6.2.3", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz", + "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/rimraf/node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/rimraf/node_modules/brace-expansion": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", + "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/rimraf/node_modules/emoji-regex": { + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", + "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", + "dev": true, + "license": "MIT" + }, + "node_modules/rimraf/node_modules/glob": { + "version": "10.5.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz", + "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==", + "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", + "dev": true, + "license": "ISC", + "dependencies": { + "foreground-child": "^3.1.0", + "jackspeak": "^3.1.2", + "minimatch": "^9.0.4", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^1.11.1" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/jackspeak": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", + "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/cliui": "^8.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "optionalDependencies": { + "@pkgjs/parseargs": "^0.11.0" + } + }, + "node_modules/rimraf/node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "dev": true, + "license": "ISC" + }, + "node_modules/rimraf/node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/path-scurry": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", + "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^10.2.0", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" + }, + "engines": { + "node": ">=16 || 14 >=14.18" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/string-width": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", + "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/rimraf/node_modules/strip-ansi": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz", + "integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/rimraf/node_modules/wrap-ansi": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", + "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/signal-exit": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", + "dev": true, + "license": "ISC" + }, + "node_modules/smart-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", + "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks": { + "version": "2.8.7", + "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz", + "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ip-address": "^10.0.1", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks-proxy-agent": { + "version": "8.0.5", + "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz", + "integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "socks": "^2.8.3" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true, + "license": "BSD-3-Clause", + "optional": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/std-env": { + "version": "3.10.0", + "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz", + "integrity": "sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==", + "dev": true, + "license": "MIT" + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width-cjs": { + "name": "string-width", + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi-cjs": { + "name": "strip-ansi", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strnum": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.1.2.tgz", + "integrity": "sha512-l63NF9y/cLROq/yqKXSLtcMeeyOfnSQlfMSlzFt/K73oIaD8DGaQWd7Z34X9GPiKqP5rbSh84Hl4bOlLcjiSrQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT" + }, + "node_modules/strtok3": { + "version": "10.3.4", + "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.4.tgz", + "integrity": "sha512-KIy5nylvC5le1OdaaoCJ07L+8iQzJHGH6pWDuzS+d07Cu7n1MZ2x26P8ZKIWfbK02+XIL8Mp4RkWeqdUCrDMfg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@tokenizer/token": "^0.3.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/thenify": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", + "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", + "dev": true, + "license": "MIT", + "dependencies": { + "any-promise": "^1.0.0" + } + }, + "node_modules/thenify-all": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", + "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", + "dev": true, + "license": "MIT", + "dependencies": { + "thenify": ">= 3.1.0 < 4" + }, + "engines": { + "node": ">=0.8" + } + }, + "node_modules/token-types": { + "version": "6.1.2", + "resolved": "https://registry.npmjs.org/token-types/-/token-types-6.1.2.tgz", + "integrity": "sha512-dRXchy+C0IgK8WPC6xvCHFRIWYUbqqdEIKPaKo/AcTUNzwLTK6AH7RjdLWsEZcAN/TBdtfUw3PYEgPr5VPr6ww==", + "dev": true, + "license": "MIT", + "dependencies": { + "@borewit/text-codec": "^0.2.1", + "@tokenizer/token": "^0.3.0", + "ieee754": "^1.2.1" + }, + "engines": { + "node": ">=14.16" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/ts-algebra": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz", + "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==", + "dev": true, + "license": "MIT" + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "dev": true, + "license": "0BSD" + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/uint8array-extras": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.5.0.tgz", + "integrity": "sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/undici": { + "version": "7.21.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.21.0.tgz", + "integrity": "sha512-Hn2tCQpoDt1wv23a68Ctc8Cr/BHpUSfaPYrkajTXOS9IKpxVRx/X5m1K2YkbK2ipgZgxXSgsUinl3x+2YdSSfg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, + "node_modules/undici-types": { + "version": "7.16.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", + "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", + "dev": true, + "license": "MIT" + }, + "node_modules/web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs": { + "name": "wrap-ansi", + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/ws": { + "version": "8.19.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz", + "integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yaml": { + "version": "2.8.2", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.2.tgz", + "integrity": "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==", + "dev": true, + "license": "ISC", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14.6" + }, + "funding": { + "url": "https://github.com/sponsors/eemeli" + } + }, + "node_modules/yargs": { + "version": "16.2.0", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", + "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "dev": true, + "license": "MIT", + "dependencies": { + "cliui": "^7.0.2", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.0", + "y18n": "^5.0.5", + "yargs-parser": "^20.2.2" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs-parser": { + "version": "20.2.9", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", + "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yoctocolors": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/yoctocolors/-/yoctocolors-2.1.2.tgz", + "integrity": "sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/zod": { + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", + "dev": true, + "license": "MIT", + "peer": true, + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.25.1", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz", + "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", + "dev": true, + "license": "ISC", + "peerDependencies": { + "zod": "^3.25 || ^4" + } + } + } +} diff --git a/package.json b/package.json index 8781960..96f6be0 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,9 @@ "koan" ], "pi": { - "extensions": ["./extensions"] + "extensions": [ + "./extensions" + ] }, "files": [ "extensions", @@ -19,7 +21,14 @@ "README.md", "LICENSE" ], + "scripts": { + "check": "tsc --noEmit" + }, "dependencies": { "@sinclair/typebox": "^0.32.30" + }, + "devDependencies": { + "@mariozechner/pi-coding-agent": "^0.52.10", + "typescript": "^5.9.3" } } diff --git a/src/planner/phases/context-capture.ts b/src/planner/phases/context-capture.ts index 404e69a..7953614 100644 --- a/src/planner/phases/context-capture.ts +++ b/src/planner/phases/context-capture.ts @@ -77,7 +77,7 @@ export class ContextCapturePhase { // for context-capture, begin() for plan-design). hookDispatch throws // if the slot is already occupied (phase hook ownership prevents // silent misrouting). - hookDispatch(this.dispatch, "onNextStep", () => this.handleSubPhaseComplete()); + hookDispatch(this.dispatch, "onCompleteStep", () => this.handleSubPhaseComplete()); hookDispatch(this.dispatch, "onStoreContext", (p, c) => this.handleContextToolCall(p, c)); this.log("Starting context capture (draft phase)", { planId: plan.id }); @@ -151,19 +151,19 @@ export class ContextCapturePhase { if (event.toolName === "koan_store_context") { return { block: true, - reason: "Draft phase: explore and draft first, then call koan_next_step.", + reason: "Draft phase: explore and draft first, then call koan_complete_step.", }; } return undefined; } if (ctx.subPhase === "verifying") { - if (event.toolName === "koan_next_step") { + if (event.toolName === "koan_complete_step") { return undefined; } return { block: true, - reason: "Verify phase: review your draft, then call koan_next_step. No other tools.", + reason: "Verify phase: review your draft, then call koan_complete_step. No other tools.", }; } @@ -179,77 +179,6 @@ export class ContextCapturePhase { return undefined; }); - - // Safety net: if the LLM ends a turn without calling the expected - // tool, nudge it to try again. The primary transition mechanism is - // tool calls (koan_next_step for sub-phase advancement, - // koan_store_context for completion). This handler only fires when - // the LLM produces a text-only response instead of calling tools. - this.pi.on("agent_end", async (_event, ctx) => { - if (!this.shouldHandle()) return; - const contextState = this.state.context!; - - if (contextState.subPhase === "drafting" || contextState.subPhase === "verifying") { - // LLM ended without calling koan_next_step. - this.log("LLM ended turn without calling koan_next_step", { - subPhase: contextState.subPhase, - }); - this.pi.sendUserMessage( - "You must call koan_next_step when you have finished this step.", - ); - return; - } - - if (contextState.subPhase === "refining") { - // LLM ended without calling koan_store_context. Retry logic. - this.log("Refine phase ended without koan_store_context call", { - attempt: contextState.attempt, - }); - - if (contextState.feedback.length === 0) { - contextState.feedback = [ - "You must call the `koan_store_context` tool with the structured context.", - ]; - } - - const remaining = contextState.maxAttempts - contextState.attempt; - if (remaining > 0) { - contextState.attempt += 1; - ctx.ui.notify("Context capture incomplete. Retrying.", "warning"); - this.sendRefinePrompt(); - return; - } - - contextState.active = false; - this.state.phase = "context-failed"; - // Unhook on both success (handleContextToolCall) and failure - // (agent_end max-attempts). - unhookDispatch(this.dispatch, "onNextStep"); - unhookDispatch(this.dispatch, "onStoreContext"); - await this.updatePlanMetadata({ - status: "context-failed", - context: { - failedAt: new Date().toISOString(), - attempt: contextState.attempt, - }, - }); - ctx.ui.notify("Context capture failed after maximum attempts.", "error"); - } - }); - } - - private sendRefinePrompt(): void { - const ctx = this.state.context!; - const prompt = formatStep( - refineGuidance({ - attempt: ctx.attempt, - maxAttempts: ctx.maxAttempts, - feedback: ctx.feedback, - }), - ); - ctx.lastPrompt = prompt; - this.log("Sending refine prompt", { attempt: ctx.attempt }); - this.pi.sendUserMessage(prompt); } private shouldHandle(): boolean { @@ -292,12 +221,10 @@ export class ContextCapturePhase { this.state.context.lastRawContent = rawText; this.state.context.feedback = []; this.state.phase = "context-complete"; - // Unhook on both success (handleContextToolCall) and failure - // (agent_end max-attempts). - unhookDispatch(this.dispatch, "onNextStep"); + unhookDispatch(this.dispatch, "onCompleteStep"); unhookDispatch(this.dispatch, "onStoreContext"); - ctx.ui.notify("Koan context capture complete.", "success"); + ctx.ui.notify("Koan context capture complete.", "info"); this.log("Context capture succeeded", { planId: this.state.context.planId, attempt: this.state.context.attempt, diff --git a/src/planner/phases/plan-design.ts b/src/planner/phases/plan-design.ts index 8928616..d924294 100644 --- a/src/planner/phases/plan-design.ts +++ b/src/planner/phases/plan-design.ts @@ -85,10 +85,10 @@ export class PlanDesignPhase { this.state.step = 1; // No koan_store_plan tool. Each mutation writes to disk immediately. - // Step 6 ends with koan_next_step, which runs validation. Removes + // Step 6 ends with koan_complete_step, which runs validation. Removes // the two-step 'build then finalize' pattern that caused LLM to skip // intermediate tools. - hookDispatch(this.dispatch, "onNextStep", () => this.handleStepComplete()); + hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); this.log("Starting plan-design workflow", { step: 1 }); await this.progress?.update(`Step 1/6: ${STEP_NAMES[1]} -- started`); @@ -160,7 +160,7 @@ export class PlanDesignPhase { return { ok: false, error: result.errors?.join("; ") }; } this.state.active = false; - unhookDispatch(this.dispatch, "onNextStep"); + unhookDispatch(this.dispatch, "onCompleteStep"); this.log("Plan finalized, workflow complete"); return { ok: true, prompt: "Plan validation passed. Workflow complete." }; } diff --git a/src/planner/prompts/context-capture.ts b/src/planner/prompts/context-capture.ts index 9657e85..2d4ce3d 100644 --- a/src/planner/prompts/context-capture.ts +++ b/src/planner/prompts/context-capture.ts @@ -29,12 +29,14 @@ export function draftGuidance(taskDescription: string): StepGuidance { "- Is there any implicit design knowledge -- invariants, rationale, accepted tradeoffs -- that should be preserved for downstream work?", "- Are there reference documents or specs in the project that apply?", "", - "Write your analysis as a draft. For each dimension, note your confidence:", + "For each dimension, note your confidence:", "- HIGH: you have direct evidence from this session", "- LOW: you are extrapolating or guessing", "", "Flag any LOW-confidence point where a single targeted read would raise it to HIGH.", "This is a working document, not a final artifact.", + "", + "Put your full draft analysis in the `thoughts` parameter when calling koan_complete_step.", ], }; } @@ -50,10 +52,9 @@ export function verifyGuidance(): StepGuidance { "3. Phrasing: would a downstream agent understand without ambiguity?", "", "Rewrite the draft with corrections. If nothing needs changing, reproduce it as-is.", - // Verify phase: tool_call handler blocks all tools except koan_next_step. - // Instruction directs LLM to avoid exploration during review. Two-layer - // defense: prohibition in description, blocking in tool_call handler. "Do not use exploration tools during this review.", + "", + "Put your revised analysis in the `thoughts` parameter when calling koan_complete_step.", ], }; } diff --git a/src/planner/prompts/plan-design.ts b/src/planner/prompts/plan-design.ts index 66e4075..3098981 100644 --- a/src/planner/prompts/plan-design.ts +++ b/src/planner/prompts/plan-design.ts @@ -44,17 +44,13 @@ export function buildPlanDesignSystemPrompt(basePrompt: string): string { "", "You will execute a 6-step workflow.", "Step 1 instructions are in the user message below.", - "Complete the work described, then call koan_next_step.", + "Complete the work described, then call koan_complete_step.", + "Put your findings in the `thoughts` parameter of koan_complete_step.", "The tool result contains the next step's instructions.", - "In step 6, use plan mutation tools, then call koan_next_step.", + "In step 6, use plan mutation tools, then call koan_complete_step.", "", - // Directive prevents immediate tool call without substantive work. - // Failure mode: koan_next_step called with zero file reads, - // producing an empty step with no exploration data. The directive - // repeats guidance from tool descriptions to strengthen the signal. "CRITICAL: Do the actual work described in each step BEFORE calling", - "koan_next_step. Read files, explore code, analyze. Do not skip.", - "Do NOT produce a final text response until koan_next_step completes.", + "koan_complete_step. Read files, explore code, analyze. Do not skip.", ].join("\n"); } @@ -207,7 +203,7 @@ export function planDesignStepGuidance(step: 1 | 2 | 3 | 4 | 5 | 6, context?: st " If file overlap: extract to M0 (foundation) or consolidate", ], invokeAfter: [ - "WHEN DONE: After completing the instructions above, call koan_next_step to validate.", + "WHEN DONE: Call koan_complete_step to validate. Put a summary of what you built in the `thoughts` parameter.", "Do NOT call this tool until you have used the plan mutation tools.", ].join("\n"), }; diff --git a/src/planner/prompts/step.ts b/src/planner/prompts/step.ts index a6598c7..28743eb 100644 --- a/src/planner/prompts/step.ts +++ b/src/planner/prompts/step.ts @@ -1,32 +1,21 @@ // Step prompt assembly for koan workflows. // -// Format matches the reference planner's format_step() in -// skills/lib/workflow/prompts/step.py. Both use "NEXT STEP:" -// directives. Reference uses "Command:" for shell execution. -// Koan uses "Tool:" -- tool results are synchronous within -// the agent loop (deterministic delivery regardless of -p mode). -// -// Why strengthen invoke-after? The original weak format ("Now call -// koan_next_step.") produced skipped steps. Strengthened format -// mirrors reference planner's explicit directive structure. +// The `thoughts` parameter on koan_complete_step captures the model's +// work output (analysis, review, findings) as a tool parameter. This +// avoids requiring the model to produce text + tool_call in one +// response, which some models (e.g. GPT-5-codex) cannot do. export interface StepGuidance { title: string; instructions: string[]; // Custom invoke-after directive. When omitted, formatStep - // appends the default koan_next_step directive. + // appends the default koan_complete_step directive. // Terminal steps override this (e.g., step 6 plan validation). invokeAfter?: string; } -// Default invoke-after: conditional gate for koan_next_step. -// "WHEN DONE" + "Do NOT call until" creates a two-part gate: -// the LLM must complete work before advancing. Unconditional -// imperatives ("Execute this tool now.") cause immediate tool -// calls because tool calls with empty params have zero friction -// (unlike shell commands which require mechanical copy-paste). const DEFAULT_INVOKE = [ - "WHEN DONE: After completing the instructions above, call koan_next_step to advance.", + "WHEN DONE: Call koan_complete_step with your findings in the `thoughts` parameter.", "Do NOT call this tool until the work described in this step is finished.", ].join("\n"); diff --git a/src/planner/session.ts b/src/planner/session.ts index 4bb533a..a14050e 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -25,7 +25,7 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan // Completion callback for context-capture phase. Runs inside the // koan_store_context tool call -- the tool blocks until the architect // subagent finishes. The LLM sees context capture + architect outcome - // in one tool response. No agent_end polling needed. + // in one tool response. const onContextComplete = async (ctx: ExtensionContext): Promise => { if (!state.plan) { return "Context captured but no plan state available."; @@ -83,7 +83,7 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan state.phase = "plan-design-complete"; log("Architect plan-design complete", { planDir }); - ctx.ui.notify("Plan-design phase complete.", "success"); + ctx.ui.notify("Plan-design phase complete.", "info"); return `Context captured. Plan written to ${planDir}/plan.json.`; }; diff --git a/src/planner/tools/dispatch.ts b/src/planner/tools/dispatch.ts index 28e91b8..7bfa629 100644 --- a/src/planner/tools/dispatch.ts +++ b/src/planner/tools/dispatch.ts @@ -1,7 +1,7 @@ // Workflow tool dispatch for koan. // -// Workflow tools (koan_next_step, koan_store_context) are registered once -// at init and read from this dispatch at call time. +// Workflow tools (koan_complete_step, koan_store_context) are registered +// once at init and read from this dispatch at call time. // Pi snapshots tools during _buildRuntime() -- late registration is // invisible to the LLM. The dispatch decouples static registration // from dynamic phase routing. @@ -25,14 +25,14 @@ export interface StepResult { // -- Dispatch -- export interface WorkflowDispatch { - onNextStep: (() => StepResult | Promise) | null; + onCompleteStep: ((thoughts?: string) => StepResult | Promise) | null; onStoreContext: | ((payload: unknown, ctx: ExtensionContext) => Promise) | null; } export function createDispatch(): WorkflowDispatch { - return { onNextStep: null, onStoreContext: null }; + return { onCompleteStep: null, onStoreContext: null }; } // Decouples tool registration (init-time, before _buildRuntime) from @@ -57,14 +57,17 @@ export function hookDispatch( if (dispatch[key] !== null) { throw new Error(`dispatch.${String(key)} is already hooked`); } - (dispatch as Record)[key] = handler; + // TypeScript cannot verify generic key-value assignment. + // Call-site generic constraint (handler: NonNullable) + // ensures type safety; collision guard above prevents double-hooking. + (dispatch as any)[key] = handler; } export function unhookDispatch( dispatch: WorkflowDispatch, key: keyof WorkflowDispatch, ): void { - (dispatch as Record)[key] = null; + (dispatch as any)[key] = null; } // -- Tool registration -- @@ -82,32 +85,36 @@ export function registerWorkflowTools( pi: ExtensionAPI, dispatch: WorkflowDispatch, ): void { - // -- koan_next_step -- - // "DO NOT call until told" creates prohibition/activation pattern - // with step prompts. Description = default prohibition, step prompt - // invoke-after = explicit activation. + // -- koan_complete_step -- + // The `thoughts` parameter captures the model's work output (analysis, + // review, findings) as a tool parameter instead of as text output. + // This ensures models that cannot mix text + tool_call in one response + // (e.g. GPT-5-codex) still advance the workflow reliably. pi.registerTool({ - name: "koan_next_step", - label: "Advance to next workflow step", + name: "koan_complete_step", + label: "Complete current workflow step", description: [ "Signal completion of the current workflow step.", + "Put your analysis, findings, or review in the `thoughts` parameter.", "DO NOT call this tool until the step instructions explicitly tell you to.", - "Do the actual work described in each step BEFORE calling this tool.", ].join(" "), - parameters: Type.Object({}), - async execute() { - // Two-layer defense: tool_call blocks with descriptive reasons - // (primary gate), dispatch null checks as fallback. Dispatch check - // fires only if tool_call handler is bypassed or misconfigured. - if (!dispatch.onNextStep) { + parameters: Type.Object({ + thoughts: Type.Optional(Type.String({ + description: "Your analysis, findings, or work output for this step.", + })), + }), + async execute(_toolCallId, params) { + if (!dispatch.onCompleteStep) { throw new Error("No workflow phase is active."); } - const r = await dispatch.onNextStep(); + const thoughts = (params as { thoughts?: string }).thoughts; + const r = await dispatch.onCompleteStep(thoughts); if (!r.ok) { throw new Error(r.error ?? "Step transition failed."); } return { content: [{ type: "text" as const, text: r.prompt ?? "Step complete." }], + details: undefined, }; }, }); @@ -134,6 +141,7 @@ export function registerWorkflowTools( log("Context stored"); return { content: [{ type: "text" as const, text: r.message }], + details: undefined, }; }, }); diff --git a/src/planner/tools/plan-entities.ts b/src/planner/tools/plan-entities.ts index f431f1a..c38efb2 100644 --- a/src/planner/tools/plan-entities.ts +++ b/src/planner/tools/plan-entities.ts @@ -2,10 +2,13 @@ // Disk is single source of truth. Single-writer assumption per phase. // Feedback messages prevent the LLM from skipping tools (prior architecture // returned opaque JSON). +// +// Static derives the TypeScript type from the TypeBox schema at +// compile time, making type casts unnecessary. The registerTool generic +// propagates the schema type through to the execute callback. -import { Type } from "@sinclair/typebox"; +import { Type, type Static, type TSchema } from "@sinclair/typebox"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import type { TSchema } from "@sinclair/typebox"; import type { PlanRef } from "./dispatch.js"; import { loadPlan, savePlan } from "../plan/serialize.js"; @@ -41,15 +44,15 @@ import { setReadmeEntry, } from "../plan/mutate.js"; -function planTool

( +function planTool( pi: ExtensionAPI, planRef: PlanRef, opts: { name: string; label: string; description: string; - parameters: TSchema; - execute: (plan: Plan, params: P) => { plan: Plan; message: string }; + parameters: TParams; + execute: (plan: Plan, params: Static) => { plan: Plan; message: string }; }, ): void { pi.registerTool({ @@ -60,10 +63,11 @@ function planTool

( async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); const plan = await loadPlan(planRef.dir); - const result = opts.execute(plan, params as P); + const result = opts.execute(plan, params); await savePlan(result.plan, planRef.dir); return { content: [{ type: "text" as const, text: result.message }], + details: undefined, }; }, }); diff --git a/src/planner/tools/plan-getters.ts b/src/planner/tools/plan-getters.ts index ff1fc2e..8154229 100644 --- a/src/planner/tools/plan-getters.ts +++ b/src/planner/tools/plan-getters.ts @@ -21,6 +21,7 @@ export function registerPlanGetterTools( const summary = formatPlanSummary(p); return { content: [{ type: "text" as const, text: summary }], + details: undefined, }; }, }); @@ -35,10 +36,11 @@ export function registerPlanGetterTools( async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); const p = await loadPlan(planRef.dir); - const m = p.milestones.find((x) => x.id === (params as { id: string }).id); - if (!m) throw new Error(`Milestone ${(params as { id: string }).id} not found`); + const m = p.milestones.find((x) => x.id === params.id); + if (!m) throw new Error(`Milestone ${params.id} not found`); return { content: [{ type: "text" as const, text: JSON.stringify(m, null, 2) }], + details: undefined, }; }, }); @@ -54,11 +56,12 @@ export function registerPlanGetterTools( if (!planRef.dir) throw new Error("No plan directory is active."); const p = await loadPlan(planRef.dir); const d = p.planning_context.decision_log.find( - (x) => x.id === (params as { id: string }).id, + (x) => x.id === params.id, ); - if (!d) throw new Error(`Decision ${(params as { id: string }).id} not found`); + if (!d) throw new Error(`Decision ${params.id} not found`); return { content: [{ type: "text" as const, text: JSON.stringify(d, null, 2) }], + details: undefined, }; }, }); @@ -73,9 +76,9 @@ export function registerPlanGetterTools( async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); const p = await loadPlan(planRef.dir); - const result = findIntent(p, (params as { id: string }).id); + const result = findIntent(p, params.id); if (!result) - throw new Error(`Intent ${(params as { id: string }).id} not found`); + throw new Error(`Intent ${params.id} not found`); return { content: [ { @@ -87,6 +90,7 @@ export function registerPlanGetterTools( ), }, ], + details: undefined, }; }, }); @@ -101,9 +105,9 @@ export function registerPlanGetterTools( async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); const p = await loadPlan(planRef.dir); - const result = findChange(p, (params as { id: string }).id); + const result = findChange(p, params.id); if (!result) - throw new Error(`Change ${(params as { id: string }).id} not found`); + throw new Error(`Change ${params.id} not found`); return { content: [ { @@ -115,6 +119,7 @@ export function registerPlanGetterTools( ), }, ], + details: undefined, }; }, }); diff --git a/src/planner/tools/plan-setters.ts b/src/planner/tools/plan-setters.ts index 16a0a87..4478254 100644 --- a/src/planner/tools/plan-setters.ts +++ b/src/planner/tools/plan-setters.ts @@ -24,13 +24,11 @@ export function registerPlanSetterTools( async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); const p = await loadPlan(planRef.dir); - const updated = setOverview( - p, - params as { problem?: string; approach?: string }, - ); + const updated = setOverview(p, params); await savePlan(updated, planRef.dir); return { content: [{ type: "text" as const, text: "Overview updated." }], + details: undefined, }; }, }); @@ -45,18 +43,16 @@ export function registerPlanSetterTools( async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); const p = await loadPlan(planRef.dir); - const updated = setConstraints( - p, - (params as { constraints: string[] }).constraints, - ); + const updated = setConstraints(p, params.constraints); await savePlan(updated, planRef.dir); return { content: [ { type: "text" as const, - text: `Constraints set (${(params as { constraints: string[] }).constraints.length} items).`, + text: `Constraints set (${params.constraints.length} items).`, }, ], + details: undefined, }; }, }); @@ -73,19 +69,13 @@ export function registerPlanSetterTools( async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); const p = await loadPlan(planRef.dir); - const updated = setInvisibleKnowledge( - p, - params as { - system?: string; - invariants?: string[]; - tradeoffs?: string[]; - }, - ); + const updated = setInvisibleKnowledge(p, params); await savePlan(updated, planRef.dir); return { content: [ { type: "text" as const, text: "Invisible knowledge updated." }, ], + details: undefined, }; }, }); diff --git a/src/planner/tools/qr-tools.ts b/src/planner/tools/qr-tools.ts index bf62bda..4d43331 100644 --- a/src/planner/tools/qr-tools.ts +++ b/src/planner/tools/qr-tools.ts @@ -4,7 +4,7 @@ import { promises as fs } from "node:fs"; import * as path from "node:path"; import type { PlanRef } from "./dispatch.js"; -import type { QRFile, QRSeverity, QRItemStatus } from "../qr/types.js"; +import type { QRFile } from "../qr/types.js"; import { addQRItem, setQRItem, assignGroup } from "../qr/mutate.js"; function createEmptyQRFile(phase: string): QRFile { @@ -55,17 +55,12 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const p = params as { - phase: string; - scope: string; - check: string; - severity?: QRSeverity; - }; - const qr = await loadQR(planRef.dir, p.phase); - const r = addQRItem(qr, p); - await saveQR(r.qr, planRef.dir, p.phase); + const qr = await loadQR(planRef.dir, params.phase); + const r = addQRItem(qr, params); + await saveQR(r.qr, planRef.dir, params.phase); return { content: [{ type: "text" as const, text: `Added QR item ${r.id}` }], + details: undefined, }; }, }); @@ -96,19 +91,12 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const p = params as { - phase: string; - id: string; - status?: QRItemStatus; - finding?: string; - check?: string; - severity?: QRSeverity; - }; - const qr = await loadQR(planRef.dir, p.phase); - const updated = setQRItem(qr, p.id, p); - await saveQR(updated, planRef.dir, p.phase); + const qr = await loadQR(planRef.dir, params.phase); + const updated = setQRItem(qr, params.id, params); + await saveQR(updated, planRef.dir, params.phase); return { - content: [{ type: "text" as const, text: `Updated QR item ${p.id}` }], + content: [{ type: "text" as const, text: `Updated QR item ${params.id}` }], + details: undefined, }; }, }); @@ -124,21 +112,17 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const p = params as { - phase: string; - ids: string[]; - group_id: string; - }; - const qr = await loadQR(planRef.dir, p.phase); - const updated = assignGroup(qr, p.ids, p.group_id); - await saveQR(updated, planRef.dir, p.phase); + const qr = await loadQR(planRef.dir, params.phase); + const updated = assignGroup(qr, params.ids, params.group_id); + await saveQR(updated, planRef.dir, params.phase); return { content: [ { type: "text" as const, - text: `Assigned ${p.ids.length} items to group ${p.group_id}`, + text: `Assigned ${params.ids.length} items to group ${params.group_id}`, }, ], + details: undefined, }; }, }); @@ -153,12 +137,12 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const p = params as { phase: string; id: string }; - const qr = await loadQR(planRef.dir, p.phase); - const item = qr.items.find((x) => x.id === p.id); - if (!item) throw new Error(`QR item ${p.id} not found`); + const qr = await loadQR(planRef.dir, params.phase); + const item = qr.items.find((x) => x.id === params.id); + if (!item) throw new Error(`QR item ${params.id} not found`); return { content: [{ type: "text" as const, text: JSON.stringify(item, null, 2) }], + details: undefined, }; }, }); @@ -179,15 +163,15 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const p = params as { phase: string; status?: QRItemStatus }; - const qr = await loadQR(planRef.dir, p.phase); - const filtered = p.status - ? qr.items.filter((item) => item.status === p.status) + const qr = await loadQR(planRef.dir, params.phase); + const filtered = params.status + ? qr.items.filter((item) => item.status === params.status) : qr.items; return { content: [ { type: "text" as const, text: JSON.stringify(filtered, null, 2) }, ], + details: undefined, }; }, }); @@ -201,8 +185,7 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const p = params as { phase: string }; - const qr = await loadQR(planRef.dir, p.phase); + const qr = await loadQR(planRef.dir, params.phase); const byStatus = { TODO: qr.items.filter((x) => x.status === "TODO").length, @@ -226,6 +209,7 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { content: [ { type: "text" as const, text: JSON.stringify(summary, null, 2) }, ], + details: undefined, }; }, }); diff --git a/src/planner/tools/registry.ts b/src/planner/tools/registry.ts index 36391c1..5151f7a 100644 --- a/src/planner/tools/registry.ts +++ b/src/planner/tools/registry.ts @@ -99,11 +99,11 @@ export const PLAN_MUTATION_TOOLS: ReadonlySet = new Set([ // updating the permissions map. export const PHASE_PERMISSIONS: ReadonlyMap> = new Map([ - ["context-capture", new Set(["koan_store_context", "koan_next_step"])], + ["context-capture", new Set(["koan_store_context", "koan_complete_step"])], [ "plan-design", new Set([ - "koan_next_step", + "koan_complete_step", ...PLAN_GETTER_TOOLS_LIST, ...PLAN_SETTER_TOOLS_LIST, ...PLAN_DESIGN_ENTITY_TOOLS, @@ -112,7 +112,7 @@ export const PHASE_PERMISSIONS: ReadonlyMap> = [ "plan-code", new Set([ - "koan_next_step", + "koan_complete_step", ...PLAN_GETTER_TOOLS_LIST, ...PLAN_CHANGE_TOOLS_LIST, "koan_set_intent", @@ -121,7 +121,7 @@ export const PHASE_PERMISSIONS: ReadonlyMap> = [ "plan-docs", new Set([ - "koan_next_step", + "koan_complete_step", ...PLAN_GETTER_TOOLS_LIST, "koan_set_change_doc_diff", "koan_set_change_comments", @@ -131,12 +131,12 @@ export const PHASE_PERMISSIONS: ReadonlyMap> = ], [ "qr-plan-design", - new Set(["koan_next_step", ...PLAN_GETTER_TOOLS_LIST, ...QR_TOOLS_LIST]), + new Set(["koan_complete_step", ...PLAN_GETTER_TOOLS_LIST, ...QR_TOOLS_LIST]), ], [ "qr-plan-code", new Set([ - "koan_next_step", + "koan_complete_step", "koan_get_plan", "koan_get_milestone", "koan_get_intent", @@ -147,7 +147,7 @@ export const PHASE_PERMISSIONS: ReadonlyMap> = [ "qr-plan-docs", new Set([ - "koan_next_step", + "koan_complete_step", "koan_get_plan", "koan_get_milestone", "koan_get_change", diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..d8a1c33 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "strict": true, + "noEmit": true, + "target": "ES2022", + "module": "Node16", + "moduleResolution": "Node16", + "esModuleInterop": true, + "skipLibCheck": true + }, + "include": [ + "src/**/*.ts", + "extensions/**/*.ts" + ] +} From 7f2ecdcff07591ec5bdbc919cb3710c8ab4103fa Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Feb 2026 16:47:45 +0700 Subject: [PATCH 005/412] Update design-decisions.md for koan_complete_step rename Replace all stale koan_next_step references, document agent_end anti-pattern, add AD-5 thoughts parameter rationale. --- design-decisions.md | 75 ++++++++++++++++---------- src/planner/prompts/context-capture.ts | 2 +- 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/design-decisions.md b/design-decisions.md index 7a10117..132572a 100644 --- a/design-decisions.md +++ b/design-decisions.md @@ -24,6 +24,7 @@ between exposing more or less information, always choose less. This is a permanent invariant. Concrete implications: + - No implementation details in prompts (temp dirs, state file paths, orchestrator internals, phase routing) - No full plan state when partial suffices (QR reviewer for design does @@ -56,6 +57,7 @@ from tools: always `throw new Error(msg)` -- never `return { isError: true }`. ### AD-2: Self-Loading Extension Pattern Same extension file (extensions/koan.ts) serves both modes: + - **Parent mode** (no --koan-role flag): registers /koan command, tools, and dispatch. Zero overhead in normal pi sessions. - **Subagent mode** (--koan-role present): activates role-specific event @@ -74,29 +76,40 @@ to ensure one-shot dispatch. ### AD-4: Tool-Call-Driven Step Transitions (Uniform Pattern) -ALL step transitions use the koan_next_step registered tool. The LLM -calls koan_next_step -> tool execute() returns next step's prompt. +ALL step transitions use the koan_complete_step registered tool. The LLM +calls koan_complete_step -> tool execute() returns next step's prompt. This works in both -p mode and interactive mode. sendUserMessage() -is only used for the initial trigger (/koan plan) and as a safety net -in agent_end when the LLM fails to call the expected tool. +is only used for the initial trigger (/koan plan). -**KEY CORRECTION**: Early design (Feb 10) considered turn_end + agent_end -+ sendUserMessage() chaining for step transitions. This was ABANDONED -because subagents in -p mode exit after the first agent loop completes. -Tool calls keep the agent loop alive within a single loop. The context -capture phase preserves sendUserMessage() in agent_end only as a -fallback retry mechanism, not as the primary transition path. +**KEY CORRECTION**: Early design (Feb 10) considered turn_end + +agent_end + sendUserMessage() chaining for step transitions. This was +ABANDONED because subagents in -p mode exit after the first agent loop +completes. Tool calls keep the agent loop alive within a single loop. -### AD-5: koan_next_step Has No Arguments +**ANTI-PATTERN**: agent_end + sendUserMessage for retry was removed. +sendUserMessage is fire-and-forget in the extension binding. In -p mode +(subagents), the process can exit before the retry completes. Even in +interactive mode, some models say "calling tool X now" as text without +emitting a tool_call block, causing agent_end to fire spuriously. + +### AD-5: koan_complete_step Accepts Optional `thoughts` The extension is stateful -- it knows exactly which step the LLM is on via closure state. No step number parameter needed. The tool response contains the next step's full prompt. +The optional `thoughts` parameter captures the model's work output +(analysis, findings, review) as a tool parameter instead of as text +output. This solves a cross-model compatibility issue: GPT-5-codex +cannot produce text + tool_call in the same response, so requiring +text output alongside a tool call caused it to narrate "Calling +koan_complete_step now" without emitting an actual tool_call block. + ### AD-6: Tool Naming Conventions Settled names (corrected from earlier iterations): -- `koan_next_step` (was koan_complete_step) + +- `koan_complete_step` (was koan_next_step -- renamed to accept `thoughts`) - `koan_store_context` (was koan_finalize_context) - `koan_store_plan` was later REMOVED entirely (see AD-14) - Prompts use "instructions" not "actions" @@ -104,7 +117,7 @@ Settled names (corrected from earlier iterations): ### AD-7: invoke_after Pattern Is Critical Every step prompt MUST have a clear "invoke after" directive telling -the LLM to call koan_next_step after completing the step's work. +the LLM to call koan_complete_step after completing the step's work. Mirrors the reference planner's "NEXT STEP: Command: python3 -m ... --step N" pattern. Without this, the LLM produces text-only responses and the agent loop exits. @@ -148,12 +161,13 @@ per user preference. ### AD-12: Context Capture Phases Three sub-phases within context capture: + 1. **Drafting**: LLM reflects on conversation. MAY use tools for "high value" targeted exploration (confirm API signature, check file existence). DO NOT explore speculatively. Confidence tagging: HIGH (direct evidence) vs LOW (extrapolating). 2. **Verifying**: Self-check. Completeness, accuracy, phrasing for - downstream agents. No tools except koan_next_step. + downstream agents. No tools except koan_complete_step. 3. **Refining**: Pure tool invocation (koan_store_context). Up to 3 attempts with validation feedback. @@ -199,7 +213,7 @@ needs evidence that each tool call produces results. 5. Assumption Surfacing 6. Milestone Definition & Plan Writing (plan mutation tools available) -Steps 1-5: only READ_TOOLS + PLAN_GETTER_TOOLS + koan_next_step allowed. +Steps 1-5: only READ_TOOLS + PLAN_GETTER_TOOLS + koan_complete_step allowed. Step 6: plan mutation tools unlocked. --- @@ -208,7 +222,7 @@ Step 6: plan mutation tools unlocked. ### WorkflowDispatch (dispatch pattern) -Workflow tools (koan_next_step, koan_store_context) are registered once +Workflow tools (koan_complete_step, koan_store_context) are registered once at init. Their execute() callbacks read from a mutable dispatch object. Phases hook/unhook dispatch slots at activation/deactivation time. @@ -221,9 +235,9 @@ All plan mutation tools share a mutable `{ dir: string | null }` set when /koan plan creates a directory or when --koan-plan-dir is received. Decouples tool registration (init-time) from directory creation (runtime). -### Pi Registers Tools at _buildRuntime() +### Pi Registers Tools at \_buildRuntime() -Pi snapshots tools during _buildRuntime(). Tools registered after this +Pi snapshots tools during \_buildRuntime(). Tools registered after this point are invisible to the LLM. All 44+ tools register unconditionally at init; phases restrict access via tool_call blocking at runtime. @@ -231,15 +245,15 @@ at init; phases restrict access via tool_call blocking at runtime. ## What Is NOT Ported from Reference Planner -| Reference planner component | Koan replacement | -|----|-----| -| CLI mutation scripts (cli/plan.py) | Pi extension tool registration | +| Reference planner component | Koan replacement | +| --------------------------------------- | ------------------------------------- | +| CLI mutation scripts (cli/plan.py) | Pi extension tool registration | | Thin router pattern (shared/routing.py) | Orchestrator deterministic gate logic | -| File-based state_dir | In-memory state + appendEntry() | -| Template dispatch | Direct process spawning | -| Constraint enforcement via prompt | tool_call event blocking | -| Agent markdown definitions | Self-loading extension pattern | -| Question relay handler | Not implemented (may add later) | +| File-based state_dir | In-memory state + appendEntry() | +| Template dispatch | Direct process spawning | +| Constraint enforcement via prompt | tool_call event blocking | +| Agent markdown definitions | Self-loading extension pattern | +| Question relay handler | Not implemented (may add later) | --- @@ -256,7 +270,7 @@ tool usage instructions, coding style guides, or editor/IDE conventions." ### BUG-2: LLM Skips Mutation Tools -The LLM called koan_next_step through steps 1-5, then at step 6 skipped +The LLM called koan_complete_step through steps 1-5, then at step 6 skipped all mutation tools and called koan_store_plan directly. The in-memory plan was empty. Root cause: mutation tools returned opaque JSON with no feedback -- they felt like ceremony. Solution: remove finalize tool, @@ -280,8 +294,9 @@ always throw new Error(msg) for error conditions (INV-3). Original weak format ("Now call koan_next_step.") produced skipped steps. The LLM called the tool immediately without doing work, because tool calls with empty params have zero friction. Solution: strengthen to -"WHEN DONE: After completing the instructions above, call koan_next_step. -Do NOT call this tool until the work described in this step is finished." +"WHEN DONE: Call koan_complete_step with your findings in the `thoughts` +parameter. Do NOT call this tool until the work described in this step +is finished." ### BUG-6: Flag Detection at Init Time @@ -320,6 +335,7 @@ koan_qr_get_item, koan_qr_list_items, koan_qr_summary. ## Current Implementation State (Feb 13 2026) Implemented: + - [x] Extension entry point with dual-mode detection - [x] Context capture (3-phase: draft/verify/refine) - [x] Plan-design architect subagent (6-step workflow) @@ -331,6 +347,7 @@ Implemented: - [x] Plan validation (design + cross-references) Not yet implemented: + - [ ] Developer role (plan-code phase) - [ ] Technical writer role (plan-docs phase) - [ ] QR decompose subagent diff --git a/src/planner/prompts/context-capture.ts b/src/planner/prompts/context-capture.ts index 2d4ce3d..2236b5b 100644 --- a/src/planner/prompts/context-capture.ts +++ b/src/planner/prompts/context-capture.ts @@ -83,7 +83,7 @@ export function refineGuidance(opts: RefinePromptOptions): StepGuidance { return { title: "Context Capture: Refine", instructions, - // Refine completes with koan_store_context, not koan_next_step. + // Refine completes with koan_store_context, not koan_complete_step. invokeAfter: [ "WHEN DONE: After completing the instructions above, call koan_store_context with the verified context data.", "Do NOT call this tool until you have prepared the structured context.", From f03bd05c2bd982f98903fc9668e04b668b2e4b93 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 23 Feb 2026 14:11:53 +0700 Subject: [PATCH 006/412] Add CI workflow and initial test --- .github/workflows/ci.yml | 28 +++++++++++++++++++++++++++ .gitignore | 1 + package.json | 5 ++++- tests/progress.test.ts | 41 ++++++++++++++++++++++++++++++++++++++++ tsconfig.build.json | 14 ++++++++++++++ 5 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/ci.yml create mode 100644 tests/progress.test.ts create mode 100644 tsconfig.build.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..81e033b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,28 @@ +name: CI + +on: + push: + branches: ["main"] + pull_request: + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + + - name: Install dependencies + run: npm ci + + - name: Type check + run: npm run check + + - name: Build and test + run: npm test diff --git a/.gitignore b/.gitignore index 97d66e2..b3bc902 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ node_modules/ dist/ +build/ .pi/ .DS_Store diff --git a/package.json b/package.json index 96f6be0..feaae5b 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,10 @@ "LICENSE" ], "scripts": { - "check": "tsc --noEmit" + "check": "tsc --noEmit", + "build": "tsc --project tsconfig.build.json", + "pretest": "npm run build", + "test": "node --test build/tests" }, "dependencies": { "@sinclair/typebox": "^0.32.30" diff --git a/tests/progress.test.ts b/tests/progress.test.ts new file mode 100644 index 0000000..3a69e40 --- /dev/null +++ b/tests/progress.test.ts @@ -0,0 +1,41 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import { ProgressReporter, readSubagentState } from "../src/utils/progress.js"; + +async function createTempDir(prefix: string): Promise { + const base = await fs.mkdtemp(path.join(os.tmpdir(), prefix)); + return base; +} + +describe("ProgressReporter", () => { + it("persists progress updates and completion state", async () => { + const tempRoot = await createTempDir("koan-progress-"); + const reporterDir = path.join(tempRoot, "reporter"); + await fs.mkdir(reporterDir, { recursive: true }); + + const reporter = new ProgressReporter(reporterDir, "planner", "analysis"); + + await reporter.update("gathering context"); + await reporter.update("synthesizing plan"); + await reporter.complete("completed"); + + const state = await readSubagentState(reporterDir); + assert.ok(state, "state file should be readable"); + assert.equal(state.role, "planner"); + assert.equal(state.phase, "analysis"); + assert.equal(state.status, "completed"); + assert.equal(state.current, "completed"); + assert.equal(state.trail.length, 3); + assert.deepEqual( + state.trail.map((entry) => entry.msg), + ["gathering context", "synthesizing plan", "completed"], + "trail should capture chronological updates" + ); + + await fs.rm(tempRoot, { recursive: true, force: true }); + }); +}); diff --git a/tsconfig.build.json b/tsconfig.build.json new file mode 100644 index 0000000..2591980 --- /dev/null +++ b/tsconfig.build.json @@ -0,0 +1,14 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "noEmit": false, + "outDir": "./build", + "declaration": false, + "sourceMap": false + }, + "include": [ + "src/**/*.ts", + "extensions/**/*.ts", + "tests/**/*.ts" + ] +} From 741e50fa729d0833157c47c307f90bd6ce5c998a Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 23 Feb 2026 14:18:12 +0700 Subject: [PATCH 007/412] Add CI workflow dispatch --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 81e033b..e305aad 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,7 @@ on: push: branches: ["main"] pull_request: + workflow_dispatch: jobs: build-and-test: From cbf89dbe3cb2b9f649a2b333ea0b2bf335af7eeb Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 23 Feb 2026 17:22:46 +0700 Subject: [PATCH 008/412] Polish --- extensions/koan.ts | 12 +- src/planner/lib/dispatch.ts | 63 ++ .../{tools/registry.ts => lib/permissions.ts} | 0 src/planner/{prompts => lib}/step.ts | 0 .../phase.ts} | 18 +- .../context-capture/prompts.ts} | 2 +- src/planner/phases/dispatch.ts | 4 +- .../{plan-design.ts => plan-design/phase.ts} | 16 +- .../plan-design/prompts.ts} | 4 +- src/planner/plan/mutate.ts | 667 ------------------ src/planner/plan/mutate/code.ts | 161 +++++ src/planner/plan/mutate/decisions.ts | 178 +++++ src/planner/plan/mutate/index.ts | 48 ++ src/planner/plan/mutate/milestones.ts | 91 +++ src/planner/plan/mutate/structure.ts | 164 +++++ src/planner/plan/mutate/top-level.ts | 37 + src/planner/plan/types.ts | 4 - src/planner/qr/mutate.ts | 3 - src/planner/qr/types.ts | 1 - src/planner/session.ts | 4 +- src/planner/tools/entity-code.ts | 171 +++++ src/planner/tools/entity-design.ts | 306 ++++++++ src/planner/tools/entity-structure.ts | 156 ++++ .../tools/{plan-getters.ts => getters.ts} | 2 +- src/planner/tools/index.ts | 36 + src/planner/tools/plan-entities.ts | 603 ---------------- src/planner/tools/{qr-tools.ts => qr.ts} | 64 +- .../tools/{plan-setters.ts => setters.ts} | 4 +- .../tools/{dispatch.ts => workflow.ts} | 73 +- src/utils/lock.ts | 44 ++ 30 files changed, 1527 insertions(+), 1409 deletions(-) create mode 100644 src/planner/lib/dispatch.ts rename src/planner/{tools/registry.ts => lib/permissions.ts} (100%) rename src/planner/{prompts => lib}/step.ts (100%) rename src/planner/phases/{context-capture.ts => context-capture/phase.ts} (95%) rename src/planner/{prompts/context-capture.ts => phases/context-capture/prompts.ts} (98%) rename src/planner/phases/{plan-design.ts => plan-design/phase.ts} (94%) rename src/planner/{prompts/plan-design.ts => phases/plan-design/prompts.ts} (98%) delete mode 100644 src/planner/plan/mutate.ts create mode 100644 src/planner/plan/mutate/code.ts create mode 100644 src/planner/plan/mutate/decisions.ts create mode 100644 src/planner/plan/mutate/index.ts create mode 100644 src/planner/plan/mutate/milestones.ts create mode 100644 src/planner/plan/mutate/structure.ts create mode 100644 src/planner/plan/mutate/top-level.ts create mode 100644 src/planner/tools/entity-code.ts create mode 100644 src/planner/tools/entity-design.ts create mode 100644 src/planner/tools/entity-structure.ts rename src/planner/tools/{plan-getters.ts => getters.ts} (99%) create mode 100644 src/planner/tools/index.ts delete mode 100644 src/planner/tools/plan-entities.ts rename src/planner/tools/{qr-tools.ts => qr.ts} (78%) rename src/planner/tools/{plan-setters.ts => setters.ts} (96%) rename src/planner/tools/{dispatch.ts => workflow.ts} (58%) create mode 100644 src/utils/lock.ts diff --git a/extensions/koan.ts b/extensions/koan.ts index 3fce06c..30288d7 100644 --- a/extensions/koan.ts +++ b/extensions/koan.ts @@ -2,11 +2,7 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { createSession } from "../src/planner/session.js"; import { detectSubagentMode, dispatchPhase } from "../src/planner/phases/dispatch.js"; -import { createDispatch, registerWorkflowTools, createPlanRef } from "../src/planner/tools/dispatch.js"; -import { registerPlanGetterTools } from "../src/planner/tools/plan-getters.js"; -import { registerPlanSetterTools } from "../src/planner/tools/plan-setters.js"; -import { registerPlanEntityTools } from "../src/planner/tools/plan-entities.js"; -import { registerQRTools } from "../src/planner/tools/qr-tools.js"; +import { registerAllTools, createDispatch, createPlanRef } from "../src/planner/tools/index.js"; import { createLogger } from "../src/utils/logger.js"; export default function koan(pi: ExtensionAPI): void { @@ -42,11 +38,7 @@ export default function koan(pi: ExtensionAPI): void { const dispatch = createDispatch(); const planRef = createPlanRef(); - registerWorkflowTools(pi, dispatch); - registerPlanGetterTools(pi, planRef); - registerPlanSetterTools(pi, planRef); - registerPlanEntityTools(pi, planRef); - registerQRTools(pi, planRef); + registerAllTools(pi, planRef, dispatch); // Subagent detection runs at before_agent_start (flags // are unavailable during init). diff --git a/src/planner/lib/dispatch.ts b/src/planner/lib/dispatch.ts new file mode 100644 index 0000000..cf8ec02 --- /dev/null +++ b/src/planner/lib/dispatch.ts @@ -0,0 +1,63 @@ +// Shared workflow dispatch and plan-ref infrastructure. +// Decouples static tool registration (init-time) from dynamic phase routing (runtime). +// All mutable slots are null by default; phases hook/unhook on begin/end. + +import type { ExtensionContext } from "@mariozechner/pi-coding-agent"; + +import type { ContextToolResult } from "../tools/context-store.js"; + +// -- Result types -- + +export interface StepResult { + ok: boolean; + prompt?: string; + error?: string; +} + +// -- Dispatch -- + +export interface WorkflowDispatch { + onCompleteStep: ((thoughts?: string) => StepResult | Promise) | null; + onStoreContext: + | ((payload: unknown, ctx: ExtensionContext) => Promise) + | null; +} + +export function createDispatch(): WorkflowDispatch { + return { onCompleteStep: null, onStoreContext: null }; +} + +// Decouples tool registration (init-time, before _buildRuntime) from +// plan directory creation (runtime, after flags available). Same +// indirection pattern as WorkflowDispatch. +export interface PlanRef { + dir: string | null; +} + +export function createPlanRef(): PlanRef { + return { dir: null }; +} + +// Sets a dispatch slot. Throws if the slot is already occupied -- +// prevents silent misrouting when two phases attempt to claim +// the same tool. +export function hookDispatch( + dispatch: WorkflowDispatch, + key: K, + handler: NonNullable, +): void { + if (dispatch[key] !== null) { + throw new Error(`dispatch.${String(key)} is already hooked`); + } + // TypeScript cannot verify generic key-value assignment. + // Call-site generic constraint (handler: NonNullable) + // ensures type safety; collision guard above prevents double-hooking. + (dispatch as any)[key] = handler; +} + +export function unhookDispatch( + dispatch: WorkflowDispatch, + key: keyof WorkflowDispatch, +): void { + (dispatch as any)[key] = null; +} diff --git a/src/planner/tools/registry.ts b/src/planner/lib/permissions.ts similarity index 100% rename from src/planner/tools/registry.ts rename to src/planner/lib/permissions.ts diff --git a/src/planner/prompts/step.ts b/src/planner/lib/step.ts similarity index 100% rename from src/planner/prompts/step.ts rename to src/planner/lib/step.ts diff --git a/src/planner/phases/context-capture.ts b/src/planner/phases/context-capture/phase.ts similarity index 95% rename from src/planner/phases/context-capture.ts rename to src/planner/phases/context-capture/phase.ts index 7953614..4b7320e 100644 --- a/src/planner/phases/context-capture.ts +++ b/src/planner/phases/context-capture/phase.ts @@ -8,15 +8,15 @@ import { verifyGuidance, refineGuidance, type RefinePromptOptions, -} from "../prompts/context-capture.js"; -import { formatStep } from "../prompts/step.js"; -import type { ContextCaptureState, PlanInfo, WorkflowState } from "../state.js"; -import type { ContextData } from "../types.js"; -import { CONTEXT_KEYS } from "../types.js"; -import type { ContextToolResult } from "../tools/context-store.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch } from "../tools/dispatch.js"; -import { createLogger, type Logger } from "../../utils/logger.js"; -import { checkPermission } from "../tools/registry.js"; +} from "./prompts.js"; +import { formatStep } from "../../lib/step.js"; +import type { ContextCaptureState, PlanInfo, WorkflowState } from "../../state.js"; +import type { ContextData } from "../../types.js"; +import { CONTEXT_KEYS } from "../../types.js"; +import type { ContextToolResult } from "../../tools/context-store.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch } from "../../lib/dispatch.js"; +import { createLogger, type Logger } from "../../../utils/logger.js"; +import { checkPermission } from "../../lib/permissions.js"; const MAX_ATTEMPTS = 3; diff --git a/src/planner/prompts/context-capture.ts b/src/planner/phases/context-capture/prompts.ts similarity index 98% rename from src/planner/prompts/context-capture.ts rename to src/planner/phases/context-capture/prompts.ts index 2236b5b..575d801 100644 --- a/src/planner/prompts/context-capture.ts +++ b/src/planner/phases/context-capture/prompts.ts @@ -1,4 +1,4 @@ -import type { StepGuidance } from "./step.js"; +import type { StepGuidance } from "../../lib/step.js"; export function draftGuidance(taskDescription: string): StepGuidance { return { diff --git a/src/planner/phases/dispatch.ts b/src/planner/phases/dispatch.ts index ce72f8a..acb9dfc 100644 --- a/src/planner/phases/dispatch.ts +++ b/src/planner/phases/dispatch.ts @@ -1,8 +1,8 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import { PlanDesignPhase } from "./plan-design.js"; +import { PlanDesignPhase } from "./plan-design/phase.js"; import { createLogger, type Logger } from "../../utils/logger.js"; -import type { WorkflowDispatch, PlanRef } from "../tools/dispatch.js"; +import type { WorkflowDispatch, PlanRef } from "../lib/dispatch.js"; export interface SubagentConfig { role: string; diff --git a/src/planner/phases/plan-design.ts b/src/planner/phases/plan-design/phase.ts similarity index 94% rename from src/planner/phases/plan-design.ts rename to src/planner/phases/plan-design/phase.ts index d924294..b7c493a 100644 --- a/src/planner/phases/plan-design.ts +++ b/src/planner/phases/plan-design/phase.ts @@ -3,20 +3,20 @@ import * as path from "node:path"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import { validatePlanDesign, validateRefs } from "../plan/validate.js"; +import { validatePlanDesign, validateRefs } from "../../plan/validate.js"; import { loadPlanDesignSystemPrompt, formatContextForStep1, buildPlanDesignSystemPrompt, planDesignStepGuidance, STEP_NAMES, -} from "../prompts/plan-design.js"; -import { formatStep } from "../prompts/step.js"; -import type { ContextData } from "../types.js"; -import { createLogger, type Logger } from "../../utils/logger.js"; -import { ProgressReporter } from "../../utils/progress.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../tools/dispatch.js"; -import { checkPermission, PLAN_MUTATION_TOOLS } from "../tools/registry.js"; +} from "./prompts.js"; +import { formatStep } from "../../lib/step.js"; +import type { ContextData } from "../../types.js"; +import { createLogger, type Logger } from "../../../utils/logger.js"; +import { ProgressReporter } from "../../../utils/progress.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; +import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; type PlanDesignStep = 1 | 2 | 3 | 4 | 5 | 6; diff --git a/src/planner/prompts/plan-design.ts b/src/planner/phases/plan-design/prompts.ts similarity index 98% rename from src/planner/prompts/plan-design.ts rename to src/planner/phases/plan-design/prompts.ts index 3098981..2f5727e 100644 --- a/src/planner/prompts/plan-design.ts +++ b/src/planner/phases/plan-design/prompts.ts @@ -2,8 +2,8 @@ import { promises as fs } from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; -import type { ContextData } from "../types.js"; -import type { StepGuidance } from "./step.js"; +import type { ContextData } from "../../types.js"; +import type { StepGuidance } from "../../lib/step.js"; export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { 1: "Task Analysis & Exploration Planning", diff --git a/src/planner/plan/mutate.ts b/src/planner/plan/mutate.ts deleted file mode 100644 index 666af63..0000000 --- a/src/planner/plan/mutate.ts +++ /dev/null @@ -1,667 +0,0 @@ -// Monotonic version counter on entities. No CAS enforcement -- single-writer -// per phase. Counter is for debugging and audit trail, not concurrency control. - -import type { - Plan, - Decision, - RejectedAlternative, - Risk, - Milestone, - CodeIntent, - CodeChange, - Wave, - DiagramGraph, - DiagramNode, - DiagramEdge, - ReadmeEntry, - Overview, - InvisibleKnowledge, -} from "./types.js"; -import { - nextDecisionId, - nextMilestoneId, - nextIntentId, - nextRiskId, - nextRejectedAltId, - nextWaveId, - nextDiagramId, - nextChangeId, -} from "./types.js"; - -// -- Top-level -- - -export function setOverview( - p: Plan, - data: { problem?: string; approach?: string }, -): Plan { - const overview: Overview = { - problem: data.problem ?? p.overview.problem, - approach: data.approach ?? p.overview.approach, - }; - return { ...p, overview }; -} - -export function setConstraints(p: Plan, constraints: string[]): Plan { - return { - ...p, - planning_context: { - ...p.planning_context, - constraints, - }, - }; -} - -export function setInvisibleKnowledge( - p: Plan, - data: { system?: string; invariants?: string[]; tradeoffs?: string[] }, -): Plan { - const ik: InvisibleKnowledge = { - system: data.system ?? p.invisible_knowledge.system, - invariants: data.invariants ?? p.invisible_knowledge.invariants, - tradeoffs: data.tradeoffs ?? p.invisible_knowledge.tradeoffs, - }; - return { ...p, invisible_knowledge: ik }; -} - -// -- Decision -- - -export function addDecision( - p: Plan, - data: { decision: string; reasoning: string }, -): { plan: Plan; id: string } { - const id = nextDecisionId(p); - const decision: Decision = { - id, - version: 1, - decision: data.decision, - reasoning_chain: data.reasoning, - }; - return { - plan: { - ...p, - planning_context: { - ...p.planning_context, - decision_log: [...p.planning_context.decision_log, decision], - }, - }, - id, - }; -} - -export function setDecision( - p: Plan, - id: string, - data: { decision?: string; reasoning?: string }, -): Plan { - const idx = p.planning_context.decision_log.findIndex((d) => d.id === id); - if (idx === -1) throw new Error(`decision ${id} not found`); - - const d = p.planning_context.decision_log[idx]; - const updated: Decision = { - ...d, - version: d.version + 1, - decision: data.decision ?? d.decision, - reasoning_chain: data.reasoning ?? d.reasoning_chain, - }; - - const log = [...p.planning_context.decision_log]; - log[idx] = updated; - - return { - ...p, - planning_context: { ...p.planning_context, decision_log: log }, - }; -} - -// -- RejectedAlternative -- - -export function addRejectedAlternative( - p: Plan, - data: { alternative: string; rejection_reason: string; decision_ref: string }, -): { plan: Plan; id: string } { - const id = nextRejectedAltId(p); - const ra: RejectedAlternative = { - id, - alternative: data.alternative, - rejection_reason: data.rejection_reason, - decision_ref: data.decision_ref, - }; - return { - plan: { - ...p, - planning_context: { - ...p.planning_context, - rejected_alternatives: [ - ...p.planning_context.rejected_alternatives, - ra, - ], - }, - }, - id, - }; -} - -export function setRejectedAlternative( - p: Plan, - id: string, - data: { - alternative?: string; - rejection_reason?: string; - decision_ref?: string; - }, -): Plan { - const idx = p.planning_context.rejected_alternatives.findIndex( - (r) => r.id === id, - ); - if (idx === -1) throw new Error(`rejected_alternative ${id} not found`); - - const r = p.planning_context.rejected_alternatives[idx]; - const updated: RejectedAlternative = { - ...r, - alternative: data.alternative ?? r.alternative, - rejection_reason: data.rejection_reason ?? r.rejection_reason, - decision_ref: data.decision_ref ?? r.decision_ref, - }; - - const list = [...p.planning_context.rejected_alternatives]; - list[idx] = updated; - - return { - ...p, - planning_context: { ...p.planning_context, rejected_alternatives: list }, - }; -} - -// -- Risk -- - -export function addRisk( - p: Plan, - data: { - risk: string; - mitigation: string; - anchor?: string; - decision_ref?: string; - }, -): { plan: Plan; id: string } { - const id = nextRiskId(p); - const risk: Risk = { - id, - risk: data.risk, - mitigation: data.mitigation, - anchor: data.anchor ?? null, - decision_ref: data.decision_ref ?? null, - }; - return { - plan: { - ...p, - planning_context: { - ...p.planning_context, - known_risks: [...p.planning_context.known_risks, risk], - }, - }, - id, - }; -} - -export function setRisk( - p: Plan, - id: string, - data: { - risk?: string; - mitigation?: string; - anchor?: string; - decision_ref?: string; - }, -): Plan { - const idx = p.planning_context.known_risks.findIndex((r) => r.id === id); - if (idx === -1) throw new Error(`risk ${id} not found`); - - const r = p.planning_context.known_risks[idx]; - const updated: Risk = { - ...r, - risk: data.risk ?? r.risk, - mitigation: data.mitigation ?? r.mitigation, - anchor: data.anchor ?? r.anchor, - decision_ref: data.decision_ref ?? r.decision_ref, - }; - - const list = [...p.planning_context.known_risks]; - list[idx] = updated; - - return { - ...p, - planning_context: { ...p.planning_context, known_risks: list }, - }; -} - -// -- Milestone -- - -export function addMilestone( - p: Plan, - data: { - name: string; - files?: string[]; - flags?: string[]; - requirements?: string[]; - acceptance_criteria?: string[]; - tests?: string[]; - }, -): { plan: Plan; id: string } { - const id = nextMilestoneId(p); - const milestone: Milestone = { - id, - version: 1, - number: p.milestones.length + 1, - name: data.name, - files: data.files ?? [], - flags: data.flags ?? [], - requirements: data.requirements ?? [], - acceptance_criteria: data.acceptance_criteria ?? [], - tests: data.tests ?? [], - code_intents: [], - code_changes: [], - documentation: { - module_comment: null, - docstrings: [], - function_blocks: [], - inline_comments: [], - }, - is_documentation_only: false, - delegated_to: null, - }; - return { - plan: { - ...p, - milestones: [...p.milestones, milestone], - }, - id, - }; -} - -function updateMilestone( - p: Plan, - id: string, - fn: (m: Milestone) => Milestone, -): Plan { - const idx = p.milestones.findIndex((m) => m.id === id); - if (idx === -1) throw new Error(`milestone ${id} not found`); - - const updated = [...p.milestones]; - updated[idx] = fn(p.milestones[idx]); - return { ...p, milestones: updated }; -} - -export function setMilestoneName(p: Plan, id: string, name: string): Plan { - return updateMilestone(p, id, (m) => ({ ...m, version: m.version + 1, name })); -} - -export function setMilestoneFiles(p: Plan, id: string, files: string[]): Plan { - return updateMilestone(p, id, (m) => ({ - ...m, - version: m.version + 1, - files, - })); -} - -export function setMilestoneFlags(p: Plan, id: string, flags: string[]): Plan { - return updateMilestone(p, id, (m) => ({ - ...m, - version: m.version + 1, - flags, - })); -} - -export function setMilestoneRequirements( - p: Plan, - id: string, - requirements: string[], -): Plan { - return updateMilestone(p, id, (m) => ({ - ...m, - version: m.version + 1, - requirements, - })); -} - -export function setMilestoneAcceptanceCriteria( - p: Plan, - id: string, - criteria: string[], -): Plan { - return updateMilestone(p, id, (m) => ({ - ...m, - version: m.version + 1, - acceptance_criteria: criteria, - })); -} - -export function setMilestoneTests(p: Plan, id: string, tests: string[]): Plan { - return updateMilestone(p, id, (m) => ({ - ...m, - version: m.version + 1, - tests, - })); -} - -// -- CodeIntent -- - -export function addIntent( - p: Plan, - data: { - milestone: string; - file: string; - function?: string; - behavior: string; - decision_refs?: string[]; - }, -): { plan: Plan; id: string } { - const idx = p.milestones.findIndex((m) => m.id === data.milestone); - if (idx === -1) throw new Error(`milestone ${data.milestone} not found`); - - const m = p.milestones[idx]; - const id = nextIntentId(m); - const intent: CodeIntent = { - id, - version: 1, - file: data.file, - function: data.function ?? null, - behavior: data.behavior, - decision_refs: data.decision_refs ?? [], - }; - - const updated = [...p.milestones]; - updated[idx] = { - ...m, - code_intents: [...m.code_intents, intent], - }; - - return { - plan: { ...p, milestones: updated }, - id, - }; -} - -export function setIntent( - p: Plan, - id: string, - data: { - file?: string; - function?: string; - behavior?: string; - decision_refs?: string[]; - }, -): Plan { - for (let i = 0; i < p.milestones.length; i++) { - const m = p.milestones[i]; - const ciIdx = m.code_intents.findIndex((ci) => ci.id === id); - if (ciIdx !== -1) { - const ci = m.code_intents[ciIdx]; - const updated: CodeIntent = { - ...ci, - version: ci.version + 1, - file: data.file ?? ci.file, - function: data.function ?? ci.function, - behavior: data.behavior ?? ci.behavior, - decision_refs: data.decision_refs ?? ci.decision_refs, - }; - - const intents = [...m.code_intents]; - intents[ciIdx] = updated; - - const milestones = [...p.milestones]; - milestones[i] = { ...m, code_intents: intents }; - - return { ...p, milestones }; - } - } - throw new Error(`intent ${id} not found`); -} - -// -- CodeChange -- - -export function addChange( - p: Plan, - data: { - milestone: string; - file: string; - intent_ref?: string; - diff?: string; - doc_diff?: string; - comments?: string; - }, -): { plan: Plan; id: string } { - const idx = p.milestones.findIndex((m) => m.id === data.milestone); - if (idx === -1) throw new Error(`milestone ${data.milestone} not found`); - - const m = p.milestones[idx]; - const id = nextChangeId(m); - const change: CodeChange = { - id, - version: 1, - intent_ref: data.intent_ref ?? null, - file: data.file, - diff: data.diff ?? "", - doc_diff: data.doc_diff ?? "", - comments: data.comments ?? "", - }; - - const updated = [...p.milestones]; - updated[idx] = { - ...m, - code_changes: [...m.code_changes, change], - }; - - return { - plan: { ...p, milestones: updated }, - id, - }; -} - -function updateChange( - p: Plan, - id: string, - fn: (c: CodeChange) => CodeChange, -): Plan { - for (let i = 0; i < p.milestones.length; i++) { - const m = p.milestones[i]; - const ccIdx = m.code_changes.findIndex((cc) => cc.id === id); - if (ccIdx !== -1) { - const changes = [...m.code_changes]; - changes[ccIdx] = fn(m.code_changes[ccIdx]); - - const milestones = [...p.milestones]; - milestones[i] = { ...m, code_changes: changes }; - - return { ...p, milestones }; - } - } - throw new Error(`code_change ${id} not found`); -} - -export function setChangeDiff(p: Plan, id: string, diff: string): Plan { - return updateChange(p, id, (c) => ({ ...c, version: c.version + 1, diff })); -} - -export function setChangeDocDiff(p: Plan, id: string, doc_diff: string): Plan { - return updateChange(p, id, (c) => ({ - ...c, - version: c.version + 1, - doc_diff, - })); -} - -export function setChangeComments(p: Plan, id: string, comments: string): Plan { - return updateChange(p, id, (c) => ({ - ...c, - version: c.version + 1, - comments, - })); -} - -export function setChangeFile(p: Plan, id: string, file: string): Plan { - return updateChange(p, id, (c) => ({ ...c, version: c.version + 1, file })); -} - -export function setChangeIntentRef( - p: Plan, - id: string, - intent_ref: string, -): Plan { - return updateChange(p, id, (c) => ({ - ...c, - version: c.version + 1, - intent_ref, - })); -} - -// -- Wave -- - -export function addWave( - p: Plan, - data: { milestones: string[] }, -): { plan: Plan; id: string } { - const id = nextWaveId(p); - const wave: Wave = { - id, - milestones: data.milestones, - }; - return { - plan: { - ...p, - waves: [...p.waves, wave], - }, - id, - }; -} - -export function setWaveMilestones( - p: Plan, - id: string, - milestones: string[], -): Plan { - const idx = p.waves.findIndex((w) => w.id === id); - if (idx === -1) throw new Error(`wave ${id} not found`); - - const updated = [...p.waves]; - updated[idx] = { ...p.waves[idx], milestones }; - - return { ...p, waves: updated }; -} - -// -- Diagram -- - -export function addDiagram( - p: Plan, - data: { - type: "architecture" | "state" | "sequence" | "dataflow"; - scope: string; - title: string; - }, -): { plan: Plan; id: string } { - const id = nextDiagramId(p); - const diagram: DiagramGraph = { - id, - type: data.type, - scope: data.scope, - title: data.title, - nodes: [], - edges: [], - ascii_render: null, - }; - return { - plan: { - ...p, - diagram_graphs: [...p.diagram_graphs, diagram], - }, - id, - }; -} - -export function setDiagram( - p: Plan, - id: string, - data: { title?: string; scope?: string; ascii_render?: string }, -): Plan { - const idx = p.diagram_graphs.findIndex((d) => d.id === id); - if (idx === -1) throw new Error(`diagram ${id} not found`); - - const d = p.diagram_graphs[idx]; - const updated: DiagramGraph = { - ...d, - title: data.title ?? d.title, - scope: data.scope ?? d.scope, - ascii_render: data.ascii_render ?? d.ascii_render, - }; - - const diagrams = [...p.diagram_graphs]; - diagrams[idx] = updated; - - return { ...p, diagram_graphs: diagrams }; -} - -export function addDiagramNode( - p: Plan, - diagramId: string, - data: { id: string; label: string; type?: string }, -): Plan { - const idx = p.diagram_graphs.findIndex((d) => d.id === diagramId); - if (idx === -1) throw new Error(`diagram ${diagramId} not found`); - - const d = p.diagram_graphs[idx]; - const node: DiagramNode = { - id: data.id, - label: data.label, - type: data.type ?? null, - }; - - const diagrams = [...p.diagram_graphs]; - diagrams[idx] = { - ...d, - nodes: [...d.nodes, node], - }; - - return { ...p, diagram_graphs: diagrams }; -} - -export function addDiagramEdge( - p: Plan, - diagramId: string, - data: { source: string; target: string; label: string; protocol?: string }, -): Plan { - const idx = p.diagram_graphs.findIndex((d) => d.id === diagramId); - if (idx === -1) throw new Error(`diagram ${diagramId} not found`); - - const d = p.diagram_graphs[idx]; - const edge: DiagramEdge = { - source: data.source, - target: data.target, - label: data.label, - protocol: data.protocol ?? null, - }; - - const diagrams = [...p.diagram_graphs]; - diagrams[idx] = { - ...d, - edges: [...d.edges, edge], - }; - - return { ...p, diagram_graphs: diagrams }; -} - -// -- ReadmeEntry -- - -export function setReadmeEntry(p: Plan, path: string, content: string): Plan { - const idx = p.readme_entries.findIndex((r) => r.path === path); - const entry: ReadmeEntry = { path, content }; - - if (idx === -1) { - return { - ...p, - readme_entries: [...p.readme_entries, entry], - }; - } - - const entries = [...p.readme_entries]; - entries[idx] = entry; - return { ...p, readme_entries: entries }; -} diff --git a/src/planner/plan/mutate/code.ts b/src/planner/plan/mutate/code.ts new file mode 100644 index 0000000..7eb74a3 --- /dev/null +++ b/src/planner/plan/mutate/code.ts @@ -0,0 +1,161 @@ +// Code intent and code change mutations. +// Pure functions -- input plan in, new plan out. No side effects. + +import type { Plan, CodeIntent, CodeChange } from "../types.js"; +import { nextIntentId, nextChangeId } from "../types.js"; + +// -- CodeIntent -- + +export function addIntent( + p: Plan, + data: { + milestone: string; + file: string; + function?: string; + behavior: string; + decision_refs?: string[]; + }, +): { plan: Plan; id: string } { + const idx = p.milestones.findIndex((m) => m.id === data.milestone); + if (idx === -1) throw new Error(`milestone ${data.milestone} not found`); + + const m = p.milestones[idx]; + const id = nextIntentId(m); + const intent: CodeIntent = { + id, + file: data.file, + function: data.function ?? null, + behavior: data.behavior, + decision_refs: data.decision_refs ?? [], + }; + + const updated = [...p.milestones]; + updated[idx] = { + ...m, + code_intents: [...m.code_intents, intent], + }; + + return { + plan: { ...p, milestones: updated }, + id, + }; +} + +export function setIntent( + p: Plan, + id: string, + data: { + file?: string; + function?: string; + behavior?: string; + decision_refs?: string[]; + }, +): Plan { + for (let i = 0; i < p.milestones.length; i++) { + const m = p.milestones[i]; + const ciIdx = m.code_intents.findIndex((ci) => ci.id === id); + if (ciIdx !== -1) { + const ci = m.code_intents[ciIdx]; + const updated: CodeIntent = { + ...ci, + file: data.file ?? ci.file, + function: data.function ?? ci.function, + behavior: data.behavior ?? ci.behavior, + decision_refs: data.decision_refs ?? ci.decision_refs, + }; + + const intents = [...m.code_intents]; + intents[ciIdx] = updated; + + const milestones = [...p.milestones]; + milestones[i] = { ...m, code_intents: intents }; + + return { ...p, milestones }; + } + } + throw new Error(`intent ${id} not found`); +} + +// -- CodeChange -- + +export function addChange( + p: Plan, + data: { + milestone: string; + file: string; + intent_ref?: string; + diff?: string; + doc_diff?: string; + comments?: string; + }, +): { plan: Plan; id: string } { + const idx = p.milestones.findIndex((m) => m.id === data.milestone); + if (idx === -1) throw new Error(`milestone ${data.milestone} not found`); + + const m = p.milestones[idx]; + const id = nextChangeId(m); + const change: CodeChange = { + id, + intent_ref: data.intent_ref ?? null, + file: data.file, + diff: data.diff ?? "", + doc_diff: data.doc_diff ?? "", + comments: data.comments ?? "", + }; + + const updated = [...p.milestones]; + updated[idx] = { + ...m, + code_changes: [...m.code_changes, change], + }; + + return { + plan: { ...p, milestones: updated }, + id, + }; +} + +function updateChange( + p: Plan, + id: string, + fn: (c: CodeChange) => CodeChange, +): Plan { + for (let i = 0; i < p.milestones.length; i++) { + const m = p.milestones[i]; + const ccIdx = m.code_changes.findIndex((cc) => cc.id === id); + if (ccIdx !== -1) { + const changes = [...m.code_changes]; + changes[ccIdx] = fn(m.code_changes[ccIdx]); + + const milestones = [...p.milestones]; + milestones[i] = { ...m, code_changes: changes }; + + return { ...p, milestones }; + } + } + throw new Error(`code_change ${id} not found`); +} + +export function setChangeDiff(p: Plan, id: string, diff: string): Plan { + return updateChange(p, id, (c) => ({ ...c, diff })); +} + +export function setChangeDocDiff(p: Plan, id: string, doc_diff: string): Plan { + return updateChange(p, id, (c) => ({ ...c, doc_diff })); +} + +export function setChangeComments(p: Plan, id: string, comments: string): Plan { + return updateChange(p, id, (c) => ({ ...c, comments })); +} + +export function setChangeFile(p: Plan, id: string, file: string): Plan { + return updateChange(p, id, (c) => ({ ...c, file })); +} + +export function setChangeIntentRef( + p: Plan, + id: string, + intent_ref: string, +): Plan { + return updateChange(p, id, (c) => ({ ...c, intent_ref })); +} diff --git a/src/planner/plan/mutate/decisions.ts b/src/planner/plan/mutate/decisions.ts new file mode 100644 index 0000000..e5e7d1f --- /dev/null +++ b/src/planner/plan/mutate/decisions.ts @@ -0,0 +1,178 @@ +// Decision log mutations: decisions, rejected alternatives, risks. +// Pure functions -- input plan in, new plan out. No side effects. + +import type { Plan, Decision, RejectedAlternative, Risk } from "../types.js"; +import { + nextDecisionId, + nextRejectedAltId, + nextRiskId, +} from "../types.js"; + +// -- Decision -- + +export function addDecision( + p: Plan, + data: { decision: string; reasoning: string }, +): { plan: Plan; id: string } { + const id = nextDecisionId(p); + const decision: Decision = { + id, + decision: data.decision, + reasoning_chain: data.reasoning, + }; + return { + plan: { + ...p, + planning_context: { + ...p.planning_context, + decision_log: [...p.planning_context.decision_log, decision], + }, + }, + id, + }; +} + +export function setDecision( + p: Plan, + id: string, + data: { decision?: string; reasoning?: string }, +): Plan { + const idx = p.planning_context.decision_log.findIndex((d) => d.id === id); + if (idx === -1) throw new Error(`decision ${id} not found`); + + const d = p.planning_context.decision_log[idx]; + const updated: Decision = { + ...d, + decision: data.decision ?? d.decision, + reasoning_chain: data.reasoning ?? d.reasoning_chain, + }; + + const log = [...p.planning_context.decision_log]; + log[idx] = updated; + + return { + ...p, + planning_context: { ...p.planning_context, decision_log: log }, + }; +} + +// -- RejectedAlternative -- + +export function addRejectedAlternative( + p: Plan, + data: { alternative: string; rejection_reason: string; decision_ref: string }, +): { plan: Plan; id: string } { + const id = nextRejectedAltId(p); + const ra: RejectedAlternative = { + id, + alternative: data.alternative, + rejection_reason: data.rejection_reason, + decision_ref: data.decision_ref, + }; + return { + plan: { + ...p, + planning_context: { + ...p.planning_context, + rejected_alternatives: [ + ...p.planning_context.rejected_alternatives, + ra, + ], + }, + }, + id, + }; +} + +export function setRejectedAlternative( + p: Plan, + id: string, + data: { + alternative?: string; + rejection_reason?: string; + decision_ref?: string; + }, +): Plan { + const idx = p.planning_context.rejected_alternatives.findIndex( + (r) => r.id === id, + ); + if (idx === -1) throw new Error(`rejected_alternative ${id} not found`); + + const r = p.planning_context.rejected_alternatives[idx]; + const updated: RejectedAlternative = { + ...r, + alternative: data.alternative ?? r.alternative, + rejection_reason: data.rejection_reason ?? r.rejection_reason, + decision_ref: data.decision_ref ?? r.decision_ref, + }; + + const list = [...p.planning_context.rejected_alternatives]; + list[idx] = updated; + + return { + ...p, + planning_context: { ...p.planning_context, rejected_alternatives: list }, + }; +} + +// -- Risk -- + +export function addRisk( + p: Plan, + data: { + risk: string; + mitigation: string; + anchor?: string; + decision_ref?: string; + }, +): { plan: Plan; id: string } { + const id = nextRiskId(p); + const risk: Risk = { + id, + risk: data.risk, + mitigation: data.mitigation, + anchor: data.anchor ?? null, + decision_ref: data.decision_ref ?? null, + }; + return { + plan: { + ...p, + planning_context: { + ...p.planning_context, + known_risks: [...p.planning_context.known_risks, risk], + }, + }, + id, + }; +} + +export function setRisk( + p: Plan, + id: string, + data: { + risk?: string; + mitigation?: string; + anchor?: string; + decision_ref?: string; + }, +): Plan { + const idx = p.planning_context.known_risks.findIndex((r) => r.id === id); + if (idx === -1) throw new Error(`risk ${id} not found`); + + const r = p.planning_context.known_risks[idx]; + const updated: Risk = { + ...r, + risk: data.risk ?? r.risk, + mitigation: data.mitigation ?? r.mitigation, + anchor: data.anchor ?? r.anchor, + decision_ref: data.decision_ref ?? r.decision_ref, + }; + + const list = [...p.planning_context.known_risks]; + list[idx] = updated; + + return { + ...p, + planning_context: { ...p.planning_context, known_risks: list }, + }; +} diff --git a/src/planner/plan/mutate/index.ts b/src/planner/plan/mutate/index.ts new file mode 100644 index 0000000..0c96dcb --- /dev/null +++ b/src/planner/plan/mutate/index.ts @@ -0,0 +1,48 @@ +// Re-exports all public mutation functions grouped by domain. +// Consumers import from this single entry point. + +export { + setOverview, + setConstraints, + setInvisibleKnowledge, +} from "./top-level.js"; + +export { + addDecision, + setDecision, + addRejectedAlternative, + setRejectedAlternative, + addRisk, + setRisk, +} from "./decisions.js"; + +export { + addMilestone, + setMilestoneName, + setMilestoneFiles, + setMilestoneFlags, + setMilestoneRequirements, + setMilestoneAcceptanceCriteria, + setMilestoneTests, +} from "./milestones.js"; + +export { + addIntent, + setIntent, + addChange, + setChangeDiff, + setChangeDocDiff, + setChangeComments, + setChangeFile, + setChangeIntentRef, +} from "./code.js"; + +export { + addWave, + setWaveMilestones, + addDiagram, + setDiagram, + addDiagramNode, + addDiagramEdge, + setReadmeEntry, +} from "./structure.js"; diff --git a/src/planner/plan/mutate/milestones.ts b/src/planner/plan/mutate/milestones.ts new file mode 100644 index 0000000..fbb4e86 --- /dev/null +++ b/src/planner/plan/mutate/milestones.ts @@ -0,0 +1,91 @@ +// Milestone mutations: add, and per-field setters. +// Pure functions -- input plan in, new plan out. No side effects. + +import type { Plan, Milestone } from "../types.js"; +import { nextMilestoneId } from "../types.js"; + +export function addMilestone( + p: Plan, + data: { + name: string; + files?: string[]; + flags?: string[]; + requirements?: string[]; + acceptance_criteria?: string[]; + tests?: string[]; + }, +): { plan: Plan; id: string } { + const id = nextMilestoneId(p); + const milestone: Milestone = { + id, + number: p.milestones.length + 1, + name: data.name, + files: data.files ?? [], + flags: data.flags ?? [], + requirements: data.requirements ?? [], + acceptance_criteria: data.acceptance_criteria ?? [], + tests: data.tests ?? [], + code_intents: [], + code_changes: [], + documentation: { + module_comment: null, + docstrings: [], + function_blocks: [], + inline_comments: [], + }, + is_documentation_only: false, + delegated_to: null, + }; + return { + plan: { + ...p, + milestones: [...p.milestones, milestone], + }, + id, + }; +} + +function updateMilestone( + p: Plan, + id: string, + fn: (m: Milestone) => Milestone, +): Plan { + const idx = p.milestones.findIndex((m) => m.id === id); + if (idx === -1) throw new Error(`milestone ${id} not found`); + + const updated = [...p.milestones]; + updated[idx] = fn(p.milestones[idx]); + return { ...p, milestones: updated }; +} + +export function setMilestoneName(p: Plan, id: string, name: string): Plan { + return updateMilestone(p, id, (m) => ({ ...m, name })); +} + +export function setMilestoneFiles(p: Plan, id: string, files: string[]): Plan { + return updateMilestone(p, id, (m) => ({ ...m, files })); +} + +export function setMilestoneFlags(p: Plan, id: string, flags: string[]): Plan { + return updateMilestone(p, id, (m) => ({ ...m, flags })); +} + +export function setMilestoneRequirements( + p: Plan, + id: string, + requirements: string[], +): Plan { + return updateMilestone(p, id, (m) => ({ ...m, requirements })); +} + +export function setMilestoneAcceptanceCriteria( + p: Plan, + id: string, + criteria: string[], +): Plan { + return updateMilestone(p, id, (m) => ({ ...m, acceptance_criteria: criteria })); +} + +export function setMilestoneTests(p: Plan, id: string, tests: string[]): Plan { + return updateMilestone(p, id, (m) => ({ ...m, tests })); +} diff --git a/src/planner/plan/mutate/structure.ts b/src/planner/plan/mutate/structure.ts new file mode 100644 index 0000000..f5679b1 --- /dev/null +++ b/src/planner/plan/mutate/structure.ts @@ -0,0 +1,164 @@ +// Structural plan mutations: waves, diagrams, readme entries. +// Pure functions -- input plan in, new plan out. No side effects. + +import type { + Plan, + Wave, + DiagramGraph, + DiagramNode, + DiagramEdge, + ReadmeEntry, +} from "../types.js"; +import { nextWaveId, nextDiagramId } from "../types.js"; + +// -- Wave -- + +export function addWave( + p: Plan, + data: { milestones: string[] }, +): { plan: Plan; id: string } { + const id = nextWaveId(p); + const wave: Wave = { + id, + milestones: data.milestones, + }; + return { + plan: { + ...p, + waves: [...p.waves, wave], + }, + id, + }; +} + +export function setWaveMilestones( + p: Plan, + id: string, + milestones: string[], +): Plan { + const idx = p.waves.findIndex((w) => w.id === id); + if (idx === -1) throw new Error(`wave ${id} not found`); + + const updated = [...p.waves]; + updated[idx] = { ...p.waves[idx], milestones }; + + return { ...p, waves: updated }; +} + +// -- Diagram -- + +export function addDiagram( + p: Plan, + data: { + type: "architecture" | "state" | "sequence" | "dataflow"; + scope: string; + title: string; + }, +): { plan: Plan; id: string } { + const id = nextDiagramId(p); + const diagram: DiagramGraph = { + id, + type: data.type, + scope: data.scope, + title: data.title, + nodes: [], + edges: [], + ascii_render: null, + }; + return { + plan: { + ...p, + diagram_graphs: [...p.diagram_graphs, diagram], + }, + id, + }; +} + +export function setDiagram( + p: Plan, + id: string, + data: { title?: string; scope?: string; ascii_render?: string }, +): Plan { + const idx = p.diagram_graphs.findIndex((d) => d.id === id); + if (idx === -1) throw new Error(`diagram ${id} not found`); + + const d = p.diagram_graphs[idx]; + const updated: DiagramGraph = { + ...d, + title: data.title ?? d.title, + scope: data.scope ?? d.scope, + ascii_render: data.ascii_render ?? d.ascii_render, + }; + + const diagrams = [...p.diagram_graphs]; + diagrams[idx] = updated; + + return { ...p, diagram_graphs: diagrams }; +} + +export function addDiagramNode( + p: Plan, + diagramId: string, + data: { id: string; label: string; type?: string }, +): Plan { + const idx = p.diagram_graphs.findIndex((d) => d.id === diagramId); + if (idx === -1) throw new Error(`diagram ${diagramId} not found`); + + const d = p.diagram_graphs[idx]; + const node: DiagramNode = { + id: data.id, + label: data.label, + type: data.type ?? null, + }; + + const diagrams = [...p.diagram_graphs]; + diagrams[idx] = { + ...d, + nodes: [...d.nodes, node], + }; + + return { ...p, diagram_graphs: diagrams }; +} + +export function addDiagramEdge( + p: Plan, + diagramId: string, + data: { source: string; target: string; label: string; protocol?: string }, +): Plan { + const idx = p.diagram_graphs.findIndex((d) => d.id === diagramId); + if (idx === -1) throw new Error(`diagram ${diagramId} not found`); + + const d = p.diagram_graphs[idx]; + const edge: DiagramEdge = { + source: data.source, + target: data.target, + label: data.label, + protocol: data.protocol ?? null, + }; + + const diagrams = [...p.diagram_graphs]; + diagrams[idx] = { + ...d, + edges: [...d.edges, edge], + }; + + return { ...p, diagram_graphs: diagrams }; +} + +// -- ReadmeEntry -- + +export function setReadmeEntry(p: Plan, path: string, content: string): Plan { + const idx = p.readme_entries.findIndex((r) => r.path === path); + const entry: ReadmeEntry = { path, content }; + + if (idx === -1) { + return { + ...p, + readme_entries: [...p.readme_entries, entry], + }; + } + + const entries = [...p.readme_entries]; + entries[idx] = entry; + return { ...p, readme_entries: entries }; +} diff --git a/src/planner/plan/mutate/top-level.ts b/src/planner/plan/mutate/top-level.ts new file mode 100644 index 0000000..2392525 --- /dev/null +++ b/src/planner/plan/mutate/top-level.ts @@ -0,0 +1,37 @@ +// Top-level plan field mutations: overview, constraints, invisible knowledge. +// Pure functions -- input plan in, new plan out. No side effects. + +import type { Plan, Overview, InvisibleKnowledge } from "../types.js"; + +export function setOverview( + p: Plan, + data: { problem?: string; approach?: string }, +): Plan { + const overview: Overview = { + problem: data.problem ?? p.overview.problem, + approach: data.approach ?? p.overview.approach, + }; + return { ...p, overview }; +} + +export function setConstraints(p: Plan, constraints: string[]): Plan { + return { + ...p, + planning_context: { + ...p.planning_context, + constraints, + }, + }; +} + +export function setInvisibleKnowledge( + p: Plan, + data: { system?: string; invariants?: string[]; tradeoffs?: string[] }, +): Plan { + const ik: InvisibleKnowledge = { + system: data.system ?? p.invisible_knowledge.system, + invariants: data.invariants ?? p.invisible_knowledge.invariants, + tradeoffs: data.tradeoffs ?? p.invisible_knowledge.tradeoffs, + }; + return { ...p, invisible_knowledge: ik }; +} diff --git a/src/planner/plan/types.ts b/src/planner/plan/types.ts index 6a4d943..518b54e 100644 --- a/src/planner/plan/types.ts +++ b/src/planner/plan/types.ts @@ -1,6 +1,5 @@ export interface Decision { id: string; - version: number; decision: string; reasoning_chain: string; } @@ -40,7 +39,6 @@ export interface Overview { export interface CodeIntent { id: string; - version: number; file: string; function?: string | null; behavior: string; @@ -49,7 +47,6 @@ export interface CodeIntent { export interface CodeChange { id: string; - version: number; intent_ref: string | null; file: string; diff: string; @@ -117,7 +114,6 @@ export interface DiagramGraph { export interface Milestone { id: string; - version: number; number: number; name: string; files: string[]; diff --git a/src/planner/qr/mutate.ts b/src/planner/qr/mutate.ts index b831074..e0644ff 100644 --- a/src/planner/qr/mutate.ts +++ b/src/planner/qr/mutate.ts @@ -18,7 +18,6 @@ export function addQRItem( scope: data.scope, check: data.check, status: "TODO", - version: 1, finding: null, parent_id: null, group_id: null, @@ -68,7 +67,6 @@ export function setQRItem( const updated: QRItem = { ...item, - version: item.version + 1, status, finding, check: data.check ?? item.check, @@ -81,7 +79,6 @@ export function setQRItem( return { ...qr, items }; } -// Does not increment version (grouping is metadata). export function assignGroup(qr: QRFile, ids: string[], groupId: string): QRFile { const idSet = new Set(ids); const items = qr.items.map((item) => diff --git a/src/planner/qr/types.ts b/src/planner/qr/types.ts index 3345631..89ab627 100644 --- a/src/planner/qr/types.ts +++ b/src/planner/qr/types.ts @@ -6,7 +6,6 @@ export interface QRItem { scope: string; check: string; status: QRItemStatus; - version: number; finding: string | null; parent_id: string | null; group_id: string | null; diff --git a/src/planner/session.ts b/src/planner/session.ts index a14050e..ef08dfa 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -4,13 +4,13 @@ import * as path from "node:path"; import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@mariozechner/pi-coding-agent"; -import { ContextCapturePhase } from "./phases/context-capture.js"; +import { ContextCapturePhase } from "./phases/context-capture/phase.js"; import { createInitialState, initializePlanState, type WorkflowState } from "./state.js"; import { createPlanInfo } from "../utils/plan.js"; import { spawnArchitect } from "./subagent.js"; import { createLogger } from "../utils/logger.js"; import { createSubagentDir, readSubagentState } from "../utils/progress.js"; -import type { WorkflowDispatch, PlanRef } from "./tools/dispatch.js"; +import type { WorkflowDispatch, PlanRef } from "./lib/dispatch.js"; interface Session { plan(args: string, ctx: ExtensionCommandContext): Promise; diff --git a/src/planner/tools/entity-code.ts b/src/planner/tools/entity-code.ts new file mode 100644 index 0000000..ca57d75 --- /dev/null +++ b/src/planner/tools/entity-code.ts @@ -0,0 +1,171 @@ +// Plan entity tools for code-phase entities: code intents and code changes. +// Uses planTool helper from entity-design (shared load-mutate-save-lock wrapper). + +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import type { PlanRef } from "../lib/dispatch.js"; +import { planTool } from "./entity-design.js"; +import { + addIntent, + setIntent, + addChange, + setChangeDiff, + setChangeDocDiff, + setChangeComments, + setChangeFile, + setChangeIntentRef, +} from "../plan/mutate/index.js"; + +export function registerPlanCodeEntityTools( + pi: ExtensionAPI, + planRef: PlanRef, +): void { + // -- CodeIntent -- + planTool(pi, planRef, { + name: "koan_add_intent", + label: "Add code intent", + description: "Add code intent to milestone.", + parameters: Type.Object({ + milestone: Type.String(), + file: Type.String(), + function: Type.Optional(Type.String()), + behavior: Type.String(), + decision_refs: Type.Optional(Type.Array(Type.String())), + }), + execute: (p, params) => { + const r = addIntent(p, params); + return { + plan: r.plan, + message: `Added intent ${r.id} to milestone ${params.milestone}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_intent", + label: "Update code intent", + description: "Update existing code intent by ID.", + parameters: Type.Object({ + id: Type.String(), + file: Type.Optional(Type.String()), + function: Type.Optional(Type.String()), + behavior: Type.Optional(Type.String()), + decision_refs: Type.Optional(Type.Array(Type.String())), + }), + execute: (p, params) => { + const updated = setIntent(p, params.id, params); + return { + plan: updated, + message: `Updated intent ${params.id}`, + }; + }, + }); + + // -- CodeChange -- + planTool(pi, planRef, { + name: "koan_add_change", + label: "Add code change", + description: "Add code change to milestone.", + parameters: Type.Object({ + milestone: Type.String(), + file: Type.String(), + intent_ref: Type.Optional(Type.String()), + diff: Type.Optional(Type.String()), + doc_diff: Type.Optional(Type.String()), + comments: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const r = addChange(p, params); + return { + plan: r.plan, + message: `Added change ${r.id} to milestone ${params.milestone}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_change_diff", + label: "Set code change diff", + description: "Update change diff.", + parameters: Type.Object({ + id: Type.String(), + diff: Type.String(), + }), + execute: (p, params) => { + const updated = setChangeDiff(p, params.id, params.diff); + return { + plan: updated, + message: `Set diff for change ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_change_doc_diff", + label: "Set code change doc_diff", + description: "Update change doc_diff.", + parameters: Type.Object({ + id: Type.String(), + doc_diff: Type.String(), + }), + execute: (p, params) => { + const updated = setChangeDocDiff(p, params.id, params.doc_diff); + return { + plan: updated, + message: `Set doc_diff for change ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_change_comments", + label: "Set code change comments", + description: "Update change comments.", + parameters: Type.Object({ + id: Type.String(), + comments: Type.String(), + }), + execute: (p, params) => { + const updated = setChangeComments(p, params.id, params.comments); + return { + plan: updated, + message: `Set comments for change ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_change_file", + label: "Set code change file", + description: "Update change file path.", + parameters: Type.Object({ + id: Type.String(), + file: Type.String(), + }), + execute: (p, params) => { + const updated = setChangeFile(p, params.id, params.file); + return { + plan: updated, + message: `Set file for change ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_change_intent_ref", + label: "Set code change intent_ref", + description: "Update change intent reference.", + parameters: Type.Object({ + id: Type.String(), + intent_ref: Type.String(), + }), + execute: (p, params) => { + const updated = setChangeIntentRef(p, params.id, params.intent_ref); + return { + plan: updated, + message: `Set intent_ref for change ${params.id}`, + }; + }, + }); +} diff --git a/src/planner/tools/entity-design.ts b/src/planner/tools/entity-design.ts new file mode 100644 index 0000000..06552ee --- /dev/null +++ b/src/planner/tools/entity-design.ts @@ -0,0 +1,306 @@ +// Plan entity tools for design-phase entities: decisions, risks, milestones. +// Exports planTool helper for shared use by entity-code and entity-structure. +// load-mutate-save wrapped in file lock; disk is single source of truth. + +import { Type, type Static, type TSchema } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; +import * as path from "node:path"; + +import type { PlanRef } from "../lib/dispatch.js"; +import { loadPlan, savePlan } from "../plan/serialize.js"; +import type { Plan } from "../plan/types.js"; +import { withFileLock } from "../../utils/lock.js"; +import { + addDecision, + setDecision, + addRejectedAlternative, + setRejectedAlternative, + addRisk, + setRisk, + addMilestone, + setMilestoneName, + setMilestoneFiles, + setMilestoneFlags, + setMilestoneRequirements, + setMilestoneAcceptanceCriteria, + setMilestoneTests, +} from "../plan/mutate/index.js"; + +export function planTool( + pi: ExtensionAPI, + planRef: PlanRef, + opts: { + name: string; + label: string; + description: string; + parameters: TParams; + execute: (plan: Plan, params: Static) => { plan: Plan; message: string }; + }, +): void { + pi.registerTool({ + name: opts.name, + label: opts.label, + description: opts.description, + parameters: opts.parameters, + async execute(_toolCallId, params) { + if (!planRef.dir) throw new Error("No plan directory is active."); + const planPath = path.join(planRef.dir, "plan.json"); + return withFileLock(planPath, async () => { + const plan = await loadPlan(planRef.dir!); + const result = opts.execute(plan, params); + await savePlan(result.plan, planRef.dir!); + return { + content: [{ type: "text" as const, text: result.message }], + details: undefined, + }; + }); + }, + }); +} + +export function registerPlanDesignEntityTools( + pi: ExtensionAPI, + planRef: PlanRef, +): void { + // -- Decision -- + planTool(pi, planRef, { + name: "koan_add_decision", + label: "Add decision", + description: "Add decision to decision log.", + parameters: Type.Object({ + decision: Type.String(), + reasoning: Type.String(), + }), + execute: (p, params) => { + const r = addDecision(p, params); + return { + plan: r.plan, + message: `Added decision ${r.id}: "${params.decision}"`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_decision", + label: "Update decision", + description: "Update existing decision by ID.", + parameters: Type.Object({ + id: Type.String(), + decision: Type.Optional(Type.String()), + reasoning: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = setDecision(p, params.id, params); + return { + plan: updated, + message: `Updated decision ${params.id}`, + }; + }, + }); + + // -- RejectedAlternative -- + planTool(pi, planRef, { + name: "koan_add_rejected_alternative", + label: "Add rejected alternative", + description: "Add rejected alternative to decision log.", + parameters: Type.Object({ + alternative: Type.String(), + rejection_reason: Type.String(), + decision_ref: Type.String(), + }), + execute: (p, params) => { + const r = addRejectedAlternative(p, params); + return { + plan: r.plan, + message: `Added rejected alternative ${r.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_rejected_alternative", + label: "Update rejected alternative", + description: "Update existing rejected alternative by ID.", + parameters: Type.Object({ + id: Type.String(), + alternative: Type.Optional(Type.String()), + rejection_reason: Type.Optional(Type.String()), + decision_ref: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = setRejectedAlternative(p, params.id, params); + return { + plan: updated, + message: `Updated rejected alternative ${params.id}`, + }; + }, + }); + + // -- Risk -- + planTool(pi, planRef, { + name: "koan_add_risk", + label: "Add risk", + description: "Add risk to known risks.", + parameters: Type.Object({ + risk: Type.String(), + mitigation: Type.String(), + anchor: Type.Optional(Type.String()), + decision_ref: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const r = addRisk(p, params); + return { + plan: r.plan, + message: `Added risk ${r.id}: "${params.risk}"`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_risk", + label: "Update risk", + description: "Update existing risk by ID.", + parameters: Type.Object({ + id: Type.String(), + risk: Type.Optional(Type.String()), + mitigation: Type.Optional(Type.String()), + anchor: Type.Optional(Type.String()), + decision_ref: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = setRisk(p, params.id, params); + return { + plan: updated, + message: `Updated risk ${params.id}`, + }; + }, + }); + + // -- Milestone -- + planTool(pi, planRef, { + name: "koan_add_milestone", + label: "Add milestone", + description: "Create new milestone.", + parameters: Type.Object({ + name: Type.String(), + files: Type.Optional(Type.Array(Type.String())), + flags: Type.Optional(Type.Array(Type.String())), + requirements: Type.Optional(Type.Array(Type.String())), + acceptance_criteria: Type.Optional(Type.Array(Type.String())), + tests: Type.Optional(Type.Array(Type.String())), + }), + execute: (p, params) => { + const r = addMilestone(p, params); + return { + plan: r.plan, + message: `Added milestone ${r.id}: "${params.name}"`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_name", + label: "Set milestone name", + description: "Update milestone name.", + parameters: Type.Object({ + id: Type.String(), + name: Type.String(), + }), + execute: (p, params) => { + const updated = setMilestoneName(p, params.id, params.name); + return { + plan: updated, + message: `Set name for milestone ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_files", + label: "Set milestone files", + description: "Update milestone files list.", + parameters: Type.Object({ + id: Type.String(), + files: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setMilestoneFiles(p, params.id, params.files); + return { + plan: updated, + message: `Set files for milestone ${params.id} (${params.files.length} files)`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_flags", + label: "Set milestone flags", + description: "Update milestone flags list.", + parameters: Type.Object({ + id: Type.String(), + flags: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setMilestoneFlags(p, params.id, params.flags); + return { + plan: updated, + message: `Set flags for milestone ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_requirements", + label: "Set milestone requirements", + description: "Update milestone requirements list.", + parameters: Type.Object({ + id: Type.String(), + requirements: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setMilestoneRequirements(p, params.id, params.requirements); + return { + plan: updated, + message: `Set requirements for milestone ${params.id} (${params.requirements.length} items)`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_acceptance_criteria", + label: "Set milestone acceptance criteria", + description: "Update milestone acceptance criteria list.", + parameters: Type.Object({ + id: Type.String(), + acceptance_criteria: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setMilestoneAcceptanceCriteria( + p, + params.id, + params.acceptance_criteria, + ); + return { + plan: updated, + message: `Set acceptance criteria for milestone ${params.id} (${params.acceptance_criteria.length} items)`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_milestone_tests", + label: "Set milestone tests", + description: "Update milestone tests list.", + parameters: Type.Object({ + id: Type.String(), + tests: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setMilestoneTests(p, params.id, params.tests); + return { + plan: updated, + message: `Set tests for milestone ${params.id} (${params.tests.length} tests)`, + }; + }, + }); +} diff --git a/src/planner/tools/entity-structure.ts b/src/planner/tools/entity-structure.ts new file mode 100644 index 0000000..cc710a8 --- /dev/null +++ b/src/planner/tools/entity-structure.ts @@ -0,0 +1,156 @@ +// Plan entity tools for structural entities: waves, diagrams, readme entries. +// Uses planTool helper from entity-design (shared load-mutate-save-lock wrapper). + +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import type { PlanRef } from "../lib/dispatch.js"; +import { planTool } from "./entity-design.js"; +import { + addWave, + setWaveMilestones, + addDiagram, + setDiagram, + addDiagramNode, + addDiagramEdge, + setReadmeEntry, +} from "../plan/mutate/index.js"; + +export function registerPlanStructureEntityTools( + pi: ExtensionAPI, + planRef: PlanRef, +): void { + // -- Wave -- + planTool(pi, planRef, { + name: "koan_add_wave", + label: "Add wave", + description: "Create wave with milestone list.", + parameters: Type.Object({ + milestones: Type.Array(Type.String()), + }), + execute: (p, params) => { + const r = addWave(p, params); + return { + plan: r.plan, + message: `Added wave ${r.id} with ${params.milestones.length} milestones`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_wave_milestones", + label: "Set wave milestones", + description: "Update wave milestones list.", + parameters: Type.Object({ + id: Type.String(), + milestones: Type.Array(Type.String()), + }), + execute: (p, params) => { + const updated = setWaveMilestones(p, params.id, params.milestones); + return { + plan: updated, + message: `Set milestones for wave ${params.id}`, + }; + }, + }); + + // -- Diagram -- + planTool(pi, planRef, { + name: "koan_add_diagram", + label: "Add diagram", + description: "Create diagram graph.", + parameters: Type.Object({ + type: Type.Union([ + Type.Literal("architecture"), + Type.Literal("state"), + Type.Literal("sequence"), + Type.Literal("dataflow"), + ]), + scope: Type.String(), + title: Type.String(), + }), + execute: (p, params) => { + const r = addDiagram(p, params); + return { + plan: r.plan, + message: `Added diagram ${r.id}: "${params.title}"`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_set_diagram", + label: "Update diagram", + description: "Update diagram properties.", + parameters: Type.Object({ + id: Type.String(), + title: Type.Optional(Type.String()), + scope: Type.Optional(Type.String()), + ascii_render: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = setDiagram(p, params.id, params); + return { + plan: updated, + message: `Updated diagram ${params.id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_add_diagram_node", + label: "Add diagram node", + description: "Add node to diagram.", + parameters: Type.Object({ + diagram_id: Type.String(), + id: Type.String(), + label: Type.String(), + type: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = addDiagramNode(p, params.diagram_id, params); + return { + plan: updated, + message: `Added node ${params.id} to diagram ${params.diagram_id}`, + }; + }, + }); + + planTool(pi, planRef, { + name: "koan_add_diagram_edge", + label: "Add diagram edge", + description: "Add edge to diagram.", + parameters: Type.Object({ + diagram_id: Type.String(), + source: Type.String(), + target: Type.String(), + label: Type.String(), + protocol: Type.Optional(Type.String()), + }), + execute: (p, params) => { + const updated = addDiagramEdge(p, params.diagram_id, params); + return { + plan: updated, + message: `Added edge ${params.source}->${params.target} to diagram ${params.diagram_id}`, + }; + }, + }); + + // -- ReadmeEntry -- + planTool(pi, planRef, { + name: "koan_set_readme_entry", + label: "Set readme entry", + description: "Upsert readme entry by path.", + parameters: Type.Object({ + path: Type.String(), + content: Type.String(), + }), + execute: (p, params) => { + const updated = setReadmeEntry(p, params.path, params.content); + return { + plan: updated, + message: `Set readme entry for ${params.path}`, + }; + }, + }); +} diff --git a/src/planner/tools/plan-getters.ts b/src/planner/tools/getters.ts similarity index 99% rename from src/planner/tools/plan-getters.ts rename to src/planner/tools/getters.ts index 8154229..712fc3d 100644 --- a/src/planner/tools/plan-getters.ts +++ b/src/planner/tools/getters.ts @@ -1,7 +1,7 @@ import { Type } from "@sinclair/typebox"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import type { PlanRef } from "./dispatch.js"; +import type { PlanRef } from "../lib/dispatch.js"; import { loadPlan } from "../plan/serialize.js"; import type { Plan, Milestone, CodeIntent, CodeChange } from "../plan/types.js"; diff --git a/src/planner/tools/index.ts b/src/planner/tools/index.ts new file mode 100644 index 0000000..e658f49 --- /dev/null +++ b/src/planner/tools/index.ts @@ -0,0 +1,36 @@ +// Tool registration aggregator. Single entry point for koan.ts. +// Re-exports dispatch primitives so koan.ts needs one import for both +// tool registration and workflow infrastructure. + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; +import type { WorkflowDispatch, PlanRef } from "../lib/dispatch.js"; + +import { registerWorkflowTools } from "./workflow.js"; +import { registerPlanGetterTools } from "./getters.js"; +import { registerPlanSetterTools } from "./setters.js"; +import { registerPlanDesignEntityTools } from "./entity-design.js"; +import { registerPlanCodeEntityTools } from "./entity-code.js"; +import { registerPlanStructureEntityTools } from "./entity-structure.js"; +import { registerQRTools } from "./qr.js"; + +export type { WorkflowDispatch, PlanRef, StepResult } from "../lib/dispatch.js"; +export { + createDispatch, + createPlanRef, + hookDispatch, + unhookDispatch, +} from "../lib/dispatch.js"; + +export function registerAllTools( + pi: ExtensionAPI, + planRef: PlanRef, + dispatch: WorkflowDispatch, +): void { + registerWorkflowTools(pi, dispatch); + registerPlanGetterTools(pi, planRef); + registerPlanSetterTools(pi, planRef); + registerPlanDesignEntityTools(pi, planRef); + registerPlanCodeEntityTools(pi, planRef); + registerPlanStructureEntityTools(pi, planRef); + registerQRTools(pi, planRef); +} diff --git a/src/planner/tools/plan-entities.ts b/src/planner/tools/plan-entities.ts deleted file mode 100644 index c38efb2..0000000 --- a/src/planner/tools/plan-entities.ts +++ /dev/null @@ -1,603 +0,0 @@ -// Every tool follows load-mutate-save: loadPlan -> pure mutation -> savePlan. -// Disk is single source of truth. Single-writer assumption per phase. -// Feedback messages prevent the LLM from skipping tools (prior architecture -// returned opaque JSON). -// -// Static derives the TypeScript type from the TypeBox schema at -// compile time, making type casts unnecessary. The registerTool generic -// propagates the schema type through to the execute callback. - -import { Type, type Static, type TSchema } from "@sinclair/typebox"; -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import type { PlanRef } from "./dispatch.js"; -import { loadPlan, savePlan } from "../plan/serialize.js"; -import type { Plan } from "../plan/types.js"; -import { - addDecision, - setDecision, - addRejectedAlternative, - setRejectedAlternative, - addRisk, - setRisk, - addMilestone, - setMilestoneName, - setMilestoneFiles, - setMilestoneFlags, - setMilestoneRequirements, - setMilestoneAcceptanceCriteria, - setMilestoneTests, - addIntent, - setIntent, - addChange, - setChangeDiff, - setChangeDocDiff, - setChangeComments, - setChangeFile, - setChangeIntentRef, - addWave, - setWaveMilestones, - addDiagram, - setDiagram, - addDiagramNode, - addDiagramEdge, - setReadmeEntry, -} from "../plan/mutate.js"; - -function planTool( - pi: ExtensionAPI, - planRef: PlanRef, - opts: { - name: string; - label: string; - description: string; - parameters: TParams; - execute: (plan: Plan, params: Static) => { plan: Plan; message: string }; - }, -): void { - pi.registerTool({ - name: opts.name, - label: opts.label, - description: opts.description, - parameters: opts.parameters, - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const plan = await loadPlan(planRef.dir); - const result = opts.execute(plan, params); - await savePlan(result.plan, planRef.dir); - return { - content: [{ type: "text" as const, text: result.message }], - details: undefined, - }; - }, - }); -} - -export function registerPlanEntityTools( - pi: ExtensionAPI, - planRef: PlanRef, -): void { - // -- Decision -- - planTool(pi, planRef, { - name: "koan_add_decision", - label: "Add decision", - description: "Add decision to decision log.", - parameters: Type.Object({ - decision: Type.String(), - reasoning: Type.String(), - }), - execute: (p, params) => { - const r = addDecision(p, params); - return { - plan: r.plan, - message: `Added decision ${r.id}: "${params.decision}"`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_decision", - label: "Update decision", - description: "Update existing decision by ID.", - parameters: Type.Object({ - id: Type.String(), - decision: Type.Optional(Type.String()), - reasoning: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = setDecision(p, params.id, params); - return { - plan: updated, - message: `Updated decision ${params.id}`, - }; - }, - }); - - // -- RejectedAlternative -- - planTool(pi, planRef, { - name: "koan_add_rejected_alternative", - label: "Add rejected alternative", - description: "Add rejected alternative to decision log.", - parameters: Type.Object({ - alternative: Type.String(), - rejection_reason: Type.String(), - decision_ref: Type.String(), - }), - execute: (p, params) => { - const r = addRejectedAlternative(p, params); - return { - plan: r.plan, - message: `Added rejected alternative ${r.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_rejected_alternative", - label: "Update rejected alternative", - description: "Update existing rejected alternative by ID.", - parameters: Type.Object({ - id: Type.String(), - alternative: Type.Optional(Type.String()), - rejection_reason: Type.Optional(Type.String()), - decision_ref: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = setRejectedAlternative(p, params.id, params); - return { - plan: updated, - message: `Updated rejected alternative ${params.id}`, - }; - }, - }); - - // -- Risk -- - planTool(pi, planRef, { - name: "koan_add_risk", - label: "Add risk", - description: "Add risk to known risks.", - parameters: Type.Object({ - risk: Type.String(), - mitigation: Type.String(), - anchor: Type.Optional(Type.String()), - decision_ref: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const r = addRisk(p, params); - return { - plan: r.plan, - message: `Added risk ${r.id}: "${params.risk}"`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_risk", - label: "Update risk", - description: "Update existing risk by ID.", - parameters: Type.Object({ - id: Type.String(), - risk: Type.Optional(Type.String()), - mitigation: Type.Optional(Type.String()), - anchor: Type.Optional(Type.String()), - decision_ref: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = setRisk(p, params.id, params); - return { - plan: updated, - message: `Updated risk ${params.id}`, - }; - }, - }); - - // -- Milestone -- - planTool(pi, planRef, { - name: "koan_add_milestone", - label: "Add milestone", - description: "Create new milestone.", - parameters: Type.Object({ - name: Type.String(), - files: Type.Optional(Type.Array(Type.String())), - flags: Type.Optional(Type.Array(Type.String())), - requirements: Type.Optional(Type.Array(Type.String())), - acceptance_criteria: Type.Optional(Type.Array(Type.String())), - tests: Type.Optional(Type.Array(Type.String())), - }), - execute: (p, params) => { - const r = addMilestone(p, params); - return { - plan: r.plan, - message: `Added milestone ${r.id}: "${params.name}"`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_name", - label: "Set milestone name", - description: "Update milestone name.", - parameters: Type.Object({ - id: Type.String(), - name: Type.String(), - }), - execute: (p, params) => { - const updated = setMilestoneName(p, params.id, params.name); - return { - plan: updated, - message: `Set name for milestone ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_files", - label: "Set milestone files", - description: "Update milestone files list.", - parameters: Type.Object({ - id: Type.String(), - files: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setMilestoneFiles(p, params.id, params.files); - return { - plan: updated, - message: `Set files for milestone ${params.id} (${params.files.length} files)`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_flags", - label: "Set milestone flags", - description: "Update milestone flags list.", - parameters: Type.Object({ - id: Type.String(), - flags: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setMilestoneFlags(p, params.id, params.flags); - return { - plan: updated, - message: `Set flags for milestone ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_requirements", - label: "Set milestone requirements", - description: "Update milestone requirements list.", - parameters: Type.Object({ - id: Type.String(), - requirements: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setMilestoneRequirements(p, params.id, params.requirements); - return { - plan: updated, - message: `Set requirements for milestone ${params.id} (${params.requirements.length} items)`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_acceptance_criteria", - label: "Set milestone acceptance criteria", - description: "Update milestone acceptance criteria list.", - parameters: Type.Object({ - id: Type.String(), - acceptance_criteria: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setMilestoneAcceptanceCriteria( - p, - params.id, - params.acceptance_criteria, - ); - return { - plan: updated, - message: `Set acceptance criteria for milestone ${params.id} (${params.acceptance_criteria.length} items)`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_tests", - label: "Set milestone tests", - description: "Update milestone tests list.", - parameters: Type.Object({ - id: Type.String(), - tests: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setMilestoneTests(p, params.id, params.tests); - return { - plan: updated, - message: `Set tests for milestone ${params.id} (${params.tests.length} tests)`, - }; - }, - }); - - // -- CodeIntent -- - planTool(pi, planRef, { - name: "koan_add_intent", - label: "Add code intent", - description: "Add code intent to milestone.", - parameters: Type.Object({ - milestone: Type.String(), - file: Type.String(), - function: Type.Optional(Type.String()), - behavior: Type.String(), - decision_refs: Type.Optional(Type.Array(Type.String())), - }), - execute: (p, params) => { - const r = addIntent(p, params); - return { - plan: r.plan, - message: `Added intent ${r.id} to milestone ${params.milestone}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_intent", - label: "Update code intent", - description: "Update existing code intent by ID.", - parameters: Type.Object({ - id: Type.String(), - file: Type.Optional(Type.String()), - function: Type.Optional(Type.String()), - behavior: Type.Optional(Type.String()), - decision_refs: Type.Optional(Type.Array(Type.String())), - }), - execute: (p, params) => { - const updated = setIntent(p, params.id, params); - return { - plan: updated, - message: `Updated intent ${params.id}`, - }; - }, - }); - - // -- CodeChange -- - planTool(pi, planRef, { - name: "koan_add_change", - label: "Add code change", - description: "Add code change to milestone.", - parameters: Type.Object({ - milestone: Type.String(), - file: Type.String(), - intent_ref: Type.Optional(Type.String()), - diff: Type.Optional(Type.String()), - doc_diff: Type.Optional(Type.String()), - comments: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const r = addChange(p, params); - return { - plan: r.plan, - message: `Added change ${r.id} to milestone ${params.milestone}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_change_diff", - label: "Set code change diff", - description: "Update change diff.", - parameters: Type.Object({ - id: Type.String(), - diff: Type.String(), - }), - execute: (p, params) => { - const updated = setChangeDiff(p, params.id, params.diff); - return { - plan: updated, - message: `Set diff for change ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_change_doc_diff", - label: "Set code change doc_diff", - description: "Update change doc_diff.", - parameters: Type.Object({ - id: Type.String(), - doc_diff: Type.String(), - }), - execute: (p, params) => { - const updated = setChangeDocDiff(p, params.id, params.doc_diff); - return { - plan: updated, - message: `Set doc_diff for change ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_change_comments", - label: "Set code change comments", - description: "Update change comments.", - parameters: Type.Object({ - id: Type.String(), - comments: Type.String(), - }), - execute: (p, params) => { - const updated = setChangeComments(p, params.id, params.comments); - return { - plan: updated, - message: `Set comments for change ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_change_file", - label: "Set code change file", - description: "Update change file path.", - parameters: Type.Object({ - id: Type.String(), - file: Type.String(), - }), - execute: (p, params) => { - const updated = setChangeFile(p, params.id, params.file); - return { - plan: updated, - message: `Set file for change ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_change_intent_ref", - label: "Set code change intent_ref", - description: "Update change intent reference.", - parameters: Type.Object({ - id: Type.String(), - intent_ref: Type.String(), - }), - execute: (p, params) => { - const updated = setChangeIntentRef(p, params.id, params.intent_ref); - return { - plan: updated, - message: `Set intent_ref for change ${params.id}`, - }; - }, - }); - - // -- Wave -- - planTool(pi, planRef, { - name: "koan_add_wave", - label: "Add wave", - description: "Create wave with milestone list.", - parameters: Type.Object({ - milestones: Type.Array(Type.String()), - }), - execute: (p, params) => { - const r = addWave(p, params); - return { - plan: r.plan, - message: `Added wave ${r.id} with ${params.milestones.length} milestones`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_wave_milestones", - label: "Set wave milestones", - description: "Update wave milestones list.", - parameters: Type.Object({ - id: Type.String(), - milestones: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setWaveMilestones(p, params.id, params.milestones); - return { - plan: updated, - message: `Set milestones for wave ${params.id}`, - }; - }, - }); - - // -- Diagram -- - planTool(pi, planRef, { - name: "koan_add_diagram", - label: "Add diagram", - description: "Create diagram graph.", - parameters: Type.Object({ - type: Type.Union([ - Type.Literal("architecture"), - Type.Literal("state"), - Type.Literal("sequence"), - Type.Literal("dataflow"), - ]), - scope: Type.String(), - title: Type.String(), - }), - execute: (p, params) => { - const r = addDiagram(p, params); - return { - plan: r.plan, - message: `Added diagram ${r.id}: "${params.title}"`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_diagram", - label: "Update diagram", - description: "Update diagram properties.", - parameters: Type.Object({ - id: Type.String(), - title: Type.Optional(Type.String()), - scope: Type.Optional(Type.String()), - ascii_render: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = setDiagram(p, params.id, params); - return { - plan: updated, - message: `Updated diagram ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_add_diagram_node", - label: "Add diagram node", - description: "Add node to diagram.", - parameters: Type.Object({ - diagram_id: Type.String(), - id: Type.String(), - label: Type.String(), - type: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = addDiagramNode(p, params.diagram_id, params); - return { - plan: updated, - message: `Added node ${params.id} to diagram ${params.diagram_id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_add_diagram_edge", - label: "Add diagram edge", - description: "Add edge to diagram.", - parameters: Type.Object({ - diagram_id: Type.String(), - source: Type.String(), - target: Type.String(), - label: Type.String(), - protocol: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = addDiagramEdge(p, params.diagram_id, params); - return { - plan: updated, - message: `Added edge ${params.source}->${params.target} to diagram ${params.diagram_id}`, - }; - }, - }); - - // -- ReadmeEntry -- - planTool(pi, planRef, { - name: "koan_set_readme_entry", - label: "Set readme entry", - description: "Upsert readme entry by path.", - parameters: Type.Object({ - path: Type.String(), - content: Type.String(), - }), - execute: (p, params) => { - const updated = setReadmeEntry(p, params.path, params.content); - return { - plan: updated, - message: `Set readme entry for ${params.path}`, - }; - }, - }); -} diff --git a/src/planner/tools/qr-tools.ts b/src/planner/tools/qr.ts similarity index 78% rename from src/planner/tools/qr-tools.ts rename to src/planner/tools/qr.ts index 4d43331..cd99ab1 100644 --- a/src/planner/tools/qr-tools.ts +++ b/src/planner/tools/qr.ts @@ -3,9 +3,10 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { promises as fs } from "node:fs"; import * as path from "node:path"; -import type { PlanRef } from "./dispatch.js"; +import type { PlanRef } from "../lib/dispatch.js"; import type { QRFile } from "../qr/types.js"; import { addQRItem, setQRItem, assignGroup } from "../qr/mutate.js"; +import { withFileLock } from "../../utils/lock.js"; function createEmptyQRFile(phase: string): QRFile { return { @@ -55,13 +56,16 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const qr = await loadQR(planRef.dir, params.phase); - const r = addQRItem(qr, params); - await saveQR(r.qr, planRef.dir, params.phase); - return { - content: [{ type: "text" as const, text: `Added QR item ${r.id}` }], - details: undefined, - }; + const qrPath = path.join(planRef.dir, `qr-${params.phase}.json`); + return withFileLock(qrPath, async () => { + const qr = await loadQR(planRef.dir!, params.phase); + const r = addQRItem(qr, params); + await saveQR(r.qr, planRef.dir!, params.phase); + return { + content: [{ type: "text" as const, text: `Added QR item ${r.id}` }], + details: undefined, + }; + }); }, }); @@ -91,13 +95,16 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const qr = await loadQR(planRef.dir, params.phase); - const updated = setQRItem(qr, params.id, params); - await saveQR(updated, planRef.dir, params.phase); - return { - content: [{ type: "text" as const, text: `Updated QR item ${params.id}` }], - details: undefined, - }; + const qrPath = path.join(planRef.dir, `qr-${params.phase}.json`); + return withFileLock(qrPath, async () => { + const qr = await loadQR(planRef.dir!, params.phase); + const updated = setQRItem(qr, params.id, params); + await saveQR(updated, planRef.dir!, params.phase); + return { + content: [{ type: "text" as const, text: `Updated QR item ${params.id}` }], + details: undefined, + }; + }); }, }); @@ -112,18 +119,21 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const qr = await loadQR(planRef.dir, params.phase); - const updated = assignGroup(qr, params.ids, params.group_id); - await saveQR(updated, planRef.dir, params.phase); - return { - content: [ - { - type: "text" as const, - text: `Assigned ${params.ids.length} items to group ${params.group_id}`, - }, - ], - details: undefined, - }; + const qrPath = path.join(planRef.dir, `qr-${params.phase}.json`); + return withFileLock(qrPath, async () => { + const qr = await loadQR(planRef.dir!, params.phase); + const updated = assignGroup(qr, params.ids, params.group_id); + await saveQR(updated, planRef.dir!, params.phase); + return { + content: [ + { + type: "text" as const, + text: `Assigned ${params.ids.length} items to group ${params.group_id}`, + }, + ], + details: undefined, + }; + }); }, }); diff --git a/src/planner/tools/plan-setters.ts b/src/planner/tools/setters.ts similarity index 96% rename from src/planner/tools/plan-setters.ts rename to src/planner/tools/setters.ts index 4478254..13e0f92 100644 --- a/src/planner/tools/plan-setters.ts +++ b/src/planner/tools/setters.ts @@ -1,13 +1,13 @@ import { Type } from "@sinclair/typebox"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import type { PlanRef } from "./dispatch.js"; +import type { PlanRef } from "../lib/dispatch.js"; import { loadPlan, savePlan } from "../plan/serialize.js"; import { setOverview, setConstraints, setInvisibleKnowledge, -} from "../plan/mutate.js"; +} from "../plan/mutate/index.js"; export function registerPlanSetterTools( pi: ExtensionAPI, diff --git a/src/planner/tools/dispatch.ts b/src/planner/tools/workflow.ts similarity index 58% rename from src/planner/tools/dispatch.ts rename to src/planner/tools/workflow.ts index 7bfa629..70075e8 100644 --- a/src/planner/tools/dispatch.ts +++ b/src/planner/tools/workflow.ts @@ -1,77 +1,16 @@ -// Workflow tool dispatch for koan. -// -// Workflow tools (koan_complete_step, koan_store_context) are registered -// once at init and read from this dispatch at call time. -// Pi snapshots tools during _buildRuntime() -- late registration is -// invisible to the LLM. The dispatch decouples static registration -// from dynamic phase routing. +// Workflow tool registration: koan_complete_step and koan_store_context. +// Tools register once at init; execute callbacks read from the mutable +// dispatch at call time, decoupling static registration from phase routing. import { Type } from "@sinclair/typebox"; -import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import { ContextStoreSchema, type ContextToolResult } from "./context-store.js"; +import { ContextStoreSchema } from "./context-store.js"; import { createLogger } from "../../utils/logger.js"; +import type { WorkflowDispatch } from "../lib/dispatch.js"; const log = createLogger("Dispatch"); -// -- Result types -- - -export interface StepResult { - ok: boolean; - prompt?: string; - error?: string; -} - -// -- Dispatch -- - -export interface WorkflowDispatch { - onCompleteStep: ((thoughts?: string) => StepResult | Promise) | null; - onStoreContext: - | ((payload: unknown, ctx: ExtensionContext) => Promise) - | null; -} - -export function createDispatch(): WorkflowDispatch { - return { onCompleteStep: null, onStoreContext: null }; -} - -// Decouples tool registration (init-time, before _buildRuntime) from -// plan directory creation (runtime, after flags available). Same -// indirection pattern as WorkflowDispatch. -export interface PlanRef { - dir: string | null; -} - -export function createPlanRef(): PlanRef { - return { dir: null }; -} - -// Sets a dispatch slot. Throws if the slot is already occupied -- -// prevents silent misrouting when two phases attempt to claim -// the same tool. -export function hookDispatch( - dispatch: WorkflowDispatch, - key: K, - handler: NonNullable, -): void { - if (dispatch[key] !== null) { - throw new Error(`dispatch.${String(key)} is already hooked`); - } - // TypeScript cannot verify generic key-value assignment. - // Call-site generic constraint (handler: NonNullable) - // ensures type safety; collision guard above prevents double-hooking. - (dispatch as any)[key] = handler; -} - -export function unhookDispatch( - dispatch: WorkflowDispatch, - key: keyof WorkflowDispatch, -): void { - (dispatch as any)[key] = null; -} - -// -- Tool registration -- - // Registers workflow tools. Called once at init in koan.ts, // before pi's _buildRuntime() snapshot. Tool execute callbacks read // from the dispatch at call time -- the dispatch is mutable, the diff --git a/src/utils/lock.ts b/src/utils/lock.ts new file mode 100644 index 0000000..47ed858 --- /dev/null +++ b/src/utils/lock.ts @@ -0,0 +1,44 @@ +import { promises as fs } from "node:fs"; + +// Advisory .lock file for serializing file mutations. Uses O_CREAT|O_EXCL +// for atomic creation (fails if lock already exists). Retry with backoff +// handles transient contention (e.g. parallel QR verifiers). + +const RETRY_INTERVAL_MS = 50; +const MAX_WAIT_MS = 5000; + +function lockPath(filePath: string): string { + return `${filePath}.lock`; +} + +async function acquire(filePath: string): Promise { + const lp = lockPath(filePath); + const deadline = Date.now() + MAX_WAIT_MS; + + while (true) { + try { + const fd = await fs.open(lp, "wx"); + await fd.close(); + return; + } catch (err: unknown) { + if ((err as NodeJS.ErrnoException).code !== "EEXIST") throw err; + if (Date.now() >= deadline) { + throw new Error(`Failed to acquire lock on ${filePath} after ${MAX_WAIT_MS}ms`); + } + await new Promise((r) => setTimeout(r, RETRY_INTERVAL_MS)); + } + } +} + +async function release(filePath: string): Promise { + await fs.rm(lockPath(filePath), { force: true }); +} + +export async function withFileLock(filePath: string, fn: () => Promise): Promise { + await acquire(filePath); + try { + return await fn(); + } finally { + await release(filePath); + } +} From a32c2d7cdbea3a291a7899236dfabdccb6a2b08b Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Tue, 24 Feb 2026 19:29:04 +0700 Subject: [PATCH 009/412] Better UI --- extensions/koan.ts | 38 ++- src/planner/lib/audit.ts | 327 ++++++++++++++++++++ src/planner/lib/pool.ts | 74 +++++ src/planner/phases/context-capture/phase.ts | 6 +- src/planner/phases/dispatch.ts | 48 ++- src/planner/phases/plan-design/phase.ts | 37 ++- src/planner/phases/qr-decompose/phase.ts | 227 ++++++++++++++ src/planner/phases/qr-decompose/prompts.ts | 256 +++++++++++++++ src/planner/phases/qr-verify/phase.ts | 227 ++++++++++++++ src/planner/phases/qr-verify/prompts.ts | 154 +++++++++ src/planner/session.ts | 318 ++++++++++++------- src/planner/state.ts | 14 +- src/planner/subagent.ts | 96 +++++- src/planner/ui/widget.ts | 203 ++++++++++++ src/utils/logger.ts | 30 +- src/utils/progress.ts | 65 +--- 16 files changed, 1921 insertions(+), 199 deletions(-) create mode 100644 src/planner/lib/audit.ts create mode 100644 src/planner/lib/pool.ts create mode 100644 src/planner/phases/qr-decompose/phase.ts create mode 100644 src/planner/phases/qr-decompose/prompts.ts create mode 100644 src/planner/phases/qr-verify/phase.ts create mode 100644 src/planner/phases/qr-verify/prompts.ts create mode 100644 src/planner/ui/widget.ts diff --git a/extensions/koan.ts b/extensions/koan.ts index 30288d7..a58d81c 100644 --- a/extensions/koan.ts +++ b/extensions/koan.ts @@ -1,9 +1,15 @@ +// Entry point for the koan pi extension. Serves dual roles: parent session +// (registers /koan command) and subagent mode (dispatches to phase workflow +// via CLI flags). All tools register unconditionally at init; phases restrict +// access via tool_call blocking at runtime. + import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { createSession } from "../src/planner/session.js"; import { detectSubagentMode, dispatchPhase } from "../src/planner/phases/dispatch.js"; import { registerAllTools, createDispatch, createPlanRef } from "../src/planner/tools/index.js"; import { createLogger } from "../src/utils/logger.js"; +import { EventLog, extractToolEvent } from "../src/planner/lib/audit.js"; export default function koan(pi: ExtensionAPI): void { const log = createLogger("Koan"); @@ -32,6 +38,12 @@ export default function koan(pi: ExtensionAPI): void { default: "", }); + pi.registerFlag("koan-qr-item", { + description: "QR item ID for reviewer subagent", + type: "string", + default: "", + }); + // Pi snapshots tools during _buildRuntime() at init. All 44 tools // register here unconditionally. Phases restrict access via tool_call // blocking at runtime. @@ -52,7 +64,31 @@ export default function koan(pi: ExtensionAPI): void { if (planDir) { planRef.dir = planDir; } - await dispatchPhase(pi, config, dispatch, planRef, log); + + // EventLog exists only in subagent mode. Parent mode has no audit log. + let eventLog: EventLog | undefined; + if (config.subagentDir) { + eventLog = new EventLog(config.subagentDir, config.role, config.phase); + await eventLog.open(); + + // Capture all tool results for the audit trail. Graduated detail: + // file paths for read/edit/write, binary name for bash, full + // input+response for koan_* tools, name-only for everything else. + pi.on("tool_result", (event) => { + void eventLog!.append(extractToolEvent(event as { + toolName: string; + input: Record; + content: Array<{ type: string; text?: string }>; + isError: boolean; + })); + }); + + pi.on("session_shutdown", () => { + void eventLog!.close(); + }); + } + + await dispatchPhase(pi, config, dispatch, planRef, log, eventLog); } }); diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts new file mode 100644 index 0000000..181070a --- /dev/null +++ b/src/planner/lib/audit.ts @@ -0,0 +1,327 @@ +// Audit trail for subagent sessions: event-sourced append log (events.jsonl) +// with an eagerly materialized projection (state.json) for parent polling. +// fold() is pure so the projection can be replayed from the raw log for testing. +// Graduated tool capture: full detail for koan_* tools, paths for file ops, +// binary name for bash, name-only for everything else. + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +// -- Types -- + +export interface EventBase { + ts: string; + seq: number; +} + +export interface ToolFileEvent extends EventBase { + kind: "tool_file"; + tool: "read" | "edit" | "write"; + path: string; + error: boolean; +} + +export interface ToolBashEvent extends EventBase { + kind: "tool_bash"; + bin: string; + error: boolean; +} + +export interface ToolKoanEvent extends EventBase { + kind: "tool_koan"; + tool: string; + input: Record; + response: string[]; + error: boolean; +} + +export interface ToolGenericEvent extends EventBase { + kind: "tool_generic"; + tool: string; + error: boolean; +} + +export type ToolEvent = ToolFileEvent | ToolBashEvent | ToolKoanEvent | ToolGenericEvent; + +export interface PhaseStartEvent extends EventBase { + kind: "phase_start"; + phase: string; + role: string; + totalSteps: number; +} + +export interface StepTransitionEvent extends EventBase { + kind: "step_transition"; + step: number; + name: string; + totalSteps: number; +} + +export interface PhaseEndEvent extends EventBase { + kind: "phase_end"; + outcome: "completed" | "failed"; + detail?: string; +} + +export interface HeartbeatEvent extends EventBase { + kind: "heartbeat"; +} + +export type AuditEvent = + | ToolFileEvent + | ToolBashEvent + | ToolKoanEvent + | ToolGenericEvent + | PhaseStartEvent + | StepTransitionEvent + | PhaseEndEvent + | HeartbeatEvent; + +export interface Projection { + role: string; + phase: string; + status: "running" | "completed" | "failed"; + step: number; + totalSteps: number; + stepName: string; + lastAction: string | null; + updatedAt: string; + eventCount: number; + error: string | null; +} + +// Pi's ToolResultEvent shape (subset we need). +interface PiToolResultEvent { + toolName: string; + input: Record; + content: Array<{ type: string; text?: string }>; + isError: boolean; +} + +// -- Constants -- + +const FILE_TOOLS = new Set(["read", "edit", "write"]); +const HEARTBEAT_MS = 10_000; + +// -- Helpers -- + +function now(): string { + return new Date().toISOString(); +} + +// Derives a concise last-action string from a tool event for display. +export function summarize(e: ToolEvent): string { + switch (e.kind) { + case "tool_file": + return `${e.tool} ${e.path}`; + case "tool_bash": + return `bash ${e.bin}`; + case "tool_koan": + return e.tool; + case "tool_generic": + return e.tool; + } +} + +// Pure projection update -- one case per discriminated kind. +// All branches update updatedAt and increment eventCount. +export function fold(s: Projection, e: AuditEvent): Projection { + const base = { ...s, updatedAt: e.ts, eventCount: s.eventCount + 1 }; + + switch (e.kind) { + case "phase_start": + return { + ...base, + role: e.role, + phase: e.phase, + status: "running", + step: 0, + totalSteps: e.totalSteps, + stepName: "", + lastAction: null, + error: null, + }; + + case "step_transition": + return { + ...base, + step: e.step, + totalSteps: e.totalSteps, + stepName: `Step ${e.step}/${e.totalSteps}: ${e.name}`, + }; + + case "phase_end": + return { + ...base, + status: e.outcome, + error: e.detail ?? null, + }; + + case "tool_file": + case "tool_bash": + case "tool_koan": + case "tool_generic": + return { ...base, lastAction: summarize(e) }; + + case "heartbeat": + return base; + } +} + +// Transforms pi's ToolResultEvent into a graduated AuditEvent. +export function extractToolEvent(piEvent: PiToolResultEvent): ToolEvent { + const { toolName, input, content, isError } = piEvent; + const ts = now(); + // ts and seq are assigned by EventLog.append(); values here are + // placeholders overridden on write. + const seq = 0; + + if (FILE_TOOLS.has(toolName)) { + return { + kind: "tool_file", + tool: toolName as "read" | "edit" | "write", + path: (input["path"] as string | undefined) ?? "", + error: isError, + ts, + seq, + }; + } + + if (toolName === "bash") { + const cmd = (input["command"] as string | undefined) ?? ""; + const bin = cmd.trim().split(/\s+/)[0] ?? "bash"; + return { kind: "tool_bash", bin, error: isError, ts, seq }; + } + + if (toolName.startsWith("koan_")) { + const response = content + .filter((c) => c.type === "text" && c.text !== undefined) + .map((c) => c.text as string); + return { kind: "tool_koan", tool: toolName, input, response, error: isError, ts, seq }; + } + + return { kind: "tool_generic", tool: toolName, error: isError, ts, seq }; +} + +// -- EventLog -- + +export class EventLog { + private readonly eventsPath: string; + private readonly statePath: string; + private readonly stateTmpPath: string; + private fd: fs.FileHandle | null = null; + private seq = 0; + private projection: Projection; + private heartbeat: ReturnType | null = null; + // Serializes append() calls. Heartbeat timer and tool_result handler + // both call append() concurrently -- without serialization, two + // writeState() calls race on the shared tmp file (ENOENT on rename). + private pending: Promise = Promise.resolve(); + + constructor(dir: string, role: string, phase: string) { + this.eventsPath = path.join(dir, "events.jsonl"); + this.statePath = path.join(dir, "state.json"); + this.stateTmpPath = path.join(dir, "state.tmp.json"); + this.projection = { + role, + phase, + status: "running", + step: 0, + totalSteps: 0, + stepName: "", + lastAction: null, + updatedAt: now(), + eventCount: 0, + error: null, + }; + } + + async open(): Promise { + this.fd = await fs.open(this.eventsPath, "a"); + await this.writeState(); + // Heartbeat keeps updatedAt fresh even during long-running steps. + this.heartbeat = setInterval(() => { + void this.append({ kind: "heartbeat" } as Omit); + }, HEARTBEAT_MS); + } + + // Assigns ts + seq, appends JSON line, folds, writes state atomically. + // Serialized: concurrent callers queue behind the in-flight write. + async append(partial: Omit): Promise { + const task = () => this.doAppend(partial); + this.pending = this.pending.then(task, task); + return this.pending; + } + + private async doAppend(partial: Omit): Promise { + if (!this.fd) { + throw new Error("EventLog.append called before open()"); + } + + const e = { ...partial, ts: now(), seq: this.seq++ } as AuditEvent; + await this.fd.write(JSON.stringify(e) + "\n"); + this.projection = fold(this.projection, e); + await this.writeState(); + } + + async emitPhaseStart(totalSteps: number): Promise { + await this.append({ + kind: "phase_start", + phase: this.projection.phase, + role: this.projection.role, + totalSteps, + } as Omit); + } + + async emitStepTransition(step: number, name: string, totalSteps: number): Promise { + await this.append({ + kind: "step_transition", + step, + name, + totalSteps, + } as Omit); + } + + async emitPhaseEnd(outcome: "completed" | "failed", detail?: string): Promise { + await this.append({ + kind: "phase_end", + outcome, + detail, + } as Omit); + } + + async close(): Promise { + if (this.heartbeat) { + clearInterval(this.heartbeat); + this.heartbeat = null; + } + if (this.fd) { + await this.fd.close(); + this.fd = null; + } + } + + get state(): Readonly { + return this.projection; + } + + // Atomic write: tmp file then rename so readers never see partial JSON. + private async writeState(): Promise { + const json = JSON.stringify(this.projection, null, 2) + "\n"; + await fs.writeFile(this.stateTmpPath, json); + await fs.rename(this.stateTmpPath, this.statePath); + } +} + +// -- Exports -- + +// Reads state.json as a Projection; returns null if missing or malformed. +// Used by session.ts parent polling loop. +export async function readProjection(dir: string): Promise { + try { + const raw = await fs.readFile(path.join(dir, "state.json"), "utf8"); + return JSON.parse(raw) as Projection; + } catch { + return null; + } +} diff --git a/src/planner/lib/pool.ts b/src/planner/lib/pool.ts new file mode 100644 index 0000000..f5e9c3f --- /dev/null +++ b/src/planner/lib/pool.ts @@ -0,0 +1,74 @@ +// Bounded-parallel subagent pool using an in-process semaphore. +// Runs all items to completion regardless of failures; callers inspect PoolResult. +// Timeout logic belongs in the worker closure, not here. + +import type { SubagentResult } from "../subagent.js"; + +// -- Types -- + +export interface PoolResult { + total: number; + completed: number; + failed: string[]; +} + +// -- Constants -- + +export const DEFAULT_REVIEWER_TIMEOUT_MS = 10 * 60 * 1000; + +// -- Private helpers -- + +class Semaphore { + private queue: Array<() => void> = []; + private count: number; + + constructor(limit: number) { + this.count = limit; + } + + acquire(): Promise { + if (this.count > 0) { + this.count--; + return Promise.resolve(); + } + return new Promise((resolve) => this.queue.push(resolve)); + } + + release(): void { + const next = this.queue.shift(); + if (next) next(); + else this.count++; + } +} + +// -- Exports -- + +export async function pool( + itemIds: string[], + limit: number, + worker: (itemId: string) => Promise, + onProgress?: (done: number, total: number) => void, +): Promise { + const sem = new Semaphore(limit); + const total = itemIds.length; + const failed: string[] = []; + let completed = 0; + + await Promise.all( + itemIds.map(async (id) => { + await sem.acquire(); + try { + const r = await worker(id); + if (r.exitCode !== 0) { + failed.push(id); + } + } finally { + completed++; + onProgress?.(completed, total); + sem.release(); + } + }), + ); + + return { total, completed, failed }; +} diff --git a/src/planner/phases/context-capture/phase.ts b/src/planner/phases/context-capture/phase.ts index 4b7320e..ecd4b94 100644 --- a/src/planner/phases/context-capture/phase.ts +++ b/src/planner/phases/context-capture/phase.ts @@ -81,7 +81,6 @@ export class ContextCapturePhase { hookDispatch(this.dispatch, "onStoreContext", (p, c) => this.handleContextToolCall(p, c)); this.log("Starting context capture (draft phase)", { planId: plan.id }); - ctx.ui.notify(`Koan context capture started for plan ${plan.id}.`, "info"); await this.updatePlanMetadata({ status: "context", @@ -211,8 +210,8 @@ export class ContextCapturePhase { this.log("Failed to write context file", { error: message }); return { ok: false, - message: `Failed to write context.json: ${message}`, - errors: [`Failed to write context.json: ${message}`], + message: `Failed to store context: ${message}`, + errors: [`Failed to store context: ${message}`], }; } @@ -224,7 +223,6 @@ export class ContextCapturePhase { unhookDispatch(this.dispatch, "onCompleteStep"); unhookDispatch(this.dispatch, "onStoreContext"); - ctx.ui.notify("Koan context capture complete.", "info"); this.log("Context capture succeeded", { planId: this.state.context.planId, attempt: this.state.context.attempt, diff --git a/src/planner/phases/dispatch.ts b/src/planner/phases/dispatch.ts index acb9dfc..c8a55f8 100644 --- a/src/planner/phases/dispatch.ts +++ b/src/planner/phases/dispatch.ts @@ -1,8 +1,16 @@ +// Phase dispatch: detects subagent mode from CLI flags and routes to the +// appropriate phase constructor. Flags are unavailable at extension init +// (getFlag returns undefined before _buildRuntime), so detection is +// deferred to before_agent_start. + import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { PlanDesignPhase } from "./plan-design/phase.js"; +import { QRDecomposePhase } from "./qr-decompose/phase.js"; +import { QRVerifyPhase } from "./qr-verify/phase.js"; import { createLogger, type Logger } from "../../utils/logger.js"; import type { WorkflowDispatch, PlanRef } from "../lib/dispatch.js"; +import type { EventLog } from "../lib/audit.js"; export interface SubagentConfig { role: string; @@ -39,6 +47,7 @@ export async function dispatchPhase( dispatch: WorkflowDispatch, planRef: PlanRef, log?: Logger, + eventLog?: EventLog, ): Promise { const logger = log ?? createLogger("Dispatch"); @@ -46,13 +55,44 @@ export async function dispatchPhase( logger("Dispatching to plan-design workflow", { planDir: config.planDir }); const phase = new PlanDesignPhase( pi, - { - planDir: config.planDir, - subagentDir: config.subagentDir || undefined, - }, + { planDir: config.planDir }, + dispatch, + planRef, + logger, + eventLog, + ); + await phase.begin(); + return; + } + + if (config.role === "qr-decomposer" && config.phase === "qr-plan-design") { + logger("Dispatching to qr-decompose workflow", { planDir: config.planDir }); + const phase = new QRDecomposePhase( + pi, + { planDir: config.planDir }, + dispatch, + planRef, + logger, + eventLog, + ); + await phase.begin(); + return; + } + + if (config.role === "reviewer" && config.phase === "qr-plan-design") { + const itemId = pi.getFlag("koan-qr-item") as string; + if (!itemId) { + logger("Reviewer missing --koan-qr-item flag"); + return; + } + logger("Dispatching to qr-verify workflow", { planDir: config.planDir, itemId }); + const phase = new QRVerifyPhase( + pi, + { planDir: config.planDir, itemId }, dispatch, planRef, logger, + eventLog, ); await phase.begin(); return; diff --git a/src/planner/phases/plan-design/phase.ts b/src/planner/phases/plan-design/phase.ts index b7c493a..f2165ef 100644 --- a/src/planner/phases/plan-design/phase.ts +++ b/src/planner/phases/plan-design/phase.ts @@ -1,3 +1,7 @@ +// Plan-design phase -- 6-step architect workflow that produces plan.json +// from captured context. Step gate: mutation tools blocked before step 6 +// (blocklist pattern). Validation runs at step-6 completion. + import { promises as fs } from "node:fs"; import * as path from "node:path"; @@ -14,7 +18,7 @@ import { import { formatStep } from "../../lib/step.js"; import type { ContextData } from "../../types.js"; import { createLogger, type Logger } from "../../../utils/logger.js"; -import { ProgressReporter } from "../../../utils/progress.js"; +import { EventLog } from "../../lib/audit.js"; import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; @@ -28,24 +32,31 @@ interface PlanDesignState { systemPrompt: string | null; } +const TOTAL_STEPS = 6; + export class PlanDesignPhase { private readonly pi: ExtensionAPI; private readonly planDir: string; private readonly log: Logger; private readonly state: PlanDesignState; - private readonly progress: ProgressReporter | null; + private readonly eventLog: EventLog | undefined; private readonly dispatch: WorkflowDispatch; private readonly planRef: PlanRef; - constructor(pi: ExtensionAPI, config: { planDir: string; subagentDir?: string }, dispatch: WorkflowDispatch, planRef: PlanRef, log?: Logger) { + constructor( + pi: ExtensionAPI, + config: { planDir: string }, + dispatch: WorkflowDispatch, + planRef: PlanRef, + log?: Logger, + eventLog?: EventLog, + ) { this.pi = pi; this.planDir = config.planDir; this.dispatch = dispatch; this.planRef = planRef; this.log = log ?? createLogger("PlanDesign"); - this.progress = config.subagentDir - ? new ProgressReporter(config.subagentDir, "architect", "plan-design") - : null; + this.eventLog = eventLog; this.state = { active: false, @@ -91,7 +102,8 @@ export class PlanDesignPhase { hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); this.log("Starting plan-design workflow", { step: 1 }); - await this.progress?.update(`Step 1/6: ${STEP_NAMES[1]} -- started`); + await this.eventLog?.emitPhaseStart(TOTAL_STEPS); + await this.eventLog?.emitStepTransition(1, STEP_NAMES[1], TOTAL_STEPS); } private registerHandlers(): void { @@ -146,9 +158,6 @@ export class PlanDesignPhase { return undefined; }); - this.pi.on("turn_end", (event) => { - if (!this.state.active) return; - }); } private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { @@ -157,10 +166,12 @@ export class PlanDesignPhase { if (prev === 6) { const result = await this.handleFinalize(); if (!result.ok) { + await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); return { ok: false, error: result.errors?.join("; ") }; } this.state.active = false; unhookDispatch(this.dispatch, "onCompleteStep"); + await this.eventLog?.emitPhaseEnd("completed"); this.log("Plan finalized, workflow complete"); return { ok: true, prompt: "Plan validation passed. Workflow complete." }; } @@ -170,9 +181,7 @@ export class PlanDesignPhase { const prompt = formatStep(planDesignStepGuidance(this.state.step)); this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName }); - - this.progress?.update(`Step ${prev}/6: ${STEP_NAMES[prev]} -- complete`); - this.progress?.update(`Step ${this.state.step}/6: ${nextName} -- started`); + await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); return { ok: true, prompt }; } @@ -202,8 +211,6 @@ export class PlanDesignPhase { } this.log("Plan validation passed", { path: planPath }); - await this.progress?.update("Step 6/6: " + STEP_NAMES[6] + " -- complete"); - await this.progress?.complete("completed"); return { ok: true }; } } diff --git a/src/planner/phases/qr-decompose/phase.ts b/src/planner/phases/qr-decompose/phase.ts new file mode 100644 index 0000000..5a8a99e --- /dev/null +++ b/src/planner/phases/qr-decompose/phase.ts @@ -0,0 +1,227 @@ +// QR decompose phase -- 13-step workflow that decomposes a plan into +// verifiable QR items. Mirrors PlanDesignPhase lifecycle exactly. +// Two-tier step gate: koan_qr_add_item unlocks at step 5, +// koan_qr_assign_group unlocks at step 9. + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { + loadQRDecomposeSystemPrompt, + formatContextForDecompose, + buildDecomposeSystemPrompt, + decomposeStepGuidance, + DECOMPOSE_STEP_NAMES, + type DecomposeStep, +} from "./prompts.js"; +import { formatStep } from "../../lib/step.js"; +import type { ContextData } from "../../types.js"; +import { createLogger, type Logger } from "../../../utils/logger.js"; +import { EventLog } from "../../lib/audit.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; +import { checkPermission } from "../../lib/permissions.js"; +import type { QRFile } from "../../qr/types.js"; + +// -- Step gate constants -- + +// Blocklist pattern: only restrict tools this gate owns; everything else +// defers to checkPermission. Avoids blocking read tools or future pi tools. +const QR_ADD_TOOLS = new Set(["koan_qr_add_item"]); +const QR_ASSIGN_TOOLS = new Set(["koan_qr_assign_group"]); +const ADD_ITEM_UNLOCK = 5; +const ASSIGN_GROUP_UNLOCK = 9; +const TOTAL_STEPS = 13; + +// -- State -- + +interface DecomposeState { + active: boolean; + step: DecomposeStep; + step1Prompt: string | null; + systemPrompt: string | null; +} + +// -- Phase -- + +export class QRDecomposePhase { + private readonly pi: ExtensionAPI; + private readonly planDir: string; + private readonly log: Logger; + private readonly state: DecomposeState; + private readonly eventLog: EventLog | undefined; + private readonly dispatch: WorkflowDispatch; + private readonly planRef: PlanRef; + + constructor( + pi: ExtensionAPI, + config: { planDir: string }, + dispatch: WorkflowDispatch, + planRef: PlanRef, + log?: Logger, + eventLog?: EventLog, + ) { + this.pi = pi; + this.planDir = config.planDir; + this.dispatch = dispatch; + this.planRef = planRef; + this.log = log ?? createLogger("QRDecompose"); + this.eventLog = eventLog; + + this.state = { + active: false, + step: 1, + step1Prompt: null, + systemPrompt: null, + }; + + this.registerHandlers(); + } + + async begin(): Promise { + const contextPath = path.join(this.planDir, "context.json"); + let contextData: ContextData; + try { + const raw = await fs.readFile(contextPath, "utf8"); + contextData = JSON.parse(raw) as ContextData; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to read context.json", { error: message }); + return; + } + + let basePrompt: string; + try { + basePrompt = await loadQRDecomposeSystemPrompt(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to load qr-decompose system prompt", { error: message }); + return; + } + + const contextXml = formatContextForDecompose(contextData); + this.state.systemPrompt = buildDecomposeSystemPrompt(basePrompt); + this.state.step1Prompt = formatStep(decomposeStepGuidance(1, contextXml)); + this.state.active = true; + this.state.step = 1; + this.planRef.dir = this.planDir; + + hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); + + this.log("Starting qr-decompose workflow", { step: 1 }); + await this.eventLog?.emitPhaseStart(TOTAL_STEPS); + await this.eventLog?.emitStepTransition(1, DECOMPOSE_STEP_NAMES[1], TOTAL_STEPS); + } + + private registerHandlers(): void { + this.pi.on("before_agent_start", () => { + if (!this.state.active || !this.state.systemPrompt) return undefined; + return { systemPrompt: this.state.systemPrompt }; + }); + + // Step 1 prompt injection. The CLI message is a process trigger -- + // the context event fires before each LLM call and replaces the + // user message with the actual step 1 instructions. Handler is a + // no-op once the step advances past 1. + this.pi.on("context", (event) => { + if (!this.state.active) return undefined; + if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; + + const messages = event.messages.map((m) => { + if (m.role === "user") { + return { ...m, content: this.state.step1Prompt! }; + } + return m; + }); + return { messages }; + }); + + this.pi.on("tool_call", (event) => { + if (!this.state.active) return undefined; + + // Outer boundary: phase permissions (default-deny). + const perm = checkPermission("qr-plan-design", event.toolName); + if (!perm.allowed) { + return { block: true, reason: perm.reason }; + } + + // Inner constraint: two-tier step gate (blocklist, not whitelist). + const step = this.state.step; + if (step < ADD_ITEM_UNLOCK && QR_ADD_TOOLS.has(event.toolName)) { + return { + block: true, + reason: `${event.toolName} available from step ${ADD_ITEM_UNLOCK} (current: ${step})`, + }; + } + if (step < ASSIGN_GROUP_UNLOCK && QR_ASSIGN_TOOLS.has(event.toolName)) { + return { + block: true, + reason: `${event.toolName} available from step ${ASSIGN_GROUP_UNLOCK} (current: ${step})`, + }; + } + + return undefined; + }); + + } + + private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { + const prev = this.state.step; + + if (prev === 13) { + const result = await this.handleFinalize(); + if (!result.ok) { + await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); + return { ok: false, error: result.errors?.join("; ") }; + } + // Only unhook after successful finalization -- on failure the LLM + // receives the error as a tool result and may retry within the step. + this.state.active = false; + unhookDispatch(this.dispatch, "onCompleteStep"); + await this.eventLog?.emitPhaseEnd("completed"); + this.log("QR decompose finalized, workflow complete"); + return { ok: true, prompt: "QR decomposition complete." }; + } + + this.state.step = (prev + 1) as DecomposeStep; + const nextName = DECOMPOSE_STEP_NAMES[this.state.step]; + const prompt = formatStep(decomposeStepGuidance(this.state.step)); + + this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName }); + await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); + + return { ok: true, prompt }; + } + + private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { + const qrPath = path.join(this.planDir, "qr-plan-design.json"); + let qr: QRFile; + try { + const raw = await fs.readFile(qrPath, "utf8"); + qr = JSON.parse(raw) as QRFile; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { ok: false, errors: [`Failed to read qr-plan-design.json: ${message}`] }; + } + + const errors: string[] = []; + if (!qr.items || qr.items.length === 0) { + errors.push("No QR items generated"); + } else { + const ungrouped = qr.items.filter((i) => i.group_id === null); + if (ungrouped.length > 0) { + const ids = ungrouped.map((i) => i.id).join(", "); + errors.push(`Ungrouped items: ${ids}`); + } + } + + if (errors.length > 0) { + this.log("QR decompose validation failed", { errors }); + return { ok: false, errors }; + } + + this.log("QR decompose validation passed"); + return { ok: true }; + } +} diff --git a/src/planner/phases/qr-decompose/prompts.ts b/src/planner/phases/qr-decompose/prompts.ts new file mode 100644 index 0000000..3c4969e --- /dev/null +++ b/src/planner/phases/qr-decompose/prompts.ts @@ -0,0 +1,256 @@ +// QR decompose phase prompts -- 13-step workflow for decomposing a plan into +// verifiable QR items. Follows the same structure as plan-design/prompts.ts. +// All tool calls reference phase='plan-design' explicitly so the decompose +// agent always writes to the correct QR namespace. + +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import type { ContextData } from "../../types.js"; +import type { StepGuidance } from "../../lib/step.js"; + +// -- Types -- + +export type DecomposeStep = 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13; + +// -- Constants -- + +export const DECOMPOSE_STEP_NAMES: Record = { + 1: "Absorb Context", + 2: "Holistic Concerns", + 3: "Structural Enumeration", + 4: "Gap Analysis", + 5: "Generate Items", + 6: "Atomicity Check", + 7: "Coverage Validation", + 8: "Validate Items", + 9: "Structural Grouping", + 10: "Component Grouping", + 11: "Concern Grouping", + 12: "Affinity Grouping", + 13: "Final Validation", +}; + +// -- Exports -- + +export async function loadQRDecomposeSystemPrompt(): Promise { + const homeDir = os.homedir(); + const promptPath = path.join(homeDir, ".claude/agents/quality-reviewer.md"); + try { + const content = await fs.readFile(promptPath, "utf8"); + const body = content.replace(/^---\n[\s\S]*?\n---\n/, ""); + return body; + } catch { + throw new Error(`Quality reviewer prompt not found at ${promptPath}`); + } +} + +export function buildDecomposeSystemPrompt(basePrompt: string): string { + return [ + basePrompt, + "", + "---", + "", + "WORKFLOW: 13-STEP QR DECOMPOSITION (plan-design)", + "", + "You will execute a 13-step workflow to decompose a plan into verifiable QR items.", + "Step 1 instructions are in the user message below.", + "Complete the work described, then call koan_complete_step.", + "Put your findings in the `thoughts` parameter of koan_complete_step.", + "The tool result contains the next step's instructions.", + "", + "CRITICAL: Do the actual work described in each step BEFORE calling", + "koan_complete_step. Read the plan, analyze, generate items. Do not skip.", + ].join("\n"); +} + +export function formatContextForDecompose(ctx: ContextData): string { + return [ + "", + JSON.stringify(ctx, null, 2), + "", + ].join("\n"); +} + +export function decomposeStepGuidance(step: DecomposeStep, context?: string): StepGuidance { + switch (step) { + case 1: + return { + title: "Step 1: Absorb Context", + instructions: [ + "PLANNING CONTEXT (from session):", + "", + context ?? "", + "", + "Use koan_get_plan to read the full plan.", + "Absorb the plan structure: overview, constraints, milestones, decisions, code_intents, risks, invisible_knowledge.", + "Identify the key entities and relationships that will need verification.", + ], + }; + + case 2: + return { + title: "Step 2: Holistic Concerns", + instructions: [ + "Identify plan-wide concerns that apply across all milestones.", + "Consider: structural completeness, logical consistency, risk coverage, dependency ordering.", + "Focus on plan-level quality -- not code correctness.", + "These concerns become scope='*' items in later steps.", + ], + }; + + case 3: + return { + title: "Step 3: Structural Enumeration", + instructions: [ + "Enumerate every major entity in the plan:", + " - Decisions (DL-xxx)", + " - Constraints", + " - Risks", + " - Milestones (M-xxx) and their code_intents (CI-M-xxx-xxx)", + " - Invisible knowledge entries", + " - Waves and ordering", + "Track counts for validation in step 8.", + ], + }; + + case 4: + return { + title: "Step 4: Gap Analysis", + instructions: [ + "Compare holistic concerns (step 2) against structural entities (step 3).", + "Identify gaps: concerns not covered by any entity, entities lacking justification.", + "Note areas where the plan is thin or under-specified.", + ], + }; + + case 5: + return { + title: "Step 5: Generate Items", + instructions: [ + "Generate QR items from the analysis in steps 2-4.", + "Use koan_qr_add_item to create each item. Always pass phase='plan-design'.", + "", + "SCOPE VOCABULARY:", + " '*' -- plan-wide check", + " 'milestone:M-001' -- milestone-specific check", + " 'decision:DL-001' -- decision-specific check", + " 'code_intent:CI-M-001-001' -- code intent-specific check", + "", + "SEVERITY:", + " MUST -- blocks all iterations (critical defect)", + " SHOULD -- important quality issue", + " COULD -- nice-to-have improvement", + "", + "Generate items covering: structural completeness, decision reasoning chains,", + "risk coverage, milestone scoping, code intent clarity, constraint satisfaction.", + ], + }; + + case 6: + return { + title: "Step 6: Atomicity Check", + instructions: [ + "Review each generated item. Each item should test exactly one concern.", + "If an item covers multiple concerns, split it:", + " Use koan_qr_add_item for each child item.", + " The original becomes the parent (parent_id on children).", + "Atomic items are easier to verify independently.", + ], + }; + + case 7: + return { + title: "Step 7: Coverage Validation", + instructions: [ + "Cross-reference items against the plan structure.", + "Every milestone should have at least one QR item.", + "Every decision should have at least one QR item.", + "High-severity risks should have corresponding QR items.", + "Use koan_qr_add_item for any gaps found.", + ], + }; + + case 8: + return { + title: "Step 8: Validate Items", + instructions: [ + "Items are already on disk (each koan_qr_add_item wrote immediately).", + "Use koan_qr_summary(phase='plan-design') to verify counts.", + "Use koan_qr_list_items(phase='plan-design') to review all items.", + "Check: no duplicate checks, severity levels appropriate, scopes valid.", + "Add missing items with koan_qr_add_item if gaps found.", + ], + }; + + case 9: + return { + title: "Step 9: Structural Grouping", + instructions: [ + "Begin organizing items into review groups.", + "DETERMINISTIC RULES:", + " - Parent-child items share the same group", + " - Umbrella items (scope='*') get group_id='umbrella'", + "", + "Use koan_qr_list_items(phase='plan-design') to see current items.", + "Use koan_qr_assign_group(phase='plan-design', ids=[...], group_id='...') to assign groups.", + ], + }; + + case 10: + return { + title: "Step 10: Component Grouping", + instructions: [ + "Group remaining ungrouped items by plan component.", + "Group candidates: a major milestone, a major decision, a constraint category.", + "", + "Use koan_qr_list_items(phase='plan-design') to see ungrouped items.", + "Use koan_qr_assign_group(phase='plan-design', ids=[...], group_id='...') to assign.", + ], + }; + + case 11: + return { + title: "Step 11: Concern Grouping", + instructions: [ + "Group remaining ungrouped items by concern type.", + "Group candidates: reasoning chain quality, reference integrity, risk coverage.", + "", + "Use koan_qr_list_items(phase='plan-design') to see ungrouped items.", + "Use koan_qr_assign_group(phase='plan-design', ids=[...], group_id='...') to assign.", + ], + }; + + case 12: + return { + title: "Step 12: Affinity Grouping", + instructions: [ + "Assign remaining ungrouped items to groups based on similarity.", + "Singletons are acceptable -- not every item needs a multi-member group.", + "", + "Use koan_qr_list_items(phase='plan-design') to see ungrouped items.", + "Use koan_qr_assign_group(phase='plan-design', ids=[...], group_id='...') to assign.", + ], + }; + + case 13: + return { + title: "Step 13: Final Validation", + instructions: [ + "Validate all items are grouped and well-formed.", + "Use koan_qr_summary(phase='plan-design') to check final counts.", + "Use koan_qr_list_items(phase='plan-design') to verify all items have group_id.", + "If any items lack group_id, assign them now.", + "Output 'PASS' in thoughts if all items are valid and grouped.", + ], + invokeAfter: [ + "WHEN DONE: Call koan_complete_step with 'PASS' or issues found in the `thoughts` parameter.", + "Do NOT call this tool until validation is complete.", + ].join("\n"), + }; + + default: + return { title: "", instructions: [] }; + } +} diff --git a/src/planner/phases/qr-verify/phase.ts b/src/planner/phases/qr-verify/phase.ts new file mode 100644 index 0000000..4a8e5c1 --- /dev/null +++ b/src/planner/phases/qr-verify/phase.ts @@ -0,0 +1,227 @@ +// QR verify phase -- 3-step reviewer subagent that verifies exactly 1 QR item +// against the plan (CONTEXT -> ANALYZE -> CONFIRM). One subagent per item. +// Mirrors PlanDesignPhase lifecycle; no finalize validation -- parent reads +// item status from disk after the reviewer exits. + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { formatStep } from "../../lib/step.js"; +import type { ContextData } from "../../types.js"; +import { createLogger, type Logger } from "../../../utils/logger.js"; +import { EventLog } from "../../lib/audit.js"; +import { + hookDispatch, + unhookDispatch, + type WorkflowDispatch, + type PlanRef, +} from "../../lib/dispatch.js"; +import { checkPermission } from "../../lib/permissions.js"; +import type { QRItem, QRFile } from "../../qr/types.js"; +import { + loadQRVerifySystemPrompt, + buildVerifySystemPrompt, + buildContextStep, + buildAnalyzeStep, + buildConfirmStep, + type VerifyStep, +} from "./prompts.js"; + +// -- Constants -- + +const TOTAL_STEPS = 3; +const STEP_NAMES: Record = { + 1: "CONTEXT", + 2: "ANALYZE", + 3: "CONFIRM", +}; + +// -- State -- + +interface VerifyState { + active: boolean; + step: VerifyStep; + itemId: string; + step1Prompt: string | null; + systemPrompt: string | null; +} + +// -- Phase -- + +export class QRVerifyPhase { + private readonly pi: ExtensionAPI; + private readonly planDir: string; + private readonly log: Logger; + private readonly state: VerifyState; + private readonly eventLog: EventLog | undefined; + private readonly dispatch: WorkflowDispatch; + private readonly planRef: PlanRef; + private item: QRItem | null = null; + + constructor( + pi: ExtensionAPI, + config: { planDir: string; itemId: string }, + dispatch: WorkflowDispatch, + planRef: PlanRef, + log?: Logger, + eventLog?: EventLog, + ) { + this.pi = pi; + this.planDir = config.planDir; + this.dispatch = dispatch; + this.planRef = planRef; + this.log = log ?? createLogger("QRVerify"); + this.eventLog = eventLog; + + this.state = { + active: false, + step: 1, + itemId: config.itemId, + step1Prompt: null, + systemPrompt: null, + }; + + this.registerHandlers(); + } + + async begin(): Promise { + // Verify plan.json exists so koan_get_plan is usable during analysis. + const planPath = path.join(this.planDir, "plan.json"); + try { + await fs.access(planPath); + } catch { + this.log("plan.json not found", { path: planPath }); + return; + } + + const contextPath = path.join(this.planDir, "context.json"); + let contextData: ContextData; + try { + const raw = await fs.readFile(contextPath, "utf8"); + contextData = JSON.parse(raw) as ContextData; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to read context.json", { error: message }); + return; + } + + const qrPath = path.join(this.planDir, "qr-plan-design.json"); + let qrFile: QRFile; + try { + const raw = await fs.readFile(qrPath, "utf8"); + qrFile = JSON.parse(raw) as QRFile; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to read qr-plan-design.json", { error: message }); + return; + } + + const item = qrFile.items.find((i) => i.id === this.state.itemId); + if (!item) { + this.log("QR item not found", { itemId: this.state.itemId }); + return; + } + this.item = item; + + let basePrompt: string; + try { + basePrompt = await loadQRVerifySystemPrompt(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to load QR verify system prompt", { error: message }); + return; + } + + this.state.systemPrompt = buildVerifySystemPrompt(basePrompt); + this.state.step1Prompt = formatStep(buildContextStep(item, contextData)); + this.state.active = true; + this.state.step = 1; + this.planRef.dir = this.planDir; + + hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); + + this.log("Starting QR verify workflow", { itemId: this.state.itemId, step: 1 }); + await this.eventLog?.emitPhaseStart(TOTAL_STEPS); + await this.eventLog?.emitStepTransition(1, STEP_NAMES[1], TOTAL_STEPS); + } + + private registerHandlers(): void { + this.pi.on("before_agent_start", () => { + if (!this.state.active || !this.state.systemPrompt) return undefined; + return { systemPrompt: this.state.systemPrompt }; + }); + + // Step 1 prompt injection. Context event fires before the initial LLM + // call and replaces the trigger user message with actual step 1 instructions. + // Handler is a no-op once the step advances past 1. + this.pi.on("context", (event) => { + if (!this.state.active) return undefined; + if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; + + const messages = event.messages.map((m) => { + if (m.role === "user") { + return { ...m, content: this.state.step1Prompt! }; + } + return m; + }); + return { messages }; + }); + + this.pi.on("tool_call", (event) => { + if (!this.state.active) return undefined; + + const perm = checkPermission("qr-plan-design", event.toolName); + if (!perm.allowed) { + return { block: true, reason: perm.reason }; + } + + // Step gate: koan_qr_set_item is step-3-only (CONFIRM step). + // Blocklist so read tools and other approved tools pass through. + const step = this.state.step; + if (step < 3 && event.toolName === "koan_qr_set_item") { + return { + block: true, + reason: `koan_qr_set_item available in step 3 (current: ${step})`, + }; + } + + return undefined; + }); + + } + + private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { + const prev = this.state.step; + + if (prev === 3) { + this.state.active = false; + unhookDispatch(this.dispatch, "onCompleteStep"); + await this.eventLog?.emitPhaseEnd("completed"); + this.log("Verification complete"); + return { ok: true, prompt: "Verification complete." }; + } + + this.state.step = (prev + 1) as VerifyStep; + const stepName = STEP_NAMES[this.state.step]; + const prompt = this.buildStepPrompt(this.state.step); + + this.log("Step complete, advancing", { from: prev, to: this.state.step }); + await this.eventLog?.emitStepTransition(this.state.step, stepName, TOTAL_STEPS); + + return { ok: true, prompt }; + } + + // Item is stored during begin() -- avoids async re-reads for prompt building. + private buildStepPrompt(step: VerifyStep): string { + switch (step) { + case 2: + return formatStep(buildAnalyzeStep(this.item!)); + case 3: + return formatStep(buildConfirmStep(this.item!)); + default: + return ""; + } + } +} diff --git a/src/planner/phases/qr-verify/prompts.ts b/src/planner/phases/qr-verify/prompts.ts new file mode 100644 index 0000000..97dfe3f --- /dev/null +++ b/src/planner/phases/qr-verify/prompts.ts @@ -0,0 +1,154 @@ +// Prompt guidance for the 3-step QR verify subagent workflow. +// +// Each reviewer subagent verifies exactly 1 QRItem against the plan. +// Steps: CONTEXT (understand the check) -> ANALYZE (read plan, apply check) +// -> CONFIRM (record verdict via koan_qr_set_item). + +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import type { ContextData } from "../../types.js"; +import type { QRItem } from "../../qr/types.js"; +import type { StepGuidance } from "../../lib/step.js"; + +// -- Types -- + +export type VerifyStep = 1 | 2 | 3; + +// -- Helpers -- + +function formatContextXml(ctx: ContextData): string { + const fields = Object.entries(ctx) + .map(([key, values]) => { + const items = (values as string[]).map((v) => ` ${v}`).join("\n"); + return ` <${key}>\n${items}\n `; + }) + .join("\n"); + return `\n${fields}\n`; +} + +function scopeGuidance(item: QRItem): string { + const s = item.scope; + if (s === "*") { + return "MACRO CHECK -- Use koan_get_plan to read the full plan."; + } + if (s.startsWith("milestone:")) { + const milestoneId = s.slice("milestone:".length); + return `MILESTONE CHECK -- Use koan_get_milestone(id='${milestoneId}') to read the milestone.`; + } + if (s.startsWith("code_intent:")) { + const intentId = s.slice("code_intent:".length); + return `CODE INTENT CHECK -- Use koan_get_intent(id='${intentId}') to read the intent.`; + } + if (s.startsWith("decision:")) { + const decisionId = s.slice("decision:".length); + return `DECISION CHECK -- Use koan_get_decision(id='${decisionId}') to read the decision.`; + } + return "SCOPED CHECK -- Read the relevant section using plan getter tools."; +} + +// -- Exports -- + +export async function loadQRVerifySystemPrompt(): Promise { + const promptPath = path.join(os.homedir(), ".claude/agents/quality-reviewer.md"); + try { + const content = await fs.readFile(promptPath, "utf8"); + return content.replace(/^---\n[\s\S]*?\n---\n/, ""); + } catch { + throw new Error(`Quality-reviewer prompt not found at ${promptPath}`); + } +} + +export function buildVerifySystemPrompt(basePrompt: string): string { + return [ + basePrompt, + "", + "---", + "", + "WORKFLOW: 3-STEP QR VERIFICATION (plan-design)", + "", + "You will verify exactly 1 QR item against the plan.", + "Step 1 instructions are in the user message below.", + "Complete the work described, then call koan_complete_step.", + "Put your findings in the `thoughts` parameter of koan_complete_step.", + "", + "CRITICAL: Do NOT record a verdict until step 3 (CONFIRM).", + "Analyze thoroughly in step 2 before committing.", + ].join("\n"); +} + +export function buildContextStep(item: QRItem, contextData: ContextData): StepGuidance { + return { + title: "Step 1: CONTEXT", + instructions: [ + "PHASE: plan-design", + "ITEM TO VERIFY:", + "", + "", + ` ${item.id}`, + ` ${item.scope}`, + ` ${item.check}`, + ` ${item.severity}`, + "", + "", + "PLANNING CONTEXT (reference for semantic validation):", + formatContextXml(contextData), + "", + "UNDERSTAND the check you need to perform.", + "Note the scope: '*' means plan-wide check, 'milestone:X' means specific milestone.", + "Severity indicates blocking behavior: MUST blocks all iterations.", + ], + }; +} + +export function buildAnalyzeStep(item: QRItem): StepGuidance { + return { + title: "Step 2: ANALYZE", + instructions: [ + scopeGuidance(item), + "", + "TASK:", + "1. Read relevant files/sections based on scope", + "2. Apply the verification check", + "3. Form preliminary conclusion: PASS or FAIL?", + "4. If FAIL, note specific evidence", + "", + "DO NOT update QR state yet. Proceed to CONFIRM step.", + ], + }; +} + +export function buildConfirmStep(item: QRItem): StepGuidance { + return { + title: "Step 3: CONFIRM", + instructions: [ + `CONFIRMING: ${item.id}`, + `SEVERITY: ${item.severity}`, + "", + "CONFIDENCE CHECK:", + "- Are you confident in your conclusion?", + "- Did you verify against actual plan content?", + "- Is your evidence specific and verifiable?", + "", + "RECORD RESULT:", + "", + "If PASS:", + ` koan_qr_set_item(phase='plan-design', id='${item.id}', status='PASS')`, + "", + "If FAIL:", + ` koan_qr_set_item(phase='plan-design', id='${item.id}', status='FAIL',`, + " finding='')", + "", + "RULES:", + "- FAIL requires finding (explains what failed)", + "- PASS forbids finding (finding field must not be set)", + "", + "Execute ONE of the above tool calls, then call koan_complete_step.", + ], + invokeAfter: [ + "WHEN DONE: Call koan_complete_step after recording your verdict.", + "Do NOT call this tool until you have called koan_qr_set_item.", + ].join("\n"), + }; +} diff --git a/src/planner/session.ts b/src/planner/session.ts index ef08dfa..1567983 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -1,5 +1,8 @@ +// Parent session: orchestrates the koan workflow (context capture -> architect +// -> QR decompose -> QR verify pool). Polls subagent state.json for progress. +// Widget displays persistent progress; destroyed on completion. + import { promises as fs } from "node:fs"; -import * as os from "node:os"; import * as path from "node:path"; import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@mariozechner/pi-coding-agent"; @@ -7,10 +10,16 @@ import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@m import { ContextCapturePhase } from "./phases/context-capture/phase.js"; import { createInitialState, initializePlanState, type WorkflowState } from "./state.js"; import { createPlanInfo } from "../utils/plan.js"; -import { spawnArchitect } from "./subagent.js"; -import { createLogger } from "../utils/logger.js"; -import { createSubagentDir, readSubagentState } from "../utils/progress.js"; +import { spawnArchitect, spawnQRDecomposer, spawnReviewer } from "./subagent.js"; +import { createLogger, setLogDir, type Logger } from "../utils/logger.js"; +import { createSubagentDir } from "../utils/progress.js"; +import { readProjection } from "./lib/audit.js"; import type { WorkflowDispatch, PlanRef } from "./lib/dispatch.js"; +import { pool } from "./lib/pool.js"; +import type { QRFile } from "./qr/types.js"; +import { WidgetController } from "./ui/widget.js"; + +// -- Types -- interface Session { plan(args: string, ctx: ExtensionCommandContext): Promise; @@ -18,9 +27,17 @@ interface Session { status(ctx: ExtensionCommandContext): Promise; } +interface QRBlockResult { + summary: string; + passed: boolean; +} + +// -- Session -- + export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, planRef: PlanRef): Session { const state: WorkflowState = createInitialState(); const log = createLogger("Session"); + let widget: WidgetController | null = null; // Completion callback for context-capture phase. Runs inside the // koan_store_context tool call -- the tool blocks until the architect @@ -31,60 +48,93 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan return "Context captured but no plan state available."; } - const planDir = state.plan.directory; - const planJsonPath = path.join(planDir, "plan.json"); - const subagentDir = await createSubagentDir(planDir, "architect"); + let outcome: "PASS" | "FAIL" = "FAIL"; + + try { + const planDir = state.plan.directory; + const planJsonPath = path.join(planDir, "plan.json"); + const subagentDir = await createSubagentDir(planDir, "architect"); + + state.phase = "architect-running"; + widget?.update({ + phaseStatus: { index: 0, status: "completed" }, + activeIndex: 1, + step: "spawning architect...", + activity: "", + }); + log("Spawning architect after context capture", { planDir, subagentDir }); - state.phase = "architect-running"; - ctx.ui.notify("Launching architect subagent for plan-design...", "info"); - log("Spawning architect after context capture", { planDir, subagentDir }); + const extensionPath = path.resolve(import.meta.dirname, "../../extensions/koan.ts"); - const extensionPath = path.resolve(import.meta.dirname, "../../extensions/koan.ts"); + const pollInterval = setInterval(async () => { + const s = await readProjection(subagentDir); + if (s) { + widget?.update({ + step: s.stepName, + activity: s.lastAction ?? "", + }); + } + }, 2000); + + const result = await spawnArchitect({ + planDir, + subagentDir, + cwd: ctx.cwd, + extensionPath, + log, + }); - const pollInterval = setInterval(async () => { - const s = await readSubagentState(subagentDir); - if (s?.current) { - ctx.ui.notify(`Architect: ${s.current}`, "info"); + clearInterval(pollInterval); + + if (result.exitCode !== 0) { + state.phase = "architect-failed"; + const detail = result.stderr.slice(0, 500); + log("Architect subagent failed", { exitCode: result.exitCode, stderr: detail }); + widget?.update({ + phaseStatus: { index: 1, status: "failed" }, + step: "architect failed", + activity: "", + }); + return `Context captured. Architect subagent failed (exit ${result.exitCode}).\n\nStderr:\n${detail}`; } - }, 2000); - - const result = await spawnArchitect({ - planDir, - subagentDir, - cwd: ctx.cwd, - extensionPath, - log, - }); - - clearInterval(pollInterval); - - if (result.exitCode !== 0) { - state.phase = "architect-failed"; - const detail = result.stderr.slice(0, 500); - log("Architect subagent failed", { exitCode: result.exitCode, stderr: detail }); - ctx.ui.notify(`Architect subagent failed (exit ${result.exitCode}).`, "error"); - return `Context captured. Architect subagent failed (exit ${result.exitCode}).\n\nStderr:\n${detail}`; - } - let planExists = false; - try { - await fs.access(planJsonPath); - planExists = true; - } catch { - // plan.json not written - } + let planExists = false; + try { + await fs.access(planJsonPath); + planExists = true; + } catch { + // plan.json not written + } - if (!planExists) { - state.phase = "architect-failed"; - log("Architect completed but plan.json not found", { planJsonPath }); - ctx.ui.notify("Architect completed but plan.json was not written.", "error"); - return "Context captured. Architect completed but plan.json was not written."; - } + if (!planExists) { + state.phase = "architect-failed"; + log("Architect completed but plan.json not found", { planJsonPath }); + widget?.update({ + phaseStatus: { index: 1, status: "failed" }, + step: "no plan produced", + activity: "", + }); + return "Context captured. Architect completed but produced no plan."; + } + + state.phase = "plan-design-complete"; + log("Architect plan-design complete", { planDir }); + widget?.update({ + phaseStatus: { index: 1, status: "completed" }, + step: "starting QR block...", + activity: "", + }); - state.phase = "plan-design-complete"; - log("Architect plan-design complete", { planDir }); - ctx.ui.notify("Plan-design phase complete.", "info"); - return `Context captured. Plan written to ${planDir}/plan.json.`; + const qr = await runQRBlock(planDir, ctx.cwd, extensionPath, state, log, widget); + if (qr.passed) outcome = "PASS"; + return `Context captured. Plan design complete.\n\n${qr.summary}`; + } finally { + if (widget) { + widget.destroy(); + widget = null; + } + ctx.ui.notify(outcome, outcome === "PASS" ? "info" : "error"); + } }; const contextPhase = new ContextCapturePhase(pi, state, dispatch, createLogger("Context"), onContextComplete); @@ -107,6 +157,7 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan const planInfo = await createPlanInfo(description, ctx.cwd); initializePlanState(state, planInfo, description); planRef.dir = planInfo.directory; + setLogDir(planInfo.directory); log("Plan command invoked", { cwd: ctx.cwd, @@ -115,6 +166,16 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan planDirectory: planInfo.directory, }); + // Destroy stale widget if re-entered + if (widget) { + widget.destroy(); + widget = null; + } + + if (ctx.hasUI) { + widget = new WidgetController(ctx.ui, planInfo.id); + } + await contextPhase.begin(description, planInfo, ctx); }, @@ -123,74 +184,117 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan }, async status(ctx) { - const summary = buildStatusSummary(state, ctx.cwd); - ctx.ui.notify(summary, "info"); + ctx.ui.notify(`Phase: ${state.phase}`, "info"); }, }; } -function buildStatusSummary(state: WorkflowState, cwd: string): string { - const lines: string[] = []; - const plan = state.plan; +// -- QR Block -- - if (plan) { - lines.push(`Plan ${plan.id}`); - lines.push(`Directory: ${formatPath(plan.directory, cwd)}`); - } else { - lines.push("No active plan."); - } +const QR_POOL_CONCURRENCY = 6; - switch (state.phase) { - case "idle": - lines.push("Koan planner is idle."); - break; - case "context": { - const attempt = state.context?.attempt ?? 0; - lines.push(`Context capture in progress (attempt ${attempt}).`); - if (state.context?.contextFilePath) { - lines.push(`Target: ${formatPath(state.context.contextFilePath, cwd)}`); - } - break; +async function runQRBlock( + planDir: string, + cwd: string, + extensionPath: string, + state: WorkflowState, + log: Logger, + widget: WidgetController | null, +): Promise { + // 1. Spawn decomposer subagent + state.phase = "qr-decompose-running"; + widget?.update({ step: "qr-decompose: starting...", activity: "" }); + const decomposeDir = await createSubagentDir(planDir, "qr-decomposer"); + + const decomposePoll = setInterval(async () => { + const s = await readProjection(decomposeDir); + if (s) { + widget?.update({ + step: `qr-decompose: ${s.stepName}`, + activity: s.lastAction ?? "", + }); } - case "context-complete": - lines.push("Context captured successfully."); - if (state.context?.contextFilePath) { - lines.push(`Stored at: ${formatPath(state.context.contextFilePath, cwd)}`); - } - break; - case "context-failed": - lines.push("Context capture failed. Re-run /koan plan to try again."); - break; - case "architect-running": - lines.push("Architect subagent running (plan-design phase)..."); - break; - case "architect-failed": - lines.push("Architect subagent failed. Check plan directory for details."); - break; - case "plan-design-complete": - lines.push("Plan-design phase complete."); - if (plan) { - lines.push(`Plan: ${formatPath(path.join(plan.directory, "plan.json"), cwd)}`); - } - break; - default: - lines.push("Unknown planner state."); - break; + }, 2000); + + const decompose = await spawnQRDecomposer({ + planDir, + subagentDir: decomposeDir, + cwd, + extensionPath, + log, + }); + + clearInterval(decomposePoll); + + if (decompose.exitCode !== 0) { + state.phase = "qr-decompose-failed"; + const detail = decompose.stderr.slice(0, 500); + log("QR decomposer failed", { exitCode: decompose.exitCode, stderr: detail }); + widget?.update({ step: "qr-decompose: failed", activity: "" }); + return { summary: `QR decompose failed (exit ${decompose.exitCode}).\n\nStderr:\n${detail}`, passed: false }; } - return lines.join("\n"); -} + // 2. Read QR items + const qrPath = path.join(planDir, "qr-plan-design.json"); + let qr: QRFile; + try { + const raw = await fs.readFile(qrPath, "utf8"); + qr = JSON.parse(raw) as QRFile; + } catch (error) { + state.phase = "qr-decompose-failed"; + const message = error instanceof Error ? error.message : String(error); + log("Failed to read qr-plan-design.json after decompose", { error: message }); + return { summary: "QR decompose completed but produced no verifiable items.", passed: false }; + } -function formatPath(target: string, cwd: string): string { - const home = os.homedir(); - if (target.startsWith(home)) { - return `~${target.slice(home.length)}`; + if (qr.items.length === 0) { + state.phase = "qr-decompose-failed"; + log("QR decompose produced no items"); + return { summary: "QR decompose completed but produced no items.", passed: false }; } - const relative = path.relative(cwd, target); - if (!relative.startsWith("..")) { - return relative; + const itemIds = qr.items.map((i) => i.id); + log("QR decompose complete", { itemCount: itemIds.length }); + widget?.update({ step: `qr-verify: 0/${itemIds.length}`, activity: "" }); + + // 3. Spawn reviewer pool + state.phase = "qr-verify-running"; + + const result = await pool( + itemIds, + QR_POOL_CONCURRENCY, + async (itemId) => { + const reviewerDir = await createSubagentDir(planDir, `qr-reviewer-${itemId}`); + return spawnReviewer({ + planDir, + subagentDir: reviewerDir, + cwd, + extensionPath, + itemId, + log, + }); + }, + (done, total) => widget?.update({ step: `qr-verify: ${done}/${total}` }), + ); + + // 4. Read final results + state.phase = "qr-complete"; + let finalQR: QRFile; + try { + const raw = await fs.readFile(qrPath, "utf8"); + finalQR = JSON.parse(raw) as QRFile; + } catch { + finalQR = qr; } - return target; + const pass = finalQR.items.filter((i) => i.status === "PASS").length; + const fail = finalQR.items.filter((i) => i.status === "FAIL").length; + const todo = finalQR.items.filter((i) => i.status === "TODO").length; + const summary = `QR complete: ${pass} PASS, ${fail} FAIL, ${todo} TODO (${result.failed.length} reviewers failed).`; + + log("QR block complete", { pass, fail, todo, failedReviewers: result.failed }); + + const passed = fail === 0 && result.failed.length === 0; + widget?.update({ step: summary, activity: "" }); + return { summary, passed }; } diff --git a/src/planner/state.ts b/src/planner/state.ts index 5d47d63..3583d4d 100644 --- a/src/planner/state.ts +++ b/src/planner/state.ts @@ -7,7 +7,12 @@ export type WorkflowPhase = | "context-failed" | "architect-running" | "architect-failed" - | "plan-design-complete"; + | "plan-design-complete" + | "qr-decompose-running" + | "qr-decompose-failed" + | "qr-verify-running" + | "qr-verify-failed" + | "qr-complete"; export interface PlanInfo { id: string; @@ -54,7 +59,12 @@ export function resetContextState(state: WorkflowState): void { state.phase === "context-failed" || state.phase === "context-complete" || state.phase === "architect-failed" || - state.phase === "plan-design-complete" + state.phase === "plan-design-complete" || + state.phase === "qr-decompose-running" || + state.phase === "qr-decompose-failed" || + state.phase === "qr-verify-running" || + state.phase === "qr-verify-failed" || + state.phase === "qr-complete" ) { state.phase = "idle"; } diff --git a/src/planner/subagent.ts b/src/planner/subagent.ts index 997c8f8..19c5647 100644 --- a/src/planner/subagent.ts +++ b/src/planner/subagent.ts @@ -15,6 +15,24 @@ export interface SpawnArchitectOptions { subagentDir: string; cwd: string; extensionPath: string; + initialPrompt?: string; + log?: Logger; +} + +export interface SpawnQRDecomposerOptions { + planDir: string; + subagentDir: string; + cwd: string; + extensionPath: string; + log?: Logger; +} + +export interface SpawnReviewerOptions { + planDir: string; + subagentDir: string; + cwd: string; + extensionPath: string; + itemId: string; log?: Logger; } @@ -28,7 +46,7 @@ export function spawnArchitect(opts: SpawnArchitectOptions): Promise { + const args = [ + "-p", + "-e", opts.extensionPath, + "--koan-role", role, + "--koan-phase", phase, + "--koan-plan-dir", opts.planDir, + "--koan-subagent-dir", opts.subagentDir, + ...(opts.extraFlags ?? []), + prompt, + ]; + + log(`Spawning ${role} subagent`, { planDir: opts.planDir, subagentDir: opts.subagentDir }); + + return new Promise((resolve) => { + const stdoutLog = createWriteStream(path.join(opts.subagentDir, "stdout.log"), { flags: "w" }); + const stderrLog = createWriteStream(path.join(opts.subagentDir, "stderr.log"), { flags: "w" }); + + const proc = spawn("pi", args, { + cwd: opts.cwd, + shell: false, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stderr = ""; + + proc.stdout.on("data", (data: Buffer) => { + stdoutLog.write(data); + }); + + proc.stderr.on("data", (data: Buffer) => { + stderr += data.toString(); + stderrLog.write(data); + }); + + proc.on("close", (code) => { + stdoutLog.end(); + stderrLog.end(); + const exitCode = code ?? 1; + log(`${role} subagent exited`, { exitCode }); + resolve({ exitCode, stderr, subagentDir: opts.subagentDir }); + }); + + proc.on("error", (error) => { + stdoutLog.end(); + stderrLog.end(); + log(`${role} subagent spawn error`, { error: error.message }); + resolve({ exitCode: 1, stderr: error.message, subagentDir: opts.subagentDir }); + }); + }); +} + +export function spawnQRDecomposer(opts: SpawnQRDecomposerOptions): Promise { + const log = opts.log ?? createLogger("Subagent"); + return spawnSubagent("qr-decomposer", "qr-plan-design", "Begin the QR decompose phase.", opts, log); +} + +export function spawnReviewer(opts: SpawnReviewerOptions): Promise { + const log = opts.log ?? createLogger("Subagent"); + return spawnSubagent( + "reviewer", + "qr-plan-design", + "Verify the assigned QR item.", + { ...opts, extraFlags: ["--koan-qr-item", opts.itemId] }, + log, + ); +} diff --git a/src/planner/ui/widget.ts b/src/planner/ui/widget.ts new file mode 100644 index 0000000..c5a4337 --- /dev/null +++ b/src/planner/ui/widget.ts @@ -0,0 +1,203 @@ +// Persistent TUI widget for koan workflow progress. +// Full-width background canvas (toolPendingBg) via component factory. +// Hash-based change detection + 1s unref'd timer for elapsed updates. +// Created by session.plan(), destroyed in onContextComplete finally block. +// +// Uses setWidget(key, factory) to get render(width) for full-width bg. +// Content stays at a fixed CONTENT width; background fills terminal edge. + +import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; +import type { Theme, ThemeColor } from "@mariozechner/pi-coding-agent"; +import { truncateToWidth, visibleWidth } from "@mariozechner/pi-tui"; + +// -- Types -- + +export type PhaseStatus = "pending" | "running" | "completed" | "failed"; + +interface PhaseEntry { + key: string; + label: string; + status: PhaseStatus; +} + +interface WidgetState { + planId: string; + phases: PhaseEntry[]; + activeIndex: number; // 0-based; -1 when done + step: string; + activity: string; + startedAt: number; +} + +export interface WidgetUpdate { + activeIndex?: number; + step?: string; + activity?: string; + phaseStatus?: { index: number; status: PhaseStatus }; +} + +// -- Constants -- + +const WIDGET_KEY = "koan"; +const PAD = 2; // horizontal padding each side + +const PHASES: ReadonlyArray<{ key: string; label: string }> = [ + { key: "ctx", label: "Gathering context" }, + { key: "design", label: "Designing plan" }, + { key: "code", label: "Planning code" }, + { key: "docs", label: "Planning docs" }, + { key: "exec-c", label: "Executing code" }, + { key: "exec-d", label: "Executing docs" }, +]; + +const STATUS_ICON: Record = { + pending: "[ ]", + running: "[>>]", + completed: "[OK]", + failed: "[!!]", +}; + +const ICON_COLOR: Record = { + pending: "muted", + running: "warning", + completed: "success", + failed: "error", +}; + +// -- Canvas primitive -- +// Content width adapts to terminal; background fills edge to edge. + +function contentWidth(termWidth: number): number { + return Math.max(40, termWidth - PAD * 2); +} + +function canvasLine(content: string, termWidth: number, theme: Theme): string { + const cw = contentWidth(termWidth); + const inner = truncateToWidth(content, cw, "...", true); + const line = " ".repeat(PAD) + inner + " ".repeat(PAD); + return theme.bg("toolPendingBg", line); +} + +// -- Helpers -- + +function formatElapsed(ms: number): string { + const totalSec = Math.floor(ms / 1000); + const m = Math.floor(totalSec / 60); + const s = totalSec % 60; + return `${m}m ${String(s).padStart(2, "0")}s`; +} + +function rightAlign(left: string, right: string, width: number): string { + const gap = Math.max(1, width - visibleWidth(left) - visibleWidth(right)); + return `${left}${" ".repeat(gap)}${right}`; +} + +// Pure render: (state, theme, termWidth) -> 7 lines. No side effects. +function render(state: WidgetState, theme: Theme, termWidth: number): string[] { + const c = (s: string) => canvasLine(s, termWidth, theme); + const cw = contentWidth(termWidth); + + // Header: koan [N/6] label ... elapsed + const idx = state.activeIndex; + const label = idx >= 0 ? state.phases[idx].label : "done"; + const num = idx >= 0 ? idx + 1 : 6; + const left = `${theme.bold(theme.fg("accent", "koan"))} [${num}/6] ${label}`; + const elapsed = theme.fg("dim", formatElapsed(Date.now() - state.startedAt)); + const header = rightAlign(left, elapsed, cw); + + // Plan ID + const planId = theme.fg("dim", state.planId); + + // Phase bar + const phaseBar = state.phases + .map((p) => `${theme.fg(ICON_COLOR[p.status], STATUS_ICON[p.status])} ${p.key}`) + .join(" "); + + // Step + activity + const step = state.step ? theme.fg("dim", state.step) : ""; + const act = state.activity ? theme.fg("muted", ` > ${state.activity}`) : ""; + const detail = truncateToWidth(step + act, cw, "..."); + + return [ + c(""), // top padding + c(header), + c(planId), + c(""), // separator + c(phaseBar), + c(detail), + c(""), // bottom padding + ]; +} + +// -- WidgetController -- + +export class WidgetController { + private state: WidgetState; + private lastHash = ""; + private timer: ReturnType; + private ui: ExtensionUIContext; + + constructor(ui: ExtensionUIContext, planId: string) { + this.ui = ui; + this.state = { + planId, + phases: PHASES.map((p) => ({ key: p.key, label: p.label, status: "pending" as PhaseStatus })), + activeIndex: 0, + step: "", + activity: "", + startedAt: Date.now(), + }; + this.state.phases[0].status = "running"; + + this.timer = setInterval(() => this.doRender(), 1000); + this.timer.unref(); + + this.doRender(); + } + + update(patch: WidgetUpdate): void { + if (patch.phaseStatus !== undefined) { + const { index, status } = patch.phaseStatus; + if (index >= 0 && index < this.state.phases.length) { + this.state.phases[index].status = status; + } + } + if (patch.activeIndex !== undefined) { + this.state.activeIndex = patch.activeIndex; + const ai = patch.activeIndex; + if (ai >= 0 && ai < this.state.phases.length && this.state.phases[ai].status === "pending") { + this.state.phases[ai].status = "running"; + } + } + if (patch.step !== undefined) { + this.state.step = patch.step; + } + if (patch.activity !== undefined) { + this.state.activity = patch.activity; + } + this.doRender(); + } + + destroy(): void { + clearInterval(this.timer); + this.ui.setWidget(WIDGET_KEY, undefined); + } + + private doRender(): void { + // Capture state snapshot for the factory closure + const state = { ...this.state, phases: this.state.phases.map((p) => ({ ...p })) }; + const theme = this.ui.theme; + + // Hash check: skip setWidget if content unchanged (ignoring width) + const hashLines = render(state, theme, 0); + const hash = hashLines.join("\n"); + if (hash === this.lastHash) return; + this.lastHash = hash; + + // Component factory: Pi calls render(width) with actual terminal width + this.ui.setWidget(WIDGET_KEY, (_tui, th) => ({ + render: (width: number) => render(state, th, width), + invalidate: () => {}, + })); + } +} diff --git a/src/utils/logger.ts b/src/utils/logger.ts index 67f4c2e..c8ced16 100644 --- a/src/utils/logger.ts +++ b/src/utils/logger.ts @@ -1,14 +1,36 @@ +// Debug logger for koan internals. Writes to a log file when a plan +// directory is available; silent otherwise. The Pi TUI captures both +// stdout and stderr, so neither can be used for debug output. + +import { appendFileSync, mkdirSync } from "node:fs"; +import * as path from "node:path"; + const prefix = "[koan]"; export type Logger = | undefined>(message: string, details?: T) => void; +let logPath: string | null = null; + +export function setLogDir(planDir: string): void { + logPath = path.join(planDir, "koan.log"); + try { + mkdirSync(path.dirname(logPath), { recursive: true }); + } catch { + // best effort + } +} + export function createLogger(scope: string): Logger { const label = `${prefix} ${scope}`; return (message, details) => { - if (details && Object.keys(details).length > 0) { - console.log(`${label}: ${message}`, details); - } else { - console.log(`${label}: ${message}`); + if (!logPath) return; + const suffix = details && Object.keys(details).length > 0 + ? ` ${JSON.stringify(details)}` + : ""; + try { + appendFileSync(logPath, `${new Date().toISOString()} ${label}: ${message}${suffix}\n`); + } catch { + // best effort -- plan dir may not exist yet } }; } diff --git a/src/utils/progress.ts b/src/utils/progress.ts index 566bda8..2940ecc 100644 --- a/src/utils/progress.ts +++ b/src/utils/progress.ts @@ -1,71 +1,14 @@ +// Directory infrastructure for subagent working directories. +// Audit state (state.json, events.jsonl) is managed by EventLog in lib/audit.ts. +// This module is retained for createSubagentDir, used by session.ts. + import { promises as fs } from "node:fs"; import * as crypto from "node:crypto"; import * as path from "node:path"; -export interface TrailEntry { - at: string; - msg: string; -} - -export interface SubagentState { - role: string; - phase: string; - status: "running" | "completed" | "failed"; - current: string; - updated_at: string; - trail: TrailEntry[]; -} - export async function createSubagentDir(planDir: string, role: string): Promise { const hex = crypto.randomBytes(2).toString("hex"); const dir = path.join(planDir, "subagents", `${role}-${hex}`); await fs.mkdir(dir, { recursive: true }); return dir; } - -export class ProgressReporter { - private readonly stateFile: string; - private readonly state: SubagentState; - - constructor(dir: string, role: string, phase: string) { - this.stateFile = path.join(dir, "state.json"); - this.state = { - role, - phase, - status: "running", - current: "", - updated_at: new Date().toISOString(), - trail: [], - }; - } - - async update(msg: string): Promise { - const now = new Date().toISOString(); - this.state.current = msg; - this.state.updated_at = now; - this.state.trail.push({ at: now, msg }); - await this.flush(); - } - - async complete(status: "completed" | "failed"): Promise { - const now = new Date().toISOString(); - this.state.status = status; - this.state.current = status; - this.state.updated_at = now; - this.state.trail.push({ at: now, msg: status }); - await this.flush(); - } - - private async flush(): Promise { - await fs.writeFile(this.stateFile, JSON.stringify(this.state, null, 2) + "\n"); - } -} - -export async function readSubagentState(dir: string): Promise { - try { - const raw = await fs.readFile(path.join(dir, "state.json"), "utf8"); - return JSON.parse(raw) as SubagentState; - } catch { - return null; - } -} From 44f5b648d500747b4fa487867938e7a19e334a10 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Tue, 24 Feb 2026 21:49:05 +0700 Subject: [PATCH 010/412] Add QR fix loop for plan-design phase When plan-design QR verification fails, the session now spawns a fix-mode architect (3-step targeted repair) and re-runs full QR, up to 5 iterations. Progressive severity de-escalation narrows blocking set per iteration. --- extensions/koan.ts | 6 + src/planner/phases/dispatch.ts | 46 +++++ src/planner/phases/plan-design/fix-phase.ts | 184 ++++++++++++++++++ src/planner/phases/plan-design/fix-prompts.ts | 137 +++++++++++++ src/planner/phases/plan-design/phase.ts | 28 +-- src/planner/plan/validate.ts | 37 ++++ src/planner/qr/severity.ts | 41 ++++ src/planner/session.ts | 105 +++++++++- src/planner/subagent.ts | 95 ++++----- 9 files changed, 597 insertions(+), 82 deletions(-) create mode 100644 src/planner/phases/plan-design/fix-phase.ts create mode 100644 src/planner/phases/plan-design/fix-prompts.ts create mode 100644 src/planner/qr/severity.ts diff --git a/extensions/koan.ts b/extensions/koan.ts index a58d81c..2dfd08c 100644 --- a/extensions/koan.ts +++ b/extensions/koan.ts @@ -44,6 +44,12 @@ export default function koan(pi: ExtensionAPI): void { default: "", }); + pi.registerFlag("koan-fix", { + description: "QR phase to fix (e.g. plan-design)", + type: "string", + default: "", + }); + // Pi snapshots tools during _buildRuntime() at init. All 44 tools // register here unconditionally. Phases restrict access via tool_call // blocking at runtime. diff --git a/src/planner/phases/dispatch.ts b/src/planner/phases/dispatch.ts index c8a55f8..9bfba42 100644 --- a/src/planner/phases/dispatch.ts +++ b/src/planner/phases/dispatch.ts @@ -3,20 +3,26 @@ // (getFlag returns undefined before _buildRuntime), so detection is // deferred to before_agent_start. +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { PlanDesignPhase } from "./plan-design/phase.js"; +import { PlanDesignFixPhase } from "./plan-design/fix-phase.js"; import { QRDecomposePhase } from "./qr-decompose/phase.js"; import { QRVerifyPhase } from "./qr-verify/phase.js"; import { createLogger, type Logger } from "../../utils/logger.js"; import type { WorkflowDispatch, PlanRef } from "../lib/dispatch.js"; import type { EventLog } from "../lib/audit.js"; +import type { QRFile } from "../qr/types.js"; export interface SubagentConfig { role: string; phase: string; planDir: string; subagentDir: string; + fix: string | null; // QR phase being fixed, null when initial mode } // Detects subagent mode by checking flags set via CLI (pi -p --koan-role @@ -33,11 +39,14 @@ export function detectSubagentMode(pi: ExtensionAPI): SubagentConfig | null { const planDir = pi.getFlag("koan-plan-dir"); const subagentDir = pi.getFlag("koan-subagent-dir"); + const fix = pi.getFlag("koan-fix"); + return { role: role.trim(), phase: typeof phase === "string" ? phase.trim() : "", planDir: typeof planDir === "string" ? planDir.trim() : "", subagentDir: typeof subagentDir === "string" ? subagentDir.trim() : "", + fix: typeof fix === "string" && fix.trim().length > 0 ? fix.trim() : null, }; } @@ -51,6 +60,43 @@ export async function dispatchPhase( ): Promise { const logger = log ?? createLogger("Dispatch"); + if (config.role === "architect" && config.fix === "plan-design") { + // Dispatch reads the QR file here, not in session.ts. + // The fix architect runs as a separate process with only the plan + // directory path -- it cannot receive in-memory QR data from the + // parent session. Reading from disk at dispatch boundary is the + // only clean handoff point. + const qrPath = path.join(config.planDir, "qr-plan-design.json"); + let qrFile: QRFile; + try { + const raw = await fs.readFile(qrPath, "utf8"); + qrFile = JSON.parse(raw) as QRFile; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + logger("Fix dispatch: failed to read QR file", { error: msg }); + return; + } + const failures = qrFile.items.filter((i) => i.status === "FAIL"); + if (failures.length === 0) { + logger("Fix dispatch: no FAIL items in QR file, skipping fix phase"); + return; + } + logger("Dispatching to plan-design fix workflow", { + planDir: config.planDir, + failureCount: failures.length, + }); + const phase = new PlanDesignFixPhase( + pi, + { planDir: config.planDir, failures }, + dispatch, + planRef, + logger, + eventLog, + ); + await phase.begin(); + return; + } + if (config.role === "architect" && config.phase === "plan-design") { logger("Dispatching to plan-design workflow", { planDir: config.planDir }); const phase = new PlanDesignPhase( diff --git a/src/planner/phases/plan-design/fix-phase.ts b/src/planner/phases/plan-design/fix-phase.ts new file mode 100644 index 0000000..4df6a24 --- /dev/null +++ b/src/planner/phases/plan-design/fix-phase.ts @@ -0,0 +1,184 @@ +// Plan-design fix phase -- 3-step targeted repair for QR failures. +// +// Separate class from PlanDesignPhase because the workflows diverge: +// initial = 6 steps of exploration then writing (mutations at step 6); +// fix = 3 steps of reading failures then applying targeted fixes +// (mutations at step 2). Conditional branching at every method +// boundary produces worse code than two focused classes. +// +// The fix architect receives QR failures as XML in step 1. It reads +// the current plan state via getter tools, applies minimal mutations +// to address the specific findings, then validates the result. The +// session orchestrator decides whether to re-run QR -- the fix phase +// does not know about iterations or severity escalation. + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { loadAndValidatePlan } from "../../plan/validate.js"; +import { + loadPlanDesignSystemPrompt, + buildPlanDesignSystemPrompt, +} from "./prompts.js"; +import { + FIX_STEP_NAMES, + buildFixSystemPrompt, + fixStepGuidance, + formatFailuresXml, + type FixStep, +} from "./fix-prompts.js"; +import { formatStep } from "../../lib/step.js"; +import type { QRItem } from "../../qr/types.js"; +import { createLogger, type Logger } from "../../../utils/logger.js"; +import { EventLog } from "../../lib/audit.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; +import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; + +interface FixPhaseState { + active: boolean; + step: FixStep; + step1Prompt: string | null; + systemPrompt: string | null; +} + +const TOTAL_STEPS = 3; + +export class PlanDesignFixPhase { + private readonly pi: ExtensionAPI; + private readonly planDir: string; + private readonly failures: QRItem[]; + private readonly log: Logger; + private readonly state: FixPhaseState; + private readonly eventLog: EventLog | undefined; + private readonly dispatch: WorkflowDispatch; + private readonly planRef: PlanRef; + + constructor( + pi: ExtensionAPI, + config: { planDir: string; failures: QRItem[] }, + dispatch: WorkflowDispatch, + planRef: PlanRef, + log?: Logger, + eventLog?: EventLog, + ) { + this.pi = pi; + this.planDir = config.planDir; + this.failures = config.failures; + this.dispatch = dispatch; + this.planRef = planRef; + this.log = log ?? createLogger("PlanDesignFix"); + this.eventLog = eventLog; + + this.state = { + active: false, + step: 1, + step1Prompt: null, + systemPrompt: null, + }; + + this.registerHandlers(); + } + + async begin(): Promise { + let basePrompt: string; + try { + basePrompt = await loadPlanDesignSystemPrompt(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Fix phase aborted: cannot load system prompt", { error: message }); + return; + } + + const failuresXml = formatFailuresXml(this.failures); + this.state.systemPrompt = buildFixSystemPrompt( + buildPlanDesignSystemPrompt(basePrompt), + this.failures.length, + ); + this.state.step1Prompt = formatStep(fixStepGuidance(1, failuresXml)); + this.state.active = true; + this.state.step = 1; + + hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); + + this.log("Starting plan-design fix workflow", { + step: 1, + failureCount: this.failures.length, + }); + await this.eventLog?.emitPhaseStart(TOTAL_STEPS); + await this.eventLog?.emitStepTransition(1, FIX_STEP_NAMES[1], TOTAL_STEPS); + } + + private registerHandlers(): void { + this.pi.on("before_agent_start", () => { + if (!this.state.active || !this.state.systemPrompt) return undefined; + return { systemPrompt: this.state.systemPrompt }; + }); + + // Step 1 prompt injection. Same pattern as PlanDesignPhase: the CLI + // message is a process trigger; the context event replaces it with + // step 1 instructions before the initial LLM call. + this.pi.on("context", (event) => { + if (!this.state.active) return undefined; + if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; + + const messages = event.messages.map((m) => { + if (m.role === "user") { + return { ...m, content: this.state.step1Prompt! }; + } + return m; + }); + return { messages }; + }); + + this.pi.on("tool_call", (event) => { + if (!this.state.active) return undefined; + + const perm = checkPermission("plan-design", event.toolName); + if (!perm.allowed) { + return { block: true, reason: perm.reason }; + } + + // Step gate: mutation tools are blocked before step 2. Blocklist + // (not whitelist) so read tools and future pi-native tools pass + // through after checkPermission approves them. + const step = this.state.step; + if (step < 2 && PLAN_MUTATION_TOOLS.has(event.toolName)) { + return { + block: true, + reason: `${event.toolName} available from step 2 (current: ${step})`, + }; + } + + return undefined; + }); + } + + private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { + const prev = this.state.step; + + if (prev === 3) { + const result = await this.handleFinalize(); + if (!result.ok) { + await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); + return { ok: false, error: result.errors?.join("; ") }; + } + this.state.active = false; + unhookDispatch(this.dispatch, "onCompleteStep"); + await this.eventLog?.emitPhaseEnd("completed"); + this.log("Fix phase complete, plan validation passed"); + return { ok: true, prompt: "Fix phase validation passed. Workflow complete." }; + } + + this.state.step = (prev + 1) as FixStep; + const nextName = FIX_STEP_NAMES[this.state.step]; + const prompt = formatStep(fixStepGuidance(this.state.step)); + + this.log("Fix step complete, advancing", { from: prev, to: this.state.step, name: nextName }); + await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); + + return { ok: true, prompt }; + } + + private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { + return loadAndValidatePlan(this.planDir, this.log); + } +} diff --git a/src/planner/phases/plan-design/fix-prompts.ts b/src/planner/phases/plan-design/fix-prompts.ts new file mode 100644 index 0000000..003bf8d --- /dev/null +++ b/src/planner/phases/plan-design/fix-prompts.ts @@ -0,0 +1,137 @@ +// Fix-phase step guidance for plan-design targeted repair (3 steps). +// +// Parallels prompts.ts structure. Step 1 explicitly prohibits mutations: +// without this constraint the LLM tends to apply the first fix it identifies +// without reading all failures, producing cascading corrections that address +// symptoms rather than root causes. + +import type { QRItem } from "../../qr/types.js"; +import type { StepGuidance } from "../../lib/step.js"; + +export type FixStep = 1 | 2 | 3; + +export const FIX_STEP_NAMES: Record = { + 1: "Understand QR Failures", + 2: "Apply Targeted Fixes", + 3: "Review & Finalize", +}; + +// Serializes FAIL items as an XML block injected into the step 1 prompt. +// XML structure mirrors how pi-native tools present structured data. +export function formatFailuresXml(failures: ReadonlyArray): string { + const items = failures.map((f) => [ + ` `, + ` ${f.check}`, + f.finding ? ` ${f.finding}` : ` `, + ` `, + ].join("\n")).join("\n"); + + return [ + "", + items, + "", + ].join("\n"); +} + +// Appends fix workflow instructions to the base architect system prompt. +export function buildFixSystemPrompt(basePrompt: string, failureCount: number): string { + return [ + basePrompt, + "", + "---", + "", + "WORKFLOW: 3-STEP PLAN-DESIGN FIX", + "", + `You are fixing ${failureCount} QR failure(s) in an existing plan.`, + "Step 1 instructions are in the user message below.", + "Complete the work described, then call koan_complete_step.", + "Put your findings in the `thoughts` parameter of koan_complete_step.", + "The tool result contains the next step's instructions.", + "", + "CRITICAL: Fix ONLY the identified failures. Do not restructure the plan", + "beyond what the failures require. Prefer updating existing entities over", + "adding new ones.", + ].join("\n"); +} + +export function fixStepGuidance(step: FixStep, context?: string): StepGuidance { + switch (step) { + case 1: + return { + title: "Step 1: Understand QR Failures", + instructions: [ + "QR FAILURES TO FIX:", + "", + context ?? "", + "", + "Read the failures carefully. For each failing item:", + " - Identify the scope (which milestone, decision, or intent)", + " - Understand what the check requires", + " - Read the finding to understand why it failed", + "", + "Use getter tools to inspect the scoped entities:", + " - koan_get_plan: overview, structure, decisions", + " - koan_get_milestone: milestone details and intents", + " - koan_get_decision: decision rationale", + " - koan_get_intent: intent definition", + "", + "Plan your fixes mentally. Consider:", + " - What minimal change addresses each failure?", + " - Do any fixes overlap or interact?", + " - Could fixing one item cause another to fail?", + "", + "DO NOT write any changes yet. Gather understanding for step 2.", + ], + }; + + case 2: + return { + title: "Step 2: Apply Targeted Fixes", + instructions: [ + "Apply the fixes you planned in step 1.", + "", + "Use plan mutation tools to address each failure:", + " - koan_set_overview / koan_set_constraints / koan_set_invisible_knowledge", + " - koan_set_milestone_* / koan_set_intent / koan_set_decision", + " - koan_add_milestone / koan_add_intent / koan_add_decision (if new entities needed)", + "", + "RULES:", + " - Fix ONLY the FAIL items from step 1", + " - Prefer updating existing entities over adding new ones", + " - Do not restructure the plan beyond what the failures require", + " - Do not change PASS items", + "", + "After applying all fixes, call koan_complete_step.", + ], + }; + + case 3: + return { + title: "Step 3: Review & Finalize", + instructions: [ + "Review the fixes you applied.", + "", + "Call koan_get_plan to read the current plan state.", + "For each original failure, verify:", + " - The fix addresses the check that failed", + " - No regressions introduced in previously passing items", + " - The plan is internally consistent", + "", + "Summarize in the `thoughts` parameter of koan_complete_step:", + " - Which failures were fixed and how", + " - Any concerns or items that may still be at risk", + ], + // Step 3 requires reading the plan before completing -- the review + // is meaningless without it. The custom invokeAfter enforces this + // sequencing explicitly. + invokeAfter: [ + "WHEN DONE: First call koan_get_plan to confirm the final plan state.", + "Then call koan_complete_step with your review summary in the `thoughts` parameter.", + "Do NOT call koan_complete_step before calling koan_get_plan.", + ].join("\n"), + }; + + default: + throw new Error(`unexpected fix step: ${step as never}`); + } +} diff --git a/src/planner/phases/plan-design/phase.ts b/src/planner/phases/plan-design/phase.ts index f2165ef..f581e11 100644 --- a/src/planner/phases/plan-design/phase.ts +++ b/src/planner/phases/plan-design/phase.ts @@ -7,7 +7,7 @@ import * as path from "node:path"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import { validatePlanDesign, validateRefs } from "../../plan/validate.js"; +import { loadAndValidatePlan } from "../../plan/validate.js"; import { loadPlanDesignSystemPrompt, formatContextForStep1, @@ -187,30 +187,6 @@ export class PlanDesignPhase { } private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { - const planPath = path.join(this.planDir, "plan.json"); - let plan; - try { - const raw = await fs.readFile(planPath, "utf8"); - plan = JSON.parse(raw); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to read plan.json for validation", { error: message }); - return { ok: false, errors: [`Failed to read plan.json: ${message}`] }; - } - - const designValidation = validatePlanDesign(plan); - if (!designValidation.ok) { - this.log("Plan design validation failed", { errors: designValidation.errors }); - return { ok: false, errors: designValidation.errors }; - } - - const refValidation = validateRefs(plan); - if (!refValidation.ok) { - this.log("Plan reference validation failed", { errors: refValidation.errors }); - return { ok: false, errors: refValidation.errors }; - } - - this.log("Plan validation passed", { path: planPath }); - return { ok: true }; + return loadAndValidatePlan(this.planDir, this.log); } } diff --git a/src/planner/plan/validate.ts b/src/planner/plan/validate.ts index cc9fe8d..210fd58 100644 --- a/src/planner/plan/validate.ts +++ b/src/planner/plan/validate.ts @@ -1,3 +1,7 @@ +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { Logger } from "../../utils/logger.js"; import type { Plan } from "./types.js"; export interface ValidationResult { @@ -131,3 +135,36 @@ export function validatePlanDocs(p: Plan): ValidationResult { } return { ok: errors.length === 0, errors }; } + +// Reads plan.json from planDir and runs validatePlanDesign + validateRefs. +// Returns { ok: false, errors } on read/parse failure or any validation failure. +export async function loadAndValidatePlan( + planDir: string, + log: Logger, +): Promise<{ ok: boolean; errors?: string[] }> { + const planPath = path.join(planDir, "plan.json"); + let plan; + try { + const raw = await fs.readFile(planPath, "utf8"); + plan = JSON.parse(raw); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + log("Failed to read plan.json for validation", { error: message }); + return { ok: false, errors: [`Failed to read plan.json: ${message}`] }; + } + + const designValidation = validatePlanDesign(plan); + if (!designValidation.ok) { + log("Plan design validation failed", { errors: designValidation.errors }); + return { ok: false, errors: designValidation.errors }; + } + + const refValidation = validateRefs(plan); + if (!refValidation.ok) { + log("Plan reference validation failed", { errors: refValidation.errors }); + return { ok: false, errors: refValidation.errors }; + } + + log("Plan validation passed", { path: planPath }); + return { ok: true }; +} diff --git a/src/planner/qr/severity.ts b/src/planner/qr/severity.ts new file mode 100644 index 0000000..6e40c6f --- /dev/null +++ b/src/planner/qr/severity.ts @@ -0,0 +1,41 @@ +// Severity escalation policy for QR fix iterations. +// +// Progressive de-escalation narrows what blocks as iterations increase. +// COULD items (style, cosmetic) do not block indefinitely: after 2 fix +// attempts, only structural issues (MUST, SHOULD) block; after 3, only +// knowledge-loss risks (MUST) block. +// +// A hard cutoff ("after N attempts, ignore all failures") would let MUST +// failures through. De-escalation by tier preserves the invariant that +// MUST items always block, while preventing COULD style nits from causing +// indefinite retries. + +import type { QRItem, QRSeverity } from "./types.js"; + +export const MAX_FIX_ITERATIONS = 5; + +// Returns the set of severities that block the plan at the given iteration. +// Iterations 1-2: all severities block. Iteration 3: MUST+SHOULD. 4+: MUST only. +export function blockingSeverities(iteration: number): ReadonlySet { + if (iteration <= 2) return new Set(["MUST", "SHOULD", "COULD"]); + if (iteration === 3) return new Set(["MUST", "SHOULD"]); + return new Set(["MUST"]); +} + +// Returns the subset of items that are FAIL and have a blocking severity +// at the given iteration. +export function blockingFailures( + items: ReadonlyArray, + iteration: number, +): QRItem[] { + const blocking = blockingSeverities(iteration); + return items.filter((i) => i.status === "FAIL" && blocking.has(i.severity)); +} + +// Returns true when no blocking failures remain at this iteration. +export function qrPassesAtIteration( + items: ReadonlyArray, + iteration: number, +): boolean { + return blockingFailures(items, iteration).length === 0; +} diff --git a/src/planner/session.ts b/src/planner/session.ts index 1567983..42d8bf8 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -10,13 +10,14 @@ import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@m import { ContextCapturePhase } from "./phases/context-capture/phase.js"; import { createInitialState, initializePlanState, type WorkflowState } from "./state.js"; import { createPlanInfo } from "../utils/plan.js"; -import { spawnArchitect, spawnQRDecomposer, spawnReviewer } from "./subagent.js"; +import { spawnArchitect, spawnArchitectFix, spawnQRDecomposer, spawnReviewer } from "./subagent.js"; import { createLogger, setLogDir, type Logger } from "../utils/logger.js"; import { createSubagentDir } from "../utils/progress.js"; import { readProjection } from "./lib/audit.js"; import type { WorkflowDispatch, PlanRef } from "./lib/dispatch.js"; import { pool } from "./lib/pool.js"; import type { QRFile } from "./qr/types.js"; +import { MAX_FIX_ITERATIONS, qrPassesAtIteration } from "./qr/severity.js"; import { WidgetController } from "./ui/widget.js"; // -- Types -- @@ -125,7 +126,7 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan activity: "", }); - const qr = await runQRBlock(planDir, ctx.cwd, extensionPath, state, log, widget); + const qr = await runPlanDesignWithQR(planDir, ctx.cwd, extensionPath, state, log, widget); if (qr.passed) outcome = "PASS"; return `Context captured. Plan design complete.\n\n${qr.summary}`; } finally { @@ -298,3 +299,103 @@ async function runQRBlock( widget?.update({ step: summary, activity: "" }); return { summary, passed }; } + +// -- Plan-design QR fix loop -- + +// Fix loop: architect -> QR -> [pass: done | fail: fix architect -> QR -> ...] +// +// Re-decomposes on each iteration rather than re-verifying only. The fix +// architect may change plan structure (add milestones, split intents, remove +// decisions); old QR items referencing stale scopes produce incorrect verdicts. +// Fresh decomposition generates items matched to the current plan state. +// +// The session's for-loop counter is the iteration source of truth. Each +// re-decompose writes a fresh qr-plan-design.json with iteration=1 and +// all-TODO items. The loop counter survives those resets. +async function runPlanDesignWithQR( + planDir: string, + cwd: string, + extensionPath: string, + state: WorkflowState, + log: Logger, + widget: WidgetController | null, +): Promise { + const qrPath = path.join(planDir, "qr-plan-design.json"); + + // Initial QR (iteration 1) + let qr = await runQRBlock(planDir, cwd, extensionPath, state, log, widget); + if (qr.passed) return qr; + + for (let iteration = 2; iteration <= MAX_FIX_ITERATIONS + 1; iteration++) { + // Read QR file for severity check + let qrFile: QRFile; + try { + const raw = await fs.readFile(qrPath, "utf8"); + qrFile = JSON.parse(raw) as QRFile; + } catch { + log("Fix loop: failed to read QR file", { iteration }); + return { summary: "Fix loop aborted: cannot read QR file.", passed: false }; + } + + // Severity escalation: if no blocking failures remain at this + // iteration, the plan passes without another fix attempt. + // Example: iteration 3 drops COULD -- if only COULD items fail, + // the plan is good enough and the loop terminates. + if (qrPassesAtIteration(qrFile.items, iteration)) { + const pass = qrFile.items.filter((i) => i.status === "PASS").length; + const fail = qrFile.items.filter((i) => i.status === "FAIL").length; + return { + passed: true, + summary: `QR passed at iteration ${iteration} after severity de-escalation: ${pass} PASS, ${fail} FAIL (non-blocking).`, + }; + } + + // Spawn fix-mode architect + const fixIndex = iteration - 1; + widget?.update({ step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: spawning architect...`, activity: "" }); + + const fixDir = await createSubagentDir(planDir, `architect-fix-${fixIndex}`); + + const fixPoll = setInterval(async () => { + const s = await readProjection(fixDir); + if (s) { + widget?.update({ + step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: ${s.stepName}`, + activity: s.lastAction ?? "", + }); + } + }, 2000); + + const fixResult = await spawnArchitectFix({ + planDir, + subagentDir: fixDir, + cwd, + extensionPath, + fixPhase: "plan-design", + log, + }); + + clearInterval(fixPoll); + + if (fixResult.exitCode !== 0) { + log("Fix architect failed", { iteration: fixIndex, exitCode: fixResult.exitCode, stderr: fixResult.stderr.slice(0, 500) }); + widget?.update({ step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: architect failed, re-running QR...`, activity: "" }); + } + + // Re-run full QR (decompose + verify) + widget?.update({ + step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: re-running QR...`, + activity: "", + }); + qr = await runQRBlock(planDir, cwd, extensionPath, state, log, widget); + if (qr.passed) return qr; + } + + // Max iterations reached. MUST failures remaining after 5 fix attempts + // indicate a structural problem -- silently passing would propagate a + // known-broken plan downstream. + return { + passed: false, + summary: `${qr.summary} (max ${MAX_FIX_ITERATIONS} fix iterations reached)`, + }; +} diff --git a/src/planner/subagent.ts b/src/planner/subagent.ts index 19c5647..32cb584 100644 --- a/src/planner/subagent.ts +++ b/src/planner/subagent.ts @@ -1,3 +1,8 @@ +// Subagent spawn helpers. Each public function delegates to spawnSubagent, +// which handles process lifecycle, stdout/stderr routing to disk, and +// exit-code normalization. Spawn errors resolve (not reject) so the caller +// can always read exitCode without try/catch. + import { spawn } from "node:child_process"; import { createWriteStream } from "node:fs"; import * as path from "node:path"; @@ -19,6 +24,15 @@ export interface SpawnArchitectOptions { log?: Logger; } +export interface SpawnArchitectFixOptions { + planDir: string; + subagentDir: string; + cwd: string; + extensionPath: string; + fixPhase: string; // e.g. "plan-design" + log?: Logger; +} + export interface SpawnQRDecomposerOptions { planDir: string; subagentDir: string; @@ -36,60 +50,7 @@ export interface SpawnReviewerOptions { log?: Logger; } -export function spawnArchitect(opts: SpawnArchitectOptions): Promise { - const log = opts.log ?? createLogger("Subagent"); - - const args = [ - "-p", - "-e", opts.extensionPath, - "--koan-role", "architect", - "--koan-phase", "plan-design", - "--koan-plan-dir", opts.planDir, - "--koan-subagent-dir", opts.subagentDir, - opts.initialPrompt ?? "Begin the plan-design phase.", - ]; - - log("Spawning architect subagent", { planDir: opts.planDir, subagentDir: opts.subagentDir }); - - return new Promise((resolve) => { - const stdoutLog = createWriteStream(path.join(opts.subagentDir, "stdout.log"), { flags: "w" }); - const stderrLog = createWriteStream(path.join(opts.subagentDir, "stderr.log"), { flags: "w" }); - - const proc = spawn("pi", args, { - cwd: opts.cwd, - shell: false, - stdio: ["ignore", "pipe", "pipe"], - }); - - let stderr = ""; - - proc.stdout.on("data", (data: Buffer) => { - stdoutLog.write(data); - }); - - proc.stderr.on("data", (data: Buffer) => { - stderr += data.toString(); - stderrLog.write(data); - }); - - proc.on("close", (code) => { - stdoutLog.end(); - stderrLog.end(); - const exitCode = code ?? 1; - log("Architect subagent exited", { exitCode }); - resolve({ exitCode, stderr, subagentDir: opts.subagentDir }); - }); - - proc.on("error", (error) => { - stdoutLog.end(); - stderrLog.end(); - log("Architect subagent spawn error", { error: error.message }); - resolve({ exitCode: 1, stderr: error.message, subagentDir: opts.subagentDir }); - }); - }); -} - -// -- QR spawners -- +// -- Spawn helper -- function spawnSubagent( role: string, @@ -149,6 +110,32 @@ function spawnSubagent( }); } +// -- Architect spawners -- + +export function spawnArchitect(opts: SpawnArchitectOptions): Promise { + const log = opts.log ?? createLogger("Subagent"); + return spawnSubagent( + "architect", + "plan-design", + opts.initialPrompt ?? "Begin the plan-design phase.", + opts, + log, + ); +} + +export function spawnArchitectFix(opts: SpawnArchitectFixOptions): Promise { + const log = opts.log ?? createLogger("Subagent"); + return spawnSubagent( + "architect", + "plan-design", + "Fix the plan based on QR failures.", + { ...opts, extraFlags: ["--koan-fix", opts.fixPhase] }, + log, + ); +} + +// -- QR spawners -- + export function spawnQRDecomposer(opts: SpawnQRDecomposerOptions): Promise { const log = opts.log ?? createLogger("Subagent"); return spawnSubagent("qr-decomposer", "qr-plan-design", "Begin the QR decompose phase.", opts, log); From bba7d12e8023ee3822e0c54603ce16232a19606c Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 25 Feb 2026 12:30:49 +0700 Subject: [PATCH 011/412] UI improvements --- src/planner/lib/audit.ts | 56 +++- src/planner/session.ts | 50 +++- src/planner/ui/widget.ts | 542 +++++++++++++++++++++++++++++++++++---- 3 files changed, 583 insertions(+), 65 deletions(-) diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index 181070a..d041f31 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -18,6 +18,8 @@ export interface ToolFileEvent extends EventBase { kind: "tool_file"; tool: "read" | "edit" | "write"; path: string; + lines?: number; + chars?: number; error: boolean; } @@ -112,8 +114,10 @@ function now(): string { // Derives a concise last-action string from a tool event for display. export function summarize(e: ToolEvent): string { switch (e.kind) { - case "tool_file": - return `${e.tool} ${e.path}`; + case "tool_file": { + const suffix = e.lines != null ? ` (${e.lines}L, ${e.chars}c)` : ""; + return `${e.tool} ${e.path}${suffix}`; + } case "tool_bash": return `bash ${e.bin}`; case "tool_koan": @@ -177,7 +181,7 @@ export function extractToolEvent(piEvent: PiToolResultEvent): ToolEvent { const seq = 0; if (FILE_TOOLS.has(toolName)) { - return { + const ev: ToolFileEvent = { kind: "tool_file", tool: toolName as "read" | "edit" | "write", path: (input["path"] as string | undefined) ?? "", @@ -185,6 +189,12 @@ export function extractToolEvent(piEvent: PiToolResultEvent): ToolEvent { ts, seq, }; + if (toolName === "read" && !isError) { + const text = content.find((c) => c.type === "text")?.text ?? ""; + ev.lines = text.split("\n").length; + ev.chars = text.length; + } + return ev; } if (toolName === "bash") { @@ -325,3 +335,43 @@ export async function readProjection(dir: string): Promise { return null; } } + +// Reads the tail of events.jsonl and returns human-readable summary lines. +// Filters out heartbeats (noisy). Used by session.ts to feed the widget log card. +export async function readRecentLogs(dir: string, count = 5): Promise { + try { + const raw = await fs.readFile(path.join(dir, "events.jsonl"), "utf8"); + const events = raw + .trimEnd() + .split("\n") + .filter(Boolean) + .map((line) => JSON.parse(line) as AuditEvent) + .filter((e) => e.kind !== "heartbeat"); + return events.slice(-count).map(formatLogLine); + } catch { + return []; + } +} + +function formatLogLine(e: AuditEvent): string { + switch (e.kind) { + case "phase_start": + return `${e.phase} started (${e.totalSteps} steps)`; + case "step_transition": + return `step ${e.step}/${e.totalSteps}: ${e.name}`; + case "phase_end": + return `${e.outcome}${e.detail ? ` -- ${e.detail}` : ""}`; + case "tool_file": { + const suffix = e.lines != null ? ` (${e.lines}L, ${e.chars}c)` : ""; + return `${e.tool} ${e.path}${suffix}`; + } + case "tool_bash": + return `bash ${e.bin}`; + case "tool_koan": + return e.tool; + case "tool_generic": + return e.tool; + case "heartbeat": + return "heartbeat"; + } +} diff --git a/src/planner/session.ts b/src/planner/session.ts index 42d8bf8..b29e98b 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -13,7 +13,7 @@ import { createPlanInfo } from "../utils/plan.js"; import { spawnArchitect, spawnArchitectFix, spawnQRDecomposer, spawnReviewer } from "./subagent.js"; import { createLogger, setLogDir, type Logger } from "../utils/logger.js"; import { createSubagentDir } from "../utils/progress.js"; -import { readProjection } from "./lib/audit.js"; +import { readProjection, readRecentLogs } from "./lib/audit.js"; import type { WorkflowDispatch, PlanRef } from "./lib/dispatch.js"; import { pool } from "./lib/pool.js"; import type { QRFile } from "./qr/types.js"; @@ -68,11 +68,15 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan const extensionPath = path.resolve(import.meta.dirname, "../../extensions/koan.ts"); const pollInterval = setInterval(async () => { - const s = await readProjection(subagentDir); + const [s, logs] = await Promise.all([ + readProjection(subagentDir), + readRecentLogs(subagentDir), + ]); if (s) { widget?.update({ step: s.stepName, activity: s.lastAction ?? "", + logLines: logs, }); } }, 2000); @@ -121,9 +125,13 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan state.phase = "plan-design-complete"; log("Architect plan-design complete", { planDir }); widget?.update({ - phaseStatus: { index: 1, status: "completed" }, + phaseStatus: { index: 1, status: "running" }, step: "starting QR block...", activity: "", + qrIterationsMax: MAX_FIX_ITERATIONS + 1, + qrIteration: 1, + qrMode: "initial", + qrPhase: "execute", }); const qr = await runPlanDesignWithQR(planDir, ctx.cwd, extensionPath, state, log, widget); @@ -204,15 +212,19 @@ async function runQRBlock( ): Promise { // 1. Spawn decomposer subagent state.phase = "qr-decompose-running"; - widget?.update({ step: "qr-decompose: starting...", activity: "" }); + widget?.update({ step: "qr-decompose: starting...", activity: "", qrPhase: "decompose" }); const decomposeDir = await createSubagentDir(planDir, "qr-decomposer"); const decomposePoll = setInterval(async () => { - const s = await readProjection(decomposeDir); + const [s, logs] = await Promise.all([ + readProjection(decomposeDir), + readRecentLogs(decomposeDir), + ]); if (s) { widget?.update({ step: `qr-decompose: ${s.stepName}`, activity: s.lastAction ?? "", + logLines: logs, }); } }, 2000); @@ -260,6 +272,7 @@ async function runQRBlock( // 3. Spawn reviewer pool state.phase = "qr-verify-running"; + widget?.update({ qrPhase: "verify" }); const result = await pool( itemIds, @@ -324,9 +337,16 @@ async function runPlanDesignWithQR( // Initial QR (iteration 1) let qr = await runQRBlock(planDir, cwd, extensionPath, state, log, widget); - if (qr.passed) return qr; + if (qr.passed) { + widget?.update({ qrPhase: "done", qrMode: null, qrIteration: null, qrIterationsMax: null, phaseStatus: { index: 1, status: "completed" } }); + return qr; + } + + widget?.update({ qrPhase: "execute" }); for (let iteration = 2; iteration <= MAX_FIX_ITERATIONS + 1; iteration++) { + widget?.update({ qrIteration: iteration, qrMode: "fix", qrPhase: "execute" }); + // Read QR file for severity check let qrFile: QRFile; try { @@ -334,6 +354,7 @@ async function runPlanDesignWithQR( qrFile = JSON.parse(raw) as QRFile; } catch { log("Fix loop: failed to read QR file", { iteration }); + widget?.update({ qrPhase: "done", qrMode: null, qrIteration: null, qrIterationsMax: null }); return { summary: "Fix loop aborted: cannot read QR file.", passed: false }; } @@ -344,6 +365,7 @@ async function runPlanDesignWithQR( if (qrPassesAtIteration(qrFile.items, iteration)) { const pass = qrFile.items.filter((i) => i.status === "PASS").length; const fail = qrFile.items.filter((i) => i.status === "FAIL").length; + widget?.update({ qrPhase: "done", qrMode: null, qrIteration: null, qrIterationsMax: null, phaseStatus: { index: 1, status: "completed" } }); return { passed: true, summary: `QR passed at iteration ${iteration} after severity de-escalation: ${pass} PASS, ${fail} FAIL (non-blocking).`, @@ -352,16 +374,20 @@ async function runPlanDesignWithQR( // Spawn fix-mode architect const fixIndex = iteration - 1; - widget?.update({ step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: spawning architect...`, activity: "" }); + widget?.update({ step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: spawning architect...`, activity: "", qrPhase: "execute" }); const fixDir = await createSubagentDir(planDir, `architect-fix-${fixIndex}`); const fixPoll = setInterval(async () => { - const s = await readProjection(fixDir); + const [s, logs] = await Promise.all([ + readProjection(fixDir), + readRecentLogs(fixDir), + ]); if (s) { widget?.update({ step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: ${s.stepName}`, activity: s.lastAction ?? "", + logLines: logs, }); } }, 2000); @@ -388,12 +414,18 @@ async function runPlanDesignWithQR( activity: "", }); qr = await runQRBlock(planDir, cwd, extensionPath, state, log, widget); - if (qr.passed) return qr; + if (qr.passed) { + widget?.update({ qrPhase: "done", qrMode: null, qrIteration: null, qrIterationsMax: null, phaseStatus: { index: 1, status: "completed" } }); + return qr; + } + + widget?.update({ qrPhase: "execute" }); } // Max iterations reached. MUST failures remaining after 5 fix attempts // indicate a structural problem -- silently passing would propagate a // known-broken plan downstream. + widget?.update({ qrPhase: "done", qrMode: null, qrIteration: null, qrIterationsMax: null }); return { passed: false, summary: `${qr.summary} (max ${MAX_FIX_ITERATIONS} fix iterations reached)`, diff --git a/src/planner/ui/widget.ts b/src/planner/ui/widget.ts index c5a4337..e663984 100644 --- a/src/planner/ui/widget.ts +++ b/src/planner/ui/widget.ts @@ -3,12 +3,13 @@ // Hash-based change detection + 1s unref'd timer for elapsed updates. // Created by session.plan(), destroyed in onContextComplete finally block. // -// Uses setWidget(key, factory) to get render(width) for full-width bg. -// Content stays at a fixed CONTENT width; background fills terminal edge. +// Layout and styling reference: docs/planning-widget.md and the +// corresponding execution widget design deck selections (Stacked Modular +// Cards canvas + Vertical Timeline Rail). import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; import type { Theme, ThemeColor } from "@mariozechner/pi-coding-agent"; -import { truncateToWidth, visibleWidth } from "@mariozechner/pi-tui"; +import { truncateToWidth, visibleWidth, wrapTextWithAnsi } from "@mariozechner/pi-tui"; // -- Types -- @@ -17,16 +18,28 @@ export type PhaseStatus = "pending" | "running" | "completed" | "failed"; interface PhaseEntry { key: string; label: string; + detail: string; status: PhaseStatus; } +type WidgetMode = "planning" | "execution"; + +type QRMode = "initial" | "fix"; +type QRPhase = "idle" | "execute" | "decompose" | "verify" | "done"; + interface WidgetState { + mode: WidgetMode; planId: string; phases: PhaseEntry[]; activeIndex: number; // 0-based; -1 when done step: string; activity: string; startedAt: number; + logLines: string[]; + qrIteration: number | null; + qrIterationsMax: number | null; + qrMode: QRMode | null; + qrPhase: QRPhase; } export interface WidgetUpdate { @@ -34,36 +47,83 @@ export interface WidgetUpdate { step?: string; activity?: string; phaseStatus?: { index: number; status: PhaseStatus }; + mode?: WidgetMode; + logLines?: readonly string[]; + qrIteration?: number | null; + qrIterationsMax?: number | null; + qrMode?: QRMode | null; + qrPhase?: QRPhase; } // -- Constants -- const WIDGET_KEY = "koan"; -const PAD = 2; // horizontal padding each side - -const PHASES: ReadonlyArray<{ key: string; label: string }> = [ - { key: "ctx", label: "Gathering context" }, - { key: "design", label: "Designing plan" }, - { key: "code", label: "Planning code" }, - { key: "docs", label: "Planning docs" }, - { key: "exec-c", label: "Executing code" }, - { key: "exec-d", label: "Executing docs" }, +const PAD = 2; // horizontal canvas padding each side +const CARD_MARGIN = 2; // left margin before card borders +const LOG_LINES = 5; + +const BODY_INDENT = " "; + +const PLANNING_PHASES: ReadonlyArray<{ key: string; label: string; detail: string }> = [ + { key: "ctx", label: "Context", detail: "Gathering context" }, + { key: "design", label: "Plan design", detail: "Designing plan" }, + { key: "code", label: "Plan code", detail: "Creating code plan" }, + { key: "docs", label: "Plan docs", detail: "Documenting plan" }, ]; const STATUS_ICON: Record = { - pending: "[ ]", - running: "[>>]", - completed: "[OK]", - failed: "[!!]", + pending: "○", + running: "●", + completed: "●", + failed: "✖", }; -const ICON_COLOR: Record = { +const STATUS_COLOR: Record = { pending: "muted", - running: "warning", - completed: "success", + running: "accent", + completed: "dim", failed: "error", }; +const STATUS_TAG: Record = { + pending: "upcoming", + running: "current", + completed: "done", + failed: "failed", +}; + +const LOG_PLACEHOLDER = "No recent log entries"; +const TIMELINE_MIN_WIDTH = 16; +const TIMELINE_MAX_WIDTH = 28; +const CONNECTOR = "│"; + +interface BorderStyle { + topLeft: string; + topRight: string; + bottomLeft: string; + bottomRight: string; + horizontal: string; + vertical: string; +} + +const BORDER_SOLID: BorderStyle = { + topLeft: "┌", + topRight: "┐", + bottomLeft: "└", + bottomRight: "┘", + horizontal: "─", + vertical: "│", +}; + +const BORDER_SUBTLE: BorderStyle = { + topLeft: "╭", + topRight: "╮", + bottomLeft: "╰", + bottomRight: "╯", + horizontal: "─", + vertical: "│", +}; + // -- Canvas primitive -- // Content width adapts to terminal; background fills edge to edge. @@ -73,13 +133,31 @@ function contentWidth(termWidth: number): number { function canvasLine(content: string, termWidth: number, theme: Theme): string { const cw = contentWidth(termWidth); - const inner = truncateToWidth(content, cw, "...", true); + const inner = clampToWidth(content, cw); const line = " ".repeat(PAD) + inner + " ".repeat(PAD); return theme.bg("toolPendingBg", line); } // -- Helpers -- +function clampToWidth(text: string, width: number, ellipsis = ""): string { + const truncated = truncateToWidth(text, width, ellipsis === "" ? "" : ellipsis, false); + const visible = visibleWidth(truncated); + if (visible >= width) { + return truncated; + } + return truncated + " ".repeat(width - visible); +} + +function indentLines(lines: string[], width: number, indent = BODY_INDENT): string[] { + if (!indent) { + return lines.map((line) => clampToWidth(line, width)); + } + const indentWidth = visibleWidth(indent); + const available = Math.max(0, width - indentWidth); + return lines.map((line) => indent + clampToWidth(line, available)); +} + function formatElapsed(ms: number): string { const totalSec = Math.floor(ms / 1000); const m = Math.floor(totalSec / 60); @@ -92,41 +170,371 @@ function rightAlign(left: string, right: string, width: number): string { return `${left}${" ".repeat(gap)}${right}`; } -// Pure render: (state, theme, termWidth) -> 7 lines. No side effects. +function activePhase(state: WidgetState): PhaseEntry | null { + if (state.activeIndex < 0) return null; + return state.phases[state.activeIndex] ?? null; +} + +function normalizeLogLines(lines: readonly string[] | undefined): string[] { + if (!lines || lines.length === 0) return []; + const trimmed = lines.map((line) => line.replace(/\s+$/u, "")); + return trimmed.slice(-LOG_LINES); +} + +function phaseChipLabel(phase: PhaseEntry, index: number, state: WidgetState, theme: Theme): string { + const label = `┃ ${phase.label} ┃`; + if (index === state.activeIndex) { + return theme.bold(theme.fg("accent", label)); + } + if (phase.status === "completed") { + return theme.bold(theme.fg("muted", label)); + } + if (phase.status === "failed") { + return theme.fg("error", label); + } + return theme.fg("muted", label); +} + +function renderPhaseChips(state: WidgetState, theme: Theme, width: number): string { + const chips = state.phases.map((phase, index) => phaseChipLabel(phase, index, state, theme)); + return clampToWidth(chips.join(" "), width, "…"); +} + +function renderTimelineLines(state: WidgetState, theme: Theme, width: number): string[] { + const lines: string[] = []; + const total = state.phases.length; + + state.phases.forEach((phase, index) => { + const isActive = index === state.activeIndex; + const color = STATUS_COLOR[phase.status]; + const iconBase = STATUS_ICON[phase.status]; + const icon = isActive + ? theme.bold(theme.fg("accent", iconBase)) + : theme.fg(color, iconBase); + + const labelColor: ThemeColor = phase.status === "completed" + ? "dim" + : isActive + ? "accent" + : phase.status === "failed" + ? "error" + : "muted"; + + const emphasize = isActive || phase.status === "completed"; + const label = emphasize + ? theme.bold(theme.fg(labelColor, phase.label)) + : theme.fg(labelColor, phase.label); + + lines.push(clampToWidth(`${icon} ${label}`, width, "…")); + + const connector = index < total - 1 ? theme.fg("muted", CONNECTOR) : " "; + lines.push(clampToWidth(`${connector} ${theme.fg("muted", STATUS_TAG[phase.status].toUpperCase())}`, width, "…")); + + if (index < total - 1) { + lines.push(clampToWidth(`${theme.fg("muted", CONNECTOR)} `, width)); + } + }); + + return lines; +} + +function upcomingSummary(state: WidgetState): string { + const remaining = state.activeIndex < 0 + ? [] + : state.phases.slice(state.activeIndex + 1).filter((p) => p.status !== "failed"); + if (state.activeIndex < 0) return "Planning complete"; + if (remaining.length === 0) return "Final step in progress"; + const labels = remaining.map((p) => p.label).join(" → "); + return `Upcoming: ${labels}`; +} + +function renderQRStatusWidget(state: WidgetState, theme: Theme, width: number): string[] { + if (state.qrIteration === null || state.qrPhase === "idle") { + return []; + } + + const innerWidth = Math.max(0, width - 2); + const iterationTotal = state.qrIterationsMax ? ` / ${state.qrIterationsMax}` : ""; + const modeLabel = state.qrMode === "fix" ? "Fix" : "Initial"; + + const headerLeft = theme.bold(theme.fg("accent", "Quality review")); + const headerRightParts = [`Iter ${state.qrIteration}${iterationTotal}`]; + if (modeLabel) headerRightParts.push(modeLabel); + const headerRight = theme.fg("dim", headerRightParts.join(" · ")); + + const phaseEntries: Array<{ key: Exclude; label: string }> = [ + { key: "execute", label: state.qrMode === "fix" ? "Execute (fix)" : "Execute" }, + { key: "decompose", label: "QR decompose" }, + { key: "verify", label: "QR verify" }, + ]; + + let currentIndex = phaseEntries.findIndex((entry) => entry.key === state.qrPhase); + if (state.qrPhase === "done") { + currentIndex = phaseEntries.length; + } + + const segments = phaseEntries.map((entry, index) => { + if (index < currentIndex) { + return theme.bold(theme.fg("dim", `${entry.label} ✓`)); + } + if (index === currentIndex) { + return theme.bold(theme.fg("accent", entry.label)); + } + return theme.fg("muted", entry.label); + }); + + const separator = theme.fg("muted", " → "); + const stageLine = clampToWidth(segments.join(separator), innerWidth, "…"); + + const description = (() => { + if (state.qrPhase === "execute") { + return state.qrMode === "fix" + ? "Fix-mode architect applies QR feedback." + : "Initial execution to gather plan context."; + } + if (state.qrPhase === "decompose") { + return state.qrIteration && state.qrIteration > 1 + ? "Re-decomposing updates into review items." + : "Deriving QR checklist from the current plan."; + } + if (state.qrPhase === "verify") { + return "Massively parallel reviewers scoring QR items."; + } + if (state.qrPhase === "done") { + return "Quality review loop complete."; + } + return ""; + })(); + + const body: string[] = []; + body.push(stageLine); + if (description) { + body.push(clampToWidth(theme.fg("muted", description), innerWidth, "…")); + } + + return renderBox(headerLeft, headerRight, body, width, theme, BORDER_SUBTLE); +} + +interface DetailSections { + core: string[]; + footer: string[]; +} + +function buildDetailSections(state: WidgetState, theme: Theme, width: number): DetailSections { + const core: string[] = []; + const footer: string[] = []; + const blank = clampToWidth("", width); + + const active = activePhase(state); + const stepTitle = state.step || active?.detail || active?.label || "Awaiting step"; + core.push(clampToWidth(theme.bold(theme.fg("accent", stepTitle)), width, "…")); + + if (state.activity) { + const activityLines = wrapTextWithAnsi(theme.fg("muted", state.activity), width); + for (const line of activityLines) { + core.push(clampToWidth(line, width)); + } + } + + const qrWidget = renderQRStatusWidget(state, theme, width); + if (qrWidget.length > 0) { + if (core.length > 0 && core[core.length - 1].trim() !== "") { + core.push(blank); + } + core.push(...qrWidget.map((line) => clampToWidth(line, width))); + } + + if (active) { + footer.push(...wrapTextWithAnsi(theme.fg("dim", `Phase ${state.activeIndex + 1}/${state.phases.length}`), width).map((line) => clampToWidth(line, width, "…"))); + footer.push(...wrapTextWithAnsi(theme.fg("dim", `Plan · ${state.planId}`), width).map((line) => clampToWidth(line, width, "…"))); + } + + const summary = upcomingSummary(state); + if (summary) { + footer.push(...wrapTextWithAnsi(theme.fg("muted", summary), width).map((line) => clampToWidth(line, width, "…"))); + } + + return { core, footer }; +} + +function layoutDetailColumn(sections: DetailSections, width: number, targetRows: number): string[] { + const blank = clampToWidth("", width); + const lines = [...sections.core]; + + if (sections.footer.length > 0) { + if (lines.length === 0 || lines[lines.length - 1].trim() !== "") { + lines.push(blank); + } + } + + const used = lines.length + sections.footer.length; + const goal = Math.max(targetRows, used); + + while (lines.length < goal - sections.footer.length) { + lines.push(blank); + } + + if (sections.footer.length === 0) { + return lines; + } + + return [...lines, ...sections.footer]; +} + +function renderBox( + titleLeft: string, + titleRight: string, + body: string[], + width: number, + theme: Theme, + border: BorderStyle = BORDER_SOLID, +): string[] { + const innerWidth = Math.max(0, width - 2); + const left = visibleWidth(titleLeft) > innerWidth ? truncateToWidth(titleLeft, innerWidth, "", false) : titleLeft; + const right = visibleWidth(titleRight) > innerWidth ? truncateToWidth(titleRight, innerWidth, "", false) : titleRight; + const headerContent = rightAlign(left, right, innerWidth); + + const top = `${border.topLeft}${clampToWidth(headerContent, innerWidth)}${border.topRight}`; + const bottom = `${border.bottomLeft}${clampToWidth(border.horizontal.repeat(innerWidth), innerWidth)}${border.bottomRight}`; + + const content = body.map((line) => `${border.vertical}${clampToWidth(line, innerWidth)}${border.vertical}`); + return [top, ...content, bottom]; +} + +function renderPlanningCard(state: WidgetState, theme: Theme, width: number): string[] { + const elapsed = theme.fg("dim", formatElapsed(Date.now() - state.startedAt)); + const innerWidth = Math.max(0, width - 2); + const indentWidth = visibleWidth(BODY_INDENT); + const contentWidth = Math.max(0, innerWidth - indentWidth); + + if (innerWidth < 60 || contentWidth < 40) { + const fallbackContent: string[] = [ + "", + theme.fg("muted", `Plan · ${state.planId}`), + "", + formatStepLine(state, theme), + formatPhaseTrail(state, theme, contentWidth), + ]; + const detail = formatDetail(state, theme, contentWidth); + if (detail) fallbackContent.push(detail); + fallbackContent.push(""); + + const body = indentLines(fallbackContent, innerWidth); + return renderBox( + `${BODY_INDENT}${theme.bold(theme.fg("accent", "Planning"))}`, + elapsed, + body, + width, + theme, + ); + } + + const chipsLine = renderPhaseChips(state, theme, contentWidth); + const timelineWidth = Math.min(TIMELINE_MAX_WIDTH, Math.max(TIMELINE_MIN_WIDTH, Math.floor(contentWidth * 0.3))); + const detailWidth = Math.max(14, contentWidth - timelineWidth - 4); + + const timelineLines = renderTimelineLines(state, theme, timelineWidth); + const detailSections = buildDetailSections(state, theme, detailWidth); + const detailLines = layoutDetailColumn(detailSections, detailWidth, timelineLines.length); + const combined: string[] = []; + const maxLines = Math.max(timelineLines.length, detailLines.length); + + for (let i = 0; i < maxLines; i++) { + const left = timelineLines[i] ?? ""; + const right = detailLines[i] ?? ""; + const composed = `${clampToWidth(left, timelineWidth)} ${clampToWidth(right, detailWidth)}`; + combined.push(clampToWidth(composed, contentWidth)); + } + + const body = indentLines( + [ + "", + chipsLine, + "", + ...combined, + "", + ], + innerWidth, + ); + + return renderBox( + `${BODY_INDENT}${theme.bold(theme.fg("accent", "Planning Workspace"))}`, + elapsed, + body, + width, + theme, + ); +} + +function renderLogCard(state: WidgetState, theme: Theme, width: number): string[] { + const innerWidth = Math.max(0, width - 2); + const raw = state.logLines.length > 0 ? state.logLines.slice(-LOG_LINES) : [LOG_PLACEHOLDER]; + const padded = [...raw]; + while (padded.length < LOG_LINES) padded.push(""); + + const lines = padded.map((line) => { + if (!line) return ""; + return theme.fg("dim", `• ${line}`); + }); + + const body = indentLines(lines, innerWidth); + return renderBox( + `${BODY_INDENT}${theme.bold(theme.fg("accent", "Latest log"))}`, + "", + body, + width, + theme, + ); +} + +function formatPhaseTrail(state: WidgetState, theme: Theme, width: number): string { + const parts = state.phases.map((phase, index) => { + const icon = STATUS_ICON[phase.status]; + const color = STATUS_COLOR[phase.status]; + const label = index === state.activeIndex ? theme.bold(phase.label) : phase.label; + return theme.fg(color, `${icon} ${label}`); + }); + const trail = parts.join(" "); + return clampToWidth(trail, width, "…"); +} + +function formatDetail(state: WidgetState, theme: Theme, width: number): string { + const step = state.step ? theme.fg("muted", state.step) : ""; + const activity = state.activity ? theme.fg("dim", ` · ${state.activity}`) : ""; + const detail = `${step}${activity}`; + if (!detail) return ""; + return clampToWidth(detail, width, "…"); +} + +function formatStepLine(state: WidgetState, theme: Theme): string { + const total = state.phases.length; + const active = activePhase(state); + const stepNumber = state.activeIndex >= 0 ? state.activeIndex + 1 : total; + const count = theme.fg("muted", `Step ${stepNumber} of ${total}`); + const label = active + ? theme.bold(theme.fg("accent", active.label)) + : theme.bold(theme.fg("muted", "Complete")); + return `${count} ${theme.fg("muted", "·")} ${label}`; +} + +// Pure render: (state, theme, termWidth) -> lines. No side effects. function render(state: WidgetState, theme: Theme, termWidth: number): string[] { const c = (s: string) => canvasLine(s, termWidth, theme); const cw = contentWidth(termWidth); + const lines: string[] = []; + const margin = " ".repeat(CARD_MARGIN); - // Header: koan [N/6] label ... elapsed - const idx = state.activeIndex; - const label = idx >= 0 ? state.phases[idx].label : "done"; - const num = idx >= 0 ? idx + 1 : 6; - const left = `${theme.bold(theme.fg("accent", "koan"))} [${num}/6] ${label}`; - const elapsed = theme.fg("dim", formatElapsed(Date.now() - state.startedAt)); - const header = rightAlign(left, elapsed, cw); - - // Plan ID - const planId = theme.fg("dim", state.planId); - - // Phase bar - const phaseBar = state.phases - .map((p) => `${theme.fg(ICON_COLOR[p.status], STATUS_ICON[p.status])} ${p.key}`) - .join(" "); - - // Step + activity - const step = state.step ? theme.fg("dim", state.step) : ""; - const act = state.activity ? theme.fg("muted", ` > ${state.activity}`) : ""; - const detail = truncateToWidth(step + act, cw, "..."); - - return [ - c(""), // top padding - c(header), - c(planId), - c(""), // separator - c(phaseBar), - c(detail), - c(""), // bottom padding - ]; + lines.push(c("")); + for (const line of renderPlanningCard(state, theme, cw - CARD_MARGIN)) { + lines.push(c(margin + line)); + } + lines.push(c(margin)); + for (const line of renderLogCard(state, theme, cw - CARD_MARGIN)) { + lines.push(c(margin + line)); + } + lines.push(c("")); + + return lines; } // -- WidgetController -- @@ -140,12 +548,18 @@ export class WidgetController { constructor(ui: ExtensionUIContext, planId: string) { this.ui = ui; this.state = { + mode: "planning", planId, - phases: PHASES.map((p) => ({ key: p.key, label: p.label, status: "pending" as PhaseStatus })), + phases: PLANNING_PHASES.map((p) => ({ key: p.key, label: p.label, detail: p.detail, status: "pending" as PhaseStatus })), activeIndex: 0, step: "", activity: "", startedAt: Date.now(), + logLines: [], + qrIteration: null, + qrIterationsMax: null, + qrMode: null, + qrPhase: "idle", }; this.state.phases[0].status = "running"; @@ -156,6 +570,9 @@ export class WidgetController { } update(patch: WidgetUpdate): void { + if (patch.mode !== undefined) { + this.state.mode = patch.mode; + } if (patch.phaseStatus !== undefined) { const { index, status } = patch.phaseStatus; if (index >= 0 && index < this.state.phases.length) { @@ -175,6 +592,21 @@ export class WidgetController { if (patch.activity !== undefined) { this.state.activity = patch.activity; } + if (patch.logLines !== undefined) { + this.state.logLines = normalizeLogLines(patch.logLines); + } + if (patch.qrIteration !== undefined) { + this.state.qrIteration = patch.qrIteration; + } + if (patch.qrIterationsMax !== undefined) { + this.state.qrIterationsMax = patch.qrIterationsMax; + } + if (patch.qrMode !== undefined) { + this.state.qrMode = patch.qrMode; + } + if (patch.qrPhase !== undefined) { + this.state.qrPhase = patch.qrPhase; + } this.doRender(); } @@ -185,7 +617,11 @@ export class WidgetController { private doRender(): void { // Capture state snapshot for the factory closure - const state = { ...this.state, phases: this.state.phases.map((p) => ({ ...p })) }; + const state = { + ...this.state, + phases: this.state.phases.map((p) => ({ ...p })), + logLines: [...this.state.logLines], + }; const theme = this.ui.theme; // Hash check: skip setWidget if content unchanged (ignoring width) From 524158cb7d71b987625bd69c8c1569c658115975 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 25 Feb 2026 12:36:00 +0700 Subject: [PATCH 012/412] Wire event log into widget and add structured log lines readRecentLogs() reads events.jsonl tail and returns structured LogLine entries (prefix/highlight/meta) so the widget can apply theme-aware coloring -- file paths and commands render bold while prefixes and size metadata render dim. Also captures lines/chars for both read and bash tool results. --- src/planner/lib/audit.ts | 49 ++++++++++++++++++++++++++-------------- src/planner/ui/widget.ts | 35 ++++++++++++++++------------ 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index d041f31..307c120 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -26,6 +26,8 @@ export interface ToolFileEvent extends EventBase { export interface ToolBashEvent extends EventBase { kind: "tool_bash"; bin: string; + lines?: number; + chars?: number; error: boolean; } @@ -118,8 +120,10 @@ export function summarize(e: ToolEvent): string { const suffix = e.lines != null ? ` (${e.lines}L, ${e.chars}c)` : ""; return `${e.tool} ${e.path}${suffix}`; } - case "tool_bash": - return `bash ${e.bin}`; + case "tool_bash": { + const suffix = e.lines != null ? ` (${e.lines}L, ${e.chars}c)` : ""; + return `bash ${e.bin}${suffix}`; + } case "tool_koan": return e.tool; case "tool_generic": @@ -200,7 +204,8 @@ export function extractToolEvent(piEvent: PiToolResultEvent): ToolEvent { if (toolName === "bash") { const cmd = (input["command"] as string | undefined) ?? ""; const bin = cmd.trim().split(/\s+/)[0] ?? "bash"; - return { kind: "tool_bash", bin, error: isError, ts, seq }; + const text = content.find((c) => c.type === "text")?.text ?? ""; + return { kind: "tool_bash", bin, lines: text.split("\n").length, chars: text.length, error: isError, ts, seq }; } if (toolName.startsWith("koan_")) { @@ -336,9 +341,17 @@ export async function readProjection(dir: string): Promise { } } -// Reads the tail of events.jsonl and returns human-readable summary lines. +// Structured log line for the widget log card. The widget applies +// theme-aware coloring: prefix dim, highlight normal, meta dim. +export interface LogLine { + prefix: string; + highlight: string; + meta: string; +} + +// Reads the tail of events.jsonl and returns structured log entries. // Filters out heartbeats (noisy). Used by session.ts to feed the widget log card. -export async function readRecentLogs(dir: string, count = 5): Promise { +export async function readRecentLogs(dir: string, count = 5): Promise { try { const raw = await fs.readFile(path.join(dir, "events.jsonl"), "utf8"); const events = raw @@ -353,25 +366,27 @@ export async function readRecentLogs(dir: string, count = 5): Promise } } -function formatLogLine(e: AuditEvent): string { +function sizeSuffix(e: { lines?: number; chars?: number }): string { + return e.lines != null ? `(${e.lines}L, ${e.chars}c)` : ""; +} + +function formatLogLine(e: AuditEvent): LogLine { switch (e.kind) { case "phase_start": - return `${e.phase} started (${e.totalSteps} steps)`; + return { prefix: "phase", highlight: e.phase, meta: `(${e.totalSteps} steps)` }; case "step_transition": - return `step ${e.step}/${e.totalSteps}: ${e.name}`; + return { prefix: `step ${e.step}/${e.totalSteps}`, highlight: e.name, meta: "" }; case "phase_end": - return `${e.outcome}${e.detail ? ` -- ${e.detail}` : ""}`; - case "tool_file": { - const suffix = e.lines != null ? ` (${e.lines}L, ${e.chars}c)` : ""; - return `${e.tool} ${e.path}${suffix}`; - } + return { prefix: "phase", highlight: e.outcome, meta: e.detail ?? "" }; + case "tool_file": + return { prefix: e.tool, highlight: e.path, meta: sizeSuffix(e) }; case "tool_bash": - return `bash ${e.bin}`; + return { prefix: "bash", highlight: e.bin, meta: sizeSuffix(e) }; case "tool_koan": - return e.tool; + return { prefix: "koan", highlight: e.tool, meta: "" }; case "tool_generic": - return e.tool; + return { prefix: "tool", highlight: e.tool, meta: "" }; case "heartbeat": - return "heartbeat"; + return { prefix: "", highlight: "heartbeat", meta: "" }; } } diff --git a/src/planner/ui/widget.ts b/src/planner/ui/widget.ts index e663984..e16cfed 100644 --- a/src/planner/ui/widget.ts +++ b/src/planner/ui/widget.ts @@ -10,6 +10,7 @@ import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; import type { Theme, ThemeColor } from "@mariozechner/pi-coding-agent"; import { truncateToWidth, visibleWidth, wrapTextWithAnsi } from "@mariozechner/pi-tui"; +import type { LogLine } from "../lib/audit.js"; // -- Types -- @@ -35,7 +36,7 @@ interface WidgetState { step: string; activity: string; startedAt: number; - logLines: string[]; + logLines: LogLine[]; qrIteration: number | null; qrIterationsMax: number | null; qrMode: QRMode | null; @@ -48,7 +49,7 @@ export interface WidgetUpdate { activity?: string; phaseStatus?: { index: number; status: PhaseStatus }; mode?: WidgetMode; - logLines?: readonly string[]; + logLines?: readonly LogLine[]; qrIteration?: number | null; qrIterationsMax?: number | null; qrMode?: QRMode | null; @@ -175,10 +176,9 @@ function activePhase(state: WidgetState): PhaseEntry | null { return state.phases[state.activeIndex] ?? null; } -function normalizeLogLines(lines: readonly string[] | undefined): string[] { +function normalizeLogLines(lines: readonly LogLine[] | undefined): LogLine[] { if (!lines || lines.length === 0) return []; - const trimmed = lines.map((line) => line.replace(/\s+$/u, "")); - return trimmed.slice(-LOG_LINES); + return [...lines].slice(-LOG_LINES); } function phaseChipLabel(phase: PhaseEntry, index: number, state: WidgetState, theme: Theme): string { @@ -466,18 +466,25 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st ); } +function renderLogLine(entry: LogLine, theme: Theme): string { + const parts: string[] = []; + if (entry.prefix) parts.push(theme.fg("dim", entry.prefix)); + if (entry.highlight) parts.push(theme.bold(entry.highlight)); + if (entry.meta) parts.push(theme.fg("dim", entry.meta)); + return `${theme.fg("dim", "•")} ${parts.join(" ")}`; +} + function renderLogCard(state: WidgetState, theme: Theme, width: number): string[] { const innerWidth = Math.max(0, width - 2); - const raw = state.logLines.length > 0 ? state.logLines.slice(-LOG_LINES) : [LOG_PLACEHOLDER]; - const padded = [...raw]; - while (padded.length < LOG_LINES) padded.push(""); + const hasEntries = state.logLines.length > 0; + const entries = hasEntries ? state.logLines.slice(-LOG_LINES) : []; - const lines = padded.map((line) => { - if (!line) return ""; - return theme.fg("dim", `• ${line}`); - }); + const formatted: string[] = hasEntries + ? entries.map((entry) => renderLogLine(entry, theme)) + : [theme.fg("dim", `• ${LOG_PLACEHOLDER}`)]; + while (formatted.length < LOG_LINES) formatted.push(""); - const body = indentLines(lines, innerWidth); + const body = indentLines(formatted, innerWidth); return renderBox( `${BODY_INDENT}${theme.bold(theme.fg("accent", "Latest log"))}`, "", @@ -620,7 +627,7 @@ export class WidgetController { const state = { ...this.state, phases: this.state.phases.map((p) => ({ ...p })), - logLines: [...this.state.logLines], + logLines: this.state.logLines.map((l) => ({ ...l })), }; const theme = this.ui.theme; From e584235f453a0034c7bb63f8743a0d8621a1bb34 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 25 Feb 2026 12:37:15 +0700 Subject: [PATCH 013/412] Filter koan_complete_step from log and clarify step label koan_complete_step and step_transition are emitted from the same tool call (transition inside handler, tool_koan after return), so the complete_step line is redundant noise. Also changes step label from "step 5/6" to "current step 5/6:" to read as status. --- src/planner/lib/audit.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index 307c120..aea727a 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -359,7 +359,7 @@ export async function readRecentLogs(dir: string, count = 5): Promise .split("\n") .filter(Boolean) .map((line) => JSON.parse(line) as AuditEvent) - .filter((e) => e.kind !== "heartbeat"); + .filter((e) => e.kind !== "heartbeat" && !(e.kind === "tool_koan" && e.tool === "koan_complete_step")); return events.slice(-count).map(formatLogLine); } catch { return []; @@ -375,7 +375,7 @@ function formatLogLine(e: AuditEvent): LogLine { case "phase_start": return { prefix: "phase", highlight: e.phase, meta: `(${e.totalSteps} steps)` }; case "step_transition": - return { prefix: `step ${e.step}/${e.totalSteps}`, highlight: e.name, meta: "" }; + return { prefix: `current step ${e.step}/${e.totalSteps}:`, highlight: e.name, meta: "" }; case "phase_end": return { prefix: "phase", highlight: e.outcome, meta: e.detail ?? "" }; case "tool_file": From f0f4090fa7bbfbe5965dfaaa0b0f209239fd1760 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 25 Feb 2026 15:34:00 +0700 Subject: [PATCH 014/412] Add planning widget design reference doc Documents the design-deck contract for the refreshed planning widget: stacked modular cards, vertical timeline rail, integrated log section, and QR inline section. --- docs/planning-widget.md | 97 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/planning-widget.md diff --git a/docs/planning-widget.md b/docs/planning-widget.md new file mode 100644 index 0000000..19d8c0d --- /dev/null +++ b/docs/planning-widget.md @@ -0,0 +1,97 @@ +# Planning Widget Refresh + +## Context +The planning widget now follows the design-deck contract selected on Feb 25 2026: + +- **Canvas direction:** Stacked Modular Cards +- **Navigation direction:** Vertical Timeline Rail +- **Log strategy:** Declarative shape-table serialization + dense two-column layout +- **QR strategy:** Inline integrated section (not a detached sub-card) + +The goal is to keep a long-running (1-2h) planning session readable in real time while preserving high-signal audit telemetry. + +## Decisions & Rationale + +### 1) Deterministic log serialization (hybrid detail) +- Keep **tool name** as the primary scan anchor. +- Use a declarative per-tool formatter table for known `koan_*` tools. +- Unknown tools fall back to tool-name-only output. +- Field order is deterministic and curated (e.g., IDs first), not alphabetical. + +**Rationale:** Users scan continuously during execution; stable order makes visual parsing faster and reduces cognitive churn between updates. + +### 2) Selective detail by field type +- Arrays render as **first item + count** (`[first] +N`). +- Free-form fields (`diff`, `doc_diff`, `comments`, large narrative strings) render as **size metadata only** (`184L/9.2k`), never full body. +- Getter tools (`koan_get_*`) show target identifiers plus response size metadata (`resp:42L/3.1k`). + +**Rationale:** Maintains observability without blowing out vertical space or flooding with low-value text. + +### 3) Latest log as dense two-column grid +- Left column: tool name (bold accent anchor). +- Right column: compact deterministic summary. +- Column widths adapt to available terminal width + observed tool-name lengths (protecting right-column readability). +- High-value rows may wrap to 2 lines; if overflow exceeds 2 lines, the second line is re-compacted with ellipsis. +- Repeated events remain separate rows (no dedup/collapse). + +**Rationale:** Preserves temporal fidelity while increasing information density and keeping the "what just happened" answer immediate, even under constrained widths. + +### 4) QR is a first-class workflow section +- QR renders inline in detail pane with divider rule (no detached mini-card border). +- Visible for Plan design (and contractually for Plan execution), hidden only for Context gathering. +- QR starts directly in the **`execute`** stage for iteration 1 (non-fix mode); fix iterations reuse the same stage model. +- QR block is normalized to a fixed structure: header, phase rail, counters, divider. +- Metadata is budgeted to **64 visible chars max** and progressively compacted (`phase/iter/mode` -> `iN/M`, `d/p/f/t`) when width is constrained. +- Counter line emphasizes severity: `fail` is error-colored; `pass` is accent; others remain muted/dim. + +**Rationale:** QR is not optional side telemetry; it is the acceptance loop for the plan. The UI should communicate that structural importance while remaining legible and shape-stable at smaller widths. + +## Layout Overview +``` +┌──────────────────────────────── Planning ────────────────────────────────────┐ +│ ┃ Context gathering ┃ ┃ Plan design ┃ ┃ Plan code ┃ ┃ Plan docs ┃ │ +│ │ +│ ● Context gathering qr-decompose: Step 2/13: Holistic Concerns │ +│ │ DONE read CLAUDE.md · 41L/1709c │ +│ │ │ +│ ● Plan design QR | phase:decompose · iter 1/6 initial │ +│ │ CURRENT Execute → QR decompose → QR verify │ +│ │ done:0/24 pass:0 fail:0 todo:24 │ +│ │ ──────────────────────────────────────────────── │ +│ ○ Plan code Plan · │ +│ │ UPCOMING │ +│ ○ Plan docs │ +│──────────────────────────────────────────────────────────────────────────────│ +│ Latest log │ +│ koan_set_milestone_tests id=M-002 · tests:["covers retries"] +7 │ +│ koan_get_milestone id=M-002 · resp:42L/3.1k │ +│ koan_add_intent milestone=M-002 · file=src/planner/ui/widget.ts │ +│ koan_set_change_diff id=CC-M-001-002 · diff:184L/9.2k │ +│ koan_qr_assign_group phase=plan-design · ids:[QR-001] +11 │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +## Rendering Guide +1. **Canvas** – Keep using `canvasLine()` so widget content remains full-width over `toolPendingBg`. +2. **Main card** – Keep solid border + consistent inner padding via shared `renderBox()` helper. +3. **Timeline rail** – Maintain status icon/color semantics (`active=accent`, `done=dim`, `failed=error`). +4. **Detail pane** – Render in this order: + - a dim section label (`Current step`) to create hierarchy + - step title + optional activity + - QR integrated section (if visible) + - footer metadata (`Plan · ID`) pinned to bottom via dynamic padding +5. **QR section** – Use inline header + phase rail + metadata line + divider. Avoid nested border style to keep it visually native to the right pane. Keep line geometry stable (fixed 3-line payload + divider) and enforce a 64-char metadata budget before clamping to pane width. +6. **Latest log section** – Keep it inside the same outer card, separated by a horizontal divider. Reuse the same left/right column split (`timelineWidth` / `detailWidth`) and gap as the planning body so vertical alignment stays consistent. + +## Data Contract Notes +- `LogLine` now carries: + - `tool` (left column) + - `summary` (right column) + - `highValue` (whether 2-line wrap is allowed) +- QR state in widget includes: + - `qrIteration`, `qrIterationsMax`, `qrMode`, `qrPhase` + - `qrDone`, `qrTotal`, `qrPass`, `qrFail`, `qrTodo` + +## Future Work (contracted, not yet implemented) +- Plan execution phase should reuse the same QR integrated section semantics. +- Optional compact mode for very narrow terminals can reduce metadata verbosity while preserving deterministic ordering. From 7389d46478bcb1c26f406efe9ecbc316c3eccb6e Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 25 Feb 2026 15:34:04 +0700 Subject: [PATCH 015/412] Record UI design decisions (UI-1, UI-2, UI-3) Documents the three UI design choices made on Feb 25: planning widget cards + timeline rail, deterministic dense log grid, and QR integrated section (not sidecar). --- design-decisions.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/design-decisions.md b/design-decisions.md index 132572a..a6027c4 100644 --- a/design-decisions.md +++ b/design-decisions.md @@ -218,6 +218,46 @@ Step 6: plan mutation tools unlocked. --- +## UI Decisions + +### UI-1: Planning Widget Cards & Timeline Rail +- Chosen on Feb 25 2026 via planning-widget design deck (Stacked Modular Cards + Vertical Timeline Rail). +- Rationale: make terminal output feel like a coherent operations workspace (not plain log spam), keep active progress glanceable, and preserve enough structure to scale into future phases without redesigning the shell. +- Implementation guardrails: + - Continue rendering through `canvasLine()` so the background fills full terminal width. + - Keep consistent card padding and solid-border framing through shared `renderBox()` helpers. + - Phase chips use stable semantic tokens (accent active, bold muted completed, muted pending, error failed). + - Vertical rail remains width-bounded (~20 cols) so the right detail pane keeps enough budget for high-signal telemetry. + - Detail footer (`Plan · id`) is pinned bottom via dynamic padding, independent of timeline density. + - Planning body and latest-log body share one outer card, separated by an internal divider for better cohesion. + +### UI-2: Latest Log as Deterministic Dense Grid +- Chosen on Feb 25 2026 via follow-up deck (`Declarative Shape Table` + `Two-Column Dense Grid`). +- Rationale: long-running sessions need more than tool names; users must see intent without reading full payloads. Deterministic ordering reduces scan friction and makes anomalies obvious over time. +- Contract: + - Left column anchor is always tool name. + - Right column is deterministic summary from shape-table formatters (ID-first ordering for recognized tools). + - Unknown tools degrade to name-only output (generic fallback). + - Arrays render as first-item-plus-count; free-form fields render as size-only metadata. + - Getter tools include target metadata + response size (`resp:42L/3.1k`). + - Repeated events remain repeated (no collapse), preserving temporal audit fidelity. + - Column widths adapt to terminal width and observed tool-name lengths so detail space stays useful. + - In integrated mode, latest-log columns are forced to the same split as the planning body (`timelineWidth` / `detailWidth`) to keep vertical alignment stable. + - High-value rows may wrap to 2 lines only; deeper overflow is compacted with ellipsis to protect fixed card height. + +### UI-3: QR Integrated Section (Not Sidecar) +- Chosen on Feb 25 2026 via follow-up deck (`Inline Integrated Section + Divider`). +- Rationale: QR is the acceptance loop, not optional telemetry. Rendering it as an inline first-class section prevents the "detached widget" feel and matches how users reason about plan quality over time. +- Contract: + - QR is visible during Plan design (and contractually Plan execution), hidden only for Context gathering. + - Iteration 1 enters `execute` immediately (same stage model as fix iterations); there is no separate `initializing` stage. + - Section includes: phase + iter/mode metadata, phase rail, and counters (`done/total/pass/fail/todo`) in a compact metadata block. + - Visual treatment uses inline sectioning + divider, not a nested bordered mini-card. + - Geometry is fixed for scan consistency: header + rail + counters + divider. + - Metadata uses a hard 64-char visible-width budget with progressive compaction (`exec/decomp/vfy`, `d/p/f/t`, `iN/M`) under narrow widths. + - Counter line emphasizes severity (`fail` highlighted in error color) so blocking issues pop in long sessions. + - Detail pane hierarchy is explicit: `Current step` label first, then step body, then QR section. + ## Workflow Dispatch Architecture ### WorkflowDispatch (dispatch pattern) From 241e9818372fbbbfe7c8f1e2c1886628c5889fb3 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 25 Feb 2026 15:34:09 +0700 Subject: [PATCH 016/412] Add QR failure handling analysis documents Analysis of how QR failures halt execution in plan-design and how the fix loop implements severity de-escalation across iterations. --- QR_ANALYSIS.md | 643 +++++++++++++++++++++++++++++++++++ QR_ANALYSIS_COMPREHENSIVE.md | 640 ++++++++++++++++++++++++++++++++++ 2 files changed, 1283 insertions(+) create mode 100644 QR_ANALYSIS.md create mode 100644 QR_ANALYSIS_COMPREHENSIVE.md diff --git a/QR_ANALYSIS.md b/QR_ANALYSIS.md new file mode 100644 index 0000000..54ffc1f --- /dev/null +++ b/QR_ANALYSIS.md @@ -0,0 +1,643 @@ +# QR Failure Handling & Fix Mode Analysis + +## Executive Summary + +This document analyzes how QR (Quality Review) failures halt execution in the koan plan-design phase and how the reference executor implements fix loops. The analysis covers three key questions: + +1. **Does QR failure halt the plan-design phase?** YES -- failures trigger a deterministic gate that either spawns a fix loop or force-proceeds after max iterations. +2. **What is the plan specification for QR fix loops?** Architect is re-spawned with `--koan-fix` flag and a QR failure report appended to context. +3. **What are the executor modes?** Initial mode (first-time work) vs. fix mode (targeted repair after QR failures). + +--- + +## Part 1: QR Failure Halts Execution (Confirmed) + +### How the QR Gate Works (Reference Executor) + +The reference executor in `~/.claude/skills/scripts/skills/planner/orchestrator/executor.py` implements a **9-step workflow** for execution (not planning): + +``` +Step 1: Execution Planning (analyze, build wave list) +Step 2: Reconciliation (validate existing code) +Step 3: Implementation (dispatch developers) +Step 4: Code QR (quality review of code) +Step 5: Code QR GATE (route pass/fail) <-- HALTS on FAIL +Step 6: Documentation (TW pass) +Step 7: Doc QR (quality review of docs) +Step 8: Doc QR GATE (route pass/fail) <-- HALTS on FAIL +Step 9: Retrospective +``` + +**Key excerpt from executor.py:** + +```python +CODE_QR_GATE = GateConfig( + qr_name="Code QR", + work_step=3, # If FAIL: loop back to step 3 + pass_step=6, # If PASS: advance to step 6 + pass_message="Code quality verified. Proceed to documentation.", + fix_target=AgentRole.DEVELOPER, # Developer fixes issues +) + +def format_gate(step: int, gate: GateConfig, qr: QRState, total_steps: int) -> str: + """Format gate step output.""" + if qr.passed: + next_cmd = f"python3 -m {MODULE_PATH} --step {gate.pass_step}" + else: + next_iteration = qr.iteration + 1 + next_cmd = f"python3 -m {MODULE_PATH} --step {gate.work_step} --qr-fail --qr-iteration {next_iteration}" + return format_step(body, next_cmd, title=f"{gate.qr_name} Gate") +``` + +**Execution halts on FAIL** because: +- QR GATE step 5 checks `qr.passed` property +- If FAIL: routes back to step 3 (implementation) with `--qr-fail` flag +- Step 3 detects fix mode and spawns developer with targeted repair instructions +- No automatic proceed to step 6 (documentation) + +### How the QR Gate Works (Koan Plan-Design) + +The koan project applies the same pattern to the plan-design phase. Based on the plan specification (section 4.2 and 5): + +``` +Plan-Design Phase (Architect): + ├─ execution: spawn architect subagent + │ (6-step exploration + plan writing) + │ + ├─ qr-decompose: spawn decomposer subagent + │ (13-step QR item generation) + │ + ├─ qr-verify: pool of reviewer subagents + │ (parallel verification, PASS/FAIL per item) + │ + └─ gate (deterministic code, no LLM) + PASS -> advance to plan-code + FAIL -> re-spawn architect with fix report (up to 5x) + iteration escalates severity filtering + after 5 iterations, force-proceed +``` + +**Plan specification (section 4.2.1 "QR Gate"):** + +```typescript +function routeGate( + phase: Phase, + qrResult: "pass" | "fail", + iteration: number, +): NextStep { + if (qrResult === "pass") { + deleteQRState(phase); + return nextPhase(phase); + } + const maxIterations = 5; + if (iteration >= maxIterations) { + return nextPhase(phase); // Force proceed, document remaining issues + } + return { phase, subPhase: "execution", mode: "fix", iteration: iteration + 1 }; +} +``` + +**Execution halts on FAIL** because: +- Gate routing is deterministic (pure code, no LLM) +- FAIL does not auto-advance +- Only PASS or max-iterations advances to next phase +- Fix mode spawns architect fresh with failure report + +--- + +## Architecture Pattern (From Old System) + +### Two-Phase Workflow Pattern + +QR operates in two distinct phases per plan phase (plan-design, plan-code, plan-docs, impl-code, impl-docs): + +1. **DECOMPOSITION** (QR Decompose) + - 8-step LLM workflow generating atomic verification items + - Creates `qr-{phase}.json` with items array + - Each item: `{id, scope, check, status: "TODO", severity, [parent_id], [group_id]}` + - Grouping logic (steps 9-13) organizes items by: parent-child, umbrella, component, concern, affinity + +2. **VERIFICATION** (QR Verify) + - Parallel dispatch of single items via `--qr-item` flag + - Each subagent verifies ONE item (ANALYZE -> CONFIRM -> SUMMARY pattern) + - Atomic mutation via `cli/qr.py` with file locking (no race conditions) + - Output: one-word PASS/FAIL only (findings in CLI --finding flag) + +### Key Files in Old System + +**Decomposition Scripts:** +- `/Users/lmergen/.claude/skills/scripts/skills/planner/quality_reviewer/plan_design_qr_decompose.py` +- `plan_code_qr_decompose.py` +- `plan_docs_qr_decompose.py` +- Shared: `skills/planner/quality_reviewer/prompts/decompose.py` (8-step workflow, grouping logic) + +**Verification Base:** +- `skills/planner/quality_reviewer/qr_verify_base.py` (VerifyBase class, step routing, item loading) +- Specific: `plan_design_qr_verify.py`, `plan_code_qr_verify.py`, `plan_docs_qr_verify.py` +- Shared: `skills/planner/shared/qr/utils.py` (load_qr_state, get_qr_item, format_qr_item_for_verification) + +**CLI Tools:** +- `skills/planner/cli/qr.py` (update-item with file locking) +- `skills/planner/cli/qr_commands.py` (update_item function, atomic write) + +## Decomposition Workflow (8 Steps) + +### Step 1: Absorb Context +- Load context.json and plan.json from STATE_DIR +- Parse planning context (overview, constraints, invisible knowledge) +- Task: Summarize in 2-3 sentences what success looks like for this phase + +### Step 2: Holistic Concerns (Top-Down) +- Brainstorm concerns specific to the phase (out-of-scope items explicitly excluded) +- Phase-specific examples (e.g., plan-design: "Missing decisions", "Policy defaults without backing") +- Output: Bulleted list, quantity over quality + +### Step 3: Structural Enumeration (Bottom-Up) +- List plan elements that exist in plan.json +- Use IDs where available (DL-001, M-001, etc.) +- Phase-specific (e.g., plan-design: decisions, constraints, risks, milestones, code_intents) + +### Step 4: Gap Analysis (Shared) +- Compare Step 2 concerns vs Step 3 elements +- Identify gaps: concerns not covered by elements, elements with no concerns +- Output: Umbrella vs specific items, cross-cutting vs targeted + +### Step 5: Generate Items (Phase-Specific Severity) +- Create verification items with UMBRELLA + SPECIFIC pattern +- Assign severity (MUST/SHOULD/COULD per phase-specific rules) +- Format: `{id, scope, check, status: "TODO", severity}` + +### Step 6: Atomicity Check (Shared) +- Review each item for atomicity (tests ONE thing, unambiguous pass/fail) +- Split non-atomic MUST items into parent + children (qa-002 -> qa-002a, qa-002b) +- Children inherit parent's severity, have parent_id field + +### Step 7: Coverage Validation (Shared) +- Use Step 3 enumeration as checklist +- Verify each element has at least one item covering it +- Verify each concern from Step 2 has at least one item +- Add items if gaps found (prefer over-coverage) + +### Step 8: Finalize +- Write qr-{phase}.json to STATE_DIR +- Format: `{phase, iteration: 1, items: [...]}` +- No fixed item count; content-driven + +### Steps 9-13: Grouping (Shared) +- **Step 9:** Structural grouping (deterministic: parent-child resolution, umbrella batching) +- **Step 10:** Component grouping (items verifying different aspects of same element) +- **Step 11:** Concern grouping (items checking same quality dimension across elements) +- **Step 12:** Affinity grouping (semantic similarity for remaining items) +- **Step 13:** Final validation (naming conventions, large group review, singleton review) + +## Phase-Specific Prompts + +### Plan-Design Phase + +**Step 1 Absorb:** +``` +Read plan.json from STATE_DIR: + cat $STATE_DIR/plan.json | jq '.' + +SCOPE: Plan structure and decision quality. + +Focus on: + - planning_context.decisions (completeness, reasoning quality) + - planning_context.constraints (all documented?) + - planning_context.risks (identified and addressed?) + - milestones[].code_intents (structure present?) + - invisible_knowledge (captured?) + +OUT OF SCOPE (verified in later phases): + - Code correctness (plan-code phase) + - Documentation quality (plan-docs phase) +``` + +**Step 2 Concerns:** +``` +Brainstorm concerns specific to PLAN STRUCTURE: + - Missing decisions (non-obvious choices not logged) + - Policy defaults without user backing + - Orphan milestones (no code_intents) + - Invalid references (decision_refs point nowhere) + - Reasoning chains too shallow + - Risks identified but not addressed + +DO NOT brainstorm code or documentation concerns (out of scope) +``` + +**Step 3 Enumeration:** +``` +For plan-design, enumerate PLAN STRUCTURE ARTIFACTS: + +DECISIONS: + - Each decision in planning_context.decisions (ID, decision text) + - Has reasoning? Multi-step chain? + +CONSTRAINTS: + - Each constraint in planning_context.constraints (ID, type) + - User-specified or inferred? + +RISKS: + - Each risk in planning_context.risks (ID, risk text) + - Has mitigation? + +MILESTONES: + - Each milestone (ID, name, count of code_intents) + - Each code_intent with decision_refs (ID, which decisions referenced) + +INVISIBLE KNOWLEDGE: + - system, invariants[], tradeoffs[] content +``` + +**Step 5 Severity (Plan-Design):** +``` +SEVERITY ASSIGNMENT (per conventions/severity.md, plan-design scope): + + MUST (blocks all iterations): + - DIAGRAM categories: + * ORPHAN_NODE: node with zero edges + * INVALID_EDGE_REF: edge references missing node + * INVALID_SCOPE_REF: scope references non-existent milestone + - KNOWLEDGE subset: + * DECISION_LOG_MISSING: non-trivial choice without logged rationale + * POLICY_UNJUSTIFIED: policy default without Tier 1 backing + * ASSUMPTION_UNVALIDATED: architectural assumption without citation + + SHOULD (iterations 1-4): + - Shallow reasoning chains (premise without implication) + - Missing risk mitigations + - Incomplete constraint documentation + + COULD (iterations 1-3): + - Cosmetic plan formatting + - Minor inconsistencies in naming +``` + +**Component Examples:** +``` + - A milestone + - A major decision + - A constraint category +``` + +**Concern Examples:** +``` + - Reasoning chain quality + - Reference integrity + - Risk coverage +``` + +### Plan-Code Phase + +**Step 1 Absorb:** +``` +Read plan.json from STATE_DIR: + cat $STATE_DIR/plan.json | jq '.' + +SCOPE: Code correctness in planned changes. + +Focus on: + - milestones[].code_intents[] -- what changes are intended + - milestones[].code_changes[] -- actual diff content + - code_changes[].diff (context lines must match codebase) + - code_changes[].why_comments[].decision_ref (refs must exist) + +OUT OF SCOPE (already verified in plan-docs phase): + - Documentation quality (temporal contamination, WHY-not-WHAT) + - README/CLAUDE.md content + - Invisible knowledge coverage +``` + +**Step 2 Concerns:** +``` +Brainstorm concerns specific to CODE CORRECTNESS: + - Context lines don't match actual codebase + - Diff format violations (missing +/- prefixes, wrong line counts) + - Code_intents without corresponding code_changes + - Invalid decision_refs in why_comments + - Type errors, missing imports, API mismatches + - Convention violations (per project style) + +DO NOT brainstorm documentation concerns (out of scope for this phase). +``` + +**Step 3 Enumeration:** +``` +For plan-code, enumerate CODE CHANGE ARTIFACTS: + +INTENTS: + - Each milestone's code_intents (ID, description) + - Intent-to-change mapping (which intents have changes?) + +CHANGES: + - Each code_change (ID, file path, line range) + - Files touched across all changes + - Context line locations requiring verification + +REFERENCES: + - decision_refs in why_comments (do they exist in planning_context?) + +DO NOT enumerate: + - documentation{} fields (plan-docs's job) + - readme_entries (plan-docs's job) +``` + +**Step 5 Severity (Plan-Code):** +``` +SEVERITY ASSIGNMENT (per conventions/severity.md, plan-code scope): + + MUST (blocks all iterations): + - ASSUMPTION_UNVALIDATED: architectural assumption without citation + - MARKER_INVALID: intent marker without valid explanation + - decision_ref references non-existent decision + + SHOULD (iterations 1-4) - STRUCTURE categories: + - GOD_OBJECT: >15 methods OR >10 deps + - GOD_FUNCTION: >50 lines OR >3 nesting + - CONVENTION_VIOLATION: violates documented project convention + - TESTING_STRATEGY_VIOLATION: tests don't follow confirmed strategy + + COULD (iterations 1-3) - COSMETIC: + - TOOLCHAIN_CATCHABLE: errors the compiler/linter would flag + - FORMATTER_FIXABLE: style issues fixable by formatter + - DEAD_CODE: unused functions, impossible branches + +DO NOT use KNOWLEDGE categories for documentation issues -- +those are plan-docs's responsibility. +``` + +**Component Examples:** +``` + - A file being modified + - A module/package + - A code_intent cluster +``` + +**Concern Examples:** +``` + - Error handling consistency + - Type safety across boundaries + - Testing boundary clarity +``` + +### Plan-Docs Phase + +**Step 1 Absorb:** +Similar structure, focus on doc_diff fields in code_changes + +**Step 2 Concerns:** +- Temporal contamination in doc_diffs (change-relative language) +- Baseline references (documentation assumes prior state) +- doc_diffs missing for non-empty diffs +- decision_refs in doc_diffs not captured + +**Step 3 Enumeration:** +- doc_diff content per code_change +- documentation{} fields (function docstrings, module comments) +- readme_entries content +- decision_log coverage in documentation + +**Step 5 Severity (Plan-Docs):** +Only KNOWLEDGE categories (TW cannot fix code): +- TEMPORAL_CONTAMINATION +- BASELINE_REFERENCE (doc assumes prior state) +- MISSING_DOC_DIFF (diff present, doc_diff absent) +- DECISION_UNCOVERED (decision not referenced in any doc_diff) + +## Verification Workflow (Per-Item Parallelization) + +### Base Class: VerifyBase + +Handles: +1. Step type routing (CONTEXT, ANALYZE, CONFIRM, SUMMARY) +2. Item loading from qr-{phase}.json +3. Dynamic step formula: `total_steps = 1 + (2 * num_items) + 1` + - Step 1: CONTEXT (load shared state) + - Steps 2..2N+1: ANALYZE/CONFIRM pairs per item + - Final step: SUMMARY + +### CONTEXT Step +- Load conventions, phase rules, context.json, plan.json +- List all items to verify +- Display severity and scope for each + +### ANALYZE Step (Per Item) +- Explore codebase if needed +- Form preliminary conclusion (PASS or FAIL) +- Task output: narrative analysis (no tool call yet) + +### CONFIRM Step (Per Item) +- Verify confidence in preliminary conclusion +- Execute CLI command to record result: + ```bash + python3 -m skills.planner.cli.qr --state-dir {state_dir} --qr-phase {phase} \ + update-item {item_id} --status PASS + ``` + Or: + ```bash + python3 -m skills.planner.cli.qr --state-dir {state_dir} --qr-phase {phase} \ + update-item {item_id} --status FAIL --finding '' + ``` +- Tool call blocks until file lock released (atomic update) + +### SUMMARY Step +- Count results, output single word: PASS or FAIL +- EXACT FORMAT REQUIRED: + - Output EXACTLY ONE WORD: "PASS" or "FAIL" + - No markdown headers (##, **) + - No "VERDICT:" prefix + - No explanation, prose, or reasoning + - Findings go in --finding flag, NOT in output + +## Phase-Specific Verification Guidance + +### Plan-Design Verification + +**Macro Check (scope: "\*"):** +``` +MACRO CHECK - Verify across entire plan.json: + + Read plan.json: + cat {state_dir}/plan.json | jq '.' +``` + +**Milestone Check (scope: "milestone:M-001"):** +``` +MILESTONE CHECK - Focus on M-001: + + Read milestone: + cat {state_dir}/plan.json | jq '.milestones[] | select(.id == "M-001")' +``` + +**Code Intent Check (scope: "code_intent:CI-M-001-001"):** +``` +CODE INTENT CHECK - Focus on CI-M-001-001: + + Read intent (find containing milestone first): + cat {state_dir}/plan.json | jq '.milestones[].code_intents[] | select(.id == "CI-M-001-001")' +``` + +**Check-Specific Guidance:** + +Decision Log Verification: +``` +DECISION LOG VERIFICATION: + - Each entry should have multi-step reasoning + - BAD: 'Polling | Webhooks unreliable' + - GOOD: 'Polling | 30% webhook failure -> need fallback anyway' +``` + +Policy Default Verification: +``` +POLICY DEFAULT VERIFICATION: + - Policy defaults affect user/org (lifecycle, capacity, failure handling) + - Must have Tier 1 (user-specified) backing in decision_log + - Technical defaults can use Tier 2-3 backing +``` + +Code Intent Verification: +``` +CODE INTENT VERIFICATION: + - Each implementation milestone needs code_intents + - Each code_intent needs file path and behavior + - decision_refs should point to valid decision_log entries +``` + +### Plan-Code Verification + +Similar structure with code-specific checks: +- Context line verification (diff patterns exist in actual files) +- Diff format validation (RULE 0/1/2) +- Intent linkage (code_change.intent_ref valid) +- Decision ref validity +- Temporal contamination in comments +- WHY-not-WHAT quality + +### Plan-Docs Verification + +Doc-specific checks: +- Temporal contamination in doc_diffs +- Baseline references (doc assumes prior state) +- Code without docs (diff present, doc_diff absent) +- Invalid diff format +- Decision coverage in docs +- WHY-not-WHAT verification +- Missing docstrings + +## Data Structures + +### QR Item (qr-{phase}.json) + +```typescript +interface QRItem { + id: string; // e.g., "plan-001", "qa-002a" + scope: string; // "*" (macro) or "element:ID" or "file:path" + check: string; // Description of what to verify + status: "TODO" | "PASS" | "FAIL"; + severity?: "MUST" | "SHOULD" | "COULD"; // Default: "SHOULD" + finding?: string; // Only for FAIL status + parent_id?: string; // For split items (qa-002a has parent_id: "qa-002") + group_id?: string; // For grouping (umbrella, component-*, concern-*, affinity-*, parent-*) + version?: number; // Default: 1, incremented on each update +} + +interface QRState { + phase: string; // "plan-design", "plan-code", etc. + iteration: number; // Current iteration (1 on first decompose) + items: QRItem[]; +} +``` + +### Severity Blocking Rules + +Per iteration: +- Iteration 1: MUST blocks all 4 iterations of fixes, SHOULD blocks iterations 1-4, COULD blocks 1-3 +- Iteration 2: MUST blocks iterations 2-5, SHOULD blocks 2-5, COULD blocks 2-4 +- Iteration 3: MUST blocks iterations 3-6, SHOULD blocks 3-6, COULD blocks 3-5 +- Iteration 4: MUST blocks iterations 4+, SHOULD blocks 4+, COULD blocks 4+ +- After iteration 4: No blocking (move to manual review) + +## Integration with Koan Architecture + +### Expected File Structure +``` +src/planner/phases/ + qr/ + decompose/ + phase.ts # QRDecomposePhase class (8-step workflow) + prompts.ts # Phase-specific step prompts + verify/ + phase.ts # QRVerifyPhase class (item-based verification) + prompts.ts # Verification guidance per phase + lib/ + items.ts # QRItem type, load/save, atomic mutations + grouping.ts # Steps 9-13 grouping logic +``` + +### Phase Registration +```typescript +// In phases/dispatch.ts +if (config.role === "quality-reviewer" && config.phase === "plan-design") { + const phase = new QRDecomposePhase(...); + await phase.begin(); +} +``` + +### Tool Registration +- QR tools likely smaller subset than plan-design (mainly read tools, no plan mutations) +- Tools may include: qr_update_item (atomic write), qr_load_state (read), qr_get_item (lookup) + +## Critical Implementation Notes + +### 1. Decomposition is Single-Run +- Decompose runs ONCE per phase (steps 1-8, 9-13) +- Orchestrator skips decompose if qr-{phase}.json already exists with iteration >=1 +- Each phase has own decomposition script (can't share due to phase-specific prompts) + +### 2. Verification is Parallel +- Each item dispatched as separate subagent with --qr-item flag +- File locking in CLI prevents race conditions +- No shared state mutation; each agent writes its own result atomically + +### 3. Step Gates Must Use Blocklists +- Whitelist fails open (blocks read tools unintentionally) +- Blocklist defers to checkPermission for everything not explicitly gated +- Example: `if (step < 6 && PLAN_MUTATION_TOOLS.has(name)) { block }` + +### 4. Findings in CLI Flag, Not Output +- Tool result is NOT return value; findings go in `--finding` flag +- SUMMARY step outputs ONE WORD only (PASS or FAIL) +- This avoids "text + tool_call in same response" bug (GPT-5-codex) + +### 5. invoke_after Two-Part Gate +- Every step prompt ends with "WHEN DONE: call koan_complete_step" +- Tool description includes "Do NOT call until told" +- Dual gates ensure single transition per step + +### 6. Disk-Backed Mutations +- Every tool mutation writes qr-{phase}.json immediately +- No finalize pattern; descriptive feedback on each write +- This prevents LLM from skipping intermediate mutations + +### 7. Severity Blocking vs Iteration Count +- Blocking set determined at gate time, not item creation time +- by_blocking_severity(iteration) is a predicate factory +- Iteration 0 not used; iteration 1 is first decompose, iteration 2+ are retries + +## Migration Checklist + +- [ ] Create QRDecomposePhase class with 8-step + 5-step grouping workflow +- [ ] Implement phase-specific prompts for plan-design, plan-code, plan-docs +- [ ] Create QRVerifyPhase class with CONTEXT/ANALYZE/CONFIRM/SUMMARY routing +- [ ] Implement VerifyBase-like step mapping (total_steps formula, item routing) +- [ ] Implement atomic QRItem mutations with file locking +- [ ] Add qr_update_item tool (wrapper around file-locked write) +- [ ] Add qr_load_state, qr_get_item tools (read-only) +- [ ] Register phases in dispatch.ts for quality-reviewer role +- [ ] Add QR phase detection to before_agent_start handler +- [ ] Implement SUMMARY step output validation (one word only) +- [ ] Test decompose single-run enforcement (skip if iteration >=1) +- [ ] Test parallel verify with file locking (concurrent writes) +- [ ] Test severity blocking at iteration thresholds +- [ ] Copy exact prompts from Python scripts (no rewriting) diff --git a/QR_ANALYSIS_COMPREHENSIVE.md b/QR_ANALYSIS_COMPREHENSIVE.md new file mode 100644 index 0000000..29b04ff --- /dev/null +++ b/QR_ANALYSIS_COMPREHENSIVE.md @@ -0,0 +1,640 @@ +# QR Failure Handling & Fix Mode Analysis + +## Executive Summary + +This document analyzes how QR (Quality Review) failures halt execution in the koan plan-design phase and how the reference executor implements fix loops. The analysis covers three key questions: + +1. **Does QR failure halt the plan-design phase?** YES -- failures trigger a deterministic gate that either spawns a fix loop or force-proceeds after max iterations. +2. **What is the plan specification for QR fix loops?** Architect is re-spawned with `--koan-fix` flag and a QR failure report appended to context. +3. **What are the executor modes?** Initial mode (first-time work) vs. fix mode (targeted repair after QR failures). + +--- + +## Part 1: QR Failure Halts Execution (Confirmed) + +### How the QR Gate Works (Reference Executor) + +The reference executor in `~/.claude/skills/scripts/skills/planner/orchestrator/executor.py` implements a **9-step workflow** for execution: + +``` +Step 1: Execution Planning (analyze, build wave list) +Step 2: Reconciliation (validate existing code) +Step 3: Implementation (dispatch developers) +Step 4: Code QR (quality review of code) +Step 5: Code QR GATE (route pass/fail) <-- HALTS on FAIL +Step 6: Documentation (TW pass) +Step 7: Doc QR (quality review of docs) +Step 8: Doc QR GATE (route pass/fail) <-- HALTS on FAIL +Step 9: Retrospective +``` + +**Key excerpt from executor.py:** + +```python +CODE_QR_GATE = GateConfig( + qr_name="Code QR", + work_step=3, # If FAIL: loop back to step 3 + pass_step=6, # If PASS: advance to step 6 + pass_message="Code quality verified. Proceed to documentation.", + fix_target=AgentRole.DEVELOPER, # Developer fixes issues +) + +def format_gate(step: int, gate: GateConfig, qr: QRState, total_steps: int) -> str: + """Format gate step output.""" + if qr.passed: + next_cmd = f"python3 -m {MODULE_PATH} --step {gate.pass_step}" + else: + next_iteration = qr.iteration + 1 + next_cmd = f"python3 -m {MODULE_PATH} --step {gate.work_step} --qr-fail --qr-iteration {next_iteration}" + return format_step(body, next_cmd, title=f"{gate.qr_name} Gate") +``` + +**Execution halts on FAIL** because: +- QR GATE step 5 checks `qr.passed` property +- If FAIL: routes back to step 3 (implementation) with `--qr-fail` flag +- Step 3 detects fix mode and spawns developer with targeted repair instructions +- No automatic proceed to step 6 (documentation) + +### How the QR Gate Works (Koan Plan-Design) + +The koan project applies the same pattern. Based on the plan specification (section 4.2 and 5 of plans/2026-02-10-init.md): + +``` +Plan-Design Phase (Architect): + ├─ execution: spawn architect subagent + │ (6-step exploration + plan writing) + │ + ├─ qr-decompose: spawn decomposer subagent + │ (13-step QR item generation) + │ + ├─ qr-verify: pool of reviewer subagents + │ (parallel verification, PASS/FAIL per item) + │ + └─ gate (deterministic code, no LLM) + PASS -> advance to plan-code + FAIL -> re-spawn architect with fix report (up to 5x) + iteration escalates severity filtering + after 5 iterations, force-proceed +``` + +**Plan specification routing logic (section 4.2.1):** + +```typescript +function routeGate( + phase: Phase, + qrResult: "pass" | "fail", + iteration: number, +): NextStep { + if (qrResult === "pass") { + deleteQRState(phase); + return nextPhase(phase); + } + const maxIterations = 5; + if (iteration >= maxIterations) { + return nextPhase(phase); // Force proceed, document remaining issues + } + return { phase, subPhase: "execution", mode: "fix", iteration: iteration + 1 }; +} +``` + +**Execution halts on FAIL** because: +- Gate routing is deterministic (pure code, not prompt-based) +- FAIL does not auto-advance +- Only PASS or max-iterations advances to next phase +- Fix mode spawns architect fresh with failure report + +--- + +## Part 2: Plan Specification for QR Fix Loops + +### Fix Mode Activation + +From plan section 4.2 "First attempt vs. fix mode": + +> When a phase's QR gate returns FAIL, the orchestrator re-spawns the subagent with an additional flag (`--koan-fix`) and appends the QR failure report to the context file. The subagent's role hooks detect fix mode and adjust step instructions to focus on fixing specific issues identified by the QR. + +**Mechanism:** + +1. **Gate detects FAIL** → compute `iteration + 1` +2. **Orchestrator spawns subagent** with: + - `--koan-fix` flag (new) + - `--koan-fix-iteration N` flag (new) + - Same `--koan-plan-dir` (plan.json + context.json + qr-plan-design.json all present) +3. **Context file is mutated** to append QR failures: + - Original 8 context categories remain (read-only) + - QR failures appended in a new `qr_failures` section +4. **Role hooks detect fix mode** via flags in `before_agent_start` +5. **Step instructions adjust** to focus on fixing + +### Reference Architect Fix Prompt + +The reference architect fix script is `~/.claude/skills/scripts/skills/planner/architect/plan_design_qr_fix.py` (3-step workflow): + +**Step 1: Load QR Failures** + +``` +FIX MODE - QR Iteration {qr_iteration} + +QR-COMPLETENESS found issues in the plan. + +FAILED QR ITEMS TO FIX (address these FIRST): +================================================ +[plan-001] Decision log completeness + Scope: decision_log entry DL-005 + Finding: Decision reference missing backing premise + +[plan-002] Code intent specification + Scope: code_intent id CI-M-001-001 + Finding: Behavior description incomplete (unclear acceptance criteria) + +================================================ + +PLANNING CONTEXT (reference for semantic validation): +(context.json displayed for validation reference) + +For EACH failed item: + 1. Read the 'finding' field to understand the issue + 2. Identify what in plan.json needs to change + 3. Note the fix approach for step 2 +``` + +**Step 2: Apply Targeted Fixes** + +``` +APPLY targeted fixes to plan.json using CLI commands. + +Missing decision_log entry: + python3 -m skills.planner.cli.plan --state-dir $STATE_DIR set-decision \ + --decision '' \ + --reasoning ' implication -> conclusion>' + +BATCH MODE (preferred): + python3 -m skills.planner.cli.plan --state-dir $STATE_DIR batch '[ + {"method": "set-decision", "params": {...}, "id": 1}, + {"method": "set-intent", "params": {...}, "id": 2} + ]' + +CONSTRAINT: Fix ONLY the failing items. Don't refactor passing items. +``` + +**Step 3: Validate Fixes** + +``` +Run structural validation: + python3 -m skills.planner.cli.plan validate --phase plan-design + +SELF-CHECK each fixed item: + For each FAIL item you addressed: + - Does the fix address the specific finding? + - Does the fix introduce new issues? + +If validation passes: + Your complete response must be exactly: PASS + Do not add summaries, explanations, or any other text. +``` + +### Key Design Points in Fix Mode + +1. **QR failures explicitly listed** -- The architect sees exactly which items failed + why (the "finding" field) +2. **Plan mutations via existing CLI** -- Fix mode doesn't add new mutation tools, just focuses the prompt on specific items +3. **Targeted not holistic** -- Fix mode does NOT re-explore codebase. It reads the QR report and applies surgical fixes. +4. **No flailing** -- The constraint "Fix ONLY the failing items" prevents second-guessing the entire plan +5. **Validation is mandatory** -- Each fix iteration must pass `python3 -m ... validate` before reporting PASS + +### Iteration Escalation with Severity Filtering + +QR items have a `severity` field: MUST | SHOULD | COULD + +**Severity filtering logic (implied by shared/qr/constants.py):** + +```python +def get_blocking_severities(iteration: int) -> Set[str]: + """Items that block at this iteration. + + iteration 1: MUST only + iteration 2: MUST, SHOULD + iteration 3+: MUST, SHOULD, COULD (all) + """ +``` + +**Meaning:** On iteration 1, only critical (MUST) items block. By iteration 3, even minor (COULD) items block. This escalates pressure to fix progressively more issues. + +--- + +## Part 3: Executor Modes (Initial vs. Fix) + +### Reference Executor: Initial Mode + +When a phase is first executed (no prior failures): + +**Step 3: Implementation (Initial Mode)** + +```python +def format_step_3_implementation(qr: QRState, total_steps: int, ...) -> str: + if qr.state == LoopState.RETRY: + # Fix mode (handled separately) + ... + else: + # Initial mode + actions.extend([ + "Execute ALL milestones using wave-aware parallel dispatch.", + "", + "WAVE-AWARE EXECUTION:", + " - Milestones within same wave: dispatch in PARALLEL", + " - Waves execute SEQUENTIALLY", + "", + "FOR EACH WAVE:", + " 1. Dispatch developer agents for ALL milestones in wave", + " 2. Each prompt includes: plan, milestone, files, acceptance criteria", + " 3. Wait for ALL agents in wave to complete", + " 4. Run tests: pytest / tsc / go test -race", + " 5. Proceed to next wave", + "", + "After ALL waves complete, proceed to Code QR.", + ]) +``` + +**Initial mode** is the "full breadth" mode: +- No prior failures to fix +- Execute all milestones +- Waves in sequence, milestones within wave in parallel +- Standard tests + validation + +### Reference Executor: Fix Mode + +When a QR gate returns FAIL and iteration < 5: + +**Step 3: Implementation (Fix Mode)** + +```python +def format_step_3_implementation(qr: QRState, total_steps: int, ...) -> str: + if qr.state == LoopState.RETRY: + actions.append(format_state_banner("IMPLEMENTATION FIX", qr.iteration, "fix")) + actions.append("FIX MODE: Code QR found issues.") + actions.append("") + + mode_script = get_mode_script_path("dev/fix-code.py") + invoke_cmd = f"python3 -m {mode_script} --step 1 --qr-fail --qr-iteration {qr.iteration}" + + actions.append(subagent_dispatch( + agent_type="developer", + command=invoke_cmd, + )) + actions.append("Developer reads QR report and fixes issues in blocks.") + actions.append("After developer completes, re-run Code QR for fresh verification.") +``` + +**Fix mode** is the "targeted repair" mode: +- QR failures are present (in memory and on disk) +- Dispatch specialized fix agent (different script/prompts) +- Agent reads QR failure items +- Agent applies fixes to milestones mentioned in failures +- Re-run QR immediately after (fresh verification) + +### Comparison Table + +| Aspect | Initial Mode | Fix Mode | +|--------|--------------|----------| +| **Trigger** | First execution | QR FAIL (iteration < 5) | +| **Context** | No prior failures | QR items with status=FAIL + findings | +| **Scope** | All milestones | Only milestones in QR failures | +| **Agent Dispatch** | Full work agent | Specialized fix agent | +| **Step Sequence** | Role's standard N-step | 3-step fix workflow | +| **Tools Available** | Full read + write | Same tools (focus via prompt) | +| **Exit Condition** | Role completes final step | PASS to QR (no FAIL) | +| **Next** | Proceed to QR decompose | Re-run QR immediately | +| **Iteration** | N/A | 1, 2, 3, ... (max 5) | + +### How the Executor Decides Which Mode + +**Flag detection in executor.py:** + +```python +# format_step_3_implementation +state = LoopState.RETRY if qr_fail else LoopState.INITIAL + +# Gate's FAIL routing: +next_cmd = f"python3 -m {MODULE_PATH} --step {work_step} --qr-fail --qr-iteration {next_iteration}" +``` + +When gate returns FAIL, step 3 is re-invoked with `--qr-fail --qr-iteration 2`, and the formatter detects fix mode. + +--- + +## Part 4: Reference Implementation Deep Dive + +### Shared QR Infrastructure + +Located in `~/.claude/skills/scripts/skills/planner/shared/qr/`: + +**types.py:** + +```python +class QRStatus(Enum): + PASS = "pass" + FAIL = "fail" + +class LoopState(Enum): + INITIAL = "initial" + RETRY = "retry" + COMPLETE = "complete" + +@dataclass +class QRState: + iteration: int = 1 + state: LoopState = LoopState.INITIAL + status: QRStatus | None = None + + @property + def passed(self) -> bool: + return self.status == QRStatus.PASS + + def transition(self, status: QRStatus) -> None: + if status == QRStatus.PASS: + self.state = LoopState.COMPLETE + else: + self.state = LoopState.RETRY + self.iteration += 1 + +@dataclass +class GateConfig: + qr_name: str + work_step: int # Where to loop back on FAIL + pass_step: int | None # Where to go on PASS + pass_message: str + fix_target: AgentRole | None # Developer / Writer / Architect +``` + +**gates.py:** + +```python +def build_gate_output( + module_path: str, + qr_name: str, + qr: QRState, + work_step: int, + pass_step: int | None, + pass_message: str, + fix_target: AgentRole | None, + state_dir: str, +) -> GateResult: + """Build complete gate step output for QR gates. + + Gates route to either: + - pass_step: QR passed, proceed to next workflow phase + - work_step: QR failed, loop back to fix issues + """ + if qr.passed: + next_cmd = f"python3 -m {module_path} --step {pass_step}" + else: + next_cmd = f"python3 -m {module_path} --step {work_step} --state-dir {state_dir}" + + return GateResult( + output=format_step(body, next_cmd, title=title), + terminal_pass=qr.passed and pass_step is None, + ) +``` + +### How the Architect Fix Prompts Load QR Failures + +**plan_design_qr_fix.py, step 1:** + +```python +def get_step_guidance(step: int, module_path: str = None, **kwargs) -> dict: + if step == 1: + state_dir = kwargs.get("state_dir", "") + qr_iteration = get_qr_iteration(state_dir, PHASE) + + # Load failed items from qr-{phase}.json + qr_state = load_qr_state(state_dir, PHASE) + failed_items_block = format_failed_items_for_fix(qr_state) + + return { + "title": STEPS[1], + "actions": [ + f"FIX MODE - QR Iteration {qr_iteration}", + "", + "QR-COMPLETENESS found issues in the plan.", + "", + failed_items_block, # <- Explicit list of failures + "", + "For EACH failed item:", + " 1. Read the 'finding' field to understand the issue", + " 2. Identify what in plan.json needs to change", + " 3. Note the fix approach for step 2", + ], + } +``` + +**format_failed_items_for_fix output example:** + +``` +============================================================ +FAILED QR ITEMS TO FIX (address these FIRST): +============================================================ + +[QR-plan-design-001] Decision completeness + Scope: decision_log entry (id: DL-003) + Finding: Caching strategy selected but no justification. + +[QR-plan-design-002] Intent specification + Scope: code_intent (id: CI-M-001-001) + Finding: Behavior unclear: "Add caching layer" -- where? What TTL? + +[QR-plan-design-003] Risk documentation + Scope: known_risks + Finding: Redis failure mode not documented. + +============================================================ +``` + +--- + +## Part 5: Koan's QR Specification + +### Section 4.2: QR Block Pattern + +**Plan-Design Phase Structure:** + +``` +Phase 2: PLAN-DESIGN +├─ Execution (architect explores + writes plan) +├─ QR Decompose (decomposer generates items) +├─ QR Verify (reviewers verify items) +└─ Gate (route PASS->phase3 or FAIL->reexecute_with_fix) +``` + +### Section 4.2.1: QR Decomposition (13-step Workflow) + +The decomposer produces items with: +- `id`: unique item ID +- `scope`: `*` (cross-cutting) or element reference +- `check`: the verification question +- `status`: TODO | PASS | FAIL +- `finding`: explanation of FAIL (populated by reviewers) +- `severity`: MUST | SHOULD | COULD + +### Section 4.2.2: QR Verification (Parallel Subagents) + +Each reviewer subagent: +1. Receives assigned item group +2. For each item: ANALYZE -> CONFIRM -> update state +3. Returns per-item status +4. Aggregate: ANY FAIL = phase FAIL + +### Section 4.2.3: Fix Mode (Key Design Decision) + +From section 4.2: + +> When a phase's QR gate returns FAIL, the orchestrator re-spawns the subagent with an additional flag (`--koan-fix`) and appends the QR failure report to the context file. The subagent's role hooks detect fix mode and adjust step instructions to focus on fixing specific issues identified by the QR. + +--- + +## Part 6: Koan Implementation + +### Key Difference: Single Phase Handler vs. Separate Scripts + +**Reference executor:** +- `architect/plan_design_execute.py` (6 steps, first-time) +- `architect/plan_design_qr_fix.py` (3 steps, targeted repair) +- Separate scripts for each mode + +**Koan design:** +- Single `PlanDesignPhase` handler +- Phase hooks detect `--koan-fix` flag +- Step prompts adjust at runtime in the `context` event handler +- Same tools, same workflow -- just different prompt text + +### Koan Implementation Pattern (Inferred) + +```typescript +// src/planner/phases/plan-design/phase.ts + +export class PlanDesignPhase { + private state: PlanDesignState & { + fixMode: boolean; + fixIteration: number; + }; + + async begin(): Promise { + // Detect fix mode from flags + this.state.fixMode = this.pi.getFlag("koan-fix") === "true"; + this.state.fixIteration = parseInt(this.pi.getFlag("koan-fix-iteration") || "0"); + + // Load context.json (with QR failures appended if fixMode) + const contextPath = path.join(this.planDir, "context.json"); + const raw = await fs.readFile(contextPath, "utf8"); + this.state.contextData = JSON.parse(raw) as ContextData; + // context.qr_failures populated by orchestrator if fixMode + } + + private registerHandlers(): void { + this.pi.on("context", (event) => { + if (this.state.step !== 1) return undefined; + + let prompt = this.state.step1Prompt; + + // Adjust for fix mode + if (this.state.fixMode) { + prompt = adjustPromptForFixMode( + prompt, + this.state.fixIteration, + this.state.contextData.qr_failures, + ); + } + + const messages = event.messages.map((m) => + m.role === "user" ? { ...m, content: prompt } : m, + ); + return { messages }; + }); + } +} + +function adjustPromptForFixMode( + basePrompt: string, + iteration: number, + failures: Array<{id: string; scope: string; finding: string}>, +): string { + // Replace exploration sections with fix guidance + // Prepend: list of failed items + findings + // Add constraint: "Fix ONLY these items" + // Add validation guidance +} +``` + +### Orchestrator-Side: Appending QR Failures to Context + +When gate returns FAIL: + +```typescript +// 1. Load qr-plan-design.json +const qrPath = path.join(planDir, "qr-plan-design.json"); +const qr = JSON.parse(await fs.readFile(qrPath, "utf8")); + +// 2. Filter FAIL items +const failures = qr.items.filter(item => item.status === "FAIL").map(item => ({ + id: item.id, + scope: item.scope, + finding: item.finding, +})); + +// 3. Load context.json +const contextPath = path.join(planDir, "context.json"); +const context = JSON.parse(await fs.readFile(contextPath, "utf8")); + +// 4. Append failures +context.qr_failures = failures; +context.qr_iteration = iteration; + +// 5. Write back (atomic) +await writeContext(planDir, context); + +// 6. Spawn architect in fix mode +spawn("pi", [ + "-p", + "-e", extensionPath, + "--koan-role", "architect", + "--koan-phase", "plan-design", + "--koan-plan-dir", planDir, + "--koan-fix", "true", + "--koan-fix-iteration", String(iteration), + "Fix the plan issues identified in the QR report.", +]); +``` + +--- + +## Summary Table: Initial vs. Fix Mode + +| Dimension | Initial Mode | Fix Mode | +|-----------|--------------|----------| +| **QR State** | None (first execution) | FAIL (previous iteration) | +| **Orchestrator Decision** | Execute (fresh start) | Fix (failures present) | +| **Flags** | None | `--koan-fix true --koan-fix-iteration N` | +| **Context File** | 8 categories only | ^^ + `qr_failures` array | +| **Step Sequence** | 1=analysis, 2=exploration, ..., 6=write | 1=load failures, 2=fix, 3=validate | +| **Scope** | All codebase areas relevant to task | Only areas in QR failures | +| **Tools** | Full set (read + write) | Same set (focus via prompt) | +| **Exit** | PASS to orchestrator -> QR decompose | PASS to orchestrator -> re-run QR | +| **Iteration** | Not applicable | 1, 2, 3, ... (max 5) | +| **Severity Filter** | N/A | Escalates per iteration | +| **Outcome** | plan.json artifact | Updated plan.json (surgical fixes) | + +--- + +## Conclusion + +**QR failures halt execution in koan's plan-design phase** because the QR gate is deterministic code. The gate examines the QR result and either: +1. PASS → advance to next phase +2. FAIL + iteration < 5 → spawn architect in fix mode with failure report +3. FAIL + iteration >= 5 → force-proceed to next phase + +**Fix mode is a targeted repair workflow** that differs from initial mode by: +- Running a 3-step workflow (load -> fix -> validate) instead of N-step exploration +- Reading QR failures from context + disk +- Focusing fixes on listed items only +- Escalating severity requirements each iteration + +**The reference executor provides the exact implementation patterns** that koan follows, with the improvement that koan consolidates execute/fix logic into one phase handler via prompt adjustment, rather than separate scripts. + From aa70767a8f4fc1a8ca199839e9153a47277d0902 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 25 Feb 2026 15:34:17 +0700 Subject: [PATCH 017/412] Add shape-table log formatting for koan tool events Restructures LogLine from prefix/highlight/meta to tool/summary/ highValue. Adds KOAN_SHAPES registry with per-tool key priority, array previews, freeform size stats, and getter response sizes. Unknown tools degrade to name-only. Formatted output uses deterministic ID-first key ordering for scan consistency. --- src/planner/lib/audit.ts | 248 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 232 insertions(+), 16 deletions(-) diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index aea727a..9d2f980 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -341,17 +341,114 @@ export async function readProjection(dir: string): Promise { } } -// Structured log line for the widget log card. The widget applies -// theme-aware coloring: prefix dim, highlight normal, meta dim. +// Structured log line for the widget log card. +// `tool` is the left-column scan anchor, `summary` is the right-column detail. +// High-value rows may wrap to two visual lines in the widget. export interface LogLine { - prefix: string; - highlight: string; - meta: string; + tool: string; + summary: string; + highValue: boolean; +} + +interface ToolShape { + keys: string[]; + arrays?: string[]; + freeform?: string[]; + getter?: boolean; + highValue?: boolean; } +const PREVIEW_CHARS = 40; +const KEY_PRIORITY = ["id", "milestone", "decision_ref", "intent_ref", "file", "path", "phase"]; + +const KOAN_SHAPES: Record = { + koan_get_plan: { keys: ["phase"], getter: true }, + koan_get_milestone: { keys: ["id"], getter: true }, + koan_get_decision: { keys: ["id"], getter: true }, + koan_get_intent: { keys: ["id"], getter: true }, + koan_get_change: { keys: ["id"], getter: true }, + + koan_set_overview: { keys: ["problem", "approach"], freeform: ["problem", "approach"], highValue: true }, + koan_set_constraints: { keys: ["constraints"], arrays: ["constraints"], highValue: true }, + koan_set_invisible_knowledge: { + keys: ["system", "invariants", "tradeoffs"], + freeform: ["system"], + arrays: ["invariants", "tradeoffs"], + highValue: true, + }, + + koan_add_decision: { keys: ["decision", "reasoning"], freeform: ["decision", "reasoning"], highValue: true }, + koan_set_decision: { keys: ["id", "decision", "reasoning"], freeform: ["decision", "reasoning"], highValue: true }, + koan_add_rejected_alternative: { + keys: ["decision_ref", "alternative", "rejection_reason"], + freeform: ["alternative", "rejection_reason"], + highValue: true, + }, + koan_set_rejected_alternative: { + keys: ["id", "decision_ref", "alternative", "rejection_reason"], + freeform: ["alternative", "rejection_reason"], + highValue: true, + }, + koan_add_risk: { keys: ["decision_ref", "anchor", "risk", "mitigation"], freeform: ["risk", "mitigation"], highValue: true }, + koan_set_risk: { + keys: ["id", "decision_ref", "anchor", "risk", "mitigation"], + freeform: ["risk", "mitigation"], + highValue: true, + }, + + koan_add_milestone: { + keys: ["name", "files", "flags", "requirements", "acceptance_criteria", "tests"], + arrays: ["files", "flags", "requirements", "acceptance_criteria", "tests"], + highValue: true, + }, + koan_set_milestone_name: { keys: ["id", "name"] }, + koan_set_milestone_files: { keys: ["id", "files"], arrays: ["files"], highValue: true }, + koan_set_milestone_flags: { keys: ["id", "flags"], arrays: ["flags"] }, + koan_set_milestone_requirements: { keys: ["id", "requirements"], arrays: ["requirements"], highValue: true }, + koan_set_milestone_acceptance_criteria: { keys: ["id", "acceptance_criteria"], arrays: ["acceptance_criteria"], highValue: true }, + koan_set_milestone_tests: { keys: ["id", "tests"], arrays: ["tests"], highValue: true }, + + koan_add_intent: { keys: ["milestone", "file", "function", "behavior"], freeform: ["behavior"], highValue: true }, + koan_set_intent: { keys: ["id", "file", "function", "behavior"], freeform: ["behavior"], highValue: true }, + + koan_add_change: { + keys: ["milestone", "file", "intent_ref", "diff", "doc_diff", "comments"], + freeform: ["diff", "doc_diff", "comments"], + highValue: true, + }, + koan_set_change_diff: { keys: ["id", "diff"], freeform: ["diff"], highValue: true }, + koan_set_change_doc_diff: { keys: ["id", "doc_diff"], freeform: ["doc_diff"], highValue: true }, + koan_set_change_comments: { keys: ["id", "comments"], freeform: ["comments"], highValue: true }, + koan_set_change_file: { keys: ["id", "file"], highValue: true }, + koan_set_change_intent_ref: { keys: ["id", "intent_ref"] }, + + koan_add_wave: { keys: ["milestones"], arrays: ["milestones"], highValue: true }, + koan_set_wave_milestones: { keys: ["id", "milestones"], arrays: ["milestones"], highValue: true }, + + koan_add_diagram: { keys: ["type", "scope", "title"] }, + koan_set_diagram: { keys: ["id", "title", "scope", "ascii_render"], freeform: ["ascii_render"], highValue: true }, + koan_add_diagram_node: { keys: ["diagram_id", "id", "label", "type"] }, + koan_add_diagram_edge: { keys: ["diagram_id", "source", "target", "label", "protocol"] }, + + koan_set_readme_entry: { keys: ["path", "content"], freeform: ["content"], highValue: true }, + + koan_qr_add_item: { keys: ["phase", "scope", "check", "severity"], freeform: ["check"], highValue: true }, + koan_qr_set_item: { keys: ["phase", "id", "status", "finding"], freeform: ["finding"], highValue: true }, + koan_qr_assign_group: { keys: ["phase", "group_id", "ids"], arrays: ["ids"], highValue: true }, + koan_qr_get_item: { keys: ["phase", "id"], getter: true }, + koan_qr_list_items: { keys: ["phase", "status"], getter: true }, + koan_qr_summary: { keys: ["phase"], getter: true }, + + koan_store_context: { + keys: ["task_spec", "constraints", "entry_points", "rejected_alternatives", "current_understanding", "assumptions", "invisible_knowledge", "reference_docs"], + arrays: ["task_spec", "constraints", "entry_points", "rejected_alternatives", "current_understanding", "assumptions", "invisible_knowledge", "reference_docs"], + highValue: true, + }, +}; + // Reads the tail of events.jsonl and returns structured log entries. // Filters out heartbeats (noisy). Used by session.ts to feed the widget log card. -export async function readRecentLogs(dir: string, count = 5): Promise { +export async function readRecentLogs(dir: string, count = 8): Promise { try { const raw = await fs.readFile(path.join(dir, "events.jsonl"), "utf8"); const events = raw @@ -366,27 +463,146 @@ export async function readRecentLogs(dir: string, count = 5): Promise } } -function sizeSuffix(e: { lines?: number; chars?: number }): string { - return e.lines != null ? `(${e.lines}L, ${e.chars}c)` : ""; +function formatChars(chars: number): string { + if (chars < 1000) return `${chars}c`; + const k = chars / 1000; + if (k >= 10) return `${Math.round(k)}k`; + return `${k.toFixed(1)}k`; +} + +function textStats(text: string): string { + const lines = text.length === 0 ? 0 : text.split("\n").length; + return `${lines}L/${formatChars(text.length)}`; +} + +function responseSize(response: string[]): string { + return textStats(response.join("\n")); +} + +function truncateUnicode(text: string, maxChars: number): string { + const chars = Array.from(text); + if (chars.length <= maxChars) return text; + return `${chars.slice(0, maxChars).join("")}…`; +} + +function inlineScalar(value: unknown): string { + if (typeof value === "string") { + return truncateUnicode(value.replace(/\r\n?|\n/gu, "\\n"), PREVIEW_CHARS); + } + if (typeof value === "number" || typeof value === "boolean") { + return String(value); + } + if (value === null) return "null"; + if (Array.isArray(value)) return `[${value.length}]`; + if (typeof value === "object") return "{…}"; + return String(value); +} + +function arrayPreview(value: unknown): string { + if (!Array.isArray(value) || value.length === 0) { + return "[]"; + } + const first = inlineScalar(value[0]); + if (value.length === 1) { + return `[${first}]`; + } + return `[${first}] +${value.length - 1}`; +} + +function freeformSize(value: unknown): string { + if (typeof value === "string") { + return textStats(value); + } + const json = JSON.stringify(value); + return textStats(json ?? String(value)); +} + +function hasKey(input: Record, key: string): boolean { + return Object.prototype.hasOwnProperty.call(input, key); +} + +function orderedShapeKeys(keys: string[]): string[] { + const indexed = keys.map((key, index) => ({ key, index })); + indexed.sort((a, b) => { + const pa = KEY_PRIORITY.indexOf(a.key); + const pb = KEY_PRIORITY.indexOf(b.key); + const ra = pa === -1 ? Number.MAX_SAFE_INTEGER : pa; + const rb = pb === -1 ? Number.MAX_SAFE_INTEGER : pb; + if (ra !== rb) return ra - rb; + return a.index - b.index; + }); + return indexed.map((x) => x.key); +} + +function formatKnownKoan(e: ToolKoanEvent, shape: ToolShape): LogLine { + const arrayKeys = new Set(shape.arrays ?? []); + const freeformKeys = new Set(shape.freeform ?? []); + const chunks: string[] = []; + + for (const key of orderedShapeKeys(shape.keys)) { + if (!hasKey(e.input, key)) continue; + const value = e.input[key]; + + if (arrayKeys.has(key)) { + chunks.push(`${key}:${arrayPreview(value)}`); + continue; + } + + if (freeformKeys.has(key)) { + chunks.push(`${key}:${freeformSize(value)}`); + continue; + } + + chunks.push(`${key}=${inlineScalar(value)}`); + } + + if (shape.getter) { + if (chunks.length === 0) { + chunks.push("scope=plan"); + } + chunks.push(`resp:${responseSize(e.response)}`); + } + + return { + tool: e.tool, + summary: chunks.join(" · "), + highValue: shape.highValue ?? chunks.length >= 3, + }; +} + +function formatKoanLogLine(e: ToolKoanEvent): LogLine { + const shape = KOAN_SHAPES[e.tool]; + if (!shape) { + return { tool: e.tool, summary: "", highValue: false }; + } + return formatKnownKoan(e, shape); } function formatLogLine(e: AuditEvent): LogLine { switch (e.kind) { case "phase_start": - return { prefix: "phase", highlight: e.phase, meta: `(${e.totalSteps} steps)` }; + return { tool: "phase", summary: `${e.phase} (${e.totalSteps} steps)`, highValue: false }; case "step_transition": - return { prefix: `current step ${e.step}/${e.totalSteps}:`, highlight: e.name, meta: "" }; + return { tool: `step ${e.step}/${e.totalSteps}`, summary: e.name, highValue: false }; case "phase_end": - return { prefix: "phase", highlight: e.outcome, meta: e.detail ?? "" }; + return { tool: "phase", summary: e.detail ? `${e.outcome} · ${e.detail}` : e.outcome, highValue: false }; case "tool_file": - return { prefix: e.tool, highlight: e.path, meta: sizeSuffix(e) }; + return { + tool: e.tool, + summary: e.lines != null ? `${e.path} · ${e.lines}L/${formatChars(e.chars ?? 0)}` : e.path, + highValue: e.tool === "read", + }; case "tool_bash": - return { prefix: "bash", highlight: e.bin, meta: sizeSuffix(e) }; + return { + tool: "bash", + summary: e.lines != null ? `${e.bin} · ${e.lines}L/${formatChars(e.chars ?? 0)}` : e.bin, + highValue: false, + }; case "tool_koan": - return { prefix: "koan", highlight: e.tool, meta: "" }; + return formatKoanLogLine(e); case "tool_generic": - return { prefix: "tool", highlight: e.tool, meta: "" }; + return { tool: e.tool, summary: "", highValue: false }; case "heartbeat": - return { prefix: "", highlight: "heartbeat", meta: "" }; + return { tool: "heartbeat", summary: "", highValue: false }; } } From fd891934bbc7f4f3e6c941e27c839dc9cb99193c Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 25 Feb 2026 15:34:23 +0700 Subject: [PATCH 018/412] Plumb QR counter stats into widget updates Adds qrDone/qrTotal/qrPass/qrFail/qrTodo fields to widget updates throughout the QR block and fix loop. Polls qr-plan-design .json during verify phase to keep counters live. Resets counters on iteration boundaries and fix-loop re-entry. --- src/planner/session.ts | 136 +++++++++++++++++++++++++++++++++-------- 1 file changed, 109 insertions(+), 27 deletions(-) diff --git a/src/planner/session.ts b/src/planner/session.ts index b29e98b..250cdbb 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -62,6 +62,15 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan activeIndex: 1, step: "spawning architect...", activity: "", + qrIterationsMax: MAX_FIX_ITERATIONS + 1, + qrIteration: 1, + qrMode: "initial", + qrPhase: "execute", + qrDone: null, + qrTotal: null, + qrPass: null, + qrFail: null, + qrTodo: null, }); log("Spawning architect after context capture", { planDir, subagentDir }); @@ -132,6 +141,11 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan qrIteration: 1, qrMode: "initial", qrPhase: "execute", + qrDone: null, + qrTotal: null, + qrPass: null, + qrFail: null, + qrTodo: null, }); const qr = await runPlanDesignWithQR(planDir, ctx.cwd, extensionPath, state, log, widget); @@ -212,7 +226,16 @@ async function runQRBlock( ): Promise { // 1. Spawn decomposer subagent state.phase = "qr-decompose-running"; - widget?.update({ step: "qr-decompose: starting...", activity: "", qrPhase: "decompose" }); + widget?.update({ + step: "qr-decompose: starting...", + activity: "", + qrPhase: "decompose", + qrDone: null, + qrTotal: null, + qrPass: null, + qrFail: null, + qrTodo: null, + }); const decomposeDir = await createSubagentDir(planDir, "qr-decomposer"); const decomposePoll = setInterval(async () => { @@ -267,29 +290,62 @@ async function runQRBlock( } const itemIds = qr.items.map((i) => i.id); + const initialPass = qr.items.filter((i) => i.status === "PASS").length; + const initialFail = qr.items.filter((i) => i.status === "FAIL").length; + const initialTodo = qr.items.filter((i) => i.status === "TODO").length; log("QR decompose complete", { itemCount: itemIds.length }); - widget?.update({ step: `qr-verify: 0/${itemIds.length}`, activity: "" }); + widget?.update({ + step: `qr-verify: 0/${itemIds.length}`, + activity: "", + qrTotal: itemIds.length, + qrDone: 0, + qrPass: initialPass, + qrFail: initialFail, + qrTodo: initialTodo, + }); // 3. Spawn reviewer pool state.phase = "qr-verify-running"; widget?.update({ qrPhase: "verify" }); - const result = await pool( - itemIds, - QR_POOL_CONCURRENCY, - async (itemId) => { - const reviewerDir = await createSubagentDir(planDir, `qr-reviewer-${itemId}`); - return spawnReviewer({ - planDir, - subagentDir: reviewerDir, - cwd, - extensionPath, - itemId, - log, - }); - }, - (done, total) => widget?.update({ step: `qr-verify: ${done}/${total}` }), - ); + let verifyDone = 0; + const verifyStatsPoll = setInterval(async () => { + try { + const raw = await fs.readFile(qrPath, "utf8"); + const current = JSON.parse(raw) as QRFile; + const pass = current.items.filter((i) => i.status === "PASS").length; + const fail = current.items.filter((i) => i.status === "FAIL").length; + const todo = current.items.filter((i) => i.status === "TODO").length; + widget?.update({ qrPass: pass, qrFail: fail, qrTodo: todo, qrDone: verifyDone, qrTotal: current.items.length }); + } catch { + // Ignore transient read races while reviewers write. + } + }, 2000); + + let result: Awaited>; + try { + result = await pool( + itemIds, + QR_POOL_CONCURRENCY, + async (itemId) => { + const reviewerDir = await createSubagentDir(planDir, `qr-reviewer-${itemId}`); + return spawnReviewer({ + planDir, + subagentDir: reviewerDir, + cwd, + extensionPath, + itemId, + log, + }); + }, + (done, total) => { + verifyDone = done; + widget?.update({ step: `qr-verify: ${done}/${total}`, qrDone: done, qrTotal: total }); + }, + ); + } finally { + clearInterval(verifyStatsPoll); + } // 4. Read final results state.phase = "qr-complete"; @@ -309,7 +365,15 @@ async function runQRBlock( log("QR block complete", { pass, fail, todo, failedReviewers: result.failed }); const passed = fail === 0 && result.failed.length === 0; - widget?.update({ step: summary, activity: "" }); + widget?.update({ + step: summary, + activity: "", + qrDone: itemIds.length, + qrTotal: itemIds.length, + qrPass: pass, + qrFail: fail, + qrTodo: todo, + }); return { summary, passed }; } @@ -338,14 +402,23 @@ async function runPlanDesignWithQR( // Initial QR (iteration 1) let qr = await runQRBlock(planDir, cwd, extensionPath, state, log, widget); if (qr.passed) { - widget?.update({ qrPhase: "done", qrMode: null, qrIteration: null, qrIterationsMax: null, phaseStatus: { index: 1, status: "completed" } }); + widget?.update({ qrPhase: "done", phaseStatus: { index: 1, status: "completed" } }); return qr; } - widget?.update({ qrPhase: "execute" }); + widget?.update({ qrPhase: "execute", qrDone: null, qrTotal: null, qrPass: null, qrFail: null, qrTodo: null }); for (let iteration = 2; iteration <= MAX_FIX_ITERATIONS + 1; iteration++) { - widget?.update({ qrIteration: iteration, qrMode: "fix", qrPhase: "execute" }); + widget?.update({ + qrIteration: iteration, + qrMode: "fix", + qrPhase: "execute", + qrDone: null, + qrTotal: null, + qrPass: null, + qrFail: null, + qrTodo: null, + }); // Read QR file for severity check let qrFile: QRFile; @@ -354,7 +427,7 @@ async function runPlanDesignWithQR( qrFile = JSON.parse(raw) as QRFile; } catch { log("Fix loop: failed to read QR file", { iteration }); - widget?.update({ qrPhase: "done", qrMode: null, qrIteration: null, qrIterationsMax: null }); + widget?.update({ qrPhase: "done" }); return { summary: "Fix loop aborted: cannot read QR file.", passed: false }; } @@ -365,7 +438,16 @@ async function runPlanDesignWithQR( if (qrPassesAtIteration(qrFile.items, iteration)) { const pass = qrFile.items.filter((i) => i.status === "PASS").length; const fail = qrFile.items.filter((i) => i.status === "FAIL").length; - widget?.update({ qrPhase: "done", qrMode: null, qrIteration: null, qrIterationsMax: null, phaseStatus: { index: 1, status: "completed" } }); + const todo = qrFile.items.filter((i) => i.status === "TODO").length; + widget?.update({ + qrPhase: "done", + qrDone: pass + fail, + qrTotal: qrFile.items.length, + qrPass: pass, + qrFail: fail, + qrTodo: todo, + phaseStatus: { index: 1, status: "completed" }, + }); return { passed: true, summary: `QR passed at iteration ${iteration} after severity de-escalation: ${pass} PASS, ${fail} FAIL (non-blocking).`, @@ -415,17 +497,17 @@ async function runPlanDesignWithQR( }); qr = await runQRBlock(planDir, cwd, extensionPath, state, log, widget); if (qr.passed) { - widget?.update({ qrPhase: "done", qrMode: null, qrIteration: null, qrIterationsMax: null, phaseStatus: { index: 1, status: "completed" } }); + widget?.update({ qrPhase: "done", phaseStatus: { index: 1, status: "completed" } }); return qr; } - widget?.update({ qrPhase: "execute" }); + widget?.update({ qrPhase: "execute", qrDone: null, qrTotal: null, qrPass: null, qrFail: null, qrTodo: null }); } // Max iterations reached. MUST failures remaining after 5 fix attempts // indicate a structural problem -- silently passing would propagate a // known-broken plan downstream. - widget?.update({ qrPhase: "done", qrMode: null, qrIteration: null, qrIterationsMax: null }); + widget?.update({ qrPhase: "done" }); return { passed: false, summary: `${qr.summary} (max ${MAX_FIX_ITERATIONS} fix iterations reached)`, From 25149c63011042aa6e06bf66fc4eb64615df0e5e Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 25 Feb 2026 15:34:29 +0700 Subject: [PATCH 019/412] Integrated workspace card with two-column log and QR counters Merges planning card and log card into a single integrated card with internal divider. Log entries render as two-column grid (tool name left, summary right) with high-value rows wrapping to 2 lines. QR section renders inline with responsive tiers (wide/ medium/tight), phase rail, and pass/fail/todo counters. Column widths shared between planning body and log body for vertical alignment. --- src/planner/ui/widget.ts | 456 ++++++++++++++++++++++++++++++--------- 1 file changed, 356 insertions(+), 100 deletions(-) diff --git a/src/planner/ui/widget.ts b/src/planner/ui/widget.ts index e16cfed..32114ca 100644 --- a/src/planner/ui/widget.ts +++ b/src/planner/ui/widget.ts @@ -41,6 +41,11 @@ interface WidgetState { qrIterationsMax: number | null; qrMode: QRMode | null; qrPhase: QRPhase; + qrDone: number | null; + qrTotal: number | null; + qrPass: number | null; + qrFail: number | null; + qrTodo: number | null; } export interface WidgetUpdate { @@ -54,6 +59,11 @@ export interface WidgetUpdate { qrIterationsMax?: number | null; qrMode?: QRMode | null; qrPhase?: QRPhase; + qrDone?: number | null; + qrTotal?: number | null; + qrPass?: number | null; + qrFail?: number | null; + qrTodo?: number | null; } // -- Constants -- @@ -66,7 +76,7 @@ const LOG_LINES = 5; const BODY_INDENT = " "; const PLANNING_PHASES: ReadonlyArray<{ key: string; label: string; detail: string }> = [ - { key: "ctx", label: "Context", detail: "Gathering context" }, + { key: "ctx", label: "Context gathering", detail: "Gathering initial context" }, { key: "design", label: "Plan design", detail: "Designing plan" }, { key: "code", label: "Plan code", detail: "Creating code plan" }, { key: "docs", label: "Plan docs", detail: "Documenting plan" }, @@ -97,6 +107,7 @@ const LOG_PLACEHOLDER = "No recent log entries"; const TIMELINE_MIN_WIDTH = 16; const TIMELINE_MAX_WIDTH = 28; const CONNECTOR = "│"; +const COLUMN_GAP = 4; interface BorderStyle { topLeft: string; @@ -116,15 +127,6 @@ const BORDER_SOLID: BorderStyle = { vertical: "│", }; -const BORDER_SUBTLE: BorderStyle = { - topLeft: "╭", - topRight: "╮", - bottomLeft: "╰", - bottomRight: "╯", - horizontal: "─", - vertical: "│", -}; - // -- Canvas primitive -- // Content width adapts to terminal; background fills edge to edge. @@ -159,6 +161,22 @@ function indentLines(lines: string[], width: number, indent = BODY_INDENT): stri return lines.map((line) => indent + clampToWidth(line, available)); } +interface PlanningColumns { + innerWidth: number; + contentWidth: number; + timelineWidth: number; + detailWidth: number; +} + +function planningColumns(width: number): PlanningColumns { + const innerWidth = Math.max(0, width - 2); + const indentWidth = visibleWidth(BODY_INDENT); + const contentWidth = Math.max(0, innerWidth - indentWidth); + const timelineWidth = Math.min(TIMELINE_MAX_WIDTH, Math.max(TIMELINE_MIN_WIDTH, Math.floor(contentWidth * 0.3))); + const detailWidth = Math.max(14, contentWidth - timelineWidth - COLUMN_GAP); + return { innerWidth, contentWidth, timelineWidth, detailWidth }; +} + function formatElapsed(ms: number): string { const totalSec = Math.floor(ms / 1000); const m = Math.floor(totalSec / 60); @@ -178,7 +196,7 @@ function activePhase(state: WidgetState): PhaseEntry | null { function normalizeLogLines(lines: readonly LogLine[] | undefined): LogLine[] { if (!lines || lines.length === 0) return []; - return [...lines].slice(-LOG_LINES); + return [...lines].slice(-(LOG_LINES * 2)); } function phaseChipLabel(phase: PhaseEntry, index: number, state: WidgetState, theme: Theme): string { @@ -238,38 +256,165 @@ function renderTimelineLines(state: WidgetState, theme: Theme, width: number): s return lines; } -function upcomingSummary(state: WidgetState): string { - const remaining = state.activeIndex < 0 - ? [] - : state.phases.slice(state.activeIndex + 1).filter((p) => p.status !== "failed"); - if (state.activeIndex < 0) return "Planning complete"; - if (remaining.length === 0) return "Final step in progress"; - const labels = remaining.map((p) => p.label).join(" → "); - return `Upcoming: ${labels}`; +function shouldShowQR(state: WidgetState): boolean { + if (state.qrIteration === null) return false; + const active = activePhase(state); + if (!active) return false; + return active.key !== "ctx"; +} + +type QRTier = "wide" | "medium" | "tight"; + +const QR_TIER_MEDIUM_WIDTH = 68; +const QR_TIER_TIGHT_WIDTH = 52; +const QR_META_MAX_CHARS = 64; + +function qrTier(width: number): QRTier { + if (width < QR_TIER_TIGHT_WIDTH) return "tight"; + if (width < QR_TIER_MEDIUM_WIDTH) return "medium"; + return "wide"; +} + +function qrPhaseLabel(phase: QRPhase): string { + switch (phase) { + case "idle": + return "execute"; + case "execute": + return "execute"; + case "decompose": + return "decompose"; + case "verify": + return "verify"; + case "done": + return "done"; + } +} + +function qrPhaseShortLabel(phase: QRPhase): string { + switch (phase) { + case "idle": + return "exec"; + case "execute": + return "exec"; + case "decompose": + return "decomp"; + case "verify": + return "vfy"; + case "done": + return "done"; + } +} + +function firstBudgeted(candidates: string[], budget: number): string { + for (const c of candidates) { + if (visibleWidth(c) <= budget) return c; + } + const fallback = candidates[candidates.length - 1] ?? ""; + return truncateToWidth(fallback, budget, "…", false); } -function renderQRStatusWidget(state: WidgetState, theme: Theme, width: number): string[] { - if (state.qrIteration === null || state.qrPhase === "idle") { +function qrMetaText(state: WidgetState, tier: QRTier, budget: number): string { + const phase = qrPhaseLabel(state.qrPhase); + const short = qrPhaseShortLabel(state.qrPhase); + const modeFull = state.qrMode === "fix" ? "fix" : "initial"; + const modeShort = state.qrMode === "fix" ? "fx" : "in"; + const iter = state.qrIteration ?? 0; + const iterMax = state.qrIterationsMax ? `/${state.qrIterationsMax}` : ""; + const iterFull = `${iter}${iterMax}`; + + const wide = `phase:${phase} · iter ${iterFull} ${modeFull}`; + const medium = `${phase} · iter ${iterFull} ${modeFull}`; + const compact = `${short} · i${iterFull} ${modeFull}`; + const tight = `${short} i${iterFull} ${modeShort}`; + + const candidates = tier === "wide" + ? [wide, medium, compact, tight] + : tier === "medium" + ? [medium, compact, tight] + : [compact, tight]; + + return firstBudgeted(candidates, budget); +} + +interface QRCounterValues { + done: string; + pass: string; + fail: string; + todo: string; +} + +function qrCounterValues(state: WidgetState): QRCounterValues { + const meaningful = (state.qrPhase === "verify" || state.qrPhase === "done") && state.qrTotal !== null; + if (!meaningful || state.qrTotal === null) { + return { done: "-/-", pass: "-", fail: "-", todo: "-" }; + } + + return { + done: `${state.qrDone ?? 0}/${state.qrTotal}`, + pass: String(state.qrPass ?? 0), + fail: String(state.qrFail ?? 0), + todo: String(state.qrTodo ?? 0), + }; +} + +function renderQRCounterLine(state: WidgetState, theme: Theme, tier: QRTier, width: number, budget: number): string { + const values = qrCounterValues(state); + + const labelSets = tier === "wide" + ? [ + { done: "done", pass: "pass", fail: "fail", todo: "todo" }, + { done: "d", pass: "p", fail: "f", todo: "t" }, + ] + : [{ done: "d", pass: "p", fail: "f", todo: "t" }]; + + const render = (labels: { done: string; pass: string; fail: string; todo: string }) => [ + `${theme.fg("muted", `${labels.done}:`)}${theme.fg("dim", values.done)}`, + `${theme.fg("muted", `${labels.pass}:`)}${theme.fg("accent", values.pass)}`, + `${theme.fg("muted", `${labels.fail}:`)}${theme.bold(theme.fg("error", values.fail))}`, + `${theme.fg("muted", `${labels.todo}:`)}${theme.fg("muted", values.todo)}`, + ].join(" "); + + const candidates = labelSets.map(render); + const selected = firstBudgeted(candidates, budget); + return clampToWidth(selected, width, "…"); +} + +function renderQRStatusSection(state: WidgetState, theme: Theme, width: number): string[] { + if (!shouldShowQR(state)) { return []; } - const innerWidth = Math.max(0, width - 2); - const iterationTotal = state.qrIterationsMax ? ` / ${state.qrIterationsMax}` : ""; - const modeLabel = state.qrMode === "fix" ? "Fix" : "Initial"; - - const headerLeft = theme.bold(theme.fg("accent", "Quality review")); - const headerRightParts = [`Iter ${state.qrIteration}${iterationTotal}`]; - if (modeLabel) headerRightParts.push(modeLabel); - const headerRight = theme.fg("dim", headerRightParts.join(" · ")); - - const phaseEntries: Array<{ key: Exclude; label: string }> = [ - { key: "execute", label: state.qrMode === "fix" ? "Execute (fix)" : "Execute" }, - { key: "decompose", label: "QR decompose" }, - { key: "verify", label: "QR verify" }, - ]; + const tier = qrTier(width); + const budget = Math.min(width, QR_META_MAX_CHARS); - let currentIndex = phaseEntries.findIndex((entry) => entry.key === state.qrPhase); - if (state.qrPhase === "done") { + const headerMeta = qrMetaText(state, tier, budget); + const header = clampToWidth( + `${theme.bold(theme.fg("accent", "QR"))} ${theme.fg("muted", "|")} ${theme.fg("dim", headerMeta)}`, + width, + "…", + ); + + const phaseEntries: Array<{ key: Exclude; label: string }> = tier === "wide" + ? [ + { key: "execute", label: state.qrMode === "fix" ? "Execute (fix)" : "Execute" }, + { key: "decompose", label: "QR decompose" }, + { key: "verify", label: "QR verify" }, + ] + : tier === "medium" + ? [ + { key: "execute", label: state.qrMode === "fix" ? "Exec(fix)" : "Exec" }, + { key: "decompose", label: "Decomp" }, + { key: "verify", label: "Verify" }, + ] + : [ + { key: "execute", label: "X" }, + { key: "decompose", label: "D" }, + { key: "verify", label: "V" }, + ]; + + const effectivePhase: Exclude = state.qrPhase === "idle" ? "execute" : state.qrPhase; + let currentIndex = phaseEntries.findIndex((entry) => entry.key === effectivePhase); + if (effectivePhase === "done") { currentIndex = phaseEntries.length; } @@ -283,36 +428,11 @@ function renderQRStatusWidget(state: WidgetState, theme: Theme, width: number): return theme.fg("muted", entry.label); }); - const separator = theme.fg("muted", " → "); - const stageLine = clampToWidth(segments.join(separator), innerWidth, "…"); - - const description = (() => { - if (state.qrPhase === "execute") { - return state.qrMode === "fix" - ? "Fix-mode architect applies QR feedback." - : "Initial execution to gather plan context."; - } - if (state.qrPhase === "decompose") { - return state.qrIteration && state.qrIteration > 1 - ? "Re-decomposing updates into review items." - : "Deriving QR checklist from the current plan."; - } - if (state.qrPhase === "verify") { - return "Massively parallel reviewers scoring QR items."; - } - if (state.qrPhase === "done") { - return "Quality review loop complete."; - } - return ""; - })(); - - const body: string[] = []; - body.push(stageLine); - if (description) { - body.push(clampToWidth(theme.fg("muted", description), innerWidth, "…")); - } + const rail = clampToWidth(segments.join(theme.fg("muted", " → ")), width, "…"); + const counters = renderQRCounterLine(state, theme, tier, width, budget); + const divider = clampToWidth(theme.fg("muted", "─".repeat(width)), width); - return renderBox(headerLeft, headerRight, body, width, theme, BORDER_SUBTLE); + return [header, rail, counters, divider]; } interface DetailSections { @@ -327,6 +447,7 @@ function buildDetailSections(state: WidgetState, theme: Theme, width: number): D const active = activePhase(state); const stepTitle = state.step || active?.detail || active?.label || "Awaiting step"; + core.push(clampToWidth(theme.fg("dim", "Current step"), width)); core.push(clampToWidth(theme.bold(theme.fg("accent", stepTitle)), width, "…")); if (state.activity) { @@ -336,24 +457,18 @@ function buildDetailSections(state: WidgetState, theme: Theme, width: number): D } } - const qrWidget = renderQRStatusWidget(state, theme, width); - if (qrWidget.length > 0) { + const qrSection = renderQRStatusSection(state, theme, width); + if (qrSection.length > 0) { if (core.length > 0 && core[core.length - 1].trim() !== "") { core.push(blank); } - core.push(...qrWidget.map((line) => clampToWidth(line, width))); + core.push(...qrSection.map((line) => clampToWidth(line, width))); } if (active) { - footer.push(...wrapTextWithAnsi(theme.fg("dim", `Phase ${state.activeIndex + 1}/${state.phases.length}`), width).map((line) => clampToWidth(line, width, "…"))); footer.push(...wrapTextWithAnsi(theme.fg("dim", `Plan · ${state.planId}`), width).map((line) => clampToWidth(line, width, "…"))); } - const summary = upcomingSummary(state); - if (summary) { - footer.push(...wrapTextWithAnsi(theme.fg("muted", summary), width).map((line) => clampToWidth(line, width, "…"))); - } - return { core, footer }; } @@ -403,9 +518,7 @@ function renderBox( function renderPlanningCard(state: WidgetState, theme: Theme, width: number): string[] { const elapsed = theme.fg("dim", formatElapsed(Date.now() - state.startedAt)); - const innerWidth = Math.max(0, width - 2); - const indentWidth = visibleWidth(BODY_INDENT); - const contentWidth = Math.max(0, innerWidth - indentWidth); + const { innerWidth, contentWidth, timelineWidth, detailWidth } = planningColumns(width); if (innerWidth < 60 || contentWidth < 40) { const fallbackContent: string[] = [ @@ -417,6 +530,10 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st ]; const detail = formatDetail(state, theme, contentWidth); if (detail) fallbackContent.push(detail); + const qrCompact = formatQRCompact(state, theme, contentWidth); + if (qrCompact.length > 0) { + fallbackContent.push(...qrCompact); + } fallbackContent.push(""); const body = indentLines(fallbackContent, innerWidth); @@ -430,8 +547,6 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st } const chipsLine = renderPhaseChips(state, theme, contentWidth); - const timelineWidth = Math.min(TIMELINE_MAX_WIDTH, Math.max(TIMELINE_MIN_WIDTH, Math.floor(contentWidth * 0.3))); - const detailWidth = Math.max(14, contentWidth - timelineWidth - 4); const timelineLines = renderTimelineLines(state, theme, timelineWidth); const detailSections = buildDetailSections(state, theme, detailWidth); @@ -442,7 +557,7 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st for (let i = 0; i < maxLines; i++) { const left = timelineLines[i] ?? ""; const right = detailLines[i] ?? ""; - const composed = `${clampToWidth(left, timelineWidth)} ${clampToWidth(right, detailWidth)}`; + const composed = `${clampToWidth(left, timelineWidth)}${" ".repeat(COLUMN_GAP)}${clampToWidth(right, detailWidth)}`; combined.push(clampToWidth(composed, contentWidth)); } @@ -458,7 +573,7 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st ); return renderBox( - `${BODY_INDENT}${theme.bold(theme.fg("accent", "Planning Workspace"))}`, + `${BODY_INDENT}${theme.bold(theme.fg("accent", "Planning"))}`, elapsed, body, width, @@ -466,25 +581,99 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st ); } -function renderLogLine(entry: LogLine, theme: Theme): string { - const parts: string[] = []; - if (entry.prefix) parts.push(theme.fg("dim", entry.prefix)); - if (entry.highlight) parts.push(theme.bold(entry.highlight)); - if (entry.meta) parts.push(theme.fg("dim", entry.meta)); - return `${theme.fg("dim", "•")} ${parts.join(" ")}`; +function wrapRightColumn(entry: LogLine, width: number): string[] { + const summary = entry.summary.trim(); + if (!summary) return [""]; + + if (!entry.highValue) { + return [clampToWidth(summary, width, "…")]; + } + + const wrapped = wrapTextWithAnsi(summary, width).map((line) => clampToWidth(line, width, "…")); + if (wrapped.length <= 1) return wrapped; + if (wrapped.length === 2) return wrapped; + + const tail = wrapped.slice(1).join(" ").replace(/\s+/gu, " ").trim(); + return [wrapped[0], clampToWidth(truncateToWidth(tail, width, "…", false), width)]; +} + +function renderLogEntry(entry: LogLine, theme: Theme, leftWidth: number, rightWidth: number, gap: number): string[] { + const rightLines = wrapRightColumn(entry, rightWidth); + const rows: string[] = []; + + rightLines.forEach((line, index) => { + const left = index === 0 + ? theme.bold(theme.fg("accent", entry.tool)) + : ""; + const composed = `${clampToWidth(left, leftWidth)}${" ".repeat(gap)}${clampToWidth(theme.fg("muted", line), rightWidth)}`; + rows.push(composed); + }); + + return rows; +} + +interface LogColumns { + left: number; + right: number; + gap: number; } -function renderLogCard(state: WidgetState, theme: Theme, width: number): string[] { +function logColumnWidths(availableWidth: number, entries: readonly LogLine[], gap: number): LogColumns { + const longestTool = entries.reduce((max, entry) => Math.max(max, visibleWidth(entry.tool)), 0); + const preferredLeft = Math.max(16, Math.min(38, longestTool + 2)); + + const minRight = availableWidth < 64 ? 18 : 24; + let left = Math.min(preferredLeft, Math.floor(availableWidth * 0.42)); + left = Math.min(left, Math.max(14, availableWidth - minRight - gap)); + left = Math.max(14, left); + + const right = Math.max(8, availableWidth - left - gap); + return { left, right, gap }; +} + +function renderLogCard(state: WidgetState, theme: Theme, width: number, forcedColumns?: LogColumns): string[] { const innerWidth = Math.max(0, width - 2); + const availableWidth = Math.max(0, innerWidth - visibleWidth(BODY_INDENT)); const hasEntries = state.logLines.length > 0; - const entries = hasEntries ? state.logLines.slice(-LOG_LINES) : []; + const entries = hasEntries ? state.logLines.slice(-(LOG_LINES * 2)) : []; + + const columns = forcedColumns ?? logColumnWidths(availableWidth, entries, 2); + const leftWidth = Math.max(8, Math.min(columns.left, Math.max(8, availableWidth - columns.gap - 8))); + const rightWidth = Math.max(8, availableWidth - leftWidth - columns.gap); + + const visualRows: string[] = []; + if (entries.length > 0) { + const rendered = entries.map((entry) => renderLogEntry(entry, theme, leftWidth, rightWidth, columns.gap)); + const selected: string[][] = []; + let remaining = LOG_LINES; + + for (let i = rendered.length - 1; i >= 0; i--) { + if (remaining <= 0) break; + const rowLines = rendered[i]; + if (rowLines.length <= remaining) { + selected.push(rowLines); + remaining -= rowLines.length; + } else { + selected.push(rowLines.slice(0, remaining)); + remaining = 0; + } + } + + selected.reverse(); + for (const lines of selected) { + visualRows.push(...lines); + } + } - const formatted: string[] = hasEntries - ? entries.map((entry) => renderLogLine(entry, theme)) - : [theme.fg("dim", `• ${LOG_PLACEHOLDER}`)]; - while (formatted.length < LOG_LINES) formatted.push(""); + if (visualRows.length === 0) { + visualRows.push(clampToWidth(theme.fg("muted", LOG_PLACEHOLDER), innerWidth)); + } - const body = indentLines(formatted, innerWidth); + while (visualRows.length < LOG_LINES) { + visualRows.push(""); + } + + const body = indentLines(visualRows, innerWidth); return renderBox( `${BODY_INDENT}${theme.bold(theme.fg("accent", "Latest log"))}`, "", @@ -513,6 +702,17 @@ function formatDetail(state: WidgetState, theme: Theme, width: number): string { return clampToWidth(detail, width, "…"); } +function formatQRCompact(state: WidgetState, theme: Theme, width: number): string[] { + if (!shouldShowQR(state)) return []; + + const tier = qrTier(width); + const budget = Math.min(width, QR_META_MAX_CHARS); + const meta = qrMetaText(state, tier, budget); + const line1 = clampToWidth(`${theme.fg("muted", "QR")} ${theme.fg("muted", "|")} ${theme.fg("dim", meta)}`, width, "…"); + const line2 = renderQRCounterLine(state, theme, tier, width, budget); + return [line1, line2]; +} + function formatStepLine(state: WidgetState, theme: Theme): string { const total = state.phases.length; const active = activePhase(state); @@ -524,6 +724,46 @@ function formatStepLine(state: WidgetState, theme: Theme): string { return `${count} ${theme.fg("muted", "·")} ${label}`; } +// Pure render: (state, theme, termWidth) -> lines. No side effects. +function stripBoxFrame(lines: string[]): string[] { + if (lines.length <= 2) return []; + return lines.slice(1, -1).map((line) => (line.length >= 2 ? line.slice(1, -1) : "")); +} + +function renderIntegratedWorkspaceCard(state: WidgetState, theme: Theme, width: number): string[] { + const innerWidth = Math.max(0, width - 2); + const elapsed = theme.fg("dim", formatElapsed(Date.now() - state.startedAt)); + const rightInset = " ".repeat(visibleWidth(BODY_INDENT)); + + const { innerWidth: planningInnerWidth, contentWidth, timelineWidth, detailWidth } = planningColumns(width); + const alignedColumns: LogColumns | undefined = planningInnerWidth >= 60 && contentWidth >= 40 + ? { left: timelineWidth, right: detailWidth, gap: COLUMN_GAP } + : undefined; + + const planningInner = stripBoxFrame(renderPlanningCard(state, theme, width)); + const logInner = stripBoxFrame(renderLogCard(state, theme, width, alignedColumns)); + + const divider = clampToWidth(theme.fg("muted", "─".repeat(innerWidth)), innerWidth); + const spacer = clampToWidth("", innerWidth); + const logTitle = clampToWidth(`${BODY_INDENT}${theme.bold(theme.fg("accent", "Latest log"))}`, innerWidth, "…"); + + const body = [ + ...planningInner, + divider, + spacer, + logTitle, + ...logInner, + ]; + + return renderBox( + `${BODY_INDENT}${theme.bold(theme.fg("accent", "Planning"))}`, + `${elapsed}${rightInset}`, + body, + width, + theme, + ); +} + // Pure render: (state, theme, termWidth) -> lines. No side effects. function render(state: WidgetState, theme: Theme, termWidth: number): string[] { const c = (s: string) => canvasLine(s, termWidth, theme); @@ -532,11 +772,7 @@ function render(state: WidgetState, theme: Theme, termWidth: number): string[] { const margin = " ".repeat(CARD_MARGIN); lines.push(c("")); - for (const line of renderPlanningCard(state, theme, cw - CARD_MARGIN)) { - lines.push(c(margin + line)); - } - lines.push(c(margin)); - for (const line of renderLogCard(state, theme, cw - CARD_MARGIN)) { + for (const line of renderIntegratedWorkspaceCard(state, theme, cw - CARD_MARGIN)) { lines.push(c(margin + line)); } lines.push(c("")); @@ -567,6 +803,11 @@ export class WidgetController { qrIterationsMax: null, qrMode: null, qrPhase: "idle", + qrDone: null, + qrTotal: null, + qrPass: null, + qrFail: null, + qrTodo: null, }; this.state.phases[0].status = "running"; @@ -614,6 +855,21 @@ export class WidgetController { if (patch.qrPhase !== undefined) { this.state.qrPhase = patch.qrPhase; } + if (patch.qrDone !== undefined) { + this.state.qrDone = patch.qrDone; + } + if (patch.qrTotal !== undefined) { + this.state.qrTotal = patch.qrTotal; + } + if (patch.qrPass !== undefined) { + this.state.qrPass = patch.qrPass; + } + if (patch.qrFail !== undefined) { + this.state.qrFail = patch.qrFail; + } + if (patch.qrTodo !== undefined) { + this.state.qrTodo = patch.qrTodo; + } this.doRender(); } From edaa2fbf5880a1cb0933ae05eb51316ea9b09043 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 25 Feb 2026 15:37:01 +0700 Subject: [PATCH 020/412] Rewrite stale progress test against EventLog/audit API ProgressReporter and readSubagentState were replaced by EventLog, readProjection, and readRecentLogs. Rewrites the test to cover the current API: EventLog persistence, readProjection, readRecentLogs filtering, fold (pure), summarize, and extractToolEvent. --- tests/progress.test.ts | 334 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 305 insertions(+), 29 deletions(-) diff --git a/tests/progress.test.ts b/tests/progress.test.ts index 3a69e40..5891306 100644 --- a/tests/progress.test.ts +++ b/tests/progress.test.ts @@ -4,38 +4,314 @@ import { promises as fs } from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; -import { ProgressReporter, readSubagentState } from "../src/utils/progress.js"; +import { EventLog, readProjection, readRecentLogs, fold, summarize, extractToolEvent } from "../src/planner/lib/audit.js"; +import type { Projection, AuditEvent, ToolEvent } from "../src/planner/lib/audit.js"; async function createTempDir(prefix: string): Promise { - const base = await fs.mkdtemp(path.join(os.tmpdir(), prefix)); - return base; + return fs.mkdtemp(path.join(os.tmpdir(), prefix)); } -describe("ProgressReporter", () => { - it("persists progress updates and completion state", async () => { - const tempRoot = await createTempDir("koan-progress-"); - const reporterDir = path.join(tempRoot, "reporter"); - await fs.mkdir(reporterDir, { recursive: true }); - - const reporter = new ProgressReporter(reporterDir, "planner", "analysis"); - - await reporter.update("gathering context"); - await reporter.update("synthesizing plan"); - await reporter.complete("completed"); - - const state = await readSubagentState(reporterDir); - assert.ok(state, "state file should be readable"); - assert.equal(state.role, "planner"); - assert.equal(state.phase, "analysis"); - assert.equal(state.status, "completed"); - assert.equal(state.current, "completed"); - assert.equal(state.trail.length, 3); - assert.deepEqual( - state.trail.map((entry) => entry.msg), - ["gathering context", "synthesizing plan", "completed"], - "trail should capture chronological updates" - ); - - await fs.rm(tempRoot, { recursive: true, force: true }); +// -- EventLog + readProjection -- + +describe("EventLog", () => { + it("persists events and projection through step transitions", async () => { + const dir = await createTempDir("koan-audit-"); + + const log = new EventLog(dir, "architect", "plan-design"); + await log.open(); + + await log.emitPhaseStart(6); + await log.emitStepTransition(1, "Task Analysis", 6); + await log.emitStepTransition(2, "Decision Framework", 6); + await log.emitPhaseEnd("completed"); + await log.close(); + + const proj = await readProjection(dir); + assert.ok(proj, "projection should be readable"); + assert.equal(proj.role, "architect"); + assert.equal(proj.phase, "plan-design"); + assert.equal(proj.status, "completed"); + assert.equal(proj.step, 2); + assert.equal(proj.totalSteps, 6); + assert.equal(proj.stepName, "Step 2/6: Decision Framework"); + assert.equal(proj.eventCount, 4); + + // Verify events.jsonl has correct number of lines + const raw = await fs.readFile(path.join(dir, "events.jsonl"), "utf8"); + const lines = raw.trimEnd().split("\n").filter(Boolean); + assert.equal(lines.length, 4); + + await fs.rm(dir, { recursive: true, force: true }); + }); + + it("tracks lastAction from tool events", async () => { + const dir = await createTempDir("koan-audit-"); + + const log = new EventLog(dir, "architect", "plan-design"); + await log.open(); + + await log.append({ + kind: "tool_file", + tool: "read", + path: "src/main.ts", + lines: 50, + chars: 1200, + error: false, + } as Omit); + + const proj = log.state; + assert.equal(proj.lastAction, "read src/main.ts (50L, 1200c)"); + + await log.close(); + await fs.rm(dir, { recursive: true, force: true }); + }); + + it("returns null for missing projection", async () => { + const dir = await createTempDir("koan-audit-"); + const proj = await readProjection(dir); + assert.equal(proj, null); + await fs.rm(dir, { recursive: true, force: true }); + }); +}); + +// -- readRecentLogs -- + +describe("readRecentLogs", () => { + it("returns recent non-heartbeat events as structured LogLines", async () => { + const dir = await createTempDir("koan-audit-"); + + const log = new EventLog(dir, "architect", "plan-design"); + await log.open(); + + await log.emitPhaseStart(3); + await log.emitStepTransition(1, "Analysis", 3); + await log.append({ + kind: "tool_file", + tool: "read", + path: "src/foo.ts", + lines: 100, + chars: 3000, + error: false, + } as Omit); + await log.close(); + + const lines = await readRecentLogs(dir, 5); + // 3 events (heartbeats filtered), all returned + assert.equal(lines.length, 3); + + assert.equal(lines[0].tool, "phase"); + assert.ok(lines[0].summary.includes("plan-design")); + + assert.equal(lines[1].tool, "step 1/3"); + assert.equal(lines[1].summary, "Analysis"); + + assert.equal(lines[2].tool, "read"); + assert.ok(lines[2].summary.includes("src/foo.ts")); + assert.ok(lines[2].summary.includes("100L")); + + await fs.rm(dir, { recursive: true, force: true }); + }); + + it("filters out koan_complete_step events", async () => { + const dir = await createTempDir("koan-audit-"); + + const log = new EventLog(dir, "architect", "plan-design"); + await log.open(); + + await log.append({ + kind: "tool_koan", + tool: "koan_complete_step", + input: { thoughts: "done" }, + response: ["ok"], + error: false, + } as Omit); + + await log.append({ + kind: "tool_koan", + tool: "koan_set_overview", + input: { problem: "test" }, + response: ["saved"], + error: false, + } as Omit); + + await log.close(); + + const lines = await readRecentLogs(dir, 5); + assert.equal(lines.length, 1); + assert.equal(lines[0].tool, "koan_set_overview"); + + await fs.rm(dir, { recursive: true, force: true }); + }); + + it("returns empty array for missing directory", async () => { + const lines = await readRecentLogs("/nonexistent/path", 5); + assert.deepEqual(lines, []); + }); +}); + +// -- fold (pure) -- + +describe("fold", () => { + const initial: Projection = { + role: "", + phase: "", + status: "running", + step: 0, + totalSteps: 0, + stepName: "", + lastAction: null, + updatedAt: "", + eventCount: 0, + error: null, + }; + + it("phase_start resets projection", () => { + const e: AuditEvent = { + kind: "phase_start", + phase: "plan-design", + role: "architect", + totalSteps: 6, + ts: "2026-01-01T00:00:00Z", + seq: 0, + }; + const s = fold(initial, e); + assert.equal(s.role, "architect"); + assert.equal(s.phase, "plan-design"); + assert.equal(s.totalSteps, 6); + assert.equal(s.eventCount, 1); + }); + + it("step_transition updates step name", () => { + const e: AuditEvent = { + kind: "step_transition", + step: 3, + name: "Risk Assessment", + totalSteps: 6, + ts: "2026-01-01T00:00:01Z", + seq: 1, + }; + const s = fold(initial, e); + assert.equal(s.step, 3); + assert.equal(s.stepName, "Step 3/6: Risk Assessment"); + }); + + it("phase_end sets status and error", () => { + const e: AuditEvent = { + kind: "phase_end", + outcome: "failed", + detail: "timeout", + ts: "2026-01-01T00:00:02Z", + seq: 2, + }; + const s = fold(initial, e); + assert.equal(s.status, "failed"); + assert.equal(s.error, "timeout"); + }); +}); + +// -- summarize -- + +describe("summarize", () => { + it("file tool with size stats", () => { + const e: ToolEvent = { + kind: "tool_file", + tool: "read", + path: "src/main.ts", + lines: 42, + chars: 1500, + error: false, + ts: "", + seq: 0, + }; + assert.equal(summarize(e), "read src/main.ts (42L, 1500c)"); + }); + + it("bash tool with size stats", () => { + const e: ToolEvent = { + kind: "tool_bash", + bin: "grep", + lines: 10, + chars: 200, + error: false, + ts: "", + seq: 0, + }; + assert.equal(summarize(e), "bash grep (10L, 200c)"); + }); + + it("file tool without size stats", () => { + const e: ToolEvent = { + kind: "tool_file", + tool: "edit", + path: "src/foo.ts", + error: false, + ts: "", + seq: 0, + }; + assert.equal(summarize(e), "edit src/foo.ts"); + }); +}); + +// -- extractToolEvent -- + +describe("extractToolEvent", () => { + it("extracts read tool with line/char counts", () => { + const content = "line1\nline2\nline3"; + const e = extractToolEvent({ + toolName: "read", + input: { path: "src/test.ts" }, + content: [{ type: "text", text: content }], + isError: false, + }); + assert.equal(e.kind, "tool_file"); + if (e.kind === "tool_file") { + assert.equal(e.tool, "read"); + assert.equal(e.path, "src/test.ts"); + assert.equal(e.lines, 3); + assert.equal(e.chars, content.length); + } + }); + + it("extracts bash tool with line/char counts", () => { + const output = "found 5 matches\n"; + const e = extractToolEvent({ + toolName: "bash", + input: { command: "grep -r pattern ." }, + content: [{ type: "text", text: output }], + isError: false, + }); + assert.equal(e.kind, "tool_bash"); + if (e.kind === "tool_bash") { + assert.equal(e.bin, "grep"); + assert.equal(e.lines, 2); + assert.equal(e.chars, output.length); + } + }); + + it("extracts koan tool with input and response", () => { + const e = extractToolEvent({ + toolName: "koan_set_overview", + input: { problem: "test problem" }, + content: [{ type: "text", text: "saved" }], + isError: false, + }); + assert.equal(e.kind, "tool_koan"); + if (e.kind === "tool_koan") { + assert.equal(e.tool, "koan_set_overview"); + assert.deepEqual(e.response, ["saved"]); + } + }); + + it("falls back to generic for unknown tools", () => { + const e = extractToolEvent({ + toolName: "unknown_tool", + input: {}, + content: [], + isError: false, + }); + assert.equal(e.kind, "tool_generic"); + if (e.kind === "tool_generic") { + assert.equal(e.tool, "unknown_tool"); + } }); }); From 969188c89cd7f950c61517bf4a5519b4f2ea2c2a Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 26 Feb 2026 09:27:35 +0700 Subject: [PATCH 021/412] planner: make plan-design fix workflow dynamic per QR item --- src/planner/phases/plan-design/fix-phase.ts | 93 ++++-- src/planner/phases/plan-design/fix-prompts.ts | 272 +++++++++++------- 2 files changed, 236 insertions(+), 129 deletions(-) diff --git a/src/planner/phases/plan-design/fix-phase.ts b/src/planner/phases/plan-design/fix-phase.ts index 4df6a24..24b5cc8 100644 --- a/src/planner/phases/plan-design/fix-phase.ts +++ b/src/planner/phases/plan-design/fix-phase.ts @@ -1,16 +1,20 @@ -// Plan-design fix phase -- 3-step targeted repair for QR failures. +// Plan-design fix phase -- dynamic N-step targeted repair for QR failures. +// +// totalSteps = 2 + failures.length. Step 1 reads all failures (read-only). +// Steps 2..N+1 each fix one QR item (mutations enabled). Step N+2 reviews +// all fixes (read-only). The step counter IS the item iterator: +// failures[step - 2] gives the current item. // // Separate class from PlanDesignPhase because the workflows diverge: // initial = 6 steps of exploration then writing (mutations at step 6); -// fix = 3 steps of reading failures then applying targeted fixes -// (mutations at step 2). Conditional branching at every method -// boundary produces worse code than two focused classes. +// fix = dynamic N steps iterating one QR item per step (mutations in +// per-item range only). Conditional branching at every method boundary +// produces worse code than two focused classes. // -// The fix architect receives QR failures as XML in step 1. It reads -// the current plan state via getter tools, applies minimal mutations -// to address the specific findings, then validates the result. The -// session orchestrator decides whether to re-run QR -- the fix phase -// does not know about iterations or severity escalation. +// The fix architect receives QR failures as XML in step 1. Per-item steps +// present a single failure with mutation tools enabled. The session +// orchestrator decides whether to re-run QR -- the fix phase does not +// know about iterations or severity escalation. import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; @@ -20,11 +24,10 @@ import { buildPlanDesignSystemPrompt, } from "./prompts.js"; import { - FIX_STEP_NAMES, + fixStepName, buildFixSystemPrompt, fixStepGuidance, formatFailuresXml, - type FixStep, } from "./fix-prompts.js"; import { formatStep } from "../../lib/step.js"; import type { QRItem } from "../../qr/types.js"; @@ -35,17 +38,15 @@ import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; interface FixPhaseState { active: boolean; - step: FixStep; + step: number; step1Prompt: string | null; systemPrompt: string | null; } -const TOTAL_STEPS = 3; - export class PlanDesignFixPhase { private readonly pi: ExtensionAPI; private readonly planDir: string; - private readonly failures: QRItem[]; + private readonly failures: ReadonlyArray; private readonly log: Logger; private readonly state: FixPhaseState; private readonly eventLog: EventLog | undefined; @@ -78,6 +79,13 @@ export class PlanDesignFixPhase { this.registerHandlers(); } + // Computed from failure count. Step 1 (understand) + N per-item steps + // + 1 final review = 2 + N. Single source of truth for all step-range + // checks in this class. + private get totalSteps(): number { + return 2 + this.failures.length; + } + async begin(): Promise { let basePrompt: string; try { @@ -89,11 +97,17 @@ export class PlanDesignFixPhase { } const failuresXml = formatFailuresXml(this.failures); + // Local copy for consistent reads across this method. The getter is stable + // (this.failures is readonly) but a local communicates "one value, many uses". + const totalSteps = this.totalSteps; this.state.systemPrompt = buildFixSystemPrompt( buildPlanDesignSystemPrompt(basePrompt), this.failures.length, + totalSteps, + ); + this.state.step1Prompt = formatStep( + fixStepGuidance(1, totalSteps, { allFailuresXml: failuresXml }), ); - this.state.step1Prompt = formatStep(fixStepGuidance(1, failuresXml)); this.state.active = true; this.state.step = 1; @@ -101,10 +115,15 @@ export class PlanDesignFixPhase { this.log("Starting plan-design fix workflow", { step: 1, + totalSteps, failureCount: this.failures.length, }); - await this.eventLog?.emitPhaseStart(TOTAL_STEPS); - await this.eventLog?.emitStepTransition(1, FIX_STEP_NAMES[1], TOTAL_STEPS); + await this.eventLog?.emitPhaseStart(totalSteps); + await this.eventLog?.emitStepTransition( + 1, + fixStepName(1, totalSteps), + totalSteps, + ); } private registerHandlers(): void { @@ -137,14 +156,17 @@ export class PlanDesignFixPhase { return { block: true, reason: perm.reason }; } - // Step gate: mutation tools are blocked before step 2. Blocklist - // (not whitelist) so read tools and future pi-native tools pass - // through after checkPermission approves them. + // Step gate: mutation tools allowed ONLY in per-item steps (step 2 + // through totalSteps-1). Both step 1 (understand) and the final step + // (review) are read-only. The upper bound prevents accidental mutations + // during review that would bypass QR re-verification. const step = this.state.step; - if (step < 2 && PLAN_MUTATION_TOOLS.has(event.toolName)) { + const total = this.totalSteps; + const inItemRange = step >= 2 && step < total; + if (!inItemRange && PLAN_MUTATION_TOOLS.has(event.toolName)) { return { block: true, - reason: `${event.toolName} available from step 2 (current: ${step})`, + reason: `${event.toolName} available in steps 2-${total - 1} (current: ${step})`, }; } @@ -154,8 +176,10 @@ export class PlanDesignFixPhase { private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { const prev = this.state.step; + const total = this.totalSteps; - if (prev === 3) { + // Terminal: final step completed -> validate plan and end phase. + if (prev === total) { const result = await this.handleFinalize(); if (!result.ok) { await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); @@ -168,12 +192,21 @@ export class PlanDesignFixPhase { return { ok: true, prompt: "Fix phase validation passed. Workflow complete." }; } - this.state.step = (prev + 1) as FixStep; - const nextName = FIX_STEP_NAMES[this.state.step]; - const prompt = formatStep(fixStepGuidance(this.state.step)); - - this.log("Fix step complete, advancing", { from: prev, to: this.state.step, name: nextName }); - await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); + // Advance to next step. Step always increments -- no cursor, no hold. + const next = prev + 1; + this.state.step = next; + + // Per-item steps (2 <= next < total) pass the individual failure item + // so fixStepGuidance generates item-specific prompts. Only the final + // step (next === total) does not carry an item. + const item = (next >= 2 && next < total) + ? this.failures[next - 2] + : undefined; + const name = fixStepName(next, total, item); + const prompt = formatStep(fixStepGuidance(next, total, { item })); + + this.log("Fix step complete, advancing", { from: prev, to: next, name }); + await this.eventLog?.emitStepTransition(next, name, total); return { ok: true, prompt }; } diff --git a/src/planner/phases/plan-design/fix-prompts.ts b/src/planner/phases/plan-design/fix-prompts.ts index 003bf8d..8d12cc8 100644 --- a/src/planner/phases/plan-design/fix-prompts.ts +++ b/src/planner/phases/plan-design/fix-prompts.ts @@ -1,21 +1,17 @@ -// Fix-phase step guidance for plan-design targeted repair (3 steps). +// Fix-phase step guidance for plan-design targeted repair (dynamic N steps). // -// Parallels prompts.ts structure. Step 1 explicitly prohibits mutations: -// without this constraint the LLM tends to apply the first fix it identifies -// without reading all failures, producing cascading corrections that address -// symptoms rather than root causes. +// totalSteps = 2 + failures.length. Step 1 reads all failures (read-only). +// Steps 2..N+1 each fix one QR item (mutations enabled). Step N+2 reviews +// all fixes (read-only). The step counter IS the item iterator: +// failures[step - 2] gives the current item in the per-item range. +// +// Step 1 explicitly prohibits mutations: without this constraint the LLM +// tends to apply the first fix it identifies without reading all failures, +// producing cascading corrections that address symptoms rather than root causes. import type { QRItem } from "../../qr/types.js"; import type { StepGuidance } from "../../lib/step.js"; -export type FixStep = 1 | 2 | 3; - -export const FIX_STEP_NAMES: Record = { - 1: "Understand QR Failures", - 2: "Apply Targeted Fixes", - 3: "Review & Finalize", -}; - // Serializes FAIL items as an XML block injected into the step 1 prompt. // XML structure mirrors how pi-native tools present structured data. export function formatFailuresXml(failures: ReadonlyArray): string { @@ -33,105 +29,183 @@ export function formatFailuresXml(failures: ReadonlyArray): string { ].join("\n"); } +// Dynamic step names. Step 1 and the final step have fixed names; +// per-item steps show the QR item ID so the widget displays +// "Step 3/7: Fix D-001" rather than a generic label. The audit log +// uses these names to distinguish per-item transitions. +export function fixStepName( + step: number, + totalSteps: number, + item?: QRItem, +): string { + if (step === 1) return "Understand QR Failures"; + if (step === totalSteps) return "Review & Finalize"; + return item ? `Fix ${item.id}` : `Fix item ${step - 1}`; +} + // Appends fix workflow instructions to the base architect system prompt. -export function buildFixSystemPrompt(basePrompt: string, failureCount: number): string { +// The structured STEP LAYOUT section uses indentation to visually separate +// the three phases so the LLM internalizes the one-at-a-time constraint +// from the system prompt rather than discovering it at step 2. +export function buildFixSystemPrompt( + basePrompt: string, + failureCount: number, + totalSteps: number, +): string { return [ basePrompt, "", "---", "", - "WORKFLOW: 3-STEP PLAN-DESIGN FIX", + `WORKFLOW: ${totalSteps}-STEP PLAN-DESIGN FIX`, "", `You are fixing ${failureCount} QR failure(s) in an existing plan.`, - "Step 1 instructions are in the user message below.", - "Complete the work described, then call koan_complete_step.", - "Put your findings in the `thoughts` parameter of koan_complete_step.", - "The tool result contains the next step's instructions.", "", - "CRITICAL: Fix ONLY the identified failures. Do not restructure the plan", - "beyond what the failures require. Prefer updating existing entities over", - "adding new ones.", + "STEP LAYOUT:", + " Step 1: Read all failures. Understand scope and interactions. READ-ONLY.", + ` Steps 2-${totalSteps - 1}: Fix ONE failure per step. Each step targets exactly one item.`, + ` Step ${totalSteps}: Review all fixes against original failures. READ-ONLY.`, + "", + "Each step's instructions appear as a tool result after you call koan_complete_step.", + "Put your work output in the `thoughts` parameter of koan_complete_step.", + "", + "CONSTRAINTS:", + " - Fix ONLY the identified failures", + " - Each per-item step targets exactly ONE failure -- do not fix other items", + " - Prefer updating existing entities over adding new ones", + " - Do not restructure the plan beyond what failures require", ].join("\n"); } -export function fixStepGuidance(step: FixStep, context?: string): StepGuidance { - switch (step) { - case 1: - return { - title: "Step 1: Understand QR Failures", - instructions: [ - "QR FAILURES TO FIX:", - "", - context ?? "", - "", - "Read the failures carefully. For each failing item:", - " - Identify the scope (which milestone, decision, or intent)", - " - Understand what the check requires", - " - Read the finding to understand why it failed", - "", - "Use getter tools to inspect the scoped entities:", - " - koan_get_plan: overview, structure, decisions", - " - koan_get_milestone: milestone details and intents", - " - koan_get_decision: decision rationale", - " - koan_get_intent: intent definition", - "", - "Plan your fixes mentally. Consider:", - " - What minimal change addresses each failure?", - " - Do any fixes overlap or interact?", - " - Could fixing one item cause another to fail?", - "", - "DO NOT write any changes yet. Gather understanding for step 2.", - ], - }; +// Three categories of step: understand (step 1), per-item fix +// (2 <= step < totalSteps), and review (step === totalSteps). +// The step counter IS the item iterator -- no separate cursor needed. +export function fixStepGuidance( + step: number, + totalSteps: number, + opts?: { item?: QRItem; allFailuresXml?: string }, +): StepGuidance { + if (step === 1) + return fixStep1Guidance(totalSteps, opts?.allFailuresXml ?? ""); + if (step === totalSteps) return fixFinalStepGuidance(totalSteps); + return fixItemStepGuidance(step, totalSteps, opts?.item); +} + +// Step 1 prompt reframes analysis as "note interactions" rather than +// "plan your fixes mentally" to avoid priming the LLM for batch application. +// The one-at-a-time delivery is stated explicitly so the LLM expects +// per-item steps rather than a single batch-fix step. +function fixStep1Guidance( + totalSteps: number, + failuresXml: string, +): StepGuidance { + const itemCount = totalSteps - 2; + return { + title: `Step 1/${totalSteps}: Understand QR Failures`, + instructions: [ + "QR FAILURES TO FIX:", + "", + failuresXml, + "", + `There are ${itemCount} failure(s). You will fix them one at a time`, + `in steps 2 through ${totalSteps - 1}. Each step presents a single item.`, + "", + "For each failing item:", + " - Identify the scope (which milestone, decision, or intent)", + " - Understand what the check requires", + " - Read the finding to understand why it failed", + "", + "Use getter tools to inspect scoped entities:", + " - koan_get_plan: overview, structure, decisions", + " - koan_get_milestone: milestone details and intents", + " - koan_get_decision: decision rationale", + " - koan_get_intent: intent definition", + "", + "Note interactions between failures:", + " - Do any failures share the same entity scope?", + " - Could fixing one affect another's context?", + "", + "This is a READ-ONLY step. Do not apply any changes.", + ], + }; +} - case 2: - return { - title: "Step 2: Apply Targeted Fixes", - instructions: [ - "Apply the fixes you planned in step 1.", - "", - "Use plan mutation tools to address each failure:", - " - koan_set_overview / koan_set_constraints / koan_set_invisible_knowledge", - " - koan_set_milestone_* / koan_set_intent / koan_set_decision", - " - koan_add_milestone / koan_add_intent / koan_add_decision (if new entities needed)", - "", - "RULES:", - " - Fix ONLY the FAIL items from step 1", - " - Prefer updating existing entities over adding new ones", - " - Do not restructure the plan beyond what the failures require", - " - Do not change PASS items", - "", - "After applying all fixes, call koan_complete_step.", - ], - }; +// Per-item fix step. Shows only the single item being fixed so the LLM +// focuses on one failure rather than attempting batch fixes that produce +// cascading corrections. Mutations are enabled by the step gate in +// fix-phase.ts for this range. +// +// Positional context ("FIX ITEM N OF M") grounds the LLM in the sequence, +// matching the reference impl's "item {idx} of {total}" pattern. The +// explicit anti-batch gate ("Do not fix other failures") is the prompt-level +// complement to the code-level step gate that blocks mutations outside the +// per-item range. +function fixItemStepGuidance( + step: number, + totalSteps: number, + item?: QRItem, +): StepGuidance { + // Defensive fallbacks: handleStepComplete guarantees item is present for + // per-item steps (failures[next-2] is in-bounds), but the function signature + // accepts optional to keep it callable from tests or future call sites. + const itemXml = item ? formatFailuresXml([item]) : ""; + const itemLabel = item?.id ?? `item ${step - 1}`; + const itemIdx = step - 1; + const itemCount = totalSteps - 2; - case 3: - return { - title: "Step 3: Review & Finalize", - instructions: [ - "Review the fixes you applied.", - "", - "Call koan_get_plan to read the current plan state.", - "For each original failure, verify:", - " - The fix addresses the check that failed", - " - No regressions introduced in previously passing items", - " - The plan is internally consistent", - "", - "Summarize in the `thoughts` parameter of koan_complete_step:", - " - Which failures were fixed and how", - " - Any concerns or items that may still be at risk", - ], - // Step 3 requires reading the plan before completing -- the review - // is meaningless without it. The custom invokeAfter enforces this - // sequencing explicitly. - invokeAfter: [ - "WHEN DONE: First call koan_get_plan to confirm the final plan state.", - "Then call koan_complete_step with your review summary in the `thoughts` parameter.", - "Do NOT call koan_complete_step before calling koan_get_plan.", - ].join("\n"), - }; + return { + title: `Step ${step}/${totalSteps}: Fix ${itemLabel}`, + instructions: [ + `FIX ITEM ${itemIdx} OF ${itemCount}:`, + "", + itemXml, + "", + "Apply a targeted fix for this failure using your analysis from step 1.", + "", + "Available mutation tools:", + " - koan_set_overview / koan_set_constraints / koan_set_invisible_knowledge", + " - koan_set_milestone_* / koan_set_intent / koan_set_decision", + " - koan_add_milestone / koan_add_intent / koan_add_decision (if needed)", + "", + "RULES:", + " - Fix ONLY this failure. Do not fix other failures in this step.", + " - Prefer updating existing entities over adding new ones", + " - Do not restructure the plan beyond what this failure requires", + ], + }; +} - default: - throw new Error(`unexpected fix step: ${step as never}`); - } +// Final review step. Accepts only totalSteps because the call site guard +// (step === totalSteps) guarantees identity. A two-parameter form would +// create a hidden contract ("pass equal values") with no type enforcement. +// +// "All per-item fixes are complete" explicitly closes the mutation phase +// and establishes the read-only review frame. "This step is READ-ONLY" +// is the prompt-level complement to the step gate blocking mutations. +function fixFinalStepGuidance(totalSteps: number): StepGuidance { + return { + title: `Step ${totalSteps}/${totalSteps}: Review & Finalize`, + instructions: [ + "All per-item fixes are complete. This step is READ-ONLY.", + "", + "Call koan_get_plan to read the current plan state.", + "", + "Verify each fix:", + " - Does the fix address the specific check that failed?", + " - Are previously passing items unaffected?", + " - Is the plan internally consistent?", + "", + "Summarize in the `thoughts` parameter of koan_complete_step:", + " - Which failures were fixed and how", + " - Any remaining concerns or regression risks", + ], + // The review step requires reading the plan before completing -- + // the review is meaningless without it. The custom invokeAfter + // enforces this sequencing explicitly. + invokeAfter: [ + "WHEN DONE: First call koan_get_plan to confirm the final plan state.", + "Then call koan_complete_step with your review summary in the `thoughts` parameter.", + "Do NOT call koan_complete_step before calling koan_get_plan.", + ].join("\n"), + }; } From 9d15a864fd35c78ac43f8fcd69d7c296317a942a Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 26 Feb 2026 09:27:38 +0700 Subject: [PATCH 022/412] planner: preserve PASS items and re-verify prior FAILs in QR loop --- src/planner/session.ts | 192 +++++++++++++++++++++++++++++------------ 1 file changed, 139 insertions(+), 53 deletions(-) diff --git a/src/planner/session.ts b/src/planner/session.ts index 250cdbb..9aba9c3 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -224,6 +224,23 @@ async function runQRBlock( log: Logger, widget: WidgetController | null, ): Promise { + const qrPath = path.join(planDir, "qr-plan-design.json"); + const keyOf = (scope: string, check: string): string => `${scope}\u0000${check}`; + + // Carry forward confirmed PASS concerns across re-decompose runs. + const previousPassKeys = new Set(); + try { + const raw = await fs.readFile(qrPath, "utf8"); + const prev = JSON.parse(raw) as QRFile; + for (const item of prev.items) { + if (item.status === "PASS") { + previousPassKeys.add(keyOf(item.scope, item.check)); + } + } + } catch { + // No previous QR file yet. + } + // 1. Spawn decomposer subagent state.phase = "qr-decompose-running"; widget?.update({ @@ -271,7 +288,6 @@ async function runQRBlock( } // 2. Read QR items - const qrPath = path.join(planDir, "qr-plan-design.json"); let qr: QRFile; try { const raw = await fs.readFile(qrPath, "utf8"); @@ -289,62 +305,130 @@ async function runQRBlock( return { summary: "QR decompose completed but produced no items.", passed: false }; } - const itemIds = qr.items.map((i) => i.id); - const initialPass = qr.items.filter((i) => i.status === "PASS").length; + // Re-apply previously confirmed PASS concerns if re-decompose reset them. + const carriedPasses = qr.items.filter((item) => + item.status !== "PASS" && previousPassKeys.has(keyOf(item.scope, item.check))).length; + if (carriedPasses > 0) { + qr = { + ...qr, + items: qr.items.map((item) => + previousPassKeys.has(keyOf(item.scope, item.check)) + ? { ...item, status: "PASS", finding: null } + : item), + }; + try { + const tmpPath = `${qrPath}.tmp`; + await fs.writeFile(tmpPath, `${JSON.stringify(qr, null, 2)}\n`, "utf8"); + await fs.rename(tmpPath, qrPath); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + log("Failed to persist carried PASS statuses", { error: message }); + return { summary: "QR verify aborted: failed to preserve PASS statuses.", passed: false }; + } + } + + // Preserve prior PASS verdicts, but force all FAIL items back to TODO for + // re-verification. This keeps confirmed concerns stable while requiring + // explicit re-check of previously failing concerns. + const resetFailures = qr.items.filter((i) => i.status === "FAIL").length; + if (resetFailures > 0) { + qr = { + ...qr, + items: qr.items.map((item) => + item.status === "FAIL" + ? { ...item, status: "TODO", finding: null } + : item), + }; + try { + const tmpPath = `${qrPath}.tmp`; + await fs.writeFile(tmpPath, `${JSON.stringify(qr, null, 2)}\n`, "utf8"); + await fs.rename(tmpPath, qrPath); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + log("Failed to persist QR FAIL->TODO reset", { error: message }); + return { summary: "QR verify aborted: failed to prepare QR item states.", passed: false }; + } + } + + const verifyIds = qr.items.filter((i) => i.status === "TODO").map((i) => i.id); + const totalItems = qr.items.length; + const preservedPass = qr.items.filter((i) => i.status === "PASS").length; const initialFail = qr.items.filter((i) => i.status === "FAIL").length; const initialTodo = qr.items.filter((i) => i.status === "TODO").length; - log("QR decompose complete", { itemCount: itemIds.length }); + + log("QR decompose complete", { + itemCount: totalItems, + verifyCount: verifyIds.length, + preservedPass, + carriedPasses, + resetFailures, + }); + widget?.update({ - step: `qr-verify: 0/${itemIds.length}`, + step: `qr-verify: 0/${verifyIds.length}`, activity: "", - qrTotal: itemIds.length, - qrDone: 0, - qrPass: initialPass, + qrTotal: totalItems, + qrDone: preservedPass, + qrPass: preservedPass, qrFail: initialFail, qrTodo: initialTodo, }); - // 3. Spawn reviewer pool + // 3. Spawn reviewer pool (TODO-only) state.phase = "qr-verify-running"; widget?.update({ qrPhase: "verify" }); let verifyDone = 0; - const verifyStatsPoll = setInterval(async () => { - try { - const raw = await fs.readFile(qrPath, "utf8"); - const current = JSON.parse(raw) as QRFile; - const pass = current.items.filter((i) => i.status === "PASS").length; - const fail = current.items.filter((i) => i.status === "FAIL").length; - const todo = current.items.filter((i) => i.status === "TODO").length; - widget?.update({ qrPass: pass, qrFail: fail, qrTodo: todo, qrDone: verifyDone, qrTotal: current.items.length }); - } catch { - // Ignore transient read races while reviewers write. - } - }, 2000); + let failedReviewers: string[] = []; - let result: Awaited>; - try { - result = await pool( - itemIds, - QR_POOL_CONCURRENCY, - async (itemId) => { - const reviewerDir = await createSubagentDir(planDir, `qr-reviewer-${itemId}`); - return spawnReviewer({ - planDir, - subagentDir: reviewerDir, - cwd, - extensionPath, - itemId, - log, + if (verifyIds.length > 0) { + const verifyStatsPoll = setInterval(async () => { + try { + const raw = await fs.readFile(qrPath, "utf8"); + const current = JSON.parse(raw) as QRFile; + const pass = current.items.filter((i) => i.status === "PASS").length; + const fail = current.items.filter((i) => i.status === "FAIL").length; + const todo = current.items.filter((i) => i.status === "TODO").length; + widget?.update({ + qrPass: pass, + qrFail: fail, + qrTodo: todo, + qrDone: preservedPass + verifyDone, + qrTotal: current.items.length, }); - }, - (done, total) => { - verifyDone = done; - widget?.update({ step: `qr-verify: ${done}/${total}`, qrDone: done, qrTotal: total }); - }, - ); - } finally { - clearInterval(verifyStatsPoll); + } catch { + // Ignore transient read races while reviewers write. + } + }, 2000); + + try { + const result = await pool( + verifyIds, + QR_POOL_CONCURRENCY, + async (itemId) => { + const reviewerDir = await createSubagentDir(planDir, `qr-reviewer-${itemId}`); + return spawnReviewer({ + planDir, + subagentDir: reviewerDir, + cwd, + extensionPath, + itemId, + log, + }); + }, + (done, total) => { + verifyDone = done; + widget?.update({ + step: `qr-verify: ${done}/${total}`, + qrDone: preservedPass + done, + qrTotal: totalItems, + }); + }, + ); + failedReviewers = result.failed; + } finally { + clearInterval(verifyStatsPoll); + } } // 4. Read final results @@ -360,16 +444,16 @@ async function runQRBlock( const pass = finalQR.items.filter((i) => i.status === "PASS").length; const fail = finalQR.items.filter((i) => i.status === "FAIL").length; const todo = finalQR.items.filter((i) => i.status === "TODO").length; - const summary = `QR complete: ${pass} PASS, ${fail} FAIL, ${todo} TODO (${result.failed.length} reviewers failed).`; + const summary = `QR complete: ${pass} PASS, ${fail} FAIL, ${todo} TODO (${failedReviewers.length} reviewers failed).`; - log("QR block complete", { pass, fail, todo, failedReviewers: result.failed }); + log("QR block complete", { pass, fail, todo, failedReviewers }); - const passed = fail === 0 && result.failed.length === 0; + const passed = fail === 0 && failedReviewers.length === 0; widget?.update({ step: summary, activity: "", - qrDone: itemIds.length, - qrTotal: itemIds.length, + qrDone: pass + fail, + qrTotal: totalItems, qrPass: pass, qrFail: fail, qrTodo: todo, @@ -383,12 +467,14 @@ async function runQRBlock( // // Re-decomposes on each iteration rather than re-verifying only. The fix // architect may change plan structure (add milestones, split intents, remove -// decisions); old QR items referencing stale scopes produce incorrect verdicts. -// Fresh decomposition generates items matched to the current plan state. +// decisions); old QR items referencing stale scopes can produce stale verdicts. +// +// Verification semantics per iteration: +// - PASS items are preserved (confirmed concerns stay confirmed). +// - FAIL items are reset to TODO (must be re-verified after fixes). +// - TODO items are verified. // -// The session's for-loop counter is the iteration source of truth. Each -// re-decompose writes a fresh qr-plan-design.json with iteration=1 and -// all-TODO items. The loop counter survives those resets. +// The session's for-loop counter remains the iteration source of truth. async function runPlanDesignWithQR( planDir: string, cwd: string, From fe3b5051350e36db62185d34783284fc14a29586 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 26 Feb 2026 09:27:45 +0700 Subject: [PATCH 023/412] ui: show elapsed time with hours in planner widget --- src/planner/ui/widget.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/planner/ui/widget.ts b/src/planner/ui/widget.ts index 32114ca..c7249ce 100644 --- a/src/planner/ui/widget.ts +++ b/src/planner/ui/widget.ts @@ -179,8 +179,14 @@ function planningColumns(width: number): PlanningColumns { function formatElapsed(ms: number): string { const totalSec = Math.floor(ms / 1000); - const m = Math.floor(totalSec / 60); + const h = Math.floor(totalSec / 3600); + const m = Math.floor((totalSec % 3600) / 60); const s = totalSec % 60; + + if (h > 0) { + return `${h}h ${String(m).padStart(2, "0")}m ${String(s).padStart(2, "0")}s`; + } + return `${m}m ${String(s).padStart(2, "0")}s`; } From ba75b15e21cd7df6050f958a2cc2322b8bf3030e Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 26 Feb 2026 11:24:37 +0700 Subject: [PATCH 024/412] planner: persist subagent model in audit projection --- extensions/koan.ts | 14 +++++++++++--- src/planner/lib/audit.ts | 7 ++++++- tests/progress.test.ts | 6 +++++- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/extensions/koan.ts b/extensions/koan.ts index 2dfd08c..613e8f3 100644 --- a/extensions/koan.ts +++ b/extensions/koan.ts @@ -3,7 +3,7 @@ // via CLI flags). All tools register unconditionally at init; phases restrict // access via tool_call blocking at runtime. -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; +import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; import { createSession } from "../src/planner/session.js"; import { detectSubagentMode, dispatchPhase } from "../src/planner/phases/dispatch.js"; @@ -11,6 +11,12 @@ import { registerAllTools, createDispatch, createPlanRef } from "../src/planner/ import { createLogger } from "../src/utils/logger.js"; import { EventLog, extractToolEvent } from "../src/planner/lib/audit.js"; +function currentModelId(ctx: ExtensionContext): string | null { + const model = ctx.model; + if (!model) return null; + return `${model.provider}/${model.id}`; +} + export default function koan(pi: ExtensionAPI): void { const log = createLogger("Koan"); @@ -61,7 +67,7 @@ export default function koan(pi: ExtensionAPI): void { // Subagent detection runs at before_agent_start (flags // are unavailable during init). let dispatched = false; - pi.on("before_agent_start", async () => { + pi.on("before_agent_start", async (_event, ctx) => { if (dispatched) return; dispatched = true; const config = detectSubagentMode(pi); @@ -72,9 +78,11 @@ export default function koan(pi: ExtensionAPI): void { } // EventLog exists only in subagent mode. Parent mode has no audit log. + // Model identity is captured by the subagent itself and persisted in + // state.json for parent widget rendering. let eventLog: EventLog | undefined; if (config.subagentDir) { - eventLog = new EventLog(config.subagentDir, config.role, config.phase); + eventLog = new EventLog(config.subagentDir, config.role, config.phase, currentModelId(ctx)); await eventLog.open(); // Capture all tool results for the audit trail. Graduated detail: diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index 9d2f980..94e9d39 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -51,6 +51,7 @@ export interface PhaseStartEvent extends EventBase { kind: "phase_start"; phase: string; role: string; + model?: string | null; totalSteps: number; } @@ -84,6 +85,7 @@ export type AuditEvent = export interface Projection { role: string; phase: string; + model: string | null; status: "running" | "completed" | "failed"; step: number; totalSteps: number; @@ -142,6 +144,7 @@ export function fold(s: Projection, e: AuditEvent): Projection { ...base, role: e.role, phase: e.phase, + model: e.model ?? s.model, status: "running", step: 0, totalSteps: e.totalSteps, @@ -233,13 +236,14 @@ export class EventLog { // writeState() calls race on the shared tmp file (ENOENT on rename). private pending: Promise = Promise.resolve(); - constructor(dir: string, role: string, phase: string) { + constructor(dir: string, role: string, phase: string, model: string | null = null) { this.eventsPath = path.join(dir, "events.jsonl"); this.statePath = path.join(dir, "state.json"); this.stateTmpPath = path.join(dir, "state.tmp.json"); this.projection = { role, phase, + model, status: "running", step: 0, totalSteps: 0, @@ -284,6 +288,7 @@ export class EventLog { kind: "phase_start", phase: this.projection.phase, role: this.projection.role, + model: this.projection.model, totalSteps, } as Omit); } diff --git a/tests/progress.test.ts b/tests/progress.test.ts index 5891306..b1378a3 100644 --- a/tests/progress.test.ts +++ b/tests/progress.test.ts @@ -17,7 +17,7 @@ describe("EventLog", () => { it("persists events and projection through step transitions", async () => { const dir = await createTempDir("koan-audit-"); - const log = new EventLog(dir, "architect", "plan-design"); + const log = new EventLog(dir, "architect", "plan-design", "anthropic/claude-sonnet-4-20250514"); await log.open(); await log.emitPhaseStart(6); @@ -30,6 +30,7 @@ describe("EventLog", () => { assert.ok(proj, "projection should be readable"); assert.equal(proj.role, "architect"); assert.equal(proj.phase, "plan-design"); + assert.equal(proj.model, "anthropic/claude-sonnet-4-20250514"); assert.equal(proj.status, "completed"); assert.equal(proj.step, 2); assert.equal(proj.totalSteps, 6); @@ -155,6 +156,7 @@ describe("fold", () => { const initial: Projection = { role: "", phase: "", + model: null, status: "running", step: 0, totalSteps: 0, @@ -170,6 +172,7 @@ describe("fold", () => { kind: "phase_start", phase: "plan-design", role: "architect", + model: "openai/gpt-5-codex", totalSteps: 6, ts: "2026-01-01T00:00:00Z", seq: 0, @@ -177,6 +180,7 @@ describe("fold", () => { const s = fold(initial, e); assert.equal(s.role, "architect"); assert.equal(s.phase, "plan-design"); + assert.equal(s.model, "openai/gpt-5-codex"); assert.equal(s.totalSteps, 6); assert.equal(s.eventCount, 1); }); From 6e1ae8d2d0a822997c3dd5aa9525f4243e5928d6 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 26 Feb 2026 11:24:41 +0700 Subject: [PATCH 025/412] ui: render subagent runtime and identity in planner widget --- src/planner/lib/pool.ts | 27 ++++- src/planner/session.ts | 92 ++++++++++++++-- src/planner/ui/widget.ts | 227 +++++++++++++++++++++++++++++++++++---- 3 files changed, 315 insertions(+), 31 deletions(-) diff --git a/src/planner/lib/pool.ts b/src/planner/lib/pool.ts index f5e9c3f..f4bfcc8 100644 --- a/src/planner/lib/pool.ts +++ b/src/planner/lib/pool.ts @@ -12,6 +12,13 @@ export interface PoolResult { failed: string[]; } +export interface PoolProgress { + done: number; + total: number; + active: number; + queued: number; +} + // -- Constants -- export const DEFAULT_REVIEWER_TIMEOUT_MS = 10 * 60 * 1000; @@ -47,24 +54,40 @@ export async function pool( itemIds: string[], limit: number, worker: (itemId: string) => Promise, - onProgress?: (done: number, total: number) => void, + onProgress?: (progress: PoolProgress) => void, ): Promise { const sem = new Semaphore(limit); const total = itemIds.length; const failed: string[] = []; let completed = 0; + let running = 0; + + const emit = () => { + onProgress?.({ + done: completed, + total, + active: running, + queued: Math.max(0, total - completed - running), + }); + }; + + emit(); await Promise.all( itemIds.map(async (id) => { await sem.acquire(); + running++; + emit(); + try { const r = await worker(id); if (r.exitCode !== 0) { failed.push(id); } } finally { + running = Math.max(0, running - 1); completed++; - onProgress?.(completed, total); + emit(); sem.release(); } }), diff --git a/src/planner/session.ts b/src/planner/session.ts index 9aba9c3..ba24055 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -13,12 +13,12 @@ import { createPlanInfo } from "../utils/plan.js"; import { spawnArchitect, spawnArchitectFix, spawnQRDecomposer, spawnReviewer } from "./subagent.js"; import { createLogger, setLogDir, type Logger } from "../utils/logger.js"; import { createSubagentDir } from "../utils/progress.js"; -import { readProjection, readRecentLogs } from "./lib/audit.js"; +import { readProjection, readRecentLogs, type Projection } from "./lib/audit.js"; import type { WorkflowDispatch, PlanRef } from "./lib/dispatch.js"; import { pool } from "./lib/pool.js"; import type { QRFile } from "./qr/types.js"; import { MAX_FIX_ITERATIONS, qrPassesAtIteration } from "./qr/severity.js"; -import { WidgetController } from "./ui/widget.js"; +import { WidgetController, type WidgetUpdate } from "./ui/widget.js"; // -- Types -- @@ -33,6 +33,28 @@ interface QRBlockResult { passed: boolean; } +function singleSubagentStart(role: string): WidgetUpdate { + return { + subagentRole: role, + subagentParallelCount: 1, + subagentQueued: 0, + subagentActive: 1, + subagentDone: 0, + }; +} + +function singleSubagentFromProjection(p: Projection): WidgetUpdate { + const running = p.status === "running"; + return { + subagentRole: p.role, + subagentModel: p.model, + subagentParallelCount: 1, + subagentQueued: 0, + subagentActive: running ? 1 : 0, + subagentDone: running ? 0 : 1, + }; +} + // -- Session -- export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, planRef: PlanRef): Session { @@ -71,6 +93,7 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan qrPass: null, qrFail: null, qrTodo: null, + ...singleSubagentStart("architect"), }); log("Spawning architect after context capture", { planDir, subagentDir }); @@ -86,6 +109,7 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan step: s.stepName, activity: s.lastAction ?? "", logLines: logs, + ...singleSubagentFromProjection(s), }); } }, 2000); @@ -108,6 +132,8 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan phaseStatus: { index: 1, status: "failed" }, step: "architect failed", activity: "", + subagentActive: 0, + subagentDone: 1, }); return `Context captured. Architect subagent failed (exit ${result.exitCode}).\n\nStderr:\n${detail}`; } @@ -127,6 +153,8 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan phaseStatus: { index: 1, status: "failed" }, step: "no plan produced", activity: "", + subagentActive: 0, + subagentDone: 1, }); return "Context captured. Architect completed but produced no plan."; } @@ -146,6 +174,8 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan qrPass: null, qrFail: null, qrTodo: null, + subagentActive: 0, + subagentDone: 1, }); const qr = await runPlanDesignWithQR(planDir, ctx.cwd, extensionPath, state, log, widget); @@ -252,6 +282,7 @@ async function runQRBlock( qrPass: null, qrFail: null, qrTodo: null, + ...singleSubagentStart("qr-decomposer"), }); const decomposeDir = await createSubagentDir(planDir, "qr-decomposer"); @@ -265,6 +296,7 @@ async function runQRBlock( step: `qr-decompose: ${s.stepName}`, activity: s.lastAction ?? "", logLines: logs, + ...singleSubagentFromProjection(s), }); } }, 2000); @@ -283,7 +315,12 @@ async function runQRBlock( state.phase = "qr-decompose-failed"; const detail = decompose.stderr.slice(0, 500); log("QR decomposer failed", { exitCode: decompose.exitCode, stderr: detail }); - widget?.update({ step: "qr-decompose: failed", activity: "" }); + widget?.update({ + step: "qr-decompose: failed", + activity: "", + subagentActive: 0, + subagentDone: 1, + }); return { summary: `QR decompose failed (exit ${decompose.exitCode}).\n\nStderr:\n${detail}`, passed: false }; } @@ -372,6 +409,11 @@ async function runQRBlock( qrPass: preservedPass, qrFail: initialFail, qrTodo: initialTodo, + subagentRole: "reviewer", + subagentParallelCount: QR_POOL_CONCURRENCY, + subagentQueued: verifyIds.length, + subagentActive: 0, + subagentDone: 0, }); // 3. Spawn reviewer pool (TODO-only) @@ -402,12 +444,13 @@ async function runQRBlock( }, 2000); try { + let reviewerModel: string | null = null; const result = await pool( verifyIds, QR_POOL_CONCURRENCY, async (itemId) => { const reviewerDir = await createSubagentDir(planDir, `qr-reviewer-${itemId}`); - return spawnReviewer({ + const r = await spawnReviewer({ planDir, subagentDir: reviewerDir, cwd, @@ -415,13 +458,26 @@ async function runQRBlock( itemId, log, }); + + if (reviewerModel === null) { + const projection = await readProjection(reviewerDir); + reviewerModel = projection?.model ?? null; + if (reviewerModel) { + widget?.update({ subagentModel: reviewerModel }); + } + } + + return r; }, - (done, total) => { - verifyDone = done; + (progress) => { + verifyDone = progress.done; widget?.update({ - step: `qr-verify: ${done}/${total}`, - qrDone: preservedPass + done, + step: `qr-verify: ${progress.done}/${progress.total}`, + qrDone: preservedPass + progress.done, qrTotal: totalItems, + subagentQueued: progress.queued, + subagentActive: progress.active, + subagentDone: progress.done, }); }, ); @@ -457,6 +513,9 @@ async function runQRBlock( qrPass: pass, qrFail: fail, qrTodo: todo, + subagentQueued: 0, + subagentActive: 0, + subagentDone: verifyIds.length, }); return { summary, passed }; } @@ -542,7 +601,12 @@ async function runPlanDesignWithQR( // Spawn fix-mode architect const fixIndex = iteration - 1; - widget?.update({ step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: spawning architect...`, activity: "", qrPhase: "execute" }); + widget?.update({ + step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: spawning architect...`, + activity: "", + qrPhase: "execute", + ...singleSubagentStart("architect"), + }); const fixDir = await createSubagentDir(planDir, `architect-fix-${fixIndex}`); @@ -556,6 +620,7 @@ async function runPlanDesignWithQR( step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: ${s.stepName}`, activity: s.lastAction ?? "", logLines: logs, + ...singleSubagentFromProjection(s), }); } }, 2000); @@ -573,13 +638,20 @@ async function runPlanDesignWithQR( if (fixResult.exitCode !== 0) { log("Fix architect failed", { iteration: fixIndex, exitCode: fixResult.exitCode, stderr: fixResult.stderr.slice(0, 500) }); - widget?.update({ step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: architect failed, re-running QR...`, activity: "" }); + widget?.update({ + step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: architect failed, re-running QR...`, + activity: "", + subagentActive: 0, + subagentDone: 1, + }); } // Re-run full QR (decompose + verify) widget?.update({ step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: re-running QR...`, activity: "", + subagentActive: 0, + subagentDone: 1, }); qr = await runQRBlock(planDir, cwd, extensionPath, state, log, widget); if (qr.passed) { diff --git a/src/planner/ui/widget.ts b/src/planner/ui/widget.ts index c7249ce..90ecc5a 100644 --- a/src/planner/ui/widget.ts +++ b/src/planner/ui/widget.ts @@ -46,6 +46,12 @@ interface WidgetState { qrPass: number | null; qrFail: number | null; qrTodo: number | null; + subagentRole: string | null; + subagentModel: string | null; + subagentParallelCount: number | null; + subagentQueued: number | null; + subagentActive: number | null; + subagentDone: number | null; } export interface WidgetUpdate { @@ -64,6 +70,12 @@ export interface WidgetUpdate { qrPass?: number | null; qrFail?: number | null; qrTodo?: number | null; + subagentRole?: string | null; + subagentModel?: string | null; + subagentParallelCount?: number | null; + subagentQueued?: number | null; + subagentActive?: number | null; + subagentDone?: number | null; } // -- Constants -- @@ -446,33 +458,160 @@ interface DetailSections { footer: string[]; } +interface DetailSectionDefinition { + id: string; + placement: "core" | "footer"; + select: (state: WidgetState) => ViewModel | null; + render: (view: ViewModel, theme: Theme, width: number) => string[]; +} + +interface CurrentStepView { + title: string; + activity: string; +} + +interface IdentityView { + planId: string; + agentLabel: "Agent" | "Agent pool"; + agentValue: string; + model: string; +} + +const IDENTITY_KEY_WIDTH = 10; + +function shouldShowSubagentSection(state: WidgetState): boolean { + if (state.subagentRole) return true; + return state.subagentQueued !== null || state.subagentActive !== null || state.subagentDone !== null; +} + +function subagentCount(value: number | null): string { + return value === null ? "-" : String(value); +} + +function renderSubagentStatusSection(state: WidgetState, theme: Theme, width: number): string[] { + if (!shouldShowSubagentSection(state)) { + return []; + } + + const parallel = state.subagentParallelCount ?? 1; + const mode = parallel > 1 ? `pool x${parallel}` : "single"; + + const header = clampToWidth( + `${theme.bold(theme.fg("accent", "Subagents"))} ${theme.fg("muted", "|")} ${theme.fg("dim", mode)}`, + width, + "…", + ); + + const counters = [ + `${theme.fg("muted", "queued:")}${theme.fg("muted", subagentCount(state.subagentQueued))}`, + `${theme.fg("muted", "active:")}${theme.bold(theme.fg("accent", subagentCount(state.subagentActive)))}`, + `${theme.fg("muted", "done:")}${theme.fg("dim", subagentCount(state.subagentDone))}`, + ].join(" "); + + const divider = clampToWidth(theme.fg("muted", "─".repeat(width)), width); + return [header, clampToWidth(counters, width, "…"), divider]; +} + +function identityView(state: WidgetState): IdentityView { + const role = state.subagentRole ?? "—"; + const parallel = state.subagentParallelCount ?? 1; + + if (parallel > 1) { + return { + planId: state.planId, + agentLabel: "Agent pool", + agentValue: `${role} x${parallel}`, + model: state.subagentModel ?? "—", + }; + } + + return { + planId: state.planId, + agentLabel: "Agent", + agentValue: role, + model: state.subagentModel ?? "—", + }; +} + +function renderIdentityRow(theme: Theme, width: number, key: string, value: string): string { + const padded = key.padEnd(IDENTITY_KEY_WIDTH, " "); + return clampToWidth(`${theme.fg("muted", padded)} : ${theme.fg("dim", value)}`, width, "…"); +} + +function renderIdentitySection(view: IdentityView, theme: Theme, width: number): string[] { + return [ + renderIdentityRow(theme, width, "Plan ID", view.planId), + renderIdentityRow(theme, width, view.agentLabel, view.agentValue), + renderIdentityRow(theme, width, "Model", view.model), + ]; +} + +const DETAIL_SECTION_REGISTRY: Array> = [ + { + id: "current-step", + placement: "core", + select: (state: WidgetState): CurrentStepView => { + const active = activePhase(state); + return { + title: state.step || active?.detail || active?.label || "Awaiting step", + activity: state.activity, + }; + }, + render: (view: CurrentStepView, theme: Theme, width: number): string[] => { + const lines = [ + clampToWidth(theme.fg("dim", "Current step"), width), + clampToWidth(theme.bold(theme.fg("accent", view.title)), width, "…"), + ]; + + if (view.activity) { + for (const line of wrapTextWithAnsi(theme.fg("muted", view.activity), width)) { + lines.push(clampToWidth(line, width)); + } + } + + return lines; + }, + }, + { + id: "qr-status", + placement: "core", + select: (state: WidgetState): WidgetState | null => (shouldShowQR(state) ? state : null), + render: (view: WidgetState, theme: Theme, width: number): string[] => renderQRStatusSection(view, theme, width), + }, + { + id: "subagent-status", + placement: "core", + select: (state: WidgetState): WidgetState | null => (shouldShowSubagentSection(state) ? state : null), + render: (view: WidgetState, theme: Theme, width: number): string[] => renderSubagentStatusSection(view, theme, width), + }, + { + id: "identity", + placement: "footer", + select: (state: WidgetState): IdentityView => identityView(state), + render: (view: IdentityView, theme: Theme, width: number): string[] => renderIdentitySection(view, theme, width), + }, +]; + function buildDetailSections(state: WidgetState, theme: Theme, width: number): DetailSections { const core: string[] = []; const footer: string[] = []; const blank = clampToWidth("", width); - const active = activePhase(state); - const stepTitle = state.step || active?.detail || active?.label || "Awaiting step"; - core.push(clampToWidth(theme.fg("dim", "Current step"), width)); - core.push(clampToWidth(theme.bold(theme.fg("accent", stepTitle)), width, "…")); - - if (state.activity) { - const activityLines = wrapTextWithAnsi(theme.fg("muted", state.activity), width); - for (const line of activityLines) { - core.push(clampToWidth(line, width)); - } - } + for (const section of DETAIL_SECTION_REGISTRY) { + const view = section.select(state); + if (!view) continue; - const qrSection = renderQRStatusSection(state, theme, width); - if (qrSection.length > 0) { - if (core.length > 0 && core[core.length - 1].trim() !== "") { - core.push(blank); + const rendered = section.render(view, theme, width).map((line) => clampToWidth(line, width)); + if (section.placement === "core") { + if (rendered.length === 0) continue; + if (core.length > 0 && core[core.length - 1].trim() !== "") { + core.push(blank); + } + core.push(...rendered); + continue; } - core.push(...qrSection.map((line) => clampToWidth(line, width))); - } - if (active) { - footer.push(...wrapTextWithAnsi(theme.fg("dim", `Plan · ${state.planId}`), width).map((line) => clampToWidth(line, width, "…"))); + footer.push(...rendered); } return { core, footer }; @@ -540,6 +679,14 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st if (qrCompact.length > 0) { fallbackContent.push(...qrCompact); } + const subagentCompact = formatSubagentCompact(state, theme, contentWidth); + if (subagentCompact.length > 0) { + if (qrCompact.length > 0) fallbackContent.push(""); + fallbackContent.push(...subagentCompact); + } + + fallbackContent.push(""); + fallbackContent.push(...formatIdentityCompact(state, theme, contentWidth)); fallbackContent.push(""); const body = indentLines(fallbackContent, innerWidth); @@ -719,6 +866,24 @@ function formatQRCompact(state: WidgetState, theme: Theme, width: number): strin return [line1, line2]; } +function formatSubagentCompact(state: WidgetState, theme: Theme, width: number): string[] { + if (!shouldShowSubagentSection(state)) return []; + + const parallel = state.subagentParallelCount ?? 1; + const mode = parallel > 1 ? `pool x${parallel}` : "single"; + const line1 = clampToWidth(`${theme.fg("muted", "Subagents")} ${theme.fg("muted", "|")} ${theme.fg("dim", mode)}`, width, "…"); + const line2 = clampToWidth( + `${theme.fg("muted", `queued:${subagentCount(state.subagentQueued)}`)} ${theme.fg("accent", `active:${subagentCount(state.subagentActive)}`)} ${theme.fg("dim", `done:${subagentCount(state.subagentDone)}`)}`, + width, + "…", + ); + return [line1, line2]; +} + +function formatIdentityCompact(state: WidgetState, theme: Theme, width: number): string[] { + return renderIdentitySection(identityView(state), theme, width); +} + function formatStepLine(state: WidgetState, theme: Theme): string { const total = state.phases.length; const active = activePhase(state); @@ -814,6 +979,12 @@ export class WidgetController { qrPass: null, qrFail: null, qrTodo: null, + subagentRole: null, + subagentModel: null, + subagentParallelCount: null, + subagentQueued: null, + subagentActive: null, + subagentDone: null, }; this.state.phases[0].status = "running"; @@ -876,6 +1047,24 @@ export class WidgetController { if (patch.qrTodo !== undefined) { this.state.qrTodo = patch.qrTodo; } + if (patch.subagentRole !== undefined) { + this.state.subagentRole = patch.subagentRole; + } + if (patch.subagentModel !== undefined) { + this.state.subagentModel = patch.subagentModel; + } + if (patch.subagentParallelCount !== undefined) { + this.state.subagentParallelCount = patch.subagentParallelCount; + } + if (patch.subagentQueued !== undefined) { + this.state.subagentQueued = patch.subagentQueued; + } + if (patch.subagentActive !== undefined) { + this.state.subagentActive = patch.subagentActive; + } + if (patch.subagentDone !== undefined) { + this.state.subagentDone = patch.subagentDone; + } this.doRender(); } From 1d96d9550715a8dedaea8bdd3c0d2f08f0d1bbdf Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 27 Feb 2026 16:01:04 +0700 Subject: [PATCH 026/412] refactor planning widget header layout and add coverage --- design-decisions.md | 13 +++- docs/planning-widget.md | 137 +++++++++++++++++++++++++++------- src/planner/ui/widget.ts | 156 ++++++++++++++++++++++++++++----------- tests/widget.test.ts | 114 ++++++++++++++++++++++++++++ 4 files changed, 350 insertions(+), 70 deletions(-) create mode 100644 tests/widget.test.ts diff --git a/design-decisions.md b/design-decisions.md index a6027c4..54c56b8 100644 --- a/design-decisions.md +++ b/design-decisions.md @@ -226,7 +226,8 @@ Step 6: plan mutation tools unlocked. - Implementation guardrails: - Continue rendering through `canvasLine()` so the background fills full terminal width. - Keep consistent card padding and solid-border framing through shared `renderBox()` helpers. - - Phase chips use stable semantic tokens (accent active, bold muted completed, muted pending, error failed). + - Header metadata carries active workflow context (`Planning · · `), with timer right-aligned on the same row. + - The old phase-tab strip is removed (no duplicated heading context). - Vertical rail remains width-bounded (~20 cols) so the right detail pane keeps enough budget for high-signal telemetry. - Detail footer (`Plan · id`) is pinned bottom via dynamic padding, independent of timeline density. - Planning body and latest-log body share one outer card, separated by an internal divider for better cohesion. @@ -258,6 +259,16 @@ Step 6: plan mutation tools unlocked. - Counter line emphasizes severity (`fail` highlighted in error color) so blocking issues pop in long sessions. - Detail pane hierarchy is explicit: `Current step` label first, then step body, then QR section. +### UI-4: Header-First Metadata (No Tabs Row) +- Chosen on Feb 26 2026 via follow-up deck focused on full-widget renders (`Phase-first header`). +- Rationale: the old title + tabs combination duplicated active-phase context and made the top of the widget feel offset from the frame. Consolidating into a full-width metadata header improves hierarchy and scan speed. +- Contract: + - Keep a full top border and render one header row: `Planning · · ` + right-aligned elapsed timer. + - Remove the dedicated tabs/chips row under the title. + - Keep phase progression in the left timeline rail (status history remains visible without tabs). + - Apply deterministic truncation in this order when width is constrained: abbreviate status -> drop status -> abbreviate phase label -> ellipsis. + - Footer identity table remains key/value aligned: `Plan ID`, `Agent`/`Agent pool`, `Model`. + ## Workflow Dispatch Architecture ### WorkflowDispatch (dispatch pattern) diff --git a/docs/planning-widget.md b/docs/planning-widget.md index 19d8c0d..36c51e4 100644 --- a/docs/planning-widget.md +++ b/docs/planning-widget.md @@ -5,6 +5,7 @@ The planning widget now follows the design-deck contract selected on Feb 25 2026 - **Canvas direction:** Stacked Modular Cards - **Navigation direction:** Vertical Timeline Rail +- **Header strategy:** Full-width top border + metadata header row (active phase in header, no tabs strip) - **Log strategy:** Declarative shape-table serialization + dense two-column layout - **QR strategy:** Inline integrated section (not a detached sub-card) @@ -46,44 +47,83 @@ The goal is to keep a long-running (1-2h) planning session readable in real time **Rationale:** QR is not optional side telemetry; it is the acceptance loop for the plan. The UI should communicate that structural importance while remaining legible and shape-stable at smaller widths. +### 5) Header-first metadata, tabs removed +- Keep a full top border and put active workflow context directly in the header row. +- Header format is phase-first: `Planning · · ` on the left, elapsed timer right-aligned. +- Remove the separate phase-tabs strip entirely; it is redundant once active context is in the header. +- Keep timeline rows in the body (left rail) because they provide progression context and status history, unlike tabs. + +**Rationale:** The previous title treatment felt detached from the frame and duplicated information with the tabs row. Consolidating context into the header yields a cleaner hierarchy and better information density in TUI constraints. + ## Layout Overview ``` -┌──────────────────────────────── Planning ────────────────────────────────────┐ -│ ┃ Context gathering ┃ ┃ Plan design ┃ ┃ Plan code ┃ ┃ Plan docs ┃ │ -│ │ -│ ● Context gathering qr-decompose: Step 2/13: Holistic Concerns │ -│ │ DONE read CLAUDE.md · 41L/1709c │ -│ │ │ -│ ● Plan design QR | phase:decompose · iter 1/6 initial │ -│ │ CURRENT Execute → QR decompose → QR verify │ -│ │ done:0/24 pass:0 fail:0 todo:24 │ -│ │ ──────────────────────────────────────────────── │ -│ ○ Plan code Plan · │ -│ │ UPCOMING │ -│ ○ Plan docs │ -│──────────────────────────────────────────────────────────────────────────────│ -│ Latest log │ -│ koan_set_milestone_tests id=M-002 · tests:["covers retries"] +7 │ -│ koan_get_milestone id=M-002 · resp:42L/3.1k │ -│ koan_add_intent milestone=M-002 · file=src/planner/ui/widget.ts │ -│ koan_set_change_diff id=CC-M-001-002 · diff:184L/9.2k │ -│ koan_qr_assign_group phase=plan-design · ids:[QR-001] +11 │ -└──────────────────────────────────────────────────────────────────────────────┘ +┌────────────────────────────────────────────────────────────────────────────────┐ +│ Planning · Context gathering · CURRENT 12m 22s │ +│ │ +│ ● Context gathering Current step │ +│ │ DONE Step 2/6: Codebase Exploration │ +│ │ read internal/rules/CLAUDE.md · 17L/1.2k │ +│ ● Plan design QR | phase:execute · iter 1/6 initial │ +│ │ CURRENT Execute → QR decompose → QR verify │ +│ ○ Plan code done:0/- pass:0 fail:0 todo:- │ +│ │ UPCOMING Subagents queued:0 active:1 done:0 │ +│ ○ Plan docs Plan ID : │ +│ UPCOMING Agent : architect │ +│ Model : openai-codex/gpt-5.3-codex │ +│────────────────────────────────────────────────────────────────────────────────│ +│ Latest log │ +│ koan_set_milestone_tests id=M-002 · tests:["covers retries"] +7 │ +│ koan_get_milestone id=M-002 · resp:42L/3.1k │ +│ koan_add_intent milestone=M-002 · file=src/planner/ui/widget.ts │ +│ koan_set_change_diff id=CC-M-001-002 · diff:184L/9.2k │ +│ koan_qr_assign_group phase=plan-design · ids:[QR-001] +11 │ +└────────────────────────────────────────────────────────────────────────────────┘ ``` ## Rendering Guide 1. **Canvas** – Keep using `canvasLine()` so widget content remains full-width over `toolPendingBg`. -2. **Main card** – Keep solid border + consistent inner padding via shared `renderBox()` helper. -3. **Timeline rail** – Maintain status icon/color semantics (`active=accent`, `done=dim`, `failed=error`). -4. **Detail pane** – Render in this order: +2. **Main card** – Keep one solid outer border + a full top rule. No cutout title and no detached title badge. +3. **Header row** – Render `Planning · · ` on the left and elapsed timer right-aligned on the same row. +4. **No tabs strip** – Do not render a separate phase-tabs row under the header. Active phase context now lives in header metadata. +5. **Timeline rail** – Maintain status icon/color semantics (`active=accent`, `done=dim`, `failed=error`). +6. **Detail pane** – Render in this order: - a dim section label (`Current step`) to create hierarchy - step title + optional activity - QR integrated section (if visible) - - footer metadata (`Plan · ID`) pinned to bottom via dynamic padding -5. **QR section** – Use inline header + phase rail + metadata line + divider. Avoid nested border style to keep it visually native to the right pane. Keep line geometry stable (fixed 3-line payload + divider) and enforce a 64-char metadata budget before clamping to pane width. -6. **Latest log section** – Keep it inside the same outer card, separated by a horizontal divider. Reuse the same left/right column split (`timelineWidth` / `detailWidth`) and gap as the planning body so vertical alignment stays consistent. + - subagent counters (`queued/active/done`) when available + - identity table (`Plan ID`, `Agent`/`Agent pool`, `Model`) pinned low in pane +7. **QR section** – Use inline header + phase rail + metadata line + divider. Avoid nested border style to keep it visually native to the right pane. Keep line geometry stable (fixed 3-line payload + divider) and enforce a 64-char metadata budget before clamping to pane width. +8. **Latest log section** – Keep it inside the same outer card, separated by a horizontal divider. Reuse the same left/right column split (`timelineWidth` / `detailWidth`) and gap as the planning body so vertical alignment stays consistent. + +## Header + Alignment Contract + +### Header composition +- Inner card width is `W` (visible cells, excluding borders). +- Timer token is right-aligned and reserved first (`T` visible cells). +- Left header budget is `W - T - 1` (one spacer between left and right chunks). +- Base left chunk: `Planning · · `. + +### Progressive compaction (left header) +Apply in order until it fits: +1. `CURRENT` -> `CUR`, `UPCOMING` -> `UP`, `DONE` unchanged. +2. Drop status chunk (keep `Planning · `). +3. Abbreviate known phases (`Context gathering` -> `Ctx gather`, `Plan design` -> `Design`, `Plan code` -> `Code`, `Plan docs` -> `Docs`). +4. Ellipsize active phase tail (`Planning · `). + +### Metadata table alignment +- Keys are fixed labels: `Plan ID`, `Agent` or `Agent pool`, `Model`. +- Compute key column width from max visible key length in the rendered set. +- Use a fixed `" : "` separator. +- Values are right-column free text, truncated with ellipsis when overflowing pane width. + +### Latest-log alignment +- Keep deterministic two-column geometry shared with body split. +- Left column width is based on observed max tool name (capped); right column gets remaining width. +- High-value rows may wrap to two lines max; second line must still obey right-column width budget. ## Data Contract Notes +- Header metadata state includes: + - `activePhaseLabel`, `activePhaseStatus`, `elapsed` - `LogLine` now carries: - `tool` (left column) - `summary` (right column) @@ -95,3 +135,46 @@ The goal is to keep a long-running (1-2h) planning session readable in real time ## Future Work (contracted, not yet implemented) - Plan execution phase should reuse the same QR integrated section semantics. - Optional compact mode for very narrow terminals can reduce metadata verbosity while preserving deterministic ordering. + +## Update: Runtime Domains + Subagent Identity (2026-02-26) + +This update captures follow-up decisions for showing subagent model information +and clarifying QR vs. parallel subagent semantics. + +### Domain split (do not merge) +- **QR section** tracks quality state: `todo`, `pass`, `fail`. +- **Subagents section** tracks execution state: `queued`, `active`, `done`. +- These are sibling runtime views. They are related in workflow, but not + collapsed into one metric family. + +### `x` meaning in parallel mode +- `x` means configured pool capacity (target parallelism), not active count. +- Active movement remains in `queued/active/done` counters. + +### Footer identity table standard +Use a unified key/value footer block: + +- `Plan ID : ` +- `Agent : ` (single subagent) +- `Agent pool : x` (parallel mode) +- `Model : ` + +### Generic rendering rule +The widget should remain role-agnostic and render identity from generic metadata +only: +- `role` +- `parallelCount` +- `model` + +Label/value rule: +- `parallelCount > 1` -> `Agent pool : x` +- otherwise -> `Agent : ` + +### View-composition pattern +Use section-level selectors/renderers (React-view-like composition without +React) so QR, subagent status, and identity/footer blocks are independently +composable and testable. + +### Decision hygiene +A separate "layout pattern" decision was deemed redundant once the domain split +was chosen; track it as derived behavior, not as a distinct product decision. diff --git a/src/planner/ui/widget.ts b/src/planner/ui/widget.ts index 90ecc5a..84320cc 100644 --- a/src/planner/ui/widget.ts +++ b/src/planner/ui/widget.ts @@ -217,23 +217,84 @@ function normalizeLogLines(lines: readonly LogLine[] | undefined): LogLine[] { return [...lines].slice(-(LOG_LINES * 2)); } -function phaseChipLabel(phase: PhaseEntry, index: number, state: WidgetState, theme: Theme): string { - const label = `┃ ${phase.label} ┃`; - if (index === state.activeIndex) { - return theme.bold(theme.fg("accent", label)); +const HEADER_STATUS_SHORT: Record = { + CURRENT: "CUR", + UPCOMING: "UP", + DONE: "DONE", + FAILED: "FAIL", +}; + +const HEADER_PHASE_SHORT: Record = { + "Context gathering": "Ctx gather", + "Plan design": "Design", + "Plan code": "Code", + "Plan docs": "Docs", +}; + +interface PlanningHeaderVariant { + label: string; + phase: string | null; + status: string | null; +} + +function selectPlanningHeaderVariant(phaseLabel: string, statusLabel: string, budget: number): PlanningHeaderVariant { + const phaseShort = HEADER_PHASE_SHORT[phaseLabel] ?? phaseLabel; + const statusShort = HEADER_STATUS_SHORT[statusLabel] ?? statusLabel; + + const truncatedPhase = truncateToWidth( + phaseShort, + Math.max(0, budget - visibleWidth("Planning · ")), + "…", + false, + ); + + const candidates: PlanningHeaderVariant[] = [ + { label: `Planning · ${phaseLabel} · ${statusLabel}`, phase: phaseLabel, status: statusLabel }, + { label: `Planning · ${phaseLabel} · ${statusShort}`, phase: phaseLabel, status: statusShort }, + { label: `Planning · ${phaseLabel}`, phase: phaseLabel, status: null }, + { label: `Planning · ${phaseShort}`, phase: phaseShort, status: null }, + { label: `Planning · ${truncatedPhase}`, phase: truncatedPhase, status: null }, + { label: "Planning", phase: null, status: null }, + ]; + + for (const candidate of candidates) { + if (visibleWidth(candidate.label) <= budget) { + return candidate; + } } - if (phase.status === "completed") { - return theme.bold(theme.fg("muted", label)); + + return { + label: truncateToWidth("Planning", budget, "…", false), + phase: null, + status: null, + }; +} + +export function formatPlanningHeaderLabel(phaseLabel: string, statusLabel: string, budget: number): string { + return selectPlanningHeaderVariant(phaseLabel, statusLabel, budget).label; +} + +function renderPlanningHeader(state: WidgetState, theme: Theme, budget: number): string { + const active = activePhase(state); + const phaseLabel = active?.label ?? "Complete"; + const statusLabel = (active ? STATUS_TAG[active.status] : "done").toUpperCase(); + const variant = selectPlanningHeaderVariant(phaseLabel, statusLabel, budget); + + if (!variant.label.startsWith("Planning")) { + return theme.bold(theme.fg("accent", variant.label)); } - if (phase.status === "failed") { - return theme.fg("error", label); + + const statusColor: ThemeColor = active ? STATUS_COLOR[active.status] : "dim"; + + if (!variant.phase) { + return theme.bold(theme.fg("accent", variant.label)); } - return theme.fg("muted", label); -} -function renderPhaseChips(state: WidgetState, theme: Theme, width: number): string { - const chips = state.phases.map((phase, index) => phaseChipLabel(phase, index, state, theme)); - return clampToWidth(chips.join(" "), width, "…"); + let result = `${theme.bold(theme.fg("accent", "Planning"))}${theme.fg("muted", " · ")}${theme.fg("muted", variant.phase)}`; + if (variant.status) { + result += `${theme.fg("muted", " · ")}${theme.bold(theme.fg(statusColor, variant.status))}`; + } + return result; } function renderTimelineLines(state: WidgetState, theme: Theme, width: number): string[] { @@ -477,8 +538,6 @@ interface IdentityView { model: string; } -const IDENTITY_KEY_WIDTH = 10; - function shouldShowSubagentSection(state: WidgetState): boolean { if (state.subagentRole) return true; return state.subagentQueued !== null || state.subagentActive !== null || state.subagentDone !== null; @@ -533,16 +592,18 @@ function identityView(state: WidgetState): IdentityView { }; } -function renderIdentityRow(theme: Theme, width: number, key: string, value: string): string { - const padded = key.padEnd(IDENTITY_KEY_WIDTH, " "); +function renderIdentityRow(theme: Theme, width: number, keyWidth: number, key: string, value: string): string { + const padded = key.padEnd(keyWidth, " "); return clampToWidth(`${theme.fg("muted", padded)} : ${theme.fg("dim", value)}`, width, "…"); } function renderIdentitySection(view: IdentityView, theme: Theme, width: number): string[] { + const keys = ["Plan ID", view.agentLabel, "Model"]; + const keyWidth = Math.max(...keys.map((key) => visibleWidth(key))); return [ - renderIdentityRow(theme, width, "Plan ID", view.planId), - renderIdentityRow(theme, width, view.agentLabel, view.agentValue), - renderIdentityRow(theme, width, "Model", view.model), + renderIdentityRow(theme, width, keyWidth, "Plan ID", view.planId), + renderIdentityRow(theme, width, keyWidth, view.agentLabel, view.agentValue), + renderIdentityRow(theme, width, keyWidth, "Model", view.model), ]; } @@ -661,9 +722,31 @@ function renderBox( return [top, ...content, bottom]; } +function renderBoxWithHeaderRow( + headerLeft: string, + headerRight: string, + body: string[], + width: number, + border: BorderStyle = BORDER_SOLID, +): string[] { + const innerWidth = Math.max(0, width - 2); + const left = visibleWidth(headerLeft) > innerWidth ? truncateToWidth(headerLeft, innerWidth, "", false) : headerLeft; + const right = visibleWidth(headerRight) > innerWidth ? truncateToWidth(headerRight, innerWidth, "", false) : headerRight; + const headerContent = rightAlign(left, right, innerWidth); + + const top = `${border.topLeft}${clampToWidth(border.horizontal.repeat(innerWidth), innerWidth)}${border.topRight}`; + const header = `${border.vertical}${clampToWidth(headerContent, innerWidth)}${border.vertical}`; + const headerDivider = `${border.vertical}${clampToWidth(border.horizontal.repeat(innerWidth), innerWidth)}${border.vertical}`; + const content = body.map((line) => `${border.vertical}${clampToWidth(line, innerWidth)}${border.vertical}`); + const bottom = `${border.bottomLeft}${clampToWidth(border.horizontal.repeat(innerWidth), innerWidth)}${border.bottomRight}`; + + return [top, header, headerDivider, ...content, bottom]; +} + function renderPlanningCard(state: WidgetState, theme: Theme, width: number): string[] { const elapsed = theme.fg("dim", formatElapsed(Date.now() - state.startedAt)); const { innerWidth, contentWidth, timelineWidth, detailWidth } = planningColumns(width); + const titleLeft = renderPlanningHeader(state, theme, Math.max(0, innerWidth - visibleWidth(elapsed) - 1)); if (innerWidth < 60 || contentWidth < 40) { const fallbackContent: string[] = [ @@ -671,7 +754,6 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st theme.fg("muted", `Plan · ${state.planId}`), "", formatStepLine(state, theme), - formatPhaseTrail(state, theme, contentWidth), ]; const detail = formatDetail(state, theme, contentWidth); if (detail) fallbackContent.push(detail); @@ -691,7 +773,7 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st const body = indentLines(fallbackContent, innerWidth); return renderBox( - `${BODY_INDENT}${theme.bold(theme.fg("accent", "Planning"))}`, + `${BODY_INDENT}${titleLeft}`, elapsed, body, width, @@ -699,8 +781,6 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st ); } - const chipsLine = renderPhaseChips(state, theme, contentWidth); - const timelineLines = renderTimelineLines(state, theme, timelineWidth); const detailSections = buildDetailSections(state, theme, detailWidth); const detailLines = layoutDetailColumn(detailSections, detailWidth, timelineLines.length); @@ -716,8 +796,6 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st const body = indentLines( [ - "", - chipsLine, "", ...combined, "", @@ -726,7 +804,7 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st ); return renderBox( - `${BODY_INDENT}${theme.bold(theme.fg("accent", "Planning"))}`, + `${BODY_INDENT}${titleLeft}`, elapsed, body, width, @@ -836,17 +914,6 @@ function renderLogCard(state: WidgetState, theme: Theme, width: number, forcedCo ); } -function formatPhaseTrail(state: WidgetState, theme: Theme, width: number): string { - const parts = state.phases.map((phase, index) => { - const icon = STATUS_ICON[phase.status]; - const color = STATUS_COLOR[phase.status]; - const label = index === state.activeIndex ? theme.bold(phase.label) : phase.label; - return theme.fg(color, `${icon} ${label}`); - }); - const trail = parts.join(" "); - return clampToWidth(trail, width, "…"); -} - function formatDetail(state: WidgetState, theme: Theme, width: number): string { const step = state.step ? theme.fg("muted", state.step) : ""; const activity = state.activity ? theme.fg("dim", ` · ${state.activity}`) : ""; @@ -904,7 +971,6 @@ function stripBoxFrame(lines: string[]): string[] { function renderIntegratedWorkspaceCard(state: WidgetState, theme: Theme, width: number): string[] { const innerWidth = Math.max(0, width - 2); const elapsed = theme.fg("dim", formatElapsed(Date.now() - state.startedAt)); - const rightInset = " ".repeat(visibleWidth(BODY_INDENT)); const { innerWidth: planningInnerWidth, contentWidth, timelineWidth, detailWidth } = planningColumns(width); const alignedColumns: LogColumns | undefined = planningInnerWidth >= 60 && contentWidth >= 40 @@ -926,12 +992,18 @@ function renderIntegratedWorkspaceCard(state: WidgetState, theme: Theme, width: ...logInner, ]; - return renderBox( - `${BODY_INDENT}${theme.bold(theme.fg("accent", "Planning"))}`, + const rightInset = " ".repeat(visibleWidth(BODY_INDENT)); + const titleLeftBudget = Math.max( + 0, + innerWidth - visibleWidth(elapsed) - visibleWidth(rightInset) - 1 - visibleWidth(BODY_INDENT), + ); + const titleLeft = renderPlanningHeader(state, theme, titleLeftBudget); + + return renderBoxWithHeaderRow( + `${BODY_INDENT}${titleLeft}`, `${elapsed}${rightInset}`, body, width, - theme, ); } diff --git a/tests/widget.test.ts b/tests/widget.test.ts new file mode 100644 index 0000000..9dfa07e --- /dev/null +++ b/tests/widget.test.ts @@ -0,0 +1,114 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import type { ExtensionUIContext, Theme } from "@mariozechner/pi-coding-agent"; +import { visibleWidth } from "@mariozechner/pi-tui"; + +import { WidgetController, formatPlanningHeaderLabel } from "../src/planner/ui/widget.js"; + +type WidgetInstance = { + render: (width: number) => string[]; + invalidate: () => void; +}; + +type WidgetFactory = ((tui: unknown, theme: Theme) => WidgetInstance) | undefined; + +function createPlainTheme(): Theme { + return { + fg: (_color: string, text: string) => text, + bg: (_color: string, text: string) => text, + bold: (text: string) => text, + } as unknown as Theme; +} + +function createWidgetHarness(): { + controller: WidgetController; + render: (width: number) => string[]; + destroy: () => void; +} { + const theme = createPlainTheme(); + let factory: WidgetFactory; + + const ui = { + theme, + setWidget: (_key: string, next: WidgetFactory) => { + factory = next; + }, + } as unknown as ExtensionUIContext; + + const controller = new WidgetController(ui, "plan-test-id"); + + return { + controller, + render: (width: number) => { + assert.ok(factory, "widget factory should be registered"); + return factory({} as unknown, theme).render(width); + }, + destroy: () => controller.destroy(), + }; +} + +describe("formatPlanningHeaderLabel", () => { + it("applies compaction in deterministic order", () => { + const phase = "Context gathering"; + const status = "CURRENT"; + + const full = `Planning · ${phase} · ${status}`; + const shortStatus = `Planning · ${phase} · CUR`; + const noStatus = `Planning · ${phase}`; + const shortPhase = "Planning · Ctx gather"; + + assert.equal(formatPlanningHeaderLabel(phase, status, visibleWidth(full)), full); + assert.equal(formatPlanningHeaderLabel(phase, status, visibleWidth(full) - 1), shortStatus); + assert.equal(formatPlanningHeaderLabel(phase, status, visibleWidth(shortStatus) - 1), noStatus); + assert.equal(formatPlanningHeaderLabel(phase, status, visibleWidth(noStatus) - 1), shortPhase); + + const tiny = formatPlanningHeaderLabel(phase, status, 14); + assert.ok(visibleWidth(tiny) <= 14); + assert.ok(tiny.startsWith("Planning")); + }); +}); + +describe("WidgetController rendering", () => { + it("renders metadata header and removes phase chips row", () => { + const harness = createWidgetHarness(); + try { + const lines = harness.render(140); + const text = lines.join("\n"); + + assert.match(text, /Planning · Context gathering · CURRENT/); + assert.doesNotMatch(text, /┃ Context gathering ┃/); + } finally { + harness.destroy(); + } + }); + + it("aligns identity table separator using dynamic key width", () => { + const harness = createWidgetHarness(); + try { + harness.controller.update({ + subagentRole: "reviewer", + subagentParallelCount: 12, + subagentModel: "openai-codex/gpt-5.3-codex", + }); + + const lines = harness.render(140); + const planLine = lines.find((line) => line.includes("Plan ID") && line.includes(" : ")); + const agentLine = lines.find((line) => line.includes("Agent pool") && line.includes(" : ")); + const modelLine = lines.find((line) => line.includes("Model") && line.includes(" : ")); + + assert.ok(planLine, "expected Plan ID row"); + assert.ok(agentLine, "expected Agent pool row"); + assert.ok(modelLine, "expected Model row"); + + const planSep = planLine.indexOf(" : "); + const agentSep = agentLine.indexOf(" : "); + const modelSep = modelLine.indexOf(" : "); + + assert.equal(planSep, agentSep); + assert.equal(agentSep, modelSep); + } finally { + harness.destroy(); + } + }); +}); From 009412ad27fd9d6a8409d00393f3a276dec146fb Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 27 Feb 2026 19:35:45 +0700 Subject: [PATCH 027/412] Implement plan-code/plan-docs phases and plan markdown rendering --- src/planner/lib/permissions.ts | 3 + src/planner/phases/dispatch.ts | 145 ++++-- src/planner/phases/plan-code/fix-phase.ts | 166 +++++++ src/planner/phases/plan-code/fix-prompts.ts | 103 ++++ src/planner/phases/plan-code/phase.ts | 169 +++++++ src/planner/phases/plan-code/prompts.ts | 121 +++++ src/planner/phases/plan-docs/fix-phase.ts | 166 +++++++ src/planner/phases/plan-docs/fix-prompts.ts | 103 ++++ src/planner/phases/plan-docs/phase.ts | 169 +++++++ src/planner/phases/plan-docs/prompts.ts | 145 ++++++ src/planner/phases/qr-decompose/phase.ts | 64 +-- src/planner/phases/qr-decompose/prompts.ts | 154 +++--- src/planner/phases/qr-verify/phase.ts | 63 +-- src/planner/phases/qr-verify/prompts.ts | 49 +- src/planner/plan/render.ts | 155 ++++++ src/planner/plan/validate.ts | 46 +- src/planner/session.ts | 520 +++++++++++--------- src/planner/state.ts | 17 +- src/planner/subagent.ts | 70 ++- 19 files changed, 1929 insertions(+), 499 deletions(-) create mode 100644 src/planner/phases/plan-code/fix-phase.ts create mode 100644 src/planner/phases/plan-code/fix-prompts.ts create mode 100644 src/planner/phases/plan-code/phase.ts create mode 100644 src/planner/phases/plan-code/prompts.ts create mode 100644 src/planner/phases/plan-docs/fix-phase.ts create mode 100644 src/planner/phases/plan-docs/fix-prompts.ts create mode 100644 src/planner/phases/plan-docs/phase.ts create mode 100644 src/planner/phases/plan-docs/prompts.ts create mode 100644 src/planner/plan/render.ts diff --git a/src/planner/lib/permissions.ts b/src/planner/lib/permissions.ts index 5151f7a..aee6f7f 100644 --- a/src/planner/lib/permissions.ts +++ b/src/planner/lib/permissions.ts @@ -126,7 +126,10 @@ export const PHASE_PERMISSIONS: ReadonlyMap> = "koan_set_change_doc_diff", "koan_set_change_comments", "koan_set_readme_entry", + "koan_add_diagram", "koan_set_diagram", + "koan_add_diagram_node", + "koan_add_diagram_edge", ]), ], [ diff --git a/src/planner/phases/dispatch.ts b/src/planner/phases/dispatch.ts index 9bfba42..3762a36 100644 --- a/src/planner/phases/dispatch.ts +++ b/src/planner/phases/dispatch.ts @@ -10,6 +10,10 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { PlanDesignPhase } from "./plan-design/phase.js"; import { PlanDesignFixPhase } from "./plan-design/fix-phase.js"; +import { PlanCodePhase } from "./plan-code/phase.js"; +import { PlanCodeFixPhase } from "./plan-code/fix-phase.js"; +import { PlanDocsPhase } from "./plan-docs/phase.js"; +import { PlanDocsFixPhase } from "./plan-docs/fix-phase.js"; import { QRDecomposePhase } from "./qr-decompose/phase.js"; import { QRVerifyPhase } from "./qr-verify/phase.js"; import { createLogger, type Logger } from "../../utils/logger.js"; @@ -22,7 +26,31 @@ export interface SubagentConfig { phase: string; planDir: string; subagentDir: string; - fix: string | null; // QR phase being fixed, null when initial mode + fix: string | null; +} + +type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; + +function parseWorkPhase(value: string | null): WorkPhaseKey | null { + if (value === "plan-design" || value === "plan-code" || value === "plan-docs") { + return value; + } + return null; +} + +function parseQRPhase(value: string): WorkPhaseKey | null { + if (!value.startsWith("qr-")) return null; + return parseWorkPhase(value.slice(3)); +} + +async function loadFixFailures(planDir: string, phase: WorkPhaseKey): Promise { + const qrPath = path.join(planDir, `qr-${phase}.json`); + try { + const raw = await fs.readFile(qrPath, "utf8"); + return JSON.parse(raw) as QRFile; + } catch { + return null; + } } // Detects subagent mode by checking flags set via CLI (pi -p --koan-role @@ -38,7 +66,6 @@ export function detectSubagentMode(pi: ExtensionAPI): SubagentConfig | null { const phase = pi.getFlag("koan-phase"); const planDir = pi.getFlag("koan-plan-dir"); const subagentDir = pi.getFlag("koan-subagent-dir"); - const fix = pi.getFlag("koan-fix"); return { @@ -60,34 +87,68 @@ export async function dispatchPhase( ): Promise { const logger = log ?? createLogger("Dispatch"); - if (config.role === "architect" && config.fix === "plan-design") { - // Dispatch reads the QR file here, not in session.ts. - // The fix architect runs as a separate process with only the plan - // directory path -- it cannot receive in-memory QR data from the - // parent session. Reading from disk at dispatch boundary is the - // only clean handoff point. - const qrPath = path.join(config.planDir, "qr-plan-design.json"); - let qrFile: QRFile; - try { - const raw = await fs.readFile(qrPath, "utf8"); - qrFile = JSON.parse(raw) as QRFile; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - logger("Fix dispatch: failed to read QR file", { error: msg }); + // -- Fix modes -- + + const fixPhase = parseWorkPhase(config.fix); + if (fixPhase) { + const qrFile = await loadFixFailures(config.planDir, fixPhase); + if (!qrFile) { + logger("Fix dispatch: failed to read QR file", { phase: fixPhase }); return; } + const failures = qrFile.items.filter((i) => i.status === "FAIL"); if (failures.length === 0) { - logger("Fix dispatch: no FAIL items in QR file, skipping fix phase"); + logger("Fix dispatch: no FAIL items in QR file, skipping fix phase", { phase: fixPhase }); + return; + } + + if (config.role === "architect" && fixPhase === "plan-design") { + const phase = new PlanDesignFixPhase( + pi, + { planDir: config.planDir, failures }, + dispatch, + planRef, + logger, + eventLog, + ); + await phase.begin(); + return; + } + + if (config.role === "developer" && fixPhase === "plan-code") { + const phase = new PlanCodeFixPhase( + pi, + { planDir: config.planDir, failures }, + dispatch, + planRef, + logger, + eventLog, + ); + await phase.begin(); + return; + } + + if (config.role === "technical-writer" && fixPhase === "plan-docs") { + const phase = new PlanDocsFixPhase( + pi, + { planDir: config.planDir, failures }, + dispatch, + planRef, + logger, + eventLog, + ); + await phase.begin(); return; } - logger("Dispatching to plan-design fix workflow", { - planDir: config.planDir, - failureCount: failures.length, - }); - const phase = new PlanDesignFixPhase( + } + + // -- Work phases -- + + if (config.role === "architect" && config.phase === "plan-design") { + const phase = new PlanDesignPhase( pi, - { planDir: config.planDir, failures }, + { planDir: config.planDir }, dispatch, planRef, logger, @@ -97,9 +158,8 @@ export async function dispatchPhase( return; } - if (config.role === "architect" && config.phase === "plan-design") { - logger("Dispatching to plan-design workflow", { planDir: config.planDir }); - const phase = new PlanDesignPhase( + if (config.role === "developer" && config.phase === "plan-code") { + const phase = new PlanCodePhase( pi, { planDir: config.planDir }, dispatch, @@ -111,9 +171,8 @@ export async function dispatchPhase( return; } - if (config.role === "qr-decomposer" && config.phase === "qr-plan-design") { - logger("Dispatching to qr-decompose workflow", { planDir: config.planDir }); - const phase = new QRDecomposePhase( + if (config.role === "technical-writer" && config.phase === "plan-docs") { + const phase = new PlanDocsPhase( pi, { planDir: config.planDir }, dispatch, @@ -125,16 +184,32 @@ export async function dispatchPhase( return; } - if (config.role === "reviewer" && config.phase === "qr-plan-design") { + // -- QR phases -- + + const qrWorkPhase = parseQRPhase(config.phase); + if (config.role === "qr-decomposer" && qrWorkPhase) { + const phase = new QRDecomposePhase( + pi, + { planDir: config.planDir, workPhase: qrWorkPhase }, + dispatch, + planRef, + logger, + eventLog, + ); + await phase.begin(); + return; + } + + if (config.role === "reviewer" && qrWorkPhase) { const itemId = pi.getFlag("koan-qr-item") as string; if (!itemId) { logger("Reviewer missing --koan-qr-item flag"); return; } - logger("Dispatching to qr-verify workflow", { planDir: config.planDir, itemId }); + const phase = new QRVerifyPhase( pi, - { planDir: config.planDir, itemId }, + { planDir: config.planDir, itemId, workPhase: qrWorkPhase }, dispatch, planRef, logger, @@ -144,5 +219,9 @@ export async function dispatchPhase( return; } - logger("Unknown role/phase combination", { role: config.role, phase: config.phase }); + logger("Unknown role/phase combination", { + role: config.role, + phase: config.phase, + fix: config.fix, + }); } diff --git a/src/planner/phases/plan-code/fix-phase.ts b/src/planner/phases/plan-code/fix-phase.ts new file mode 100644 index 0000000..6f2df7e --- /dev/null +++ b/src/planner/phases/plan-code/fix-phase.ts @@ -0,0 +1,166 @@ +// Plan-code fix phase -- dynamic targeted QR repair workflow. + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; +import { loadPlanCodeSystemPrompt, buildPlanCodeSystemPrompt } from "./prompts.js"; +import { + fixStepName, + buildFixSystemPrompt, + fixStepGuidance, + formatFailuresXml, +} from "./fix-prompts.js"; +import { formatStep } from "../../lib/step.js"; +import type { QRItem } from "../../qr/types.js"; +import { createLogger, type Logger } from "../../../utils/logger.js"; +import { EventLog } from "../../lib/audit.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; +import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; + +interface FixState { + active: boolean; + step: number; + step1Prompt: string | null; + systemPrompt: string | null; +} + +export class PlanCodeFixPhase { + private readonly pi: ExtensionAPI; + private readonly planDir: string; + private readonly failures: ReadonlyArray; + private readonly log: Logger; + private readonly state: FixState; + private readonly eventLog: EventLog | undefined; + private readonly dispatch: WorkflowDispatch; + private readonly planRef: PlanRef; + + constructor( + pi: ExtensionAPI, + config: { planDir: string; failures: QRItem[] }, + dispatch: WorkflowDispatch, + planRef: PlanRef, + log?: Logger, + eventLog?: EventLog, + ) { + this.pi = pi; + this.planDir = config.planDir; + this.failures = config.failures; + this.dispatch = dispatch; + this.planRef = planRef; + this.log = log ?? createLogger("PlanCodeFix"); + this.eventLog = eventLog; + + this.state = { + active: false, + step: 1, + step1Prompt: null, + systemPrompt: null, + }; + + this.registerHandlers(); + } + + private get totalSteps(): number { + return 2 + this.failures.length; + } + + async begin(): Promise { + let basePrompt: string; + try { + basePrompt = await loadPlanCodeSystemPrompt(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Fix phase aborted: cannot load system prompt", { error: message }); + return; + } + + const failuresXml = formatFailuresXml(this.failures); + const totalSteps = this.totalSteps; + this.state.systemPrompt = buildFixSystemPrompt( + buildPlanCodeSystemPrompt(basePrompt), + this.failures.length, + totalSteps, + ); + this.state.step1Prompt = formatStep(fixStepGuidance(1, totalSteps, { allFailuresXml: failuresXml })); + this.state.active = true; + this.state.step = 1; + this.planRef.dir = this.planDir; + + hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); + + this.log("Starting plan-code fix workflow", { step: 1, totalSteps, failureCount: this.failures.length }); + await this.eventLog?.emitPhaseStart(totalSteps); + await this.eventLog?.emitStepTransition(1, fixStepName(1, totalSteps), totalSteps); + } + + private registerHandlers(): void { + this.pi.on("before_agent_start", () => { + if (!this.state.active || !this.state.systemPrompt) return undefined; + return { systemPrompt: this.state.systemPrompt }; + }); + + this.pi.on("context", (event) => { + if (!this.state.active) return undefined; + if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; + + const messages = event.messages.map((m) => { + if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; + return m; + }); + return { messages }; + }); + + this.pi.on("tool_call", (event) => { + if (!this.state.active) return undefined; + + const perm = checkPermission("plan-code", event.toolName); + if (!perm.allowed) return { block: true, reason: perm.reason }; + + const step = this.state.step; + const total = this.totalSteps; + const inFixRange = step >= 2 && step < total; + if (!inFixRange && PLAN_MUTATION_TOOLS.has(event.toolName)) { + return { + block: true, + reason: `${event.toolName} available in steps 2-${total - 1} (current: ${step})`, + }; + } + + return undefined; + }); + } + + private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { + const prev = this.state.step; + const total = this.totalSteps; + + if (prev === total) { + const result = await this.handleFinalize(); + if (!result.ok) { + await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); + return { ok: false, error: result.errors?.join("; ") }; + } + + this.state.active = false; + unhookDispatch(this.dispatch, "onCompleteStep"); + await this.eventLog?.emitPhaseEnd("completed"); + this.log("Fix phase complete, plan-code validation passed"); + return { ok: true, prompt: "Fix phase validation passed. Workflow complete." }; + } + + const next = prev + 1; + this.state.step = next; + + const item = next >= 2 && next < total ? this.failures[next - 2] : undefined; + const name = fixStepName(next, total, item); + const prompt = formatStep(fixStepGuidance(next, total, { item })); + + this.log("Fix step complete, advancing", { from: prev, to: next, name }); + await this.eventLog?.emitStepTransition(next, name, total); + return { ok: true, prompt }; + } + + private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { + return loadAndValidatePlanForPhase(this.planDir, "plan-code", this.log); + } +} diff --git a/src/planner/phases/plan-code/fix-prompts.ts b/src/planner/phases/plan-code/fix-prompts.ts new file mode 100644 index 0000000..8c8000f --- /dev/null +++ b/src/planner/phases/plan-code/fix-prompts.ts @@ -0,0 +1,103 @@ +import type { QRItem } from "../../qr/types.js"; +import type { StepGuidance } from "../../lib/step.js"; + +export function formatFailuresXml(failures: ReadonlyArray): string { + const items = failures + .map((f) => [ + ` `, + ` ${f.check}`, + f.finding ? ` ${f.finding}` : " ", + " ", + ].join("\n")) + .join("\n"); + return ["", items, ""].join("\n"); +} + +export function fixStepName(step: number, totalSteps: number, item?: QRItem): string { + if (step === 1) return "Understand QR Failures"; + if (step === totalSteps) return "Review & Finalize"; + return item ? `Fix ${item.id}` : `Fix item ${step - 1}`; +} + +export function buildFixSystemPrompt(basePrompt: string, failureCount: number, totalSteps: number): string { + return [ + basePrompt, + "", + "---", + "", + `WORKFLOW: ${totalSteps}-STEP PLAN-CODE FIX`, + "", + `You are fixing ${failureCount} QR failure(s) in code planning output.`, + "Step 1 is read-only and covers all failures.", + `Steps 2-${totalSteps - 1} fix exactly one failure per step.`, + `Step ${totalSteps} is read-only review.`, + "", + "CONSTRAINTS:", + "- Fix only identified failures", + "- Preserve already-valid code_changes", + "- Do not edit repository files (planning only)", + ].join("\n"); +} + +function step1(totalSteps: number, failuresXml: string): StepGuidance { + const itemCount = totalSteps - 2; + return { + title: `Step 1/${totalSteps}: Understand QR Failures`, + instructions: [ + "QR FAILURES:", + "", + failuresXml, + "", + `There are ${itemCount} item(s). You will fix them one by one in steps 2-${totalSteps - 1}.`, + "Read current plan state with koan_get_plan / koan_get_change / koan_get_intent.", + "Identify exact mismatch for each failure.", + "", + "This step is read-only.", + ], + }; +} + +function itemStep(step: number, totalSteps: number, item?: QRItem): StepGuidance { + const itemXml = item ? formatFailuresXml([item]) : ""; + const idx = step - 1; + const total = totalSteps - 2; + return { + title: `Step ${step}/${totalSteps}: Fix ${item?.id ?? `item ${idx}`}`, + instructions: [ + `FIX ITEM ${idx} OF ${total}:`, + "", + itemXml, + "", + "Apply a targeted plan fix using change tools (add/set change, set intent ref, set comments).", + "Do not batch-fix other failures in this step.", + "Keep modifications minimal and scoped.", + ], + }; +} + +function finalStep(totalSteps: number): StepGuidance { + return { + title: `Step ${totalSteps}/${totalSteps}: Review & Finalize`, + instructions: [ + "All per-item fixes are complete.", + "Use koan_get_plan to verify overall coherence and coverage.", + "Confirm fixed items are addressed without regressing passing items.", + "", + "This step is read-only.", + ], + invokeAfter: [ + "WHEN DONE: Call koan_get_plan, then call koan_complete_step.", + "Do NOT call koan_complete_step before reviewing final plan state.", + ].join("\n"), + }; +} + +export function fixStepGuidance( + step: number, + totalSteps: number, + opts?: { item?: QRItem; allFailuresXml?: string }, +): StepGuidance { + if (step === 1) return step1(totalSteps, opts?.allFailuresXml ?? ""); + if (step === totalSteps) return finalStep(totalSteps); + return itemStep(step, totalSteps, opts?.item); +} diff --git a/src/planner/phases/plan-code/phase.ts b/src/planner/phases/plan-code/phase.ts new file mode 100644 index 0000000..f4948b2 --- /dev/null +++ b/src/planner/phases/plan-code/phase.ts @@ -0,0 +1,169 @@ +// Plan-code phase -- 4-step developer workflow converting code intents +// to concrete code_changes diffs in plan.json. + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; +import { + loadPlanCodeSystemPrompt, + formatContextForStep1, + buildPlanCodeSystemPrompt, + planCodeStepGuidance, + STEP_NAMES, +} from "./prompts.js"; +import { formatStep } from "../../lib/step.js"; +import type { ContextData } from "../../types.js"; +import { createLogger, type Logger } from "../../../utils/logger.js"; +import { EventLog } from "../../lib/audit.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; +import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; + +type PlanCodeStep = 1 | 2 | 3 | 4; + +interface PlanCodeState { + active: boolean; + step: PlanCodeStep; + step1Prompt: string | null; + contextData: ContextData | null; + systemPrompt: string | null; +} + +const TOTAL_STEPS = 4; +const MUTATION_UNLOCK_STEP = 3; + +export class PlanCodePhase { + private readonly pi: ExtensionAPI; + private readonly planDir: string; + private readonly log: Logger; + private readonly state: PlanCodeState; + private readonly eventLog: EventLog | undefined; + private readonly dispatch: WorkflowDispatch; + private readonly planRef: PlanRef; + + constructor( + pi: ExtensionAPI, + config: { planDir: string }, + dispatch: WorkflowDispatch, + planRef: PlanRef, + log?: Logger, + eventLog?: EventLog, + ) { + this.pi = pi; + this.planDir = config.planDir; + this.dispatch = dispatch; + this.planRef = planRef; + this.log = log ?? createLogger("PlanCode"); + this.eventLog = eventLog; + + this.state = { + active: false, + step: 1, + step1Prompt: null, + contextData: null, + systemPrompt: null, + }; + + this.registerHandlers(); + } + + async begin(): Promise { + const contextPath = path.join(this.planDir, "context.json"); + try { + const raw = await fs.readFile(contextPath, "utf8"); + this.state.contextData = JSON.parse(raw) as ContextData; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to read context.json", { error: message }); + return; + } + + let basePrompt: string; + try { + basePrompt = await loadPlanCodeSystemPrompt(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to load plan-code system prompt", { error: message }); + return; + } + + const contextXml = formatContextForStep1(this.state.contextData); + this.state.systemPrompt = buildPlanCodeSystemPrompt(basePrompt); + this.state.step1Prompt = formatStep(planCodeStepGuidance(1, contextXml)); + this.state.active = true; + this.state.step = 1; + this.planRef.dir = this.planDir; + + hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); + + this.log("Starting plan-code workflow", { step: 1 }); + await this.eventLog?.emitPhaseStart(TOTAL_STEPS); + await this.eventLog?.emitStepTransition(1, STEP_NAMES[1], TOTAL_STEPS); + } + + private registerHandlers(): void { + this.pi.on("before_agent_start", () => { + if (!this.state.active || !this.state.systemPrompt) return undefined; + return { systemPrompt: this.state.systemPrompt }; + }); + + this.pi.on("context", (event) => { + if (!this.state.active) return undefined; + if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; + + const messages = event.messages.map((m) => { + if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; + return m; + }); + return { messages }; + }); + + this.pi.on("tool_call", (event) => { + if (!this.state.active) return undefined; + + const perm = checkPermission("plan-code", event.toolName); + if (!perm.allowed) return { block: true, reason: perm.reason }; + + if (this.state.step < MUTATION_UNLOCK_STEP && PLAN_MUTATION_TOOLS.has(event.toolName)) { + return { + block: true, + reason: `${event.toolName} available from step ${MUTATION_UNLOCK_STEP} (current: ${this.state.step})`, + }; + } + + return undefined; + }); + } + + private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { + const prev = this.state.step; + + if (prev === 4) { + const result = await this.handleFinalize(); + if (!result.ok) { + await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); + return { ok: false, error: result.errors?.join("; ") }; + } + + this.state.active = false; + unhookDispatch(this.dispatch, "onCompleteStep"); + await this.eventLog?.emitPhaseEnd("completed"); + this.log("Plan-code finalized, workflow complete"); + return { ok: true, prompt: "Plan-code validation passed. Workflow complete." }; + } + + this.state.step = (prev + 1) as PlanCodeStep; + const nextName = STEP_NAMES[this.state.step]; + const prompt = formatStep(planCodeStepGuidance(this.state.step)); + + this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName }); + await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); + return { ok: true, prompt }; + } + + private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { + return loadAndValidatePlanForPhase(this.planDir, "plan-code", this.log); + } +} diff --git a/src/planner/phases/plan-code/prompts.ts b/src/planner/phases/plan-code/prompts.ts new file mode 100644 index 0000000..782ce4c --- /dev/null +++ b/src/planner/phases/plan-code/prompts.ts @@ -0,0 +1,121 @@ +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import type { ContextData } from "../../types.js"; +import type { StepGuidance } from "../../lib/step.js"; + +export const STEP_NAMES: Record<1 | 2 | 3 | 4, string> = { + 1: "Intent Coverage Analysis", + 2: "Codebase Anchoring", + 3: "Diff Authoring", + 4: "Validation & Review", +}; + +export async function loadPlanCodeSystemPrompt(): Promise { + const promptPath = path.join(os.homedir(), ".claude/agents/developer.md"); + try { + const content = await fs.readFile(promptPath, "utf8"); + return content.replace(/^---\n[\s\S]*?\n---\n/, ""); + } catch { + throw new Error(`Developer prompt not found at ${promptPath}`); + } +} + +export function formatContextForStep1(ctx: ContextData): string { + return ["", JSON.stringify(ctx, null, 2), ""].join("\n"); +} + +export function buildPlanCodeSystemPrompt(basePrompt: string): string { + return [ + basePrompt, + "", + "---", + "", + "WORKFLOW: 4-STEP PLAN-CODE", + "", + "You are in planning mode. Produce code diffs in plan.json, not repo edits.", + "Step 1 instructions are in the user message below.", + "Complete each step, then call koan_complete_step.", + "Put your work output in the `thoughts` parameter.", + "The tool result contains the next step.", + "", + "CRITICAL:", + "- NEVER use edit/write tools during plan-code.", + "- Convert every code_intent into at least one code_change with intent_ref.", + "- Use unified diffs in code_change.diff.", + ].join("\n"); +} + +export function planCodeStepGuidance(step: 1 | 2 | 3 | 4, context?: string): StepGuidance { + switch (step) { + case 1: + return { + title: "Step 1: Intent Coverage Analysis", + instructions: [ + "PLANNING CONTEXT (from session):", + "", + context ?? "", + "", + "Use koan_get_plan to inspect milestones and code_intents.", + "Build a checklist of intents that need code_changes.", + "Record target files and affected functions per intent.", + "", + "This step is read-only.", + ], + }; + + case 2: + return { + title: "Step 2: Codebase Anchoring", + instructions: [ + "Read target files to anchor each planned diff:", + " - Use read/grep/find/bash as needed", + " - Identify stable context lines around each change", + " - Confirm naming/pattern conventions", + "", + "Do not create code_changes yet. This step is still read-only.", + ], + }; + + case 3: + return { + title: "Step 3: Diff Authoring", + instructions: [ + "Create code_changes for each intent using plan mutation tools:", + " - koan_add_change (if missing)", + " - koan_set_change_intent_ref", + " - koan_set_change_file", + " - koan_set_change_diff", + " - koan_set_change_comments", + "", + "Rules:", + " - Every code_intent must map to at least one code_change", + " - Use valid unified diff format in diff field", + " - comments explain WHY (reference decision IDs where relevant)", + "", + "Use koan_get_plan/koan_get_milestone to verify coverage as you go.", + ], + }; + + case 4: + return { + title: "Step 4: Validation & Review", + instructions: [ + "Run a final coverage review using getter tools:", + " - Every intent has at least one linked change", + " - Every change has exact file path and non-empty diff", + " - Diffs and comments are coherent with intent behavior", + "", + "Fix any gaps before completing this step.", + ], + invokeAfter: [ + "WHEN DONE: Call koan_complete_step with a concise summary of coverage.", + "Do NOT call this tool until all required code_changes are present.", + ].join("\n"), + }; + + default: + return { title: "", instructions: [] }; + } +} diff --git a/src/planner/phases/plan-docs/fix-phase.ts b/src/planner/phases/plan-docs/fix-phase.ts new file mode 100644 index 0000000..e757461 --- /dev/null +++ b/src/planner/phases/plan-docs/fix-phase.ts @@ -0,0 +1,166 @@ +// Plan-docs fix phase -- dynamic targeted QR repair workflow. + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; +import { loadPlanDocsSystemPrompt, buildPlanDocsSystemPrompt } from "./prompts.js"; +import { + fixStepName, + buildFixSystemPrompt, + fixStepGuidance, + formatFailuresXml, +} from "./fix-prompts.js"; +import { formatStep } from "../../lib/step.js"; +import type { QRItem } from "../../qr/types.js"; +import { createLogger, type Logger } from "../../../utils/logger.js"; +import { EventLog } from "../../lib/audit.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; +import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; + +interface FixState { + active: boolean; + step: number; + step1Prompt: string | null; + systemPrompt: string | null; +} + +export class PlanDocsFixPhase { + private readonly pi: ExtensionAPI; + private readonly planDir: string; + private readonly failures: ReadonlyArray; + private readonly log: Logger; + private readonly state: FixState; + private readonly eventLog: EventLog | undefined; + private readonly dispatch: WorkflowDispatch; + private readonly planRef: PlanRef; + + constructor( + pi: ExtensionAPI, + config: { planDir: string; failures: QRItem[] }, + dispatch: WorkflowDispatch, + planRef: PlanRef, + log?: Logger, + eventLog?: EventLog, + ) { + this.pi = pi; + this.planDir = config.planDir; + this.failures = config.failures; + this.dispatch = dispatch; + this.planRef = planRef; + this.log = log ?? createLogger("PlanDocsFix"); + this.eventLog = eventLog; + + this.state = { + active: false, + step: 1, + step1Prompt: null, + systemPrompt: null, + }; + + this.registerHandlers(); + } + + private get totalSteps(): number { + return 2 + this.failures.length; + } + + async begin(): Promise { + let basePrompt: string; + try { + basePrompt = await loadPlanDocsSystemPrompt(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Fix phase aborted: cannot load system prompt", { error: message }); + return; + } + + const failuresXml = formatFailuresXml(this.failures); + const totalSteps = this.totalSteps; + this.state.systemPrompt = buildFixSystemPrompt( + buildPlanDocsSystemPrompt(basePrompt), + this.failures.length, + totalSteps, + ); + this.state.step1Prompt = formatStep(fixStepGuidance(1, totalSteps, { allFailuresXml: failuresXml })); + this.state.active = true; + this.state.step = 1; + this.planRef.dir = this.planDir; + + hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); + + this.log("Starting plan-docs fix workflow", { step: 1, totalSteps, failureCount: this.failures.length }); + await this.eventLog?.emitPhaseStart(totalSteps); + await this.eventLog?.emitStepTransition(1, fixStepName(1, totalSteps), totalSteps); + } + + private registerHandlers(): void { + this.pi.on("before_agent_start", () => { + if (!this.state.active || !this.state.systemPrompt) return undefined; + return { systemPrompt: this.state.systemPrompt }; + }); + + this.pi.on("context", (event) => { + if (!this.state.active) return undefined; + if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; + + const messages = event.messages.map((m) => { + if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; + return m; + }); + return { messages }; + }); + + this.pi.on("tool_call", (event) => { + if (!this.state.active) return undefined; + + const perm = checkPermission("plan-docs", event.toolName); + if (!perm.allowed) return { block: true, reason: perm.reason }; + + const step = this.state.step; + const total = this.totalSteps; + const inFixRange = step >= 2 && step < total; + if (!inFixRange && PLAN_MUTATION_TOOLS.has(event.toolName)) { + return { + block: true, + reason: `${event.toolName} available in steps 2-${total - 1} (current: ${step})`, + }; + } + + return undefined; + }); + } + + private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { + const prev = this.state.step; + const total = this.totalSteps; + + if (prev === total) { + const result = await this.handleFinalize(); + if (!result.ok) { + await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); + return { ok: false, error: result.errors?.join("; ") }; + } + + this.state.active = false; + unhookDispatch(this.dispatch, "onCompleteStep"); + await this.eventLog?.emitPhaseEnd("completed"); + this.log("Fix phase complete, plan-docs validation passed"); + return { ok: true, prompt: "Fix phase validation passed. Workflow complete." }; + } + + const next = prev + 1; + this.state.step = next; + + const item = next >= 2 && next < total ? this.failures[next - 2] : undefined; + const name = fixStepName(next, total, item); + const prompt = formatStep(fixStepGuidance(next, total, { item })); + + this.log("Fix step complete, advancing", { from: prev, to: next, name }); + await this.eventLog?.emitStepTransition(next, name, total); + return { ok: true, prompt }; + } + + private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { + return loadAndValidatePlanForPhase(this.planDir, "plan-docs", this.log); + } +} diff --git a/src/planner/phases/plan-docs/fix-prompts.ts b/src/planner/phases/plan-docs/fix-prompts.ts new file mode 100644 index 0000000..90da4a0 --- /dev/null +++ b/src/planner/phases/plan-docs/fix-prompts.ts @@ -0,0 +1,103 @@ +import type { QRItem } from "../../qr/types.js"; +import type { StepGuidance } from "../../lib/step.js"; + +export function formatFailuresXml(failures: ReadonlyArray): string { + const items = failures + .map((f) => [ + ` `, + ` ${f.check}`, + f.finding ? ` ${f.finding}` : " ", + " ", + ].join("\n")) + .join("\n"); + return ["", items, ""].join("\n"); +} + +export function fixStepName(step: number, totalSteps: number, item?: QRItem): string { + if (step === 1) return "Understand QR Failures"; + if (step === totalSteps) return "Review & Finalize"; + return item ? `Fix ${item.id}` : `Fix item ${step - 1}`; +} + +export function buildFixSystemPrompt(basePrompt: string, failureCount: number, totalSteps: number): string { + return [ + basePrompt, + "", + "---", + "", + `WORKFLOW: ${totalSteps}-STEP PLAN-DOCS FIX`, + "", + `You are fixing ${failureCount} documentation-related QR failure(s).`, + "Step 1 is read-only and covers all failures.", + `Steps 2-${totalSteps - 1} fix exactly one failure per step.`, + `Step ${totalSteps} is read-only review.`, + "", + "CONSTRAINTS:", + "- Fix only identified failures", + "- Keep docs timeless and decision-grounded", + "- Preserve already-valid doc artifacts", + ].join("\n"); +} + +function step1(totalSteps: number, failuresXml: string): StepGuidance { + const itemCount = totalSteps - 2; + return { + title: `Step 1/${totalSteps}: Understand QR Failures`, + instructions: [ + "QR FAILURES:", + "", + failuresXml, + "", + `There are ${itemCount} item(s). You will fix them one by one in steps 2-${totalSteps - 1}.`, + "Inspect current docs state via koan_get_plan / koan_get_change.", + "Identify exact correction needed per item.", + "", + "This step is read-only.", + ], + }; +} + +function itemStep(step: number, totalSteps: number, item?: QRItem): StepGuidance { + const itemXml = item ? formatFailuresXml([item]) : ""; + const idx = step - 1; + const total = totalSteps - 2; + return { + title: `Step ${step}/${totalSteps}: Fix ${item?.id ?? `item ${idx}`}`, + instructions: [ + `FIX ITEM ${idx} OF ${total}:`, + "", + itemXml, + "", + "Apply a targeted docs fix using doc tools (set doc_diff/comments/readme/diagram).", + "Do not batch-fix other failures in this step.", + "Keep changes minimal and scoped.", + ], + }; +} + +function finalStep(totalSteps: number): StepGuidance { + return { + title: `Step ${totalSteps}/${totalSteps}: Review & Finalize`, + instructions: [ + "All per-item fixes are complete.", + "Use koan_get_plan to verify docs coherence and completeness.", + "Confirm fixed items are addressed without regressing passing items.", + "", + "This step is read-only.", + ], + invokeAfter: [ + "WHEN DONE: Call koan_get_plan, then call koan_complete_step.", + "Do NOT call koan_complete_step before reviewing final plan state.", + ].join("\n"), + }; +} + +export function fixStepGuidance( + step: number, + totalSteps: number, + opts?: { item?: QRItem; allFailuresXml?: string }, +): StepGuidance { + if (step === 1) return step1(totalSteps, opts?.allFailuresXml ?? ""); + if (step === totalSteps) return finalStep(totalSteps); + return itemStep(step, totalSteps, opts?.item); +} diff --git a/src/planner/phases/plan-docs/phase.ts b/src/planner/phases/plan-docs/phase.ts new file mode 100644 index 0000000..f8fec6c --- /dev/null +++ b/src/planner/phases/plan-docs/phase.ts @@ -0,0 +1,169 @@ +// Plan-docs phase -- 6-step technical writer workflow producing doc artifacts +// (doc_diff/comments/diagram/readme) in plan.json. + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; +import { + loadPlanDocsSystemPrompt, + formatContextForStep1, + buildPlanDocsSystemPrompt, + planDocsStepGuidance, + STEP_NAMES, +} from "./prompts.js"; +import { formatStep } from "../../lib/step.js"; +import type { ContextData } from "../../types.js"; +import { createLogger, type Logger } from "../../../utils/logger.js"; +import { EventLog } from "../../lib/audit.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; +import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; + +type PlanDocsStep = 1 | 2 | 3 | 4 | 5 | 6; + +interface PlanDocsState { + active: boolean; + step: PlanDocsStep; + step1Prompt: string | null; + contextData: ContextData | null; + systemPrompt: string | null; +} + +const TOTAL_STEPS = 6; +const MUTATION_UNLOCK_STEP = 3; + +export class PlanDocsPhase { + private readonly pi: ExtensionAPI; + private readonly planDir: string; + private readonly log: Logger; + private readonly state: PlanDocsState; + private readonly eventLog: EventLog | undefined; + private readonly dispatch: WorkflowDispatch; + private readonly planRef: PlanRef; + + constructor( + pi: ExtensionAPI, + config: { planDir: string }, + dispatch: WorkflowDispatch, + planRef: PlanRef, + log?: Logger, + eventLog?: EventLog, + ) { + this.pi = pi; + this.planDir = config.planDir; + this.dispatch = dispatch; + this.planRef = planRef; + this.log = log ?? createLogger("PlanDocs"); + this.eventLog = eventLog; + + this.state = { + active: false, + step: 1, + step1Prompt: null, + contextData: null, + systemPrompt: null, + }; + + this.registerHandlers(); + } + + async begin(): Promise { + const contextPath = path.join(this.planDir, "context.json"); + try { + const raw = await fs.readFile(contextPath, "utf8"); + this.state.contextData = JSON.parse(raw) as ContextData; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to read context.json", { error: message }); + return; + } + + let basePrompt: string; + try { + basePrompt = await loadPlanDocsSystemPrompt(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.log("Failed to load plan-docs system prompt", { error: message }); + return; + } + + const contextXml = formatContextForStep1(this.state.contextData); + this.state.systemPrompt = buildPlanDocsSystemPrompt(basePrompt); + this.state.step1Prompt = formatStep(planDocsStepGuidance(1, contextXml)); + this.state.active = true; + this.state.step = 1; + this.planRef.dir = this.planDir; + + hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); + + this.log("Starting plan-docs workflow", { step: 1 }); + await this.eventLog?.emitPhaseStart(TOTAL_STEPS); + await this.eventLog?.emitStepTransition(1, STEP_NAMES[1], TOTAL_STEPS); + } + + private registerHandlers(): void { + this.pi.on("before_agent_start", () => { + if (!this.state.active || !this.state.systemPrompt) return undefined; + return { systemPrompt: this.state.systemPrompt }; + }); + + this.pi.on("context", (event) => { + if (!this.state.active) return undefined; + if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; + + const messages = event.messages.map((m) => { + if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; + return m; + }); + return { messages }; + }); + + this.pi.on("tool_call", (event) => { + if (!this.state.active) return undefined; + + const perm = checkPermission("plan-docs", event.toolName); + if (!perm.allowed) return { block: true, reason: perm.reason }; + + if (this.state.step < MUTATION_UNLOCK_STEP && PLAN_MUTATION_TOOLS.has(event.toolName)) { + return { + block: true, + reason: `${event.toolName} available from step ${MUTATION_UNLOCK_STEP} (current: ${this.state.step})`, + }; + } + + return undefined; + }); + } + + private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { + const prev = this.state.step; + + if (prev === 6) { + const result = await this.handleFinalize(); + if (!result.ok) { + await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); + return { ok: false, error: result.errors?.join("; ") }; + } + + this.state.active = false; + unhookDispatch(this.dispatch, "onCompleteStep"); + await this.eventLog?.emitPhaseEnd("completed"); + this.log("Plan-docs finalized, workflow complete"); + return { ok: true, prompt: "Plan-docs validation passed. Workflow complete." }; + } + + this.state.step = (prev + 1) as PlanDocsStep; + const nextName = STEP_NAMES[this.state.step]; + const prompt = formatStep(planDocsStepGuidance(this.state.step)); + + this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName }); + await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); + return { ok: true, prompt }; + } + + private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { + return loadAndValidatePlanForPhase(this.planDir, "plan-docs", this.log); + } +} diff --git a/src/planner/phases/plan-docs/prompts.ts b/src/planner/phases/plan-docs/prompts.ts new file mode 100644 index 0000000..e27b58e --- /dev/null +++ b/src/planner/phases/plan-docs/prompts.ts @@ -0,0 +1,145 @@ +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import type { ContextData } from "../../types.js"; +import type { StepGuidance } from "../../lib/step.js"; + +export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { + 1: "Extract Documentation Context", + 2: "Analyze Planned Code Changes", + 3: "Author Code-Adjacent Docs", + 4: "Author Cross-Cutting Docs", + 5: "Diagram & Consistency Review", + 6: "Validation & Final Review", +}; + +export async function loadPlanDocsSystemPrompt(): Promise { + const promptPath = path.join(os.homedir(), ".claude/agents/technical-writer.md"); + try { + const content = await fs.readFile(promptPath, "utf8"); + return content.replace(/^---\n[\s\S]*?\n---\n/, ""); + } catch { + throw new Error(`Technical-writer prompt not found at ${promptPath}`); + } +} + +export function formatContextForStep1(ctx: ContextData): string { + return ["", JSON.stringify(ctx, null, 2), ""].join("\n"); +} + +export function buildPlanDocsSystemPrompt(basePrompt: string): string { + return [ + basePrompt, + "", + "---", + "", + "WORKFLOW: 6-STEP PLAN-DOCS", + "", + "You are in planning mode. Add documentation artifacts to plan.json.", + "Step 1 instructions are in the user message below.", + "Complete each step, then call koan_complete_step.", + "Put your findings in the `thoughts` parameter.", + "The tool result contains the next step.", + "", + "CRITICAL:", + "- NEVER use edit/write tools during plan-docs.", + "- Populate code_change.doc_diff for code changes.", + "- Keep comments and docs timeless (no temporal contamination).", + "- Keep architecture diagrams and README entries aligned with plan intent.", + ].join("\n"); +} + +export function planDocsStepGuidance(step: 1 | 2 | 3 | 4 | 5 | 6, context?: string): StepGuidance { + switch (step) { + case 1: + return { + title: "Step 1: Extract Documentation Context", + instructions: [ + "PLANNING CONTEXT (from session):", + "", + context ?? "", + "", + "Use koan_get_plan to review decisions, constraints, risks, and milestones.", + "Capture decision IDs that should be reflected in documentation rationale.", + "This step is read-only.", + ], + }; + + case 2: + return { + title: "Step 2: Analyze Planned Code Changes", + instructions: [ + "Inspect each milestone and code_change:", + " - What needs doc_diff coverage?", + " - Which comments are missing or weak?", + " - Which changes require architecture/README support?", + "", + "Use koan_get_milestone / koan_get_change for detail.", + "This step is read-only.", + ], + }; + + case 3: + return { + title: "Step 3: Author Code-Adjacent Docs", + instructions: [ + "Populate code-level documentation in plan.json:", + " - koan_set_change_doc_diff", + " - koan_set_change_comments", + "", + "Rules:", + " - Every code change with diff should have doc_diff", + " - comments explain WHY (reference decisions where applicable)", + " - Avoid temporal language (no 'added', 'changed from', 'now')", + ], + }; + + case 4: + return { + title: "Step 4: Author Cross-Cutting Docs", + instructions: [ + "Update cross-cutting documentation artifacts:", + " - koan_set_readme_entry for docs not tied to one change", + " - koan_set_diagram (title/scope/ascii_render) for architecture visuals", + "", + "If diagrams are missing but needed, create them with:", + " - koan_add_diagram", + " - koan_add_diagram_node / koan_add_diagram_edge", + ], + }; + + case 5: + return { + title: "Step 5: Diagram & Consistency Review", + instructions: [ + "Review documentation consistency across the plan:", + " - doc_diff content matches planned behavior", + " - diagrams align with milestone scope", + " - README entries do not contradict decisions/invariants", + "", + "Use getter tools to re-read affected entities and patch gaps.", + ], + }; + + case 6: + return { + title: "Step 6: Validation & Final Review", + instructions: [ + "Perform final documentation completeness check:", + " - all code changes with diff have doc_diff", + " - comments/doc diffs are coherent and timeless", + " - readme/diagram updates are present when needed", + "", + "Fix remaining issues before completing.", + ], + invokeAfter: [ + "WHEN DONE: Call koan_complete_step with a concise docs-completeness summary.", + "Do NOT call this tool until documentation artifacts are complete.", + ].join("\n"), + }; + + default: + return { title: "", instructions: [] }; + } +} diff --git a/src/planner/phases/qr-decompose/phase.ts b/src/planner/phases/qr-decompose/phase.ts index 5a8a99e..309dba5 100644 --- a/src/planner/phases/qr-decompose/phase.ts +++ b/src/planner/phases/qr-decompose/phase.ts @@ -1,6 +1,5 @@ -// QR decompose phase -- 13-step workflow that decomposes a plan into -// verifiable QR items. Mirrors PlanDesignPhase lifecycle exactly. -// Two-tier step gate: koan_qr_add_item unlocks at step 5, +// QR decompose phase -- 13-step workflow that decomposes a plan phase into +// verifiable QR items. Two-tier step gate: koan_qr_add_item unlocks at step 5, // koan_qr_assign_group unlocks at step 9. import { promises as fs } from "node:fs"; @@ -15,6 +14,7 @@ import { decomposeStepGuidance, DECOMPOSE_STEP_NAMES, type DecomposeStep, + type WorkPhaseKey, } from "./prompts.js"; import { formatStep } from "../../lib/step.js"; import type { ContextData } from "../../types.js"; @@ -24,18 +24,12 @@ import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } fro import { checkPermission } from "../../lib/permissions.js"; import type { QRFile } from "../../qr/types.js"; -// -- Step gate constants -- - -// Blocklist pattern: only restrict tools this gate owns; everything else -// defers to checkPermission. Avoids blocking read tools or future pi tools. const QR_ADD_TOOLS = new Set(["koan_qr_add_item"]); const QR_ASSIGN_TOOLS = new Set(["koan_qr_assign_group"]); const ADD_ITEM_UNLOCK = 5; const ASSIGN_GROUP_UNLOCK = 9; const TOTAL_STEPS = 13; -// -- State -- - interface DecomposeState { active: boolean; step: DecomposeStep; @@ -43,11 +37,11 @@ interface DecomposeState { systemPrompt: string | null; } -// -- Phase -- - export class QRDecomposePhase { private readonly pi: ExtensionAPI; private readonly planDir: string; + private readonly workPhase: WorkPhaseKey; + private readonly qrPhaseKey: `qr-${WorkPhaseKey}`; private readonly log: Logger; private readonly state: DecomposeState; private readonly eventLog: EventLog | undefined; @@ -56,7 +50,7 @@ export class QRDecomposePhase { constructor( pi: ExtensionAPI, - config: { planDir: string }, + config: { planDir: string; workPhase: WorkPhaseKey }, dispatch: WorkflowDispatch, planRef: PlanRef, log?: Logger, @@ -64,6 +58,8 @@ export class QRDecomposePhase { ) { this.pi = pi; this.planDir = config.planDir; + this.workPhase = config.workPhase; + this.qrPhaseKey = `qr-${config.workPhase}`; this.dispatch = dispatch; this.planRef = planRef; this.log = log ?? createLogger("QRDecompose"); @@ -101,15 +97,15 @@ export class QRDecomposePhase { } const contextXml = formatContextForDecompose(contextData); - this.state.systemPrompt = buildDecomposeSystemPrompt(basePrompt); - this.state.step1Prompt = formatStep(decomposeStepGuidance(1, contextXml)); + this.state.systemPrompt = buildDecomposeSystemPrompt(basePrompt, this.workPhase); + this.state.step1Prompt = formatStep(decomposeStepGuidance(1, this.workPhase, contextXml)); this.state.active = true; this.state.step = 1; this.planRef.dir = this.planDir; hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - this.log("Starting qr-decompose workflow", { step: 1 }); + this.log("Starting qr-decompose workflow", { step: 1, phase: this.workPhase }); await this.eventLog?.emitPhaseStart(TOTAL_STEPS); await this.eventLog?.emitStepTransition(1, DECOMPOSE_STEP_NAMES[1], TOTAL_STEPS); } @@ -120,18 +116,12 @@ export class QRDecomposePhase { return { systemPrompt: this.state.systemPrompt }; }); - // Step 1 prompt injection. The CLI message is a process trigger -- - // the context event fires before each LLM call and replaces the - // user message with the actual step 1 instructions. Handler is a - // no-op once the step advances past 1. this.pi.on("context", (event) => { if (!this.state.active) return undefined; if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; const messages = event.messages.map((m) => { - if (m.role === "user") { - return { ...m, content: this.state.step1Prompt! }; - } + if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; return m; }); return { messages }; @@ -140,13 +130,9 @@ export class QRDecomposePhase { this.pi.on("tool_call", (event) => { if (!this.state.active) return undefined; - // Outer boundary: phase permissions (default-deny). - const perm = checkPermission("qr-plan-design", event.toolName); - if (!perm.allowed) { - return { block: true, reason: perm.reason }; - } + const perm = checkPermission(this.qrPhaseKey, event.toolName); + if (!perm.allowed) return { block: true, reason: perm.reason }; - // Inner constraint: two-tier step gate (blocklist, not whitelist). const step = this.state.step; if (step < ADD_ITEM_UNLOCK && QR_ADD_TOOLS.has(event.toolName)) { return { @@ -163,7 +149,6 @@ export class QRDecomposePhase { return undefined; }); - } private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { @@ -175,34 +160,32 @@ export class QRDecomposePhase { await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); return { ok: false, error: result.errors?.join("; ") }; } - // Only unhook after successful finalization -- on failure the LLM - // receives the error as a tool result and may retry within the step. + this.state.active = false; unhookDispatch(this.dispatch, "onCompleteStep"); await this.eventLog?.emitPhaseEnd("completed"); - this.log("QR decompose finalized, workflow complete"); + this.log("QR decompose finalized, workflow complete", { phase: this.workPhase }); return { ok: true, prompt: "QR decomposition complete." }; } this.state.step = (prev + 1) as DecomposeStep; const nextName = DECOMPOSE_STEP_NAMES[this.state.step]; - const prompt = formatStep(decomposeStepGuidance(this.state.step)); + const prompt = formatStep(decomposeStepGuidance(this.state.step, this.workPhase)); - this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName }); + this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName, phase: this.workPhase }); await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); - return { ok: true, prompt }; } private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { - const qrPath = path.join(this.planDir, "qr-plan-design.json"); + const qrPath = path.join(this.planDir, `qr-${this.workPhase}.json`); let qr: QRFile; try { const raw = await fs.readFile(qrPath, "utf8"); qr = JSON.parse(raw) as QRFile; } catch (error) { const message = error instanceof Error ? error.message : String(error); - return { ok: false, errors: [`Failed to read qr-plan-design.json: ${message}`] }; + return { ok: false, errors: [`Failed to read qr-${this.workPhase}.json: ${message}`] }; } const errors: string[] = []; @@ -211,17 +194,16 @@ export class QRDecomposePhase { } else { const ungrouped = qr.items.filter((i) => i.group_id === null); if (ungrouped.length > 0) { - const ids = ungrouped.map((i) => i.id).join(", "); - errors.push(`Ungrouped items: ${ids}`); + errors.push(`Ungrouped items: ${ungrouped.map((i) => i.id).join(", ")}`); } } if (errors.length > 0) { - this.log("QR decompose validation failed", { errors }); + this.log("QR decompose validation failed", { errors, phase: this.workPhase }); return { ok: false, errors }; } - this.log("QR decompose validation passed"); + this.log("QR decompose validation passed", { phase: this.workPhase }); return { ok: true }; } } diff --git a/src/planner/phases/qr-decompose/prompts.ts b/src/planner/phases/qr-decompose/prompts.ts index 3c4969e..474f22f 100644 --- a/src/planner/phases/qr-decompose/prompts.ts +++ b/src/planner/phases/qr-decompose/prompts.ts @@ -1,7 +1,6 @@ // QR decompose phase prompts -- 13-step workflow for decomposing a plan into -// verifiable QR items. Follows the same structure as plan-design/prompts.ts. -// All tool calls reference phase='plan-design' explicitly so the decompose -// agent always writes to the correct QR namespace. +// verifiable QR items. Prompt text is shared across plan-design, plan-code, +// and plan-docs via the injected phase key. import { promises as fs } from "node:fs"; import * as os from "node:os"; @@ -10,11 +9,8 @@ import * as path from "node:path"; import type { ContextData } from "../../types.js"; import type { StepGuidance } from "../../lib/step.js"; -// -- Types -- - export type DecomposeStep = 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13; - -// -- Constants -- +export type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; export const DECOMPOSE_STEP_NAMES: Record = { 1: "Absorb Context", @@ -32,7 +28,23 @@ export const DECOMPOSE_STEP_NAMES: Record = { 13: "Final Validation", }; -// -- Exports -- +const PHASE_SCOPE_HINTS: Record = { + "plan-design": [ + "decision:DL-001 -- decision reasoning quality", + "milestone:M-001 -- milestone structure", + "code_intent:CI-M-001-001 -- intent clarity", + ], + "plan-code": [ + "milestone:M-001 -- code change coverage", + "code_intent:CI-M-001-001 -- intent->change linkage", + "change:CC-M-001-001 -- diff quality/anchor correctness", + ], + "plan-docs": [ + "milestone:M-001 -- docs completeness", + "change:CC-M-001-001 -- doc_diff/comments quality", + "diagram:DIAG-001 -- architecture docs fidelity", + ], +}; export async function loadQRDecomposeSystemPrompt(): Promise { const homeDir = os.homedir(); @@ -46,15 +58,15 @@ export async function loadQRDecomposeSystemPrompt(): Promise { } } -export function buildDecomposeSystemPrompt(basePrompt: string): string { +export function buildDecomposeSystemPrompt(basePrompt: string, phase: WorkPhaseKey): string { return [ basePrompt, "", "---", "", - "WORKFLOW: 13-STEP QR DECOMPOSITION (plan-design)", + `WORKFLOW: 13-STEP QR DECOMPOSITION (${phase})`, "", - "You will execute a 13-step workflow to decompose a plan into verifiable QR items.", + "You will execute a 13-step workflow to decompose the current plan phase into verifiable QR items.", "Step 1 instructions are in the user message below.", "Complete the work described, then call koan_complete_step.", "Put your findings in the `thoughts` parameter of koan_complete_step.", @@ -66,26 +78,22 @@ export function buildDecomposeSystemPrompt(basePrompt: string): string { } export function formatContextForDecompose(ctx: ContextData): string { - return [ - "", - JSON.stringify(ctx, null, 2), - "", - ].join("\n"); + return ["", JSON.stringify(ctx, null, 2), ""].join("\n"); } -export function decomposeStepGuidance(step: DecomposeStep, context?: string): StepGuidance { +export function decomposeStepGuidance(step: DecomposeStep, phase: WorkPhaseKey, context?: string): StepGuidance { switch (step) { case 1: return { title: "Step 1: Absorb Context", instructions: [ + `PHASE: ${phase}`, "PLANNING CONTEXT (from session):", "", context ?? "", "", "Use koan_get_plan to read the full plan.", - "Absorb the plan structure: overview, constraints, milestones, decisions, code_intents, risks, invisible_knowledge.", - "Identify the key entities and relationships that will need verification.", + "Absorb the structures relevant to this phase and identify what needs verification.", ], }; @@ -93,10 +101,9 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 2: Holistic Concerns", instructions: [ - "Identify plan-wide concerns that apply across all milestones.", - "Consider: structural completeness, logical consistency, risk coverage, dependency ordering.", - "Focus on plan-level quality -- not code correctness.", - "These concerns become scope='*' items in later steps.", + `List phase-wide concerns for ${phase}.`, + "Focus on quality/completeness/consistency concerns, not implementation details.", + "These become umbrella items (scope='*').", ], }; @@ -104,14 +111,9 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 3: Structural Enumeration", instructions: [ - "Enumerate every major entity in the plan:", - " - Decisions (DL-xxx)", - " - Constraints", - " - Risks", - " - Milestones (M-xxx) and their code_intents (CI-M-xxx-xxx)", - " - Invisible knowledge entries", - " - Waves and ordering", - "Track counts for validation in step 8.", + `Enumerate concrete entities touched by ${phase}.`, + "Track IDs and counts so step 7 can validate coverage.", + "Use getter tools to resolve uncertain IDs.", ], }; @@ -119,9 +121,8 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 4: Gap Analysis", instructions: [ - "Compare holistic concerns (step 2) against structural entities (step 3).", - "Identify gaps: concerns not covered by any entity, entities lacking justification.", - "Note areas where the plan is thin or under-specified.", + "Map concerns (step 2) to entities (step 3).", + "Identify uncovered concerns and under-specified entities.", ], }; @@ -129,22 +130,16 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 5: Generate Items", instructions: [ - "Generate QR items from the analysis in steps 2-4.", - "Use koan_qr_add_item to create each item. Always pass phase='plan-design'.", + "Generate QR items with koan_qr_add_item.", + `Always pass phase='${phase}'.`, "", - "SCOPE VOCABULARY:", - " '*' -- plan-wide check", - " 'milestone:M-001' -- milestone-specific check", - " 'decision:DL-001' -- decision-specific check", - " 'code_intent:CI-M-001-001' -- code intent-specific check", + "Scope examples for this phase:", + ...PHASE_SCOPE_HINTS[phase].map((hint) => ` - ${hint}`), "", - "SEVERITY:", - " MUST -- blocks all iterations (critical defect)", - " SHOULD -- important quality issue", - " COULD -- nice-to-have improvement", - "", - "Generate items covering: structural completeness, decision reasoning chains,", - "risk coverage, milestone scoping, code intent clarity, constraint satisfaction.", + "Severity:", + " MUST -- critical defect", + " SHOULD -- significant quality issue", + " COULD -- non-blocking improvement", ], }; @@ -152,11 +147,8 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 6: Atomicity Check", instructions: [ - "Review each generated item. Each item should test exactly one concern.", - "If an item covers multiple concerns, split it:", - " Use koan_qr_add_item for each child item.", - " The original becomes the parent (parent_id on children).", - "Atomic items are easier to verify independently.", + "Ensure each item checks exactly one concern.", + "Split non-atomic items by adding child items when needed.", ], }; @@ -164,11 +156,8 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 7: Coverage Validation", instructions: [ - "Cross-reference items against the plan structure.", - "Every milestone should have at least one QR item.", - "Every decision should have at least one QR item.", - "High-severity risks should have corresponding QR items.", - "Use koan_qr_add_item for any gaps found.", + "Cross-check item set against structural enumeration from step 3.", + "Add missing items for uncovered entities/concerns.", ], }; @@ -176,11 +165,9 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 8: Validate Items", instructions: [ - "Items are already on disk (each koan_qr_add_item wrote immediately).", - "Use koan_qr_summary(phase='plan-design') to verify counts.", - "Use koan_qr_list_items(phase='plan-design') to review all items.", - "Check: no duplicate checks, severity levels appropriate, scopes valid.", - "Add missing items with koan_qr_add_item if gaps found.", + "Use koan_qr_summary and koan_qr_list_items to audit generated items.", + `Always pass phase='${phase}'.`, + "Fix duplicates or malformed scopes by adding/revising items.", ], }; @@ -188,13 +175,10 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 9: Structural Grouping", instructions: [ - "Begin organizing items into review groups.", - "DETERMINISTIC RULES:", - " - Parent-child items share the same group", - " - Umbrella items (scope='*') get group_id='umbrella'", - "", - "Use koan_qr_list_items(phase='plan-design') to see current items.", - "Use koan_qr_assign_group(phase='plan-design', ids=[...], group_id='...') to assign groups.", + "Assign deterministic groups:", + " - Parent/child items share group", + " - Umbrella items (scope='*') use group_id='umbrella'", + `Use koan_qr_assign_group(phase='${phase}', ...)`, ], }; @@ -202,11 +186,8 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 10: Component Grouping", instructions: [ - "Group remaining ungrouped items by plan component.", - "Group candidates: a major milestone, a major decision, a constraint category.", - "", - "Use koan_qr_list_items(phase='plan-design') to see ungrouped items.", - "Use koan_qr_assign_group(phase='plan-design', ids=[...], group_id='...') to assign.", + "Group remaining ungrouped items by component (milestone/decision/change cluster).", + `Use koan_qr_list_items(phase='${phase}') and koan_qr_assign_group(...)`, ], }; @@ -215,10 +196,7 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St title: "Step 11: Concern Grouping", instructions: [ "Group remaining ungrouped items by concern type.", - "Group candidates: reasoning chain quality, reference integrity, risk coverage.", - "", - "Use koan_qr_list_items(phase='plan-design') to see ungrouped items.", - "Use koan_qr_assign_group(phase='plan-design', ids=[...], group_id='...') to assign.", + "Example concern groups: coverage, consistency, traceability, docs quality.", ], }; @@ -226,11 +204,8 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 12: Affinity Grouping", instructions: [ - "Assign remaining ungrouped items to groups based on similarity.", - "Singletons are acceptable -- not every item needs a multi-member group.", - "", - "Use koan_qr_list_items(phase='plan-design') to see ungrouped items.", - "Use koan_qr_assign_group(phase='plan-design', ids=[...], group_id='...') to assign.", + "Assign any remaining ungrouped items by semantic affinity.", + "Singleton groups are acceptable.", ], }; @@ -238,14 +213,13 @@ export function decomposeStepGuidance(step: DecomposeStep, context?: string): St return { title: "Step 13: Final Validation", instructions: [ - "Validate all items are grouped and well-formed.", - "Use koan_qr_summary(phase='plan-design') to check final counts.", - "Use koan_qr_list_items(phase='plan-design') to verify all items have group_id.", - "If any items lack group_id, assign them now.", - "Output 'PASS' in thoughts if all items are valid and grouped.", + "Validate that all items are grouped and well-formed.", + `Use koan_qr_summary(phase='${phase}') and koan_qr_list_items(phase='${phase}')`, + "Ensure no item has null group_id.", + "Output PASS in thoughts when complete.", ], invokeAfter: [ - "WHEN DONE: Call koan_complete_step with 'PASS' or issues found in the `thoughts` parameter.", + "WHEN DONE: Call koan_complete_step with PASS or issues in `thoughts`.", "Do NOT call this tool until validation is complete.", ].join("\n"), }; diff --git a/src/planner/phases/qr-verify/phase.ts b/src/planner/phases/qr-verify/phase.ts index 4a8e5c1..623b9f6 100644 --- a/src/planner/phases/qr-verify/phase.ts +++ b/src/planner/phases/qr-verify/phase.ts @@ -1,7 +1,5 @@ // QR verify phase -- 3-step reviewer subagent that verifies exactly 1 QR item // against the plan (CONTEXT -> ANALYZE -> CONFIRM). One subagent per item. -// Mirrors PlanDesignPhase lifecycle; no finalize validation -- parent reads -// item status from disk after the reviewer exits. import { promises as fs } from "node:fs"; import * as path from "node:path"; @@ -12,12 +10,7 @@ import { formatStep } from "../../lib/step.js"; import type { ContextData } from "../../types.js"; import { createLogger, type Logger } from "../../../utils/logger.js"; import { EventLog } from "../../lib/audit.js"; -import { - hookDispatch, - unhookDispatch, - type WorkflowDispatch, - type PlanRef, -} from "../../lib/dispatch.js"; +import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; import { checkPermission } from "../../lib/permissions.js"; import type { QRItem, QRFile } from "../../qr/types.js"; import { @@ -29,7 +22,7 @@ import { type VerifyStep, } from "./prompts.js"; -// -- Constants -- +type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; const TOTAL_STEPS = 3; const STEP_NAMES: Record = { @@ -38,8 +31,6 @@ const STEP_NAMES: Record = { 3: "CONFIRM", }; -// -- State -- - interface VerifyState { active: boolean; step: VerifyStep; @@ -48,11 +39,11 @@ interface VerifyState { systemPrompt: string | null; } -// -- Phase -- - export class QRVerifyPhase { private readonly pi: ExtensionAPI; private readonly planDir: string; + private readonly workPhase: WorkPhaseKey; + private readonly qrPhaseKey: `qr-${WorkPhaseKey}`; private readonly log: Logger; private readonly state: VerifyState; private readonly eventLog: EventLog | undefined; @@ -62,7 +53,7 @@ export class QRVerifyPhase { constructor( pi: ExtensionAPI, - config: { planDir: string; itemId: string }, + config: { planDir: string; itemId: string; workPhase: WorkPhaseKey }, dispatch: WorkflowDispatch, planRef: PlanRef, log?: Logger, @@ -70,6 +61,8 @@ export class QRVerifyPhase { ) { this.pi = pi; this.planDir = config.planDir; + this.workPhase = config.workPhase; + this.qrPhaseKey = `qr-${config.workPhase}`; this.dispatch = dispatch; this.planRef = planRef; this.log = log ?? createLogger("QRVerify"); @@ -87,7 +80,6 @@ export class QRVerifyPhase { } async begin(): Promise { - // Verify plan.json exists so koan_get_plan is usable during analysis. const planPath = path.join(this.planDir, "plan.json"); try { await fs.access(planPath); @@ -107,20 +99,20 @@ export class QRVerifyPhase { return; } - const qrPath = path.join(this.planDir, "qr-plan-design.json"); + const qrPath = path.join(this.planDir, `qr-${this.workPhase}.json`); let qrFile: QRFile; try { const raw = await fs.readFile(qrPath, "utf8"); qrFile = JSON.parse(raw) as QRFile; } catch (error) { const message = error instanceof Error ? error.message : String(error); - this.log("Failed to read qr-plan-design.json", { error: message }); + this.log(`Failed to read qr-${this.workPhase}.json`, { error: message }); return; } const item = qrFile.items.find((i) => i.id === this.state.itemId); if (!item) { - this.log("QR item not found", { itemId: this.state.itemId }); + this.log("QR item not found", { itemId: this.state.itemId, phase: this.workPhase }); return; } this.item = item; @@ -134,15 +126,15 @@ export class QRVerifyPhase { return; } - this.state.systemPrompt = buildVerifySystemPrompt(basePrompt); - this.state.step1Prompt = formatStep(buildContextStep(item, contextData)); + this.state.systemPrompt = buildVerifySystemPrompt(basePrompt, this.workPhase); + this.state.step1Prompt = formatStep(buildContextStep(item, contextData, this.workPhase)); this.state.active = true; this.state.step = 1; this.planRef.dir = this.planDir; hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - this.log("Starting QR verify workflow", { itemId: this.state.itemId, step: 1 }); + this.log("Starting QR verify workflow", { itemId: this.state.itemId, phase: this.workPhase, step: 1 }); await this.eventLog?.emitPhaseStart(TOTAL_STEPS); await this.eventLog?.emitStepTransition(1, STEP_NAMES[1], TOTAL_STEPS); } @@ -153,17 +145,12 @@ export class QRVerifyPhase { return { systemPrompt: this.state.systemPrompt }; }); - // Step 1 prompt injection. Context event fires before the initial LLM - // call and replaces the trigger user message with actual step 1 instructions. - // Handler is a no-op once the step advances past 1. this.pi.on("context", (event) => { if (!this.state.active) return undefined; if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; const messages = event.messages.map((m) => { - if (m.role === "user") { - return { ...m, content: this.state.step1Prompt! }; - } + if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; return m; }); return { messages }; @@ -172,24 +159,18 @@ export class QRVerifyPhase { this.pi.on("tool_call", (event) => { if (!this.state.active) return undefined; - const perm = checkPermission("qr-plan-design", event.toolName); - if (!perm.allowed) { - return { block: true, reason: perm.reason }; - } + const perm = checkPermission(this.qrPhaseKey, event.toolName); + if (!perm.allowed) return { block: true, reason: perm.reason }; - // Step gate: koan_qr_set_item is step-3-only (CONFIRM step). - // Blocklist so read tools and other approved tools pass through. - const step = this.state.step; - if (step < 3 && event.toolName === "koan_qr_set_item") { + if (this.state.step < 3 && event.toolName === "koan_qr_set_item") { return { block: true, - reason: `koan_qr_set_item available in step 3 (current: ${step})`, + reason: `koan_qr_set_item available in step 3 (current: ${this.state.step})`, }; } return undefined; }); - } private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { @@ -199,7 +180,7 @@ export class QRVerifyPhase { this.state.active = false; unhookDispatch(this.dispatch, "onCompleteStep"); await this.eventLog?.emitPhaseEnd("completed"); - this.log("Verification complete"); + this.log("Verification complete", { itemId: this.state.itemId, phase: this.workPhase }); return { ok: true, prompt: "Verification complete." }; } @@ -207,19 +188,17 @@ export class QRVerifyPhase { const stepName = STEP_NAMES[this.state.step]; const prompt = this.buildStepPrompt(this.state.step); - this.log("Step complete, advancing", { from: prev, to: this.state.step }); + this.log("Step complete, advancing", { from: prev, to: this.state.step, phase: this.workPhase }); await this.eventLog?.emitStepTransition(this.state.step, stepName, TOTAL_STEPS); - return { ok: true, prompt }; } - // Item is stored during begin() -- avoids async re-reads for prompt building. private buildStepPrompt(step: VerifyStep): string { switch (step) { case 2: return formatStep(buildAnalyzeStep(this.item!)); case 3: - return formatStep(buildConfirmStep(this.item!)); + return formatStep(buildConfirmStep(this.item!, this.workPhase)); default: return ""; } diff --git a/src/planner/phases/qr-verify/prompts.ts b/src/planner/phases/qr-verify/prompts.ts index 97dfe3f..a364490 100644 --- a/src/planner/phases/qr-verify/prompts.ts +++ b/src/planner/phases/qr-verify/prompts.ts @@ -1,8 +1,5 @@ // Prompt guidance for the 3-step QR verify subagent workflow. -// // Each reviewer subagent verifies exactly 1 QRItem against the plan. -// Steps: CONTEXT (understand the check) -> ANALYZE (read plan, apply check) -// -> CONFIRM (record verdict via koan_qr_set_item). import { promises as fs } from "node:fs"; import * as os from "node:os"; @@ -12,12 +9,10 @@ import type { ContextData } from "../../types.js"; import type { QRItem } from "../../qr/types.js"; import type { StepGuidance } from "../../lib/step.js"; -// -- Types -- +type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; export type VerifyStep = 1 | 2 | 3; -// -- Helpers -- - function formatContextXml(ctx: ContextData): string { const fields = Object.entries(ctx) .map(([key, values]) => { @@ -41,6 +36,10 @@ function scopeGuidance(item: QRItem): string { const intentId = s.slice("code_intent:".length); return `CODE INTENT CHECK -- Use koan_get_intent(id='${intentId}') to read the intent.`; } + if (s.startsWith("change:")) { + const changeId = s.slice("change:".length); + return `CHANGE CHECK -- Use koan_get_change(id='${changeId}') to read the planned change.`; + } if (s.startsWith("decision:")) { const decisionId = s.slice("decision:".length); return `DECISION CHECK -- Use koan_get_decision(id='${decisionId}') to read the decision.`; @@ -48,8 +47,6 @@ function scopeGuidance(item: QRItem): string { return "SCOPED CHECK -- Read the relevant section using plan getter tools."; } -// -- Exports -- - export async function loadQRVerifySystemPrompt(): Promise { const promptPath = path.join(os.homedir(), ".claude/agents/quality-reviewer.md"); try { @@ -60,13 +57,13 @@ export async function loadQRVerifySystemPrompt(): Promise { } } -export function buildVerifySystemPrompt(basePrompt: string): string { +export function buildVerifySystemPrompt(basePrompt: string, phase: WorkPhaseKey): string { return [ basePrompt, "", "---", "", - "WORKFLOW: 3-STEP QR VERIFICATION (plan-design)", + `WORKFLOW: 3-STEP QR VERIFICATION (${phase})`, "", "You will verify exactly 1 QR item against the plan.", "Step 1 instructions are in the user message below.", @@ -78,11 +75,11 @@ export function buildVerifySystemPrompt(basePrompt: string): string { ].join("\n"); } -export function buildContextStep(item: QRItem, contextData: ContextData): StepGuidance { +export function buildContextStep(item: QRItem, contextData: ContextData, phase: WorkPhaseKey): StepGuidance { return { title: "Step 1: CONTEXT", instructions: [ - "PHASE: plan-design", + `PHASE: ${phase}`, "ITEM TO VERIFY:", "", "", @@ -95,9 +92,7 @@ export function buildContextStep(item: QRItem, contextData: ContextData): StepGu "PLANNING CONTEXT (reference for semantic validation):", formatContextXml(contextData), "", - "UNDERSTAND the check you need to perform.", - "Note the scope: '*' means plan-wide check, 'milestone:X' means specific milestone.", - "Severity indicates blocking behavior: MUST blocks all iterations.", + "Understand the check and required evidence before analyzing.", ], }; } @@ -109,17 +104,17 @@ export function buildAnalyzeStep(item: QRItem): StepGuidance { scopeGuidance(item), "", "TASK:", - "1. Read relevant files/sections based on scope", + "1. Read relevant entities based on scope", "2. Apply the verification check", - "3. Form preliminary conclusion: PASS or FAIL?", - "4. If FAIL, note specific evidence", + "3. Form preliminary PASS/FAIL conclusion", + "4. Gather concrete evidence", "", - "DO NOT update QR state yet. Proceed to CONFIRM step.", + "Do NOT update QR state yet.", ], }; } -export function buildConfirmStep(item: QRItem): StepGuidance { +export function buildConfirmStep(item: QRItem, phase: WorkPhaseKey): StepGuidance { return { title: "Step 3: CONFIRM", instructions: [ @@ -128,23 +123,21 @@ export function buildConfirmStep(item: QRItem): StepGuidance { "", "CONFIDENCE CHECK:", "- Are you confident in your conclusion?", - "- Did you verify against actual plan content?", - "- Is your evidence specific and verifiable?", + "- Is evidence specific and verifiable?", "", "RECORD RESULT:", "", "If PASS:", - ` koan_qr_set_item(phase='plan-design', id='${item.id}', status='PASS')`, + ` koan_qr_set_item(phase='${phase}', id='${item.id}', status='PASS')`, "", "If FAIL:", - ` koan_qr_set_item(phase='plan-design', id='${item.id}', status='FAIL',`, - " finding='')", + ` koan_qr_set_item(phase='${phase}', id='${item.id}', status='FAIL', finding='')`, "", "RULES:", - "- FAIL requires finding (explains what failed)", - "- PASS forbids finding (finding field must not be set)", + "- FAIL requires finding", + "- PASS must not include finding", "", - "Execute ONE of the above tool calls, then call koan_complete_step.", + "Execute ONE verdict call, then call koan_complete_step.", ], invokeAfter: [ "WHEN DONE: Call koan_complete_step after recording your verdict.", diff --git a/src/planner/plan/render.ts b/src/planner/plan/render.ts new file mode 100644 index 0000000..4974bdd --- /dev/null +++ b/src/planner/plan/render.ts @@ -0,0 +1,155 @@ +// Mechanical renderer: plan.json -> plan.md. +// The plan JSON is the source of truth; this file provides a deterministic +// markdown projection for human/manual review between planning and execution. + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { Plan, Milestone, DiagramGraph } from "./types.js"; +import { loadPlan } from "./serialize.js"; + +function escCell(text: string): string { + return text.replace(/\|/g, "\\|").replace(/\n/g, " ").trim(); +} + +function pushList(lines: string[], title: string, values: string[]): void { + if (values.length === 0) return; + lines.push(title, ""); + for (const value of values) lines.push(`- ${value}`); + lines.push(""); +} + +function pushScopedDiagrams(lines: string[], diagrams: DiagramGraph[], scope: string): void { + const scoped = diagrams.filter((d) => d.scope === scope); + for (const diagram of scoped) { + lines.push(`### ${diagram.title}`, ""); + if (diagram.ascii_render && diagram.ascii_render.trim().length > 0) { + lines.push("```", diagram.ascii_render, "```", ""); + } else { + lines.push(`[Diagram pending rendering: ${diagram.id}]`, ""); + } + } +} + +function pushMilestone(lines: string[], milestone: Milestone, diagrams: DiagramGraph[]): void { + lines.push(`### ${milestone.id}: ${milestone.name}`, ""); + + pushScopedDiagrams(lines, diagrams, `milestone:${milestone.id}`); + + if (milestone.files.length > 0) { + lines.push(`**Files**: ${milestone.files.join(", ")}`, ""); + } + + pushList(lines, "**Requirements**", milestone.requirements); + pushList(lines, "**Acceptance Criteria**", milestone.acceptance_criteria); + pushList(lines, "**Tests**", milestone.tests); + + if (milestone.code_intents.length > 0) { + lines.push("#### Code Intents", ""); + for (const intent of milestone.code_intents) { + const fn = intent.function ? `::${intent.function}` : ""; + const refs = intent.decision_refs.length > 0 ? ` (refs: ${intent.decision_refs.join(", ")})` : ""; + lines.push(`- **${intent.id}** \`${intent.file}${fn}\`: ${intent.behavior}${refs}`); + } + lines.push(""); + } + + if (milestone.code_changes.length > 0) { + lines.push("#### Code Changes", ""); + for (const change of milestone.code_changes) { + const intentRef = change.intent_ref ? ` - implements ${change.intent_ref}` : ""; + lines.push(`**${change.id}** (${change.file})${intentRef}`, ""); + + if (change.diff.trim().length > 0) { + lines.push("**Code Diff**", "", "```diff", change.diff, "```", ""); + } + + if (change.doc_diff.trim().length > 0) { + lines.push("**Documentation Diff**", "", "```diff", change.doc_diff, "```", ""); + } + + if (change.comments.trim().length > 0) { + lines.push(`> ${change.comments}`, ""); + } + } + } +} + +export function renderPlanMarkdown(plan: Plan): string { + const lines: string[] = ["# Plan", "", "## Overview", "", plan.overview.problem || "(empty)", ""]; + + if (plan.overview.approach.trim().length > 0) { + lines.push(`**Approach**: ${plan.overview.approach}`, ""); + } + + pushScopedDiagrams(lines, plan.diagram_graphs, "overview"); + + if (plan.planning_context.decision_log.length > 0) { + lines.push("## Planning Context", "", "### Decision Log", "", "| ID | Decision | Reasoning Chain |", "|---|---|---|"); + for (const d of plan.planning_context.decision_log) { + lines.push(`| ${d.id} | ${escCell(d.decision)} | ${escCell(d.reasoning_chain)} |`); + } + lines.push(""); + } + + if (plan.planning_context.rejected_alternatives.length > 0) { + lines.push("### Rejected Alternatives", "", "| Alternative | Why Rejected |", "|---|---|"); + for (const r of plan.planning_context.rejected_alternatives) { + lines.push(`| ${escCell(r.alternative)} | ${escCell(r.rejection_reason)} (ref: ${r.decision_ref}) |`); + } + lines.push(""); + } + + pushList(lines, "### Constraints", plan.planning_context.constraints); + + if (plan.planning_context.known_risks.length > 0) { + lines.push("### Known Risks", ""); + for (const risk of plan.planning_context.known_risks) { + lines.push(`- **${risk.risk}**: ${risk.mitigation}`); + } + lines.push(""); + } + + const ik = plan.invisible_knowledge; + if (ik.system.trim().length > 0 || ik.invariants.length > 0 || ik.tradeoffs.length > 0) { + lines.push("## Invisible Knowledge", ""); + if (ik.system.trim().length > 0) { + lines.push("### System", "", ik.system, ""); + } + pushList(lines, "### Invariants", ik.invariants); + pushList(lines, "### Tradeoffs", ik.tradeoffs); + pushScopedDiagrams(lines, plan.diagram_graphs, "invisible_knowledge"); + } + + lines.push("## Milestones", ""); + for (const milestone of plan.milestones) { + pushMilestone(lines, milestone, plan.diagram_graphs); + } + + if (plan.readme_entries.length > 0) { + lines.push("## README Entries", ""); + for (const entry of plan.readme_entries) { + lines.push(`### ${entry.path}`, "", entry.content, ""); + } + } + + if (plan.waves.length > 0) { + lines.push("## Execution Waves", ""); + for (const wave of plan.waves) { + lines.push(`- ${wave.id}: ${wave.milestones.join(", ")}`); + } + lines.push(""); + } + + return `${lines.join("\n").trimEnd()}\n`; +} + +export async function renderPlanMarkdownToFile(planDir: string): Promise { + const plan = await loadPlan(planDir); + const markdown = renderPlanMarkdown(plan); + const outputPath = path.join(planDir, "plan.md"); + const tmpPath = path.join(planDir, ".plan.md.tmp"); + await fs.writeFile(tmpPath, markdown, "utf8"); + await fs.rename(tmpPath, outputPath); + return outputPath; +} diff --git a/src/planner/plan/validate.ts b/src/planner/plan/validate.ts index 210fd58..c5ecedd 100644 --- a/src/planner/plan/validate.ts +++ b/src/planner/plan/validate.ts @@ -136,35 +136,63 @@ export function validatePlanDocs(p: Plan): ValidationResult { return { ok: errors.length === 0, errors }; } -// Reads plan.json from planDir and runs validatePlanDesign + validateRefs. -// Returns { ok: false, errors } on read/parse failure or any validation failure. -export async function loadAndValidatePlan( +export type PlanValidationPhase = "plan-design" | "plan-code" | "plan-docs"; + +// Reads plan.json from planDir and runs phase-appropriate validation. +// All phases require plan-design + reference integrity checks. +// plan-code additionally requires intent->change completeness. +// plan-docs additionally requires doc completeness. +export async function loadAndValidatePlanForPhase( planDir: string, + phase: PlanValidationPhase, log: Logger, ): Promise<{ ok: boolean; errors?: string[] }> { const planPath = path.join(planDir, "plan.json"); - let plan; + let plan: Plan; try { const raw = await fs.readFile(planPath, "utf8"); - plan = JSON.parse(raw); + plan = JSON.parse(raw) as Plan; } catch (error) { const message = error instanceof Error ? error.message : String(error); - log("Failed to read plan.json for validation", { error: message }); + log("Failed to read plan.json for validation", { error: message, phase }); return { ok: false, errors: [`Failed to read plan.json: ${message}`] }; } const designValidation = validatePlanDesign(plan); if (!designValidation.ok) { - log("Plan design validation failed", { errors: designValidation.errors }); + log("Plan design validation failed", { errors: designValidation.errors, phase }); return { ok: false, errors: designValidation.errors }; } const refValidation = validateRefs(plan); if (!refValidation.ok) { - log("Plan reference validation failed", { errors: refValidation.errors }); + log("Plan reference validation failed", { errors: refValidation.errors, phase }); return { ok: false, errors: refValidation.errors }; } - log("Plan validation passed", { path: planPath }); + if (phase === "plan-code" || phase === "plan-docs") { + const codeValidation = validatePlanCode(plan); + if (!codeValidation.ok) { + log("Plan code validation failed", { errors: codeValidation.errors, phase }); + return { ok: false, errors: codeValidation.errors }; + } + } + + if (phase === "plan-docs") { + const docsValidation = validatePlanDocs(plan); + if (!docsValidation.ok) { + log("Plan docs validation failed", { errors: docsValidation.errors, phase }); + return { ok: false, errors: docsValidation.errors }; + } + } + + log("Plan validation passed", { path: planPath, phase }); return { ok: true }; } + +export async function loadAndValidatePlan( + planDir: string, + log: Logger, +): Promise<{ ok: boolean; errors?: string[] }> { + return loadAndValidatePlanForPhase(planDir, "plan-design", log); +} diff --git a/src/planner/session.ts b/src/planner/session.ts index ba24055..f48f65f 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -1,6 +1,6 @@ -// Parent session: orchestrates the koan workflow (context capture -> architect -// -> QR decompose -> QR verify pool). Polls subagent state.json for progress. -// Widget displays persistent progress; destroyed on completion. +// Parent session: orchestrates the koan planning workflow. +// Flow: context capture -> plan-design(+QR) -> plan-code(+QR) -> plan-docs(+QR) +// -> mechanical plan.json->plan.md rendering for manual review. import { promises as fs } from "node:fs"; import * as path from "node:path"; @@ -10,7 +10,17 @@ import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@m import { ContextCapturePhase } from "./phases/context-capture/phase.js"; import { createInitialState, initializePlanState, type WorkflowState } from "./state.js"; import { createPlanInfo } from "../utils/plan.js"; -import { spawnArchitect, spawnArchitectFix, spawnQRDecomposer, spawnReviewer } from "./subagent.js"; +import { + spawnArchitect, + spawnArchitectFix, + spawnDeveloper, + spawnDeveloperFix, + spawnTechnicalWriter, + spawnTechnicalWriterFix, + spawnQRDecomposer, + spawnReviewer, + type SubagentResult, +} from "./subagent.js"; import { createLogger, setLogDir, type Logger } from "../utils/logger.js"; import { createSubagentDir } from "../utils/progress.js"; import { readProjection, readRecentLogs, type Projection } from "./lib/audit.js"; @@ -19,8 +29,9 @@ import { pool } from "./lib/pool.js"; import type { QRFile } from "./qr/types.js"; import { MAX_FIX_ITERATIONS, qrPassesAtIteration } from "./qr/severity.js"; import { WidgetController, type WidgetUpdate } from "./ui/widget.js"; +import { renderPlanMarkdownToFile } from "./plan/render.js"; -// -- Types -- +type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; interface Session { plan(args: string, ctx: ExtensionCommandContext): Promise; @@ -33,6 +44,29 @@ interface QRBlockResult { passed: boolean; } +interface PhaseRunConfig { + key: WorkPhaseKey; + label: string; + widgetIndex: number; + role: "architect" | "developer" | "technical-writer"; + spawnWork: (opts: SpawnWorkRunOptions) => Promise; + spawnFix: (opts: SpawnFixRunOptions) => Promise; +} + +interface SpawnWorkRunOptions { + planDir: string; + subagentDir: string; + cwd: string; + extensionPath: string; + log: Logger; +} + +interface SpawnFixRunOptions extends SpawnWorkRunOptions {} + +function qrFilePath(planDir: string, phase: WorkPhaseKey): string { + return path.join(planDir, `qr-${phase}.json`); +} + function singleSubagentStart(role: string): WidgetUpdate { return { subagentRole: role, @@ -55,17 +89,23 @@ function singleSubagentFromProjection(p: Projection): WidgetUpdate { }; } -// -- Session -- +function phaseRunningState(phase: WorkPhaseKey): WorkflowState["phase"] { + if (phase === "plan-design") return "architect-running"; + if (phase === "plan-code") return "plan-code-running"; + return "plan-docs-running"; +} + +function phaseCompleteState(phase: WorkPhaseKey): WorkflowState["phase"] { + if (phase === "plan-design") return "plan-design-complete"; + if (phase === "plan-code") return "plan-code-complete"; + return "plan-docs-complete"; +} export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, planRef: PlanRef): Session { const state: WorkflowState = createInitialState(); const log = createLogger("Session"); let widget: WidgetController | null = null; - // Completion callback for context-capture phase. Runs inside the - // koan_store_context tool call -- the tool blocks until the architect - // subagent finishes. The LLM sees context capture + architect outcome - // in one tool response. const onContextComplete = async (ctx: ExtensionContext): Promise => { if (!state.plan) { return "Context captured but no plan state available."; @@ -75,112 +115,85 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan try { const planDir = state.plan.directory; - const planJsonPath = path.join(planDir, "plan.json"); - const subagentDir = await createSubagentDir(planDir, "architect"); + const extensionPath = path.resolve(import.meta.dirname, "../../extensions/koan.ts"); + + const phases: PhaseRunConfig[] = [ + { + key: "plan-design", + label: "Plan design", + widgetIndex: 1, + role: "architect", + spawnWork: (opts) => spawnArchitect(opts), + spawnFix: (opts) => spawnArchitectFix({ ...opts, fixPhase: "plan-design" }), + }, + { + key: "plan-code", + label: "Plan code", + widgetIndex: 2, + role: "developer", + spawnWork: (opts) => spawnDeveloper(opts), + spawnFix: (opts) => spawnDeveloperFix({ ...opts, fixPhase: "plan-code" }), + }, + { + key: "plan-docs", + label: "Plan docs", + widgetIndex: 3, + role: "technical-writer", + spawnWork: (opts) => spawnTechnicalWriter(opts), + spawnFix: (opts) => spawnTechnicalWriterFix({ ...opts, fixPhase: "plan-docs" }), + }, + ]; - state.phase = "architect-running"; widget?.update({ phaseStatus: { index: 0, status: "completed" }, activeIndex: 1, - step: "spawning architect...", + step: "context captured; starting planning phases...", activity: "", - qrIterationsMax: MAX_FIX_ITERATIONS + 1, - qrIteration: 1, - qrMode: "initial", - qrPhase: "execute", - qrDone: null, - qrTotal: null, - qrPass: null, - qrFail: null, - qrTodo: null, - ...singleSubagentStart("architect"), }); - log("Spawning architect after context capture", { planDir, subagentDir }); - - const extensionPath = path.resolve(import.meta.dirname, "../../extensions/koan.ts"); - const pollInterval = setInterval(async () => { - const [s, logs] = await Promise.all([ - readProjection(subagentDir), - readRecentLogs(subagentDir), - ]); - if (s) { - widget?.update({ - step: s.stepName, - activity: s.lastAction ?? "", - logLines: logs, - ...singleSubagentFromProjection(s), - }); + const phaseSummaries: string[] = []; + for (const phase of phases) { + const result = await runPlanningPhase( + phase, + planDir, + ctx.cwd, + extensionPath, + state, + log, + widget, + ); + + phaseSummaries.push(`${phase.label}: ${result.summary}`); + if (!result.passed) { + return `Context captured. ${phase.label} failed.\n\n${phaseSummaries.join("\n")}`; } - }, 2000); - - const result = await spawnArchitect({ - planDir, - subagentDir, - cwd: ctx.cwd, - extensionPath, - log, - }); - - clearInterval(pollInterval); - - if (result.exitCode !== 0) { - state.phase = "architect-failed"; - const detail = result.stderr.slice(0, 500); - log("Architect subagent failed", { exitCode: result.exitCode, stderr: detail }); - widget?.update({ - phaseStatus: { index: 1, status: "failed" }, - step: "architect failed", - activity: "", - subagentActive: 0, - subagentDone: 1, - }); - return `Context captured. Architect subagent failed (exit ${result.exitCode}).\n\nStderr:\n${detail}`; } - let planExists = false; + let planMdPath: string; try { - await fs.access(planJsonPath); - planExists = true; - } catch { - // plan.json not written - } - - if (!planExists) { - state.phase = "architect-failed"; - log("Architect completed but plan.json not found", { planJsonPath }); - widget?.update({ - phaseStatus: { index: 1, status: "failed" }, - step: "no plan produced", - activity: "", - subagentActive: 0, - subagentDone: 1, - }); - return "Context captured. Architect completed but produced no plan."; + planMdPath = await renderPlanMarkdownToFile(planDir); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + log("Failed to render plan.md", { error: message, planDir }); + return `Planning phases completed, but plan markdown rendering failed: ${message}`; } - state.phase = "plan-design-complete"; - log("Architect plan-design complete", { planDir }); + state.phase = "plan-docs-complete"; widget?.update({ - phaseStatus: { index: 1, status: "running" }, - step: "starting QR block...", + activeIndex: -1, + step: "planning complete; awaiting manual review of plan.md", activity: "", - qrIterationsMax: MAX_FIX_ITERATIONS + 1, - qrIteration: 1, - qrMode: "initial", - qrPhase: "execute", - qrDone: null, - qrTotal: null, - qrPass: null, - qrFail: null, - qrTodo: null, - subagentActive: 0, - subagentDone: 1, }); - const qr = await runPlanDesignWithQR(planDir, ctx.cwd, extensionPath, state, log, widget); - if (qr.passed) outcome = "PASS"; - return `Context captured. Plan design complete.\n\n${qr.summary}`; + outcome = "PASS"; + return [ + "Context captured. Planning complete.", + "", + ...phaseSummaries, + "", + `Plan markdown: ${planMdPath}`, + "PAUSE: Please review this file manually before /koan execute.", + ].join("\n"); } finally { if (widget) { widget.destroy(); @@ -219,7 +232,6 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan planDirectory: planInfo.directory, }); - // Destroy stale widget if re-entered if (widget) { widget.destroy(); widget = null; @@ -242,39 +254,149 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan }; } -// -- QR Block -- - const QR_POOL_CONCURRENCY = 6; +async function runPlanningPhase( + phase: PhaseRunConfig, + planDir: string, + cwd: string, + extensionPath: string, + state: WorkflowState, + log: Logger, + widget: WidgetController | null, +): Promise { + state.phase = phaseRunningState(phase.key); + + widget?.update({ + phaseStatus: { index: phase.widgetIndex, status: "running" }, + activeIndex: phase.widgetIndex, + step: `${phase.key}: spawning ${phase.role}...`, + activity: "", + qrIterationsMax: MAX_FIX_ITERATIONS + 1, + qrIteration: 1, + qrMode: "initial", + qrPhase: "execute", + qrDone: null, + qrTotal: null, + qrPass: null, + qrFail: null, + qrTodo: null, + ...singleSubagentStart(phase.role), + }); + + const subagentDir = await createSubagentDir(planDir, `${phase.role}-${phase.key}`); + + const pollInterval = setInterval(async () => { + const [projection, logs] = await Promise.all([readProjection(subagentDir), readRecentLogs(subagentDir)]); + if (!projection) return; + widget?.update({ + step: `${phase.key}: ${projection.stepName}`, + activity: projection.lastAction ?? "", + logLines: logs, + ...singleSubagentFromProjection(projection), + }); + }, 2000); + + const workResult = await phase.spawnWork({ + planDir, + subagentDir, + cwd, + extensionPath, + log, + }); + + clearInterval(pollInterval); + + if (workResult.exitCode !== 0) { + const detail = workResult.stderr.slice(0, 500); + log(`${phase.key} subagent failed`, { exitCode: workResult.exitCode, stderr: detail }); + widget?.update({ + phaseStatus: { index: phase.widgetIndex, status: "failed" }, + step: `${phase.key}: worker failed`, + activity: "", + subagentActive: 0, + subagentDone: 1, + }); + return { summary: `${phase.label} subagent failed (exit ${workResult.exitCode}).\n\nStderr:\n${detail}`, passed: false }; + } + + const planJsonPath = path.join(planDir, "plan.json"); + try { + await fs.access(planJsonPath); + } catch { + log(`${phase.key} completed but plan.json missing`, { planJsonPath }); + widget?.update({ + phaseStatus: { index: phase.widgetIndex, status: "failed" }, + step: `${phase.key}: no plan produced`, + activity: "", + subagentActive: 0, + subagentDone: 1, + }); + return { summary: `${phase.label} completed but produced no plan.json.`, passed: false }; + } + + state.phase = phaseCompleteState(phase.key); + widget?.update({ + step: `${phase.key}: starting QR block...`, + activity: "", + qrIteration: 1, + qrMode: "initial", + qrPhase: "execute", + qrDone: null, + qrTotal: null, + qrPass: null, + qrFail: null, + qrTodo: null, + subagentActive: 0, + subagentDone: 1, + }); + + const qr = await runPhaseWithQR( + phase, + planDir, + cwd, + extensionPath, + state, + log, + widget, + ); + + if (qr.passed) { + state.phase = phaseCompleteState(phase.key); + widget?.update({ phaseStatus: { index: phase.widgetIndex, status: "completed" } }); + } else { + widget?.update({ phaseStatus: { index: phase.widgetIndex, status: "failed" } }); + } + + return qr; +} + async function runQRBlock( planDir: string, cwd: string, extensionPath: string, + phase: WorkPhaseKey, state: WorkflowState, log: Logger, widget: WidgetController | null, ): Promise { - const qrPath = path.join(planDir, "qr-plan-design.json"); + const qrPath = qrFilePath(planDir, phase); const keyOf = (scope: string, check: string): string => `${scope}\u0000${check}`; - // Carry forward confirmed PASS concerns across re-decompose runs. const previousPassKeys = new Set(); try { const raw = await fs.readFile(qrPath, "utf8"); const prev = JSON.parse(raw) as QRFile; for (const item of prev.items) { - if (item.status === "PASS") { - previousPassKeys.add(keyOf(item.scope, item.check)); - } + if (item.status === "PASS") previousPassKeys.add(keyOf(item.scope, item.check)); } } catch { - // No previous QR file yet. + // First QR run for this phase. } - // 1. Spawn decomposer subagent state.phase = "qr-decompose-running"; widget?.update({ - step: "qr-decompose: starting...", + step: `${phase} qr-decompose: starting...`, activity: "", qrPhase: "decompose", qrDone: null, @@ -284,21 +406,18 @@ async function runQRBlock( qrTodo: null, ...singleSubagentStart("qr-decomposer"), }); - const decomposeDir = await createSubagentDir(planDir, "qr-decomposer"); + + const decomposeDir = await createSubagentDir(planDir, `qr-decomposer-${phase}`); const decomposePoll = setInterval(async () => { - const [s, logs] = await Promise.all([ - readProjection(decomposeDir), - readRecentLogs(decomposeDir), - ]); - if (s) { - widget?.update({ - step: `qr-decompose: ${s.stepName}`, - activity: s.lastAction ?? "", - logLines: logs, - ...singleSubagentFromProjection(s), - }); - } + const [projection, logs] = await Promise.all([readProjection(decomposeDir), readRecentLogs(decomposeDir)]); + if (!projection) return; + widget?.update({ + step: `${phase} qr-decompose: ${projection.stepName}`, + activity: projection.lastAction ?? "", + logLines: logs, + ...singleSubagentFromProjection(projection), + }); }, 2000); const decompose = await spawnQRDecomposer({ @@ -306,6 +425,7 @@ async function runQRBlock( subagentDir: decomposeDir, cwd, extensionPath, + phase, log, }); @@ -314,17 +434,11 @@ async function runQRBlock( if (decompose.exitCode !== 0) { state.phase = "qr-decompose-failed"; const detail = decompose.stderr.slice(0, 500); - log("QR decomposer failed", { exitCode: decompose.exitCode, stderr: detail }); - widget?.update({ - step: "qr-decompose: failed", - activity: "", - subagentActive: 0, - subagentDone: 1, - }); - return { summary: `QR decompose failed (exit ${decompose.exitCode}).\n\nStderr:\n${detail}`, passed: false }; + log("QR decomposer failed", { phase, exitCode: decompose.exitCode, stderr: detail }); + widget?.update({ step: `${phase} qr-decompose: failed`, activity: "", subagentActive: 0, subagentDone: 1 }); + return { summary: `${phase} QR decompose failed (exit ${decompose.exitCode}).\n\nStderr:\n${detail}`, passed: false }; } - // 2. Read QR items let qr: QRFile; try { const raw = await fs.readFile(qrPath, "utf8"); @@ -332,19 +446,17 @@ async function runQRBlock( } catch (error) { state.phase = "qr-decompose-failed"; const message = error instanceof Error ? error.message : String(error); - log("Failed to read qr-plan-design.json after decompose", { error: message }); - return { summary: "QR decompose completed but produced no verifiable items.", passed: false }; + log("Failed to read QR file after decompose", { phase, error: message }); + return { summary: `${phase} QR decompose completed but produced no verifiable items.`, passed: false }; } if (qr.items.length === 0) { state.phase = "qr-decompose-failed"; - log("QR decompose produced no items"); - return { summary: "QR decompose completed but produced no items.", passed: false }; + log("QR decompose produced no items", { phase }); + return { summary: `${phase} QR decompose completed but produced no items.`, passed: false }; } - // Re-apply previously confirmed PASS concerns if re-decompose reset them. - const carriedPasses = qr.items.filter((item) => - item.status !== "PASS" && previousPassKeys.has(keyOf(item.scope, item.check))).length; + const carriedPasses = qr.items.filter((item) => item.status !== "PASS" && previousPassKeys.has(keyOf(item.scope, item.check))).length; if (carriedPasses > 0) { qr = { ...qr, @@ -359,22 +471,16 @@ async function runQRBlock( await fs.rename(tmpPath, qrPath); } catch (error) { const message = error instanceof Error ? error.message : String(error); - log("Failed to persist carried PASS statuses", { error: message }); - return { summary: "QR verify aborted: failed to preserve PASS statuses.", passed: false }; + log("Failed to persist carried PASS statuses", { phase, error: message }); + return { summary: `${phase} QR verify aborted: failed to preserve PASS statuses.`, passed: false }; } } - // Preserve prior PASS verdicts, but force all FAIL items back to TODO for - // re-verification. This keeps confirmed concerns stable while requiring - // explicit re-check of previously failing concerns. const resetFailures = qr.items.filter((i) => i.status === "FAIL").length; if (resetFailures > 0) { qr = { ...qr, - items: qr.items.map((item) => - item.status === "FAIL" - ? { ...item, status: "TODO", finding: null } - : item), + items: qr.items.map((item) => (item.status === "FAIL" ? { ...item, status: "TODO", finding: null } : item)), }; try { const tmpPath = `${qrPath}.tmp`; @@ -382,8 +488,8 @@ async function runQRBlock( await fs.rename(tmpPath, qrPath); } catch (error) { const message = error instanceof Error ? error.message : String(error); - log("Failed to persist QR FAIL->TODO reset", { error: message }); - return { summary: "QR verify aborted: failed to prepare QR item states.", passed: false }; + log("Failed to persist QR FAIL->TODO reset", { phase, error: message }); + return { summary: `${phase} QR verify aborted: failed to prepare QR item states.`, passed: false }; } } @@ -393,16 +499,8 @@ async function runQRBlock( const initialFail = qr.items.filter((i) => i.status === "FAIL").length; const initialTodo = qr.items.filter((i) => i.status === "TODO").length; - log("QR decompose complete", { - itemCount: totalItems, - verifyCount: verifyIds.length, - preservedPass, - carriedPasses, - resetFailures, - }); - widget?.update({ - step: `qr-verify: 0/${verifyIds.length}`, + step: `${phase} qr-verify: 0/${verifyIds.length}`, activity: "", qrTotal: totalItems, qrDone: preservedPass, @@ -416,7 +514,6 @@ async function runQRBlock( subagentDone: 0, }); - // 3. Spawn reviewer pool (TODO-only) state.phase = "qr-verify-running"; widget?.update({ qrPhase: "verify" }); @@ -449,12 +546,13 @@ async function runQRBlock( verifyIds, QR_POOL_CONCURRENCY, async (itemId) => { - const reviewerDir = await createSubagentDir(planDir, `qr-reviewer-${itemId}`); + const reviewerDir = await createSubagentDir(planDir, `qr-reviewer-${phase}-${itemId}`); const r = await spawnReviewer({ planDir, subagentDir: reviewerDir, cwd, extensionPath, + phase, itemId, log, }); @@ -462,9 +560,7 @@ async function runQRBlock( if (reviewerModel === null) { const projection = await readProjection(reviewerDir); reviewerModel = projection?.model ?? null; - if (reviewerModel) { - widget?.update({ subagentModel: reviewerModel }); - } + if (reviewerModel) widget?.update({ subagentModel: reviewerModel }); } return r; @@ -472,7 +568,7 @@ async function runQRBlock( (progress) => { verifyDone = progress.done; widget?.update({ - step: `qr-verify: ${progress.done}/${progress.total}`, + step: `${phase} qr-verify: ${progress.done}/${progress.total}`, qrDone: preservedPass + progress.done, qrTotal: totalItems, subagentQueued: progress.queued, @@ -487,7 +583,6 @@ async function runQRBlock( } } - // 4. Read final results state.phase = "qr-complete"; let finalQR: QRFile; try { @@ -500,9 +595,7 @@ async function runQRBlock( const pass = finalQR.items.filter((i) => i.status === "PASS").length; const fail = finalQR.items.filter((i) => i.status === "FAIL").length; const todo = finalQR.items.filter((i) => i.status === "TODO").length; - const summary = `QR complete: ${pass} PASS, ${fail} FAIL, ${todo} TODO (${failedReviewers.length} reviewers failed).`; - - log("QR block complete", { pass, fail, todo, failedReviewers }); + const summary = `${phase} QR complete: ${pass} PASS, ${fail} FAIL, ${todo} TODO (${failedReviewers.length} reviewers failed).`; const passed = fail === 0 && failedReviewers.length === 0; widget?.update({ @@ -520,21 +613,8 @@ async function runQRBlock( return { summary, passed }; } -// -- Plan-design QR fix loop -- - -// Fix loop: architect -> QR -> [pass: done | fail: fix architect -> QR -> ...] -// -// Re-decomposes on each iteration rather than re-verifying only. The fix -// architect may change plan structure (add milestones, split intents, remove -// decisions); old QR items referencing stale scopes can produce stale verdicts. -// -// Verification semantics per iteration: -// - PASS items are preserved (confirmed concerns stay confirmed). -// - FAIL items are reset to TODO (must be re-verified after fixes). -// - TODO items are verified. -// -// The session's for-loop counter remains the iteration source of truth. -async function runPlanDesignWithQR( +async function runPhaseWithQR( + phase: PhaseRunConfig, planDir: string, cwd: string, extensionPath: string, @@ -542,12 +622,11 @@ async function runPlanDesignWithQR( log: Logger, widget: WidgetController | null, ): Promise { - const qrPath = path.join(planDir, "qr-plan-design.json"); + const qrPath = qrFilePath(planDir, phase.key); - // Initial QR (iteration 1) - let qr = await runQRBlock(planDir, cwd, extensionPath, state, log, widget); + let qr = await runQRBlock(planDir, cwd, extensionPath, phase.key, state, log, widget); if (qr.passed) { - widget?.update({ qrPhase: "done", phaseStatus: { index: 1, status: "completed" } }); + widget?.update({ qrPhase: "done", phaseStatus: { index: phase.widgetIndex, status: "completed" } }); return qr; } @@ -565,21 +644,16 @@ async function runPlanDesignWithQR( qrTodo: null, }); - // Read QR file for severity check let qrFile: QRFile; try { const raw = await fs.readFile(qrPath, "utf8"); qrFile = JSON.parse(raw) as QRFile; } catch { - log("Fix loop: failed to read QR file", { iteration }); + log("Fix loop: failed to read QR file", { phase: phase.key, iteration }); widget?.update({ qrPhase: "done" }); - return { summary: "Fix loop aborted: cannot read QR file.", passed: false }; + return { summary: `${phase.key} fix loop aborted: cannot read QR file.`, passed: false }; } - // Severity escalation: if no blocking failures remain at this - // iteration, the plan passes without another fix attempt. - // Example: iteration 3 drops COULD -- if only COULD items fail, - // the plan is good enough and the loop terminates. if (qrPassesAtIteration(qrFile.items, iteration)) { const pass = qrFile.items.filter((i) => i.status === "PASS").length; const fail = qrFile.items.filter((i) => i.status === "FAIL").length; @@ -591,83 +665,79 @@ async function runPlanDesignWithQR( qrPass: pass, qrFail: fail, qrTodo: todo, - phaseStatus: { index: 1, status: "completed" }, + phaseStatus: { index: phase.widgetIndex, status: "completed" }, }); return { passed: true, - summary: `QR passed at iteration ${iteration} after severity de-escalation: ${pass} PASS, ${fail} FAIL (non-blocking).`, + summary: `${phase.key} QR passed at iteration ${iteration} after severity de-escalation: ${pass} PASS, ${fail} FAIL (non-blocking).`, }; } - // Spawn fix-mode architect const fixIndex = iteration - 1; widget?.update({ - step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: spawning architect...`, + step: `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}: spawning ${phase.role}...`, activity: "", qrPhase: "execute", - ...singleSubagentStart("architect"), + ...singleSubagentStart(phase.role), }); - const fixDir = await createSubagentDir(planDir, `architect-fix-${fixIndex}`); + const fixDir = await createSubagentDir(planDir, `${phase.role}-fix-${phase.key}-${fixIndex}`); const fixPoll = setInterval(async () => { - const [s, logs] = await Promise.all([ - readProjection(fixDir), - readRecentLogs(fixDir), - ]); - if (s) { - widget?.update({ - step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: ${s.stepName}`, - activity: s.lastAction ?? "", - logLines: logs, - ...singleSubagentFromProjection(s), - }); - } + const [projection, logs] = await Promise.all([readProjection(fixDir), readRecentLogs(fixDir)]); + if (!projection) return; + widget?.update({ + step: `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}: ${projection.stepName}`, + activity: projection.lastAction ?? "", + logLines: logs, + ...singleSubagentFromProjection(projection), + }); }, 2000); - const fixResult = await spawnArchitectFix({ + const fixResult = await phase.spawnFix({ planDir, subagentDir: fixDir, cwd, extensionPath, - fixPhase: "plan-design", log, }); clearInterval(fixPoll); if (fixResult.exitCode !== 0) { - log("Fix architect failed", { iteration: fixIndex, exitCode: fixResult.exitCode, stderr: fixResult.stderr.slice(0, 500) }); + log("Fix worker failed", { + phase: phase.key, + iteration: fixIndex, + exitCode: fixResult.exitCode, + stderr: fixResult.stderr.slice(0, 500), + }); widget?.update({ - step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: architect failed, re-running QR...`, + step: `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}: worker failed, re-running QR...`, activity: "", subagentActive: 0, subagentDone: 1, }); } - // Re-run full QR (decompose + verify) widget?.update({ - step: `fix ${fixIndex}/${MAX_FIX_ITERATIONS}: re-running QR...`, + step: `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}: re-running QR...`, activity: "", subagentActive: 0, subagentDone: 1, }); - qr = await runQRBlock(planDir, cwd, extensionPath, state, log, widget); + + qr = await runQRBlock(planDir, cwd, extensionPath, phase.key, state, log, widget); if (qr.passed) { - widget?.update({ qrPhase: "done", phaseStatus: { index: 1, status: "completed" } }); + widget?.update({ qrPhase: "done", phaseStatus: { index: phase.widgetIndex, status: "completed" } }); return qr; } widget?.update({ qrPhase: "execute", qrDone: null, qrTotal: null, qrPass: null, qrFail: null, qrTodo: null }); } - // Max iterations reached. MUST failures remaining after 5 fix attempts - // indicate a structural problem -- silently passing would propagate a - // known-broken plan downstream. widget?.update({ qrPhase: "done" }); return { passed: false, - summary: `${qr.summary} (max ${MAX_FIX_ITERATIONS} fix iterations reached)`, + summary: `${phase.key} ${qr.summary} (max ${MAX_FIX_ITERATIONS} fix iterations reached)`, }; } diff --git a/src/planner/state.ts b/src/planner/state.ts index 3583d4d..eb34f5b 100644 --- a/src/planner/state.ts +++ b/src/planner/state.ts @@ -8,6 +8,10 @@ export type WorkflowPhase = | "architect-running" | "architect-failed" | "plan-design-complete" + | "plan-code-running" + | "plan-code-complete" + | "plan-docs-running" + | "plan-docs-complete" | "qr-decompose-running" | "qr-decompose-failed" | "qr-verify-running" @@ -54,18 +58,7 @@ export function createInitialState(): WorkflowState { export function resetContextState(state: WorkflowState): void { state.context = null; - if ( - state.phase === "context" || - state.phase === "context-failed" || - state.phase === "context-complete" || - state.phase === "architect-failed" || - state.phase === "plan-design-complete" || - state.phase === "qr-decompose-running" || - state.phase === "qr-decompose-failed" || - state.phase === "qr-verify-running" || - state.phase === "qr-verify-failed" || - state.phase === "qr-complete" - ) { + if (state.phase !== "idle") { state.phase = "idle"; } } diff --git a/src/planner/subagent.ts b/src/planner/subagent.ts index 32cb584..973759b 100644 --- a/src/planner/subagent.ts +++ b/src/planner/subagent.ts @@ -9,13 +9,15 @@ import * as path from "node:path"; import { createLogger, type Logger } from "../utils/logger.js"; +type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; + export interface SubagentResult { exitCode: number; stderr: string; subagentDir: string; } -export interface SpawnArchitectOptions { +export interface SpawnWorkOptions { planDir: string; subagentDir: string; cwd: string; @@ -24,12 +26,12 @@ export interface SpawnArchitectOptions { log?: Logger; } -export interface SpawnArchitectFixOptions { +export interface SpawnFixOptions { planDir: string; subagentDir: string; cwd: string; extensionPath: string; - fixPhase: string; // e.g. "plan-design" + fixPhase: WorkPhaseKey; log?: Logger; } @@ -38,6 +40,7 @@ export interface SpawnQRDecomposerOptions { subagentDir: string; cwd: string; extensionPath: string; + phase: WorkPhaseKey; log?: Logger; } @@ -46,12 +49,11 @@ export interface SpawnReviewerOptions { subagentDir: string; cwd: string; extensionPath: string; + phase: WorkPhaseKey; itemId: string; log?: Logger; } -// -- Spawn helper -- - function spawnSubagent( role: string, phase: string, @@ -70,7 +72,7 @@ function spawnSubagent( prompt, ]; - log(`Spawning ${role} subagent`, { planDir: opts.planDir, subagentDir: opts.subagentDir }); + log(`Spawning ${role} subagent`, { planDir: opts.planDir, subagentDir: opts.subagentDir, phase }); return new Promise((resolve) => { const stdoutLog = createWriteStream(path.join(opts.subagentDir, "stdout.log"), { flags: "w" }); @@ -97,55 +99,85 @@ function spawnSubagent( stdoutLog.end(); stderrLog.end(); const exitCode = code ?? 1; - log(`${role} subagent exited`, { exitCode }); + log(`${role} subagent exited`, { exitCode, phase }); resolve({ exitCode, stderr, subagentDir: opts.subagentDir }); }); proc.on("error", (error) => { stdoutLog.end(); stderrLog.end(); - log(`${role} subagent spawn error`, { error: error.message }); + log(`${role} subagent spawn error`, { error: error.message, phase }); resolve({ exitCode: 1, stderr: error.message, subagentDir: opts.subagentDir }); }); }); } -// -- Architect spawners -- +function spawnWork(role: string, phase: WorkPhaseKey, prompt: string, opts: SpawnWorkOptions): Promise { + const log = opts.log ?? createLogger("Subagent"); + return spawnSubagent(role, phase, prompt, opts, log); +} + +// -- Planning workers -- + +export function spawnArchitect(opts: SpawnWorkOptions): Promise { + return spawnWork("architect", "plan-design", opts.initialPrompt ?? "Begin the plan-design phase.", opts); +} + +export function spawnDeveloper(opts: SpawnWorkOptions): Promise { + return spawnWork("developer", "plan-code", opts.initialPrompt ?? "Begin the plan-code phase.", opts); +} + +export function spawnTechnicalWriter(opts: SpawnWorkOptions): Promise { + return spawnWork("technical-writer", "plan-docs", opts.initialPrompt ?? "Begin the plan-docs phase.", opts); +} + +// -- Fix workers -- -export function spawnArchitect(opts: SpawnArchitectOptions): Promise { +export function spawnArchitectFix(opts: SpawnFixOptions): Promise { const log = opts.log ?? createLogger("Subagent"); return spawnSubagent( "architect", "plan-design", - opts.initialPrompt ?? "Begin the plan-design phase.", - opts, + "Fix the plan based on QR failures.", + { ...opts, extraFlags: ["--koan-fix", opts.fixPhase] }, log, ); } -export function spawnArchitectFix(opts: SpawnArchitectFixOptions): Promise { +export function spawnDeveloperFix(opts: SpawnFixOptions): Promise { const log = opts.log ?? createLogger("Subagent"); return spawnSubagent( - "architect", - "plan-design", - "Fix the plan based on QR failures.", + "developer", + "plan-code", + "Fix plan-code output based on QR failures.", + { ...opts, extraFlags: ["--koan-fix", opts.fixPhase] }, + log, + ); +} + +export function spawnTechnicalWriterFix(opts: SpawnFixOptions): Promise { + const log = opts.log ?? createLogger("Subagent"); + return spawnSubagent( + "technical-writer", + "plan-docs", + "Fix plan-docs output based on QR failures.", { ...opts, extraFlags: ["--koan-fix", opts.fixPhase] }, log, ); } -// -- QR spawners -- +// -- QR workers -- export function spawnQRDecomposer(opts: SpawnQRDecomposerOptions): Promise { const log = opts.log ?? createLogger("Subagent"); - return spawnSubagent("qr-decomposer", "qr-plan-design", "Begin the QR decompose phase.", opts, log); + return spawnSubagent("qr-decomposer", `qr-${opts.phase}`, "Begin the QR decompose phase.", opts, log); } export function spawnReviewer(opts: SpawnReviewerOptions): Promise { const log = opts.log ?? createLogger("Subagent"); return spawnSubagent( "reviewer", - "qr-plan-design", + `qr-${opts.phase}`, "Verify the assigned QR item.", { ...opts, extraFlags: ["--koan-qr-item", opts.itemId] }, log, From aebabbb51b0b1812acc47a0a5ec6d08dd01abe99 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Sat, 28 Feb 2026 09:10:09 +0700 Subject: [PATCH 028/412] docs: expand README with architecture and workflow overview --- README.md | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 10e3e1d..e8c4765 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,37 @@ # Koan Pi Package -This repository is structured as a [pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent) package. +## Overview + +Koan is an opinionated planning workflow extension for the pi coding agent. It constrains model behavior with deterministic phase orchestration, explicit tool boundaries, and durable file-backed state so planning sessions are repeatable and auditable. + +## Architecture + +The runtime is split into two modes from the same extension entrypoint: + +- **Parent session mode** runs `/koan` commands and orchestrates the workflow. +- **Subagent mode** runs role/phase-specific workflows (architect, QR decomposer, reviewer, fix mode). + +The parent controls progression through context capture, plan design, quality review, and iterative fixes. Subagents are isolated processes that communicate through persisted artifacts (`plan.json`, `context.json`, `qr-*.json`) and audit projections. + +## Design Decisions + +Key design choices that shape implementation: + +- **Inversion of control**: TypeScript orchestration code drives agent behavior; models do not self-route workflow steps. +- **Tool-call-driven transitions**: step progression happens via `koan_complete_step` tool calls, not conversational chaining. +- **Default-deny permissions**: each phase explicitly allowlists tools; unknown tool/phase access is blocked. +- **Disk-backed mutations**: planning mutations are immediately persisted with atomic writes instead of deferred finalize steps. +- **Need-to-know prompts**: each subagent only receives the minimum context needed for its task. + +## Invariants + +The workflow depends on these invariants: + +- Planning phases must block direct `edit`/`write` tools. +- Tool failures must throw errors (not return soft error payloads). +- Cross-reference integrity in the plan must validate before progression. +- MUST-severity QR failures remain blocking even as lower-severity checks de-escalate in later fix iterations. + +## Boundaries + +Current scope focuses on planning and QR orchestration. `/koan execute` is intentionally not implemented yet. From c52ebf4a0501bb69275b01aee7bb782c7cc5e3ee Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 2 Mar 2026 13:41:28 +0700 Subject: [PATCH 029/412] refactor planner flow around koan_plan and conversation context --- extensions/koan.ts | 71 ++- src/planner/conversation.ts | 32 ++ src/planner/lib/audit.ts | 6 - src/planner/lib/conversation-trigger.ts | 60 +++ src/planner/lib/dispatch.ts | 9 +- src/planner/lib/permissions.ts | 1 - src/planner/model-config.ts | 102 +++++ src/planner/model-phase.ts | 63 +++ src/planner/model-resolver.ts | 33 ++ src/planner/phases/context-capture/phase.ts | 308 ------------- src/planner/phases/context-capture/prompts.ts | 92 ---- src/planner/phases/plan-code/phase.ts | 20 +- src/planner/phases/plan-code/prompts.ts | 11 +- src/planner/phases/plan-design/fix-phase.ts | 5 +- src/planner/phases/plan-design/fix-prompts.ts | 8 +- src/planner/phases/plan-design/phase.ts | 19 +- src/planner/phases/plan-design/prompts.ts | 27 +- src/planner/phases/plan-docs/fix-phase.ts | 5 +- src/planner/phases/plan-docs/fix-prompts.ts | 9 +- src/planner/phases/plan-docs/phase.ts | 19 +- src/planner/phases/plan-docs/prompts.ts | 18 +- src/planner/phases/qr-decompose/phase.ts | 17 +- src/planner/phases/qr-decompose/prompts.ts | 33 +- src/planner/phases/qr-verify/phase.ts | 15 +- src/planner/phases/qr-verify/prompts.ts | 39 +- src/planner/session.ts | 321 ++++++++------ src/planner/state.ts | 30 -- src/planner/subagent.ts | 33 +- src/planner/tools/context-store.ts | 34 -- src/planner/tools/workflow.ts | 30 +- src/planner/types.ts | 21 - src/planner/ui/config/menu.ts | 87 ++++ src/planner/ui/config/model-selection.ts | 410 ++++++++++++++++++ src/planner/ui/widget.ts | 4 +- 34 files changed, 1145 insertions(+), 847 deletions(-) create mode 100644 src/planner/conversation.ts create mode 100644 src/planner/lib/conversation-trigger.ts create mode 100644 src/planner/model-config.ts create mode 100644 src/planner/model-phase.ts create mode 100644 src/planner/model-resolver.ts delete mode 100644 src/planner/phases/context-capture/phase.ts delete mode 100644 src/planner/phases/context-capture/prompts.ts delete mode 100644 src/planner/tools/context-store.ts delete mode 100644 src/planner/types.ts create mode 100644 src/planner/ui/config/menu.ts create mode 100644 src/planner/ui/config/model-selection.ts diff --git a/extensions/koan.ts b/extensions/koan.ts index 613e8f3..369eac7 100644 --- a/extensions/koan.ts +++ b/extensions/koan.ts @@ -1,8 +1,10 @@ // Entry point for the koan pi extension. Serves dual roles: parent session -// (registers /koan command) and subagent mode (dispatches to phase workflow -// via CLI flags). All tools register unconditionally at init; phases restrict -// access via tool_call blocking at runtime. +// (registers koan_plan tool and /koan-execute, /koan-status, /koan commands) +// and subagent mode (dispatches to phase workflow via CLI flags). All tools +// register unconditionally at init; phases restrict access via tool_call +// blocking at runtime. +import { Type } from "@sinclair/typebox"; import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; import { createSession } from "../src/planner/session.js"; @@ -10,6 +12,7 @@ import { detectSubagentMode, dispatchPhase } from "../src/planner/phases/dispatc import { registerAllTools, createDispatch, createPlanRef } from "../src/planner/tools/index.js"; import { createLogger } from "../src/utils/logger.js"; import { EventLog, extractToolEvent } from "../src/planner/lib/audit.js"; +import { openKoanConfig } from "../src/planner/ui/config/menu.js"; function currentModelId(ctx: ExtensionContext): string | null { const model = ctx.model; @@ -109,30 +112,50 @@ export default function koan(pi: ExtensionAPI): void { // Session: parent-mode workflow engine. const session = createSession(pi, dispatch, planRef); + pi.registerTool({ + name: "koan_plan", + label: "Plan", + description: [ + "Launch a structured planning pipeline for complex, multi-file tasks.", + "Invoke when the user asks to plan, use the planner, or when the task", + "is too large to implement directly.", + "", + "The current conversation is automatically captured — it becomes the", + "planning context. The pipeline spawns specialized agents (architect,", + "developer, writer) that read the conversation history to understand", + "the task, then produce a structured plan with milestones, code intents,", + "and quality review.", + "", + "This is a long-running operation (5-15 minutes). Do not invoke for", + "simple tasks that can be done in a single pass.", + ].join("\n"), + parameters: Type.Object({}), + async execute(toolCallId, params, signal, onUpdate, ctx) { + return await session.plan(ctx); + }, + }); + pi.registerCommand("koan", { - description: "Koan planning workflow", + description: "Koan commands. Usage: /koan config", handler: async (args, ctx) => { - const [subcommand, ...rest] = args.trim().split(/\s+/); - const command = subcommand ?? ""; - const remainingArgs = rest.join(" "); - - switch (command) { - case "plan": - await session.plan(remainingArgs, ctx); - break; - case "execute": - await session.execute(ctx); - break; - case "status": - await session.status(ctx); - break; - default: - ctx.ui.notify( - "Usage: /koan plan , /koan execute, or /koan status", - "error", - ); - break; + const subcommand = args.trim(); + if (subcommand === "config") { + await openKoanConfig(ctx); + } else if (subcommand === "") { + ctx.ui.notify("Usage: /koan config", "info"); + } else { + ctx.ui.notify(`Unknown koan subcommand: "${subcommand}". Usage: /koan config`, "warning"); } }, }); + + pi.registerCommand("koan-execute", { + description: "Execute a koan plan", + handler: async (_args, ctx) => { await session.execute(ctx); }, + }); + + pi.registerCommand("koan-status", { + description: "Show koan workflow status", + handler: async (_args, ctx) => { await session.status(ctx); }, + }); } diff --git a/src/planner/conversation.ts b/src/planner/conversation.ts new file mode 100644 index 0000000..86e9850 --- /dev/null +++ b/src/planner/conversation.ts @@ -0,0 +1,32 @@ +// Export the parent session conversation to a JSONL file in the plan directory. +// +// The output is raw pi SessionManager entries — NOT a plain-text transcript. +// Each line is a JSON object. Agents reading this file should look for entries +// with type "message" (role: "user" | "assistant") for conversation content, +// and type "compaction" for synthesized summaries of earlier context. +// The file is write-once and read-only from the perspective of planning phases. + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { ExtensionContext } from "@mariozechner/pi-coding-agent"; + +/** + * Export the current conversation branch as a JSONL file. + * Each line is a JSON-serialized session entry (header first, then branch entries). + */ +export async function exportConversation( + sessionManager: ExtensionContext["sessionManager"], + planDir: string, +): Promise { + const filePath = path.join(planDir, "conversation.jsonl"); + const header = sessionManager.getHeader(); + const branch = sessionManager.getBranch(); + + const lines: string[] = []; + if (header) lines.push(JSON.stringify(header)); + for (const entry of branch) lines.push(JSON.stringify(entry)); + + await fs.writeFile(filePath, lines.join("\n") + "\n", "utf8"); + return filePath; +} diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index 94e9d39..1d2d670 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -443,12 +443,6 @@ const KOAN_SHAPES: Record = { koan_qr_get_item: { keys: ["phase", "id"], getter: true }, koan_qr_list_items: { keys: ["phase", "status"], getter: true }, koan_qr_summary: { keys: ["phase"], getter: true }, - - koan_store_context: { - keys: ["task_spec", "constraints", "entry_points", "rejected_alternatives", "current_understanding", "assumptions", "invisible_knowledge", "reference_docs"], - arrays: ["task_spec", "constraints", "entry_points", "rejected_alternatives", "current_understanding", "assumptions", "invisible_knowledge", "reference_docs"], - highValue: true, - }, }; // Reads the tail of events.jsonl and returns structured log entries. diff --git a/src/planner/lib/conversation-trigger.ts b/src/planner/lib/conversation-trigger.ts new file mode 100644 index 0000000..81bdf70 --- /dev/null +++ b/src/planner/lib/conversation-trigger.ts @@ -0,0 +1,60 @@ +export const PLAN_DESIGN_CONTEXT_TRIGGER_ID = "plan-design-context-trigger"; +export const PLAN_DOCS_CONTEXT_TRIGGER_ID = "plan-docs-context-trigger"; + +function exampleCommands(conversationPath: string, keywordRegex: string): string[] { + return [ + "Example commands (starting points; adapt as needed):", + ` CONV=\"${conversationPath}\"`, + " rg -n '\"role\":\"user\"|\"toolCall\"|koan_plan|phase|decision|constraint|tradeoff' \"$CONV\"", + " jq -cr 'select(.type==\"message\" and (.message.role==\"user\" or .message.role==\"assistant\")) | {ts:.timestamp, role:.message.role, text:([.message.content[]? | select(.type==\"text\") | .text] | join(\"\\n\"))} | select(.text != \"\")' \"$CONV\"", + ` jq -cr --arg re \"${keywordRegex}\" 'select(.type==\"message\") | {role:.message.role, texts:[.message.content[]? | select(.type==\"text\") | .text]} | .texts[]? as $t | select($t|test($re;\"i\")) | {role, text:$t}' \"$CONV\"`, + " jq -r 'select(.type==\"message\" and .message.role==\"assistant\") | .message.content[]? | select(.type==\"toolCall\" and .name==\"read\") | .arguments.path' \"$CONV\" | sort -u", + ]; +} + +export function buildPlanDesignContextTrigger(conversationPath: string): string[] { + return [ + "Use conversation context from the exact JSONL file path below.", + `Conversation file (absolute path): ${conversationPath}`, + "", + "This phase requires conversation grounding by default.", + "Before finalizing this step, open conversation.jsonl and extract:", + " - task intent and acceptance shape", + " - user constraints and preferences", + " - prior rejected options and decision rationale", + "", + "Read selectively (do not scan blindly end-to-end):", + " - prioritize type='message' with role='user'/'assistant'", + " - use type='compaction' entries for summarized earlier context", + "", + ...exampleCommands( + conversationPath, + "phase|planner|koan_plan|constraint|decision|tradeoff|acceptance", + ), + "", + "conversation.jsonl is read-only.", + ]; +} + +export function buildPlanDocsContextTrigger(conversationPath: string): string[] { + return [ + "Use conversation context from the exact JSONL file path below when needed.", + `Conversation file (absolute path): ${conversationPath}`, + "", + "Consult conversation.jsonl when plan artifacts do not fully explain:", + " - why a decision was made", + " - which tradeoff was accepted", + " - what implicit project knowledge should be documented", + " - how user preferences should affect docs emphasis", + "", + "Start from plan artifacts first; use conversation.jsonl to fill rationale gaps.", + "Read selectively (message + compaction entries), not exhaustively.", + "", + ...exampleCommands( + conversationPath, + "decision|tradeoff|why|constraint|docs|readme|diagram|comment|rationale", + ), + "", + "conversation.jsonl is read-only.", + ]; +} diff --git a/src/planner/lib/dispatch.ts b/src/planner/lib/dispatch.ts index cf8ec02..e9f935e 100644 --- a/src/planner/lib/dispatch.ts +++ b/src/planner/lib/dispatch.ts @@ -2,10 +2,6 @@ // Decouples static tool registration (init-time) from dynamic phase routing (runtime). // All mutable slots are null by default; phases hook/unhook on begin/end. -import type { ExtensionContext } from "@mariozechner/pi-coding-agent"; - -import type { ContextToolResult } from "../tools/context-store.js"; - // -- Result types -- export interface StepResult { @@ -18,13 +14,10 @@ export interface StepResult { export interface WorkflowDispatch { onCompleteStep: ((thoughts?: string) => StepResult | Promise) | null; - onStoreContext: - | ((payload: unknown, ctx: ExtensionContext) => Promise) - | null; } export function createDispatch(): WorkflowDispatch { - return { onCompleteStep: null, onStoreContext: null }; + return { onCompleteStep: null }; } // Decouples tool registration (init-time, before _buildRuntime) from diff --git a/src/planner/lib/permissions.ts b/src/planner/lib/permissions.ts index aee6f7f..a23faca 100644 --- a/src/planner/lib/permissions.ts +++ b/src/planner/lib/permissions.ts @@ -99,7 +99,6 @@ export const PLAN_MUTATION_TOOLS: ReadonlySet = new Set([ // updating the permissions map. export const PHASE_PERMISSIONS: ReadonlyMap> = new Map([ - ["context-capture", new Set(["koan_store_context", "koan_complete_step"])], [ "plan-design", new Set([ diff --git a/src/planner/model-config.ts b/src/planner/model-config.ts new file mode 100644 index 0000000..0f007cc --- /dev/null +++ b/src/planner/model-config.ts @@ -0,0 +1,102 @@ +// Koan config persistence for per-phase model overrides. +// Storage location: ~/.koan/config.json under a `phaseModels` key. +// Enforces all-or-none semantics: a stored config must contain exactly all +// 20 PhaseModelKeys. Partial configs are treated as absent and logged. + +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import { + ALL_PHASE_MODEL_KEYS, + isPhaseModelKey, + type PhaseModelKey, +} from "./model-phase.js"; + +export const KOAN_CONFIG_PATH = path.join(os.homedir(), ".koan", "config.json"); + +interface KoanConfigFile { + phaseModels?: Record; + [key: string]: unknown; +} + +export async function loadPhaseModelConfig(): Promise | null> { + let raw: string; + try { + raw = await fs.readFile(KOAN_CONFIG_PATH, "utf8"); + } catch { + return null; + } + + let parsed: KoanConfigFile; + try { + parsed = JSON.parse(raw) as KoanConfigFile; + } catch { + console.warn("[koan] config.json is not valid JSON; treating phase model config as absent."); + return null; + } + + if (!parsed.phaseModels || typeof parsed.phaseModels !== "object") { + return null; + } + + const phaseModels = parsed.phaseModels; + const keys = Object.keys(phaseModels); + + if (keys.length !== ALL_PHASE_MODEL_KEYS.length) { + console.warn( + `[koan] config.json phaseModels has ${keys.length} entries (expected ${ALL_PHASE_MODEL_KEYS.length}); treating as absent.`, + ); + return null; + } + + const result: Partial> = {}; + for (const key of keys) { + if (!isPhaseModelKey(key)) { + console.warn(`[koan] config.json phaseModels contains unknown key "${key}"; treating as absent.`); + return null; + } + const value = phaseModels[key]; + if (typeof value !== "string" || value.length === 0) { + console.warn( + `[koan] config.json phaseModels["${key}"] is not a non-empty string; treating as absent.`, + ); + return null; + } + result[key] = value; + } + + for (const expected of ALL_PHASE_MODEL_KEYS) { + if (!(expected in result)) { + console.warn(`[koan] config.json phaseModels is missing key "${expected}"; treating as absent.`); + return null; + } + } + + return result as Record; +} + +export async function savePhaseModelConfig( + config: Record | null, +): Promise { + const configDir = path.dirname(KOAN_CONFIG_PATH); + await fs.mkdir(configDir, { recursive: true }); + + let existing: KoanConfigFile = {}; + try { + const raw = await fs.readFile(KOAN_CONFIG_PATH, "utf8"); + existing = JSON.parse(raw) as KoanConfigFile; + } catch { + // Start fresh if file is missing or contains invalid JSON. + } + + if (config === null) { + delete existing.phaseModels; + } else { + existing.phaseModels = config as Record; + } + + const tmpPath = `${KOAN_CONFIG_PATH}.tmp`; + await fs.writeFile(tmpPath, `${JSON.stringify(existing, null, 2)}\n`, "utf8"); + await fs.rename(tmpPath, KOAN_CONFIG_PATH); +} diff --git a/src/planner/model-phase.ts b/src/planner/model-phase.ts new file mode 100644 index 0000000..b2319ca --- /dev/null +++ b/src/planner/model-phase.ts @@ -0,0 +1,63 @@ +// Canonical phase-model key definitions for koan per-phase model selection. +// Defines the 5×4 matrix of (phase row × sub-phase column) keys used across +// configuration, UI, and spawn-time resolution. + +export type PhaseRow = "plan-design" | "plan-code" | "plan-docs" | "exec-code" | "exec-docs"; +export type SubPhase = "exec-debut" | "exec-fix" | "qr-decompose" | "qr-verify"; +export type PhaseModelKey = `${PhaseRow}-${SubPhase}`; + +export const PHASE_ROWS: readonly PhaseRow[] = [ + "plan-design", + "plan-code", + "plan-docs", + "exec-code", + "exec-docs", +]; + +export const SUB_PHASES: readonly SubPhase[] = [ + "exec-debut", + "exec-fix", + "qr-decompose", + "qr-verify", +]; + +function computeAllKeys(): PhaseModelKey[] { + const keys: PhaseModelKey[] = []; + for (const row of PHASE_ROWS) { + for (const col of SUB_PHASES) { + keys.push(`${row}-${col}`); + } + } + return keys; +} + +export const ALL_PHASE_MODEL_KEYS: readonly PhaseModelKey[] = computeAllKeys(); + +const STRONG_KEY_SET: Set = new Set([ + // All qr-decompose keys (bias reasoning budget to verification) + "plan-design-qr-decompose", + "plan-code-qr-decompose", + "plan-docs-qr-decompose", + "exec-code-qr-decompose", + "exec-docs-qr-decompose", + // plan-design exec keys (ripple effects across later work) + "plan-design-exec-debut", + "plan-design-exec-fix", + // exec-docs exec keys (no mechanical correctness backstop) + "exec-docs-exec-debut", + "exec-docs-exec-fix", +]); + +export const STRONG_PHASE_MODEL_KEYS: ReadonlySet = STRONG_KEY_SET; + +export const GENERAL_PURPOSE_PHASE_MODEL_KEYS: readonly PhaseModelKey[] = + ALL_PHASE_MODEL_KEYS.filter((k) => !STRONG_KEY_SET.has(k)); + +export function isPhaseModelKey(value: unknown): value is PhaseModelKey { + if (typeof value !== "string") return false; + return (ALL_PHASE_MODEL_KEYS as readonly string[]).includes(value); +} + +export function buildPhaseModelKey(phaseRow: PhaseRow, subPhase: SubPhase): PhaseModelKey { + return `${phaseRow}-${subPhase}`; +} diff --git a/src/planner/model-resolver.ts b/src/planner/model-resolver.ts new file mode 100644 index 0000000..b67b371 --- /dev/null +++ b/src/planner/model-resolver.ts @@ -0,0 +1,33 @@ +// Spawn-time model resolver for per-phase model overrides. +// Maps spawn contexts to PhaseModelKeys and looks up configured overrides. +// Returns undefined when no config exists so the caller omits --model entirely, +// preserving pi's current active model as the implicit fallback. + +import { buildPhaseModelKey, type PhaseModelKey, type PhaseRow } from "./model-phase.js"; +import { loadPhaseModelConfig } from "./model-config.js"; + +export type SpawnContext = "work-debut" | "fix" | "qr-decompose" | "qr-verify"; + +export function mapSpawnContextToPhaseModelKey( + context: SpawnContext, + phaseRow: PhaseRow, + // Reserved for future fix-phase-specific routing. Current mapping is phase-row + context only. + _fixPhase?: string, +): PhaseModelKey { + switch (context) { + case "work-debut": + return buildPhaseModelKey(phaseRow, "exec-debut"); + case "fix": + return buildPhaseModelKey(phaseRow, "exec-fix"); + case "qr-decompose": + return buildPhaseModelKey(phaseRow, "qr-decompose"); + case "qr-verify": + return buildPhaseModelKey(phaseRow, "qr-verify"); + } +} + +export async function resolvePhaseModelOverride(key: PhaseModelKey): Promise { + const config = await loadPhaseModelConfig(); + if (config === null) return undefined; + return config[key]; +} diff --git a/src/planner/phases/context-capture/phase.ts b/src/planner/phases/context-capture/phase.ts deleted file mode 100644 index ecd4b94..0000000 --- a/src/planner/phases/context-capture/phase.ts +++ /dev/null @@ -1,308 +0,0 @@ -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - -import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; - -import { - draftGuidance, - verifyGuidance, - refineGuidance, - type RefinePromptOptions, -} from "./prompts.js"; -import { formatStep } from "../../lib/step.js"; -import type { ContextCaptureState, PlanInfo, WorkflowState } from "../../state.js"; -import type { ContextData } from "../../types.js"; -import { CONTEXT_KEYS } from "../../types.js"; -import type { ContextToolResult } from "../../tools/context-store.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch } from "../../lib/dispatch.js"; -import { createLogger, type Logger } from "../../../utils/logger.js"; -import { checkPermission } from "../../lib/permissions.js"; - -const MAX_ATTEMPTS = 3; - -interface ValidationResult { - ok: boolean; - data?: ContextData; - errors: string[]; -} - -export class ContextCapturePhase { - private readonly state: WorkflowState; - private readonly pi: ExtensionAPI; - private readonly log: Logger; - private readonly dispatch: WorkflowDispatch; - private readonly onComplete?: (ctx: ExtensionContext) => Promise; - - constructor( - pi: ExtensionAPI, - state: WorkflowState, - dispatch: WorkflowDispatch, - log?: Logger, - onComplete?: (ctx: ExtensionContext) => Promise, - ) { - this.pi = pi; - this.state = state; - this.dispatch = dispatch; - this.log = log ?? createLogger("Context"); - this.onComplete = onComplete; - - this.registerHandlers(); - } - - async begin(taskDescription: string, plan: PlanInfo, ctx: ExtensionContext): Promise { - if (this.state.context?.active) { - ctx.ui.notify("Context capture is already in progress.", "warning"); - return; - } - - const contextFilePath = path.join(plan.directory, "context.json"); - await fs.rm(contextFilePath, { force: true }); - - this.state.phase = "context"; - this.state.context = { - active: true, - subPhase: "drafting", - attempt: 0, - maxAttempts: MAX_ATTEMPTS, - taskDescription, - planId: plan.id, - planDirectory: plan.directory, - contextFilePath, - lastPrompt: null, - feedback: [], - } satisfies ContextCaptureState; - - // Hook dispatch slots here (not constructor) because dispatch is - // shared with plan-design. Each phase hooks when activated (begin() - // for context-capture, begin() for plan-design). hookDispatch throws - // if the slot is already occupied (phase hook ownership prevents - // silent misrouting). - hookDispatch(this.dispatch, "onCompleteStep", () => this.handleSubPhaseComplete()); - hookDispatch(this.dispatch, "onStoreContext", (p, c) => this.handleContextToolCall(p, c)); - - this.log("Starting context capture (draft phase)", { planId: plan.id }); - - await this.updatePlanMetadata({ - status: "context", - context: { - expectedPath: contextFilePath, - startedAt: new Date().toISOString(), - }, - }); - - const prompt = formatStep(draftGuidance(taskDescription)); - this.state.context.lastPrompt = prompt; - this.pi.sendUserMessage(prompt); - } - - // Advances context capture sub-phase via tool call result. - // The returned prompt becomes the tool result text that the LLM - // processes within the same agent loop -- no sendUserMessage needed. - // Tool result delivery is synchronous regardless of -p mode. - private handleSubPhaseComplete(): { ok: boolean; prompt?: string; error?: string } { - const ctx = this.state.context; - if (!ctx || !this.shouldHandle()) { - return { ok: false, error: "Context capture is not active." }; - } - - if (ctx.subPhase === "drafting") { - ctx.subPhase = "verifying"; - const prompt = formatStep(verifyGuidance()); - ctx.lastPrompt = prompt; - this.log("Draft complete, transition to verify phase (tool call)"); - return { ok: true, prompt }; - } - - if (ctx.subPhase === "verifying") { - ctx.subPhase = "refining"; - ctx.attempt = 1; - const prompt = formatStep( - refineGuidance({ - attempt: 1, - maxAttempts: ctx.maxAttempts, - feedback: [], - }), - ); - ctx.lastPrompt = prompt; - this.log("Verify complete, transition to refine phase (tool call)"); - return { ok: true, prompt }; - } - - // Refine phase: koan_store_context handles completion, not this tool. - return { - ok: false, - error: "Refine phase: use koan_store_context to store the context.", - }; - } - - private registerHandlers(): void { - this.pi.on("tool_call", async (event) => { - if (!this.shouldHandle()) return; - - const perm = checkPermission("context-capture", event.toolName); - if (!perm.allowed) { - return { block: true, reason: perm.reason }; - } - - const ctx = this.state.context!; - - if (ctx.subPhase === "drafting") { - if (event.toolName === "koan_store_context") { - return { - block: true, - reason: "Draft phase: explore and draft first, then call koan_complete_step.", - }; - } - return undefined; - } - - if (ctx.subPhase === "verifying") { - if (event.toolName === "koan_complete_step") { - return undefined; - } - return { - block: true, - reason: "Verify phase: review your draft, then call koan_complete_step. No other tools.", - }; - } - - if (ctx.subPhase === "refining") { - if (event.toolName === "koan_store_context") { - return undefined; - } - return { - block: true, - reason: "Refine phase: call koan_store_context with the verified context.", - }; - } - - return undefined; - }); - } - - private shouldHandle(): boolean { - return Boolean(this.state.context?.active && this.state.phase === "context"); - } - - private async handleContextToolCall(payload: unknown, ctx: ExtensionContext): Promise { - if (!this.state.context || !this.shouldHandle()) { - return { - ok: false, - message: "Context capture is not active.", - errors: ["Context capture is not active."], - }; - } - - const validation = validateContextData(payload); - - if (!validation.ok || !validation.data) { - const errors = validation.errors.length > 0 ? validation.errors : ["Context validation failed."]; - this.state.context.feedback = errors; - this.log("Context validation failed", { errors }); - return { ok: false, message: formatErrors(errors), errors }; - } - - const rawText = JSON.stringify(payload, null, 2); - try { - await fs.writeFile(this.state.context.contextFilePath, `${rawText}\n`, "utf8"); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to write context file", { error: message }); - return { - ok: false, - message: `Failed to store context: ${message}`, - errors: [`Failed to store context: ${message}`], - }; - } - - this.state.context.active = false; - this.state.context.data = validation.data; - this.state.context.lastRawContent = rawText; - this.state.context.feedback = []; - this.state.phase = "context-complete"; - unhookDispatch(this.dispatch, "onCompleteStep"); - unhookDispatch(this.dispatch, "onStoreContext"); - - this.log("Context capture succeeded", { - planId: this.state.context.planId, - attempt: this.state.context.attempt, - }); - - await this.updatePlanMetadata({ - status: "context-complete", - context: { - capturedAt: new Date().toISOString(), - attempt: this.state.context.attempt, - file: this.state.context.contextFilePath, - }, - }); - - // Trigger completion callback (e.g. architect spawn) synchronously - // within the tool call. The tool blocks until the callback resolves, - // preventing the LLM from taking intermediate turns. - if (this.onComplete) { - const message = await this.onComplete(ctx); - return { ok: true, message }; - } - return { ok: true, message: "Context captured successfully." }; - } - - private async updatePlanMetadata(patch: Record): Promise { - const plan = this.state.plan; - if (!plan) return; - - try { - let current: Record = {}; - try { - const existing = await fs.readFile(plan.metadataPath, "utf8"); - current = JSON.parse(existing); - } catch { - current = { id: plan.id, createdAt: plan.createdAt }; - } - - const next = { ...current, ...patch }; - await fs.writeFile(plan.metadataPath, `${JSON.stringify(next, null, 2)}\n`, "utf8"); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to update plan metadata", { error: message }); - } - } -} - -function formatErrors(errors: string[]): string { - return `Context validation failed:\n${errors.map((e) => `- ${e}`).join("\n")}`; -} - -function validateContextData(value: unknown): ValidationResult { - if (typeof value !== "object" || value === null) { - return { ok: false, errors: ["Context data must be a JSON object."] }; - } - - const data = value as Record; - const errors: string[] = []; - const result: Record = {}; - - for (const key of CONTEXT_KEYS) { - const field = data[key]; - if (!Array.isArray(field)) { - errors.push(`${key} must be an array of strings.`); - continue; - } - if (field.length === 0) { - errors.push(`${key} must not be empty.`); - continue; - } - const bad = field.findIndex((item) => typeof item !== "string" || item.trim().length === 0); - if (bad !== -1) { - errors.push(`${key}[${bad}] must be a non-empty string.`); - continue; - } - result[key] = field.map((s: string) => s.trim()); - } - - if (errors.length > 0) { - return { ok: false, errors }; - } - - return { ok: true, data: result as unknown as ContextData, errors: [] }; -} diff --git a/src/planner/phases/context-capture/prompts.ts b/src/planner/phases/context-capture/prompts.ts deleted file mode 100644 index 575d801..0000000 --- a/src/planner/phases/context-capture/prompts.ts +++ /dev/null @@ -1,92 +0,0 @@ -import type { StepGuidance } from "../../lib/step.js"; - -export function draftGuidance(taskDescription: string): StepGuidance { - return { - title: "Context Capture: Draft", - instructions: [ - "You are about to begin a structured planning workflow. Before any formalization, think carefully through the full context of this task.", - "", - `Task: ${taskDescription}`, - "", - "Your primary source is the conversation so far. Most of what you need is already here.", - "", - "You MAY use tools during this phase if -- and only if -- a specific lookup would", - "resolve genuine uncertainty that materially affects planning. Examples of justified reads:", - "- Confirming an API signature you are unsure about", - "- Checking whether a file or module actually exists", - "- Reading a config that determines a key constraint", - "", - "Do NOT explore speculatively. If you can draft a confident answer from context alone, do so.", - "", - "Think through each of these dimensions:", - "", - "- What exactly is being asked? What is the user's goal? What is in scope and what is explicitly not?", - "- What technical constraints apply to the task itself -- API contracts, performance targets, compatibility requirements, architectural rules? Only include constraints that are specific to this task. Do not include general tool usage instructions, coding style guides, or editor/IDE conventions.", - "- Which files, modules, or entry points in the codebase are relevant? If this is greenfield work with no existing code, say so.", - "- Were any alternative approaches discussed and rejected during this session? Why?", - "- What is your current understanding of the system or domain involved?", - "- What assumptions are you making that haven't been verified? How confident are you in each?", - "- Is there any implicit design knowledge -- invariants, rationale, accepted tradeoffs -- that should be preserved for downstream work?", - "- Are there reference documents or specs in the project that apply?", - "", - "For each dimension, note your confidence:", - "- HIGH: you have direct evidence from this session", - "- LOW: you are extrapolating or guessing", - "", - "Flag any LOW-confidence point where a single targeted read would raise it to HIGH.", - "This is a working document, not a final artifact.", - "", - "Put your full draft analysis in the `thoughts` parameter when calling koan_complete_step.", - ], - }; -} - -export function verifyGuidance(): StepGuidance { - return { - title: "Context Capture: Verify", - instructions: [ - "Review the draft you just wrote. Check three things:", - "", - "1. Completeness: scan each dimension above. Is anything missing?", - "2. Accuracy: are any items wrong, speculative, or conflating things?", - "3. Phrasing: would a downstream agent understand without ambiguity?", - "", - "Rewrite the draft with corrections. If nothing needs changing, reproduce it as-is.", - "Do not use exploration tools during this review.", - "", - "Put your revised analysis in the `thoughts` parameter when calling koan_complete_step.", - ], - }; -} - -export interface RefinePromptOptions { - attempt: number; - maxAttempts: number; - feedback: string[]; -} - -export function refineGuidance(opts: RefinePromptOptions): StepGuidance { - const instructions: string[] = []; - if (opts.attempt > 1) { - instructions.push(`Retry (attempt ${opts.attempt} of ${opts.maxAttempts}).`); - } - instructions.push( - "Now call the `koan_store_context` tool with the verified context.", - "The tool's parameter schema defines exactly what fields are needed.", - ); - if (opts.feedback.length > 0) { - instructions.push("", "Address these issues from the previous attempt:"); - for (const item of opts.feedback) { - instructions.push(`- ${item}`); - } - } - return { - title: "Context Capture: Refine", - instructions, - // Refine completes with koan_store_context, not koan_complete_step. - invokeAfter: [ - "WHEN DONE: After completing the instructions above, call koan_store_context with the verified context data.", - "Do NOT call this tool until you have prepared the structured context.", - ].join("\n"), - }; -} diff --git a/src/planner/phases/plan-code/phase.ts b/src/planner/phases/plan-code/phase.ts index f4948b2..ab2b9e4 100644 --- a/src/planner/phases/plan-code/phase.ts +++ b/src/planner/phases/plan-code/phase.ts @@ -1,21 +1,16 @@ // Plan-code phase -- 4-step developer workflow converting code intents // to concrete code_changes diffs in plan.json. -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; import { loadPlanCodeSystemPrompt, - formatContextForStep1, buildPlanCodeSystemPrompt, planCodeStepGuidance, STEP_NAMES, } from "./prompts.js"; import { formatStep } from "../../lib/step.js"; -import type { ContextData } from "../../types.js"; import { createLogger, type Logger } from "../../../utils/logger.js"; import { EventLog } from "../../lib/audit.js"; import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; @@ -27,7 +22,6 @@ interface PlanCodeState { active: boolean; step: PlanCodeStep; step1Prompt: string | null; - contextData: ContextData | null; systemPrompt: string | null; } @@ -62,7 +56,6 @@ export class PlanCodePhase { active: false, step: 1, step1Prompt: null, - contextData: null, systemPrompt: null, }; @@ -70,16 +63,6 @@ export class PlanCodePhase { } async begin(): Promise { - const contextPath = path.join(this.planDir, "context.json"); - try { - const raw = await fs.readFile(contextPath, "utf8"); - this.state.contextData = JSON.parse(raw) as ContextData; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to read context.json", { error: message }); - return; - } - let basePrompt: string; try { basePrompt = await loadPlanCodeSystemPrompt(); @@ -89,9 +72,8 @@ export class PlanCodePhase { return; } - const contextXml = formatContextForStep1(this.state.contextData); this.state.systemPrompt = buildPlanCodeSystemPrompt(basePrompt); - this.state.step1Prompt = formatStep(planCodeStepGuidance(1, contextXml)); + this.state.step1Prompt = formatStep(planCodeStepGuidance(1)); this.state.active = true; this.state.step = 1; this.planRef.dir = this.planDir; diff --git a/src/planner/phases/plan-code/prompts.ts b/src/planner/phases/plan-code/prompts.ts index 782ce4c..0aaab34 100644 --- a/src/planner/phases/plan-code/prompts.ts +++ b/src/planner/phases/plan-code/prompts.ts @@ -2,7 +2,6 @@ import { promises as fs } from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; -import type { ContextData } from "../../types.js"; import type { StepGuidance } from "../../lib/step.js"; export const STEP_NAMES: Record<1 | 2 | 3 | 4, string> = { @@ -22,10 +21,6 @@ export async function loadPlanCodeSystemPrompt(): Promise { } } -export function formatContextForStep1(ctx: ContextData): string { - return ["", JSON.stringify(ctx, null, 2), ""].join("\n"); -} - export function buildPlanCodeSystemPrompt(basePrompt: string): string { return [ basePrompt, @@ -47,16 +42,12 @@ export function buildPlanCodeSystemPrompt(basePrompt: string): string { ].join("\n"); } -export function planCodeStepGuidance(step: 1 | 2 | 3 | 4, context?: string): StepGuidance { +export function planCodeStepGuidance(step: 1 | 2 | 3 | 4): StepGuidance { switch (step) { case 1: return { title: "Step 1: Intent Coverage Analysis", instructions: [ - "PLANNING CONTEXT (from session):", - "", - context ?? "", - "", "Use koan_get_plan to inspect milestones and code_intents.", "Build a checklist of intents that need code_changes.", "Record target files and affected functions per intent.", diff --git a/src/planner/phases/plan-design/fix-phase.ts b/src/planner/phases/plan-design/fix-phase.ts index 24b5cc8..664f0ba 100644 --- a/src/planner/phases/plan-design/fix-phase.ts +++ b/src/planner/phases/plan-design/fix-phase.ts @@ -16,6 +16,8 @@ // orchestrator decides whether to re-run QR -- the fix phase does not // know about iterations or severity escalation. +import * as path from "node:path"; + import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { loadAndValidatePlan } from "../../plan/validate.js"; @@ -105,8 +107,9 @@ export class PlanDesignFixPhase { this.failures.length, totalSteps, ); + const conversationPath = path.join(this.planDir, "conversation.jsonl"); this.state.step1Prompt = formatStep( - fixStepGuidance(1, totalSteps, { allFailuresXml: failuresXml }), + fixStepGuidance(1, totalSteps, { allFailuresXml: failuresXml, conversationPath }), ); this.state.active = true; this.state.step = 1; diff --git a/src/planner/phases/plan-design/fix-prompts.ts b/src/planner/phases/plan-design/fix-prompts.ts index 8d12cc8..d9ec61e 100644 --- a/src/planner/phases/plan-design/fix-prompts.ts +++ b/src/planner/phases/plan-design/fix-prompts.ts @@ -11,6 +11,7 @@ import type { QRItem } from "../../qr/types.js"; import type { StepGuidance } from "../../lib/step.js"; +import { buildPlanDesignContextTrigger } from "../../lib/conversation-trigger.js"; // Serializes FAIL items as an XML block injected into the step 1 prompt. // XML structure mirrors how pi-native tools present structured data. @@ -83,10 +84,10 @@ export function buildFixSystemPrompt( export function fixStepGuidance( step: number, totalSteps: number, - opts?: { item?: QRItem; allFailuresXml?: string }, + opts?: { item?: QRItem; allFailuresXml?: string; conversationPath?: string }, ): StepGuidance { if (step === 1) - return fixStep1Guidance(totalSteps, opts?.allFailuresXml ?? ""); + return fixStep1Guidance(totalSteps, opts?.allFailuresXml ?? "", opts?.conversationPath); if (step === totalSteps) return fixFinalStepGuidance(totalSteps); return fixItemStepGuidance(step, totalSteps, opts?.item); } @@ -98,6 +99,7 @@ export function fixStepGuidance( function fixStep1Guidance( totalSteps: number, failuresXml: string, + conversationPath?: string, ): StepGuidance { const itemCount = totalSteps - 2; return { @@ -107,6 +109,8 @@ function fixStep1Guidance( "", failuresXml, "", + ...buildPlanDesignContextTrigger(conversationPath ?? "/conversation.jsonl"), + "", `There are ${itemCount} failure(s). You will fix them one at a time`, `in steps 2 through ${totalSteps - 1}. Each step presents a single item.`, "", diff --git a/src/planner/phases/plan-design/phase.ts b/src/planner/phases/plan-design/phase.ts index f581e11..470f14e 100644 --- a/src/planner/phases/plan-design/phase.ts +++ b/src/planner/phases/plan-design/phase.ts @@ -2,7 +2,6 @@ // from captured context. Step gate: mutation tools blocked before step 6 // (blocklist pattern). Validation runs at step-6 completion. -import { promises as fs } from "node:fs"; import * as path from "node:path"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; @@ -10,13 +9,11 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { loadAndValidatePlan } from "../../plan/validate.js"; import { loadPlanDesignSystemPrompt, - formatContextForStep1, buildPlanDesignSystemPrompt, planDesignStepGuidance, STEP_NAMES, } from "./prompts.js"; import { formatStep } from "../../lib/step.js"; -import type { ContextData } from "../../types.js"; import { createLogger, type Logger } from "../../../utils/logger.js"; import { EventLog } from "../../lib/audit.js"; import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; @@ -28,7 +25,6 @@ interface PlanDesignState { active: boolean; step: PlanDesignStep; step1Prompt: string | null; - contextData: ContextData | null; systemPrompt: string | null; } @@ -62,7 +58,6 @@ export class PlanDesignPhase { active: false, step: 1, step1Prompt: null, - contextData: null, systemPrompt: null, }; @@ -70,16 +65,6 @@ export class PlanDesignPhase { } async begin(): Promise { - const contextPath = path.join(this.planDir, "context.json"); - try { - const raw = await fs.readFile(contextPath, "utf8"); - this.state.contextData = JSON.parse(raw) as ContextData; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to read context.json", { error: message }); - return; - } - let basePrompt: string; try { basePrompt = await loadPlanDesignSystemPrompt(); @@ -89,9 +74,9 @@ export class PlanDesignPhase { return; } - const contextXml = formatContextForStep1(this.state.contextData); this.state.systemPrompt = buildPlanDesignSystemPrompt(basePrompt); - this.state.step1Prompt = formatStep(planDesignStepGuidance(1, contextXml)); + const conversationPath = path.join(this.planDir, "conversation.jsonl"); + this.state.step1Prompt = formatStep(planDesignStepGuidance(1, conversationPath)); this.state.active = true; this.state.step = 1; diff --git a/src/planner/phases/plan-design/prompts.ts b/src/planner/phases/plan-design/prompts.ts index 2f5727e..928a102 100644 --- a/src/planner/phases/plan-design/prompts.ts +++ b/src/planner/phases/plan-design/prompts.ts @@ -2,8 +2,8 @@ import { promises as fs } from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; -import type { ContextData } from "../../types.js"; import type { StepGuidance } from "../../lib/step.js"; +import { buildPlanDesignContextTrigger } from "../../lib/conversation-trigger.js"; export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { 1: "Task Analysis & Exploration Planning", @@ -26,14 +26,6 @@ export async function loadPlanDesignSystemPrompt(): Promise { } } -export function formatContextForStep1(ctx: ContextData): string { - return [ - "", - JSON.stringify(ctx, null, 2), - "", - ].join("\n"); -} - export function buildPlanDesignSystemPrompt(basePrompt: string): string { return [ basePrompt, @@ -54,17 +46,18 @@ export function buildPlanDesignSystemPrompt(basePrompt: string): string { ].join("\n"); } -export function planDesignStepGuidance(step: 1 | 2 | 3 | 4 | 5 | 6, context?: string): StepGuidance { +export function planDesignStepGuidance( + step: 1 | 2 | 3 | 4 | 5 | 6, + conversationPath?: string, +): StepGuidance { switch (step) { case 1: return { title: "Step 1: Task Analysis & Exploration Planning", instructions: [ - "PLANNING CONTEXT (from session):", - "", - context ?? "", + ...buildPlanDesignContextTrigger(conversationPath ?? "/conversation.jsonl"), "", - "Parse the user's task description. Identify:", + "After absorbing the task intent, identify:", " - What needs to change (files, modules, behavior)", " - What exploration is needed (patterns, constraints, existing code)", " - What directories/files are relevant", @@ -72,12 +65,6 @@ export function planDesignStepGuidance(step: 1 | 2 | 3 | 4 | 5 | 6, context?: st "Read project context files to understand structure:", " - Project root CLAUDE.md", " - Subdirectory CLAUDE.md files in relevant areas", - " - All paths in context.json reference_docs field (if any)", - "", - "CONTEXT.JSON CONTRACT: READ-ONLY.", - " - context.json is owned by the session", - " - You MUST NOT write, modify, or append to context.json", - " - Your outputs go to plan.json (step 6) -- never context.json", "", "DO NOT write any files yet. Gather understanding for step 2.", "Record your analysis mentally for use in subsequent steps.", diff --git a/src/planner/phases/plan-docs/fix-phase.ts b/src/planner/phases/plan-docs/fix-phase.ts index e757461..dcbc15f 100644 --- a/src/planner/phases/plan-docs/fix-phase.ts +++ b/src/planner/phases/plan-docs/fix-phase.ts @@ -1,5 +1,7 @@ // Plan-docs fix phase -- dynamic targeted QR repair workflow. +import * as path from "node:path"; + import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; @@ -81,7 +83,8 @@ export class PlanDocsFixPhase { this.failures.length, totalSteps, ); - this.state.step1Prompt = formatStep(fixStepGuidance(1, totalSteps, { allFailuresXml: failuresXml })); + const conversationPath = path.join(this.planDir, "conversation.jsonl"); + this.state.step1Prompt = formatStep(fixStepGuidance(1, totalSteps, { allFailuresXml: failuresXml, conversationPath })); this.state.active = true; this.state.step = 1; this.planRef.dir = this.planDir; diff --git a/src/planner/phases/plan-docs/fix-prompts.ts b/src/planner/phases/plan-docs/fix-prompts.ts index 90da4a0..5ae245c 100644 --- a/src/planner/phases/plan-docs/fix-prompts.ts +++ b/src/planner/phases/plan-docs/fix-prompts.ts @@ -1,5 +1,6 @@ import type { QRItem } from "../../qr/types.js"; import type { StepGuidance } from "../../lib/step.js"; +import { buildPlanDocsContextTrigger } from "../../lib/conversation-trigger.js"; export function formatFailuresXml(failures: ReadonlyArray): string { const items = failures @@ -39,7 +40,7 @@ export function buildFixSystemPrompt(basePrompt: string, failureCount: number, t ].join("\n"); } -function step1(totalSteps: number, failuresXml: string): StepGuidance { +function step1(totalSteps: number, failuresXml: string, conversationPath?: string): StepGuidance { const itemCount = totalSteps - 2; return { title: `Step 1/${totalSteps}: Understand QR Failures`, @@ -48,6 +49,8 @@ function step1(totalSteps: number, failuresXml: string): StepGuidance { "", failuresXml, "", + ...buildPlanDocsContextTrigger(conversationPath ?? "/conversation.jsonl"), + "", `There are ${itemCount} item(s). You will fix them one by one in steps 2-${totalSteps - 1}.`, "Inspect current docs state via koan_get_plan / koan_get_change.", "Identify exact correction needed per item.", @@ -95,9 +98,9 @@ function finalStep(totalSteps: number): StepGuidance { export function fixStepGuidance( step: number, totalSteps: number, - opts?: { item?: QRItem; allFailuresXml?: string }, + opts?: { item?: QRItem; allFailuresXml?: string; conversationPath?: string }, ): StepGuidance { - if (step === 1) return step1(totalSteps, opts?.allFailuresXml ?? ""); + if (step === 1) return step1(totalSteps, opts?.allFailuresXml ?? "", opts?.conversationPath); if (step === totalSteps) return finalStep(totalSteps); return itemStep(step, totalSteps, opts?.item); } diff --git a/src/planner/phases/plan-docs/phase.ts b/src/planner/phases/plan-docs/phase.ts index f8fec6c..24970ce 100644 --- a/src/planner/phases/plan-docs/phase.ts +++ b/src/planner/phases/plan-docs/phase.ts @@ -1,7 +1,6 @@ // Plan-docs phase -- 6-step technical writer workflow producing doc artifacts // (doc_diff/comments/diagram/readme) in plan.json. -import { promises as fs } from "node:fs"; import * as path from "node:path"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; @@ -9,13 +8,11 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; import { loadPlanDocsSystemPrompt, - formatContextForStep1, buildPlanDocsSystemPrompt, planDocsStepGuidance, STEP_NAMES, } from "./prompts.js"; import { formatStep } from "../../lib/step.js"; -import type { ContextData } from "../../types.js"; import { createLogger, type Logger } from "../../../utils/logger.js"; import { EventLog } from "../../lib/audit.js"; import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; @@ -27,7 +24,6 @@ interface PlanDocsState { active: boolean; step: PlanDocsStep; step1Prompt: string | null; - contextData: ContextData | null; systemPrompt: string | null; } @@ -62,7 +58,6 @@ export class PlanDocsPhase { active: false, step: 1, step1Prompt: null, - contextData: null, systemPrompt: null, }; @@ -70,16 +65,6 @@ export class PlanDocsPhase { } async begin(): Promise { - const contextPath = path.join(this.planDir, "context.json"); - try { - const raw = await fs.readFile(contextPath, "utf8"); - this.state.contextData = JSON.parse(raw) as ContextData; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to read context.json", { error: message }); - return; - } - let basePrompt: string; try { basePrompt = await loadPlanDocsSystemPrompt(); @@ -89,9 +74,9 @@ export class PlanDocsPhase { return; } - const contextXml = formatContextForStep1(this.state.contextData); this.state.systemPrompt = buildPlanDocsSystemPrompt(basePrompt); - this.state.step1Prompt = formatStep(planDocsStepGuidance(1, contextXml)); + const conversationPath = path.join(this.planDir, "conversation.jsonl"); + this.state.step1Prompt = formatStep(planDocsStepGuidance(1, conversationPath)); this.state.active = true; this.state.step = 1; this.planRef.dir = this.planDir; diff --git a/src/planner/phases/plan-docs/prompts.ts b/src/planner/phases/plan-docs/prompts.ts index e27b58e..081f08a 100644 --- a/src/planner/phases/plan-docs/prompts.ts +++ b/src/planner/phases/plan-docs/prompts.ts @@ -2,8 +2,8 @@ import { promises as fs } from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; -import type { ContextData } from "../../types.js"; import type { StepGuidance } from "../../lib/step.js"; +import { buildPlanDocsContextTrigger } from "../../lib/conversation-trigger.js"; export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { 1: "Extract Documentation Context", @@ -24,10 +24,6 @@ export async function loadPlanDocsSystemPrompt(): Promise { } } -export function formatContextForStep1(ctx: ContextData): string { - return ["", JSON.stringify(ctx, null, 2), ""].join("\n"); -} - export function buildPlanDocsSystemPrompt(basePrompt: string): string { return [ basePrompt, @@ -50,18 +46,20 @@ export function buildPlanDocsSystemPrompt(basePrompt: string): string { ].join("\n"); } -export function planDocsStepGuidance(step: 1 | 2 | 3 | 4 | 5 | 6, context?: string): StepGuidance { +export function planDocsStepGuidance( + step: 1 | 2 | 3 | 4 | 5 | 6, + conversationPath?: string, +): StepGuidance { switch (step) { case 1: return { title: "Step 1: Extract Documentation Context", instructions: [ - "PLANNING CONTEXT (from session):", - "", - context ?? "", - "", "Use koan_get_plan to review decisions, constraints, risks, and milestones.", "Capture decision IDs that should be reflected in documentation rationale.", + "", + ...buildPlanDocsContextTrigger(conversationPath ?? "/conversation.jsonl"), + "", "This step is read-only.", ], }; diff --git a/src/planner/phases/qr-decompose/phase.ts b/src/planner/phases/qr-decompose/phase.ts index 309dba5..6f2e0b5 100644 --- a/src/planner/phases/qr-decompose/phase.ts +++ b/src/planner/phases/qr-decompose/phase.ts @@ -9,7 +9,6 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { loadQRDecomposeSystemPrompt, - formatContextForDecompose, buildDecomposeSystemPrompt, decomposeStepGuidance, DECOMPOSE_STEP_NAMES, @@ -17,7 +16,6 @@ import { type WorkPhaseKey, } from "./prompts.js"; import { formatStep } from "../../lib/step.js"; -import type { ContextData } from "../../types.js"; import { createLogger, type Logger } from "../../../utils/logger.js"; import { EventLog } from "../../lib/audit.js"; import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; @@ -76,17 +74,6 @@ export class QRDecomposePhase { } async begin(): Promise { - const contextPath = path.join(this.planDir, "context.json"); - let contextData: ContextData; - try { - const raw = await fs.readFile(contextPath, "utf8"); - contextData = JSON.parse(raw) as ContextData; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to read context.json", { error: message }); - return; - } - let basePrompt: string; try { basePrompt = await loadQRDecomposeSystemPrompt(); @@ -96,9 +83,9 @@ export class QRDecomposePhase { return; } - const contextXml = formatContextForDecompose(contextData); this.state.systemPrompt = buildDecomposeSystemPrompt(basePrompt, this.workPhase); - this.state.step1Prompt = formatStep(decomposeStepGuidance(1, this.workPhase, contextXml)); + const conversationPath = path.join(this.planDir, "conversation.jsonl"); + this.state.step1Prompt = formatStep(decomposeStepGuidance(1, this.workPhase, conversationPath)); this.state.active = true; this.state.step = 1; this.planRef.dir = this.planDir; diff --git a/src/planner/phases/qr-decompose/prompts.ts b/src/planner/phases/qr-decompose/prompts.ts index 474f22f..e66c9d1 100644 --- a/src/planner/phases/qr-decompose/prompts.ts +++ b/src/planner/phases/qr-decompose/prompts.ts @@ -6,8 +6,11 @@ import { promises as fs } from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; -import type { ContextData } from "../../types.js"; import type { StepGuidance } from "../../lib/step.js"; +import { + buildPlanDesignContextTrigger, + buildPlanDocsContextTrigger, +} from "../../lib/conversation-trigger.js"; export type DecomposeStep = 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13; export type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; @@ -46,6 +49,19 @@ const PHASE_SCOPE_HINTS: Record = { ], }; +function phaseContextTrigger( + phase: WorkPhaseKey, + conversationPath?: string, +): string[] { + if (phase === "plan-design") { + return buildPlanDesignContextTrigger(conversationPath ?? "/conversation.jsonl"); + } + if (phase === "plan-docs") { + return buildPlanDocsContextTrigger(conversationPath ?? "/conversation.jsonl"); + } + return []; +} + export async function loadQRDecomposeSystemPrompt(): Promise { const homeDir = os.homedir(); const promptPath = path.join(homeDir, ".claude/agents/quality-reviewer.md"); @@ -77,21 +93,20 @@ export function buildDecomposeSystemPrompt(basePrompt: string, phase: WorkPhaseK ].join("\n"); } -export function formatContextForDecompose(ctx: ContextData): string { - return ["", JSON.stringify(ctx, null, 2), ""].join("\n"); -} - -export function decomposeStepGuidance(step: DecomposeStep, phase: WorkPhaseKey, context?: string): StepGuidance { +export function decomposeStepGuidance( + step: DecomposeStep, + phase: WorkPhaseKey, + conversationPath?: string, +): StepGuidance { switch (step) { case 1: return { title: "Step 1: Absorb Context", instructions: [ `PHASE: ${phase}`, - "PLANNING CONTEXT (from session):", - "", - context ?? "", "", + ...phaseContextTrigger(phase, conversationPath), + ...(phase === "plan-code" ? [] : [""]), "Use koan_get_plan to read the full plan.", "Absorb the structures relevant to this phase and identify what needs verification.", ], diff --git a/src/planner/phases/qr-verify/phase.ts b/src/planner/phases/qr-verify/phase.ts index 623b9f6..185fb97 100644 --- a/src/planner/phases/qr-verify/phase.ts +++ b/src/planner/phases/qr-verify/phase.ts @@ -7,7 +7,6 @@ import * as path from "node:path"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { formatStep } from "../../lib/step.js"; -import type { ContextData } from "../../types.js"; import { createLogger, type Logger } from "../../../utils/logger.js"; import { EventLog } from "../../lib/audit.js"; import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; @@ -88,17 +87,6 @@ export class QRVerifyPhase { return; } - const contextPath = path.join(this.planDir, "context.json"); - let contextData: ContextData; - try { - const raw = await fs.readFile(contextPath, "utf8"); - contextData = JSON.parse(raw) as ContextData; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to read context.json", { error: message }); - return; - } - const qrPath = path.join(this.planDir, `qr-${this.workPhase}.json`); let qrFile: QRFile; try { @@ -127,7 +115,8 @@ export class QRVerifyPhase { } this.state.systemPrompt = buildVerifySystemPrompt(basePrompt, this.workPhase); - this.state.step1Prompt = formatStep(buildContextStep(item, contextData, this.workPhase)); + const conversationPath = path.join(this.planDir, "conversation.jsonl"); + this.state.step1Prompt = formatStep(buildContextStep(item, this.workPhase, conversationPath)); this.state.active = true; this.state.step = 1; this.planRef.dir = this.planDir; diff --git a/src/planner/phases/qr-verify/prompts.ts b/src/planner/phases/qr-verify/prompts.ts index a364490..38fcbe7 100644 --- a/src/planner/phases/qr-verify/prompts.ts +++ b/src/planner/phases/qr-verify/prompts.ts @@ -5,24 +5,17 @@ import { promises as fs } from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; -import type { ContextData } from "../../types.js"; import type { QRItem } from "../../qr/types.js"; import type { StepGuidance } from "../../lib/step.js"; +import { + buildPlanDesignContextTrigger, + buildPlanDocsContextTrigger, +} from "../../lib/conversation-trigger.js"; type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; export type VerifyStep = 1 | 2 | 3; -function formatContextXml(ctx: ContextData): string { - const fields = Object.entries(ctx) - .map(([key, values]) => { - const items = (values as string[]).map((v) => ` ${v}`).join("\n"); - return ` <${key}>\n${items}\n `; - }) - .join("\n"); - return `\n${fields}\n`; -} - function scopeGuidance(item: QRItem): string { const s = item.scope; if (s === "*") { @@ -47,6 +40,19 @@ function scopeGuidance(item: QRItem): string { return "SCOPED CHECK -- Read the relevant section using plan getter tools."; } +function phaseContextTrigger( + phase: WorkPhaseKey, + conversationPath?: string, +): string[] { + if (phase === "plan-design") { + return buildPlanDesignContextTrigger(conversationPath ?? "/conversation.jsonl"); + } + if (phase === "plan-docs") { + return buildPlanDocsContextTrigger(conversationPath ?? "/conversation.jsonl"); + } + return []; +} + export async function loadQRVerifySystemPrompt(): Promise { const promptPath = path.join(os.homedir(), ".claude/agents/quality-reviewer.md"); try { @@ -75,7 +81,11 @@ export function buildVerifySystemPrompt(basePrompt: string, phase: WorkPhaseKey) ].join("\n"); } -export function buildContextStep(item: QRItem, contextData: ContextData, phase: WorkPhaseKey): StepGuidance { +export function buildContextStep( + item: QRItem, + phase: WorkPhaseKey, + conversationPath?: string, +): StepGuidance { return { title: "Step 1: CONTEXT", instructions: [ @@ -89,9 +99,8 @@ export function buildContextStep(item: QRItem, contextData: ContextData, phase: ` ${item.severity}`, "", "", - "PLANNING CONTEXT (reference for semantic validation):", - formatContextXml(contextData), - "", + ...phaseContextTrigger(phase, conversationPath), + ...(phase === "plan-code" ? [] : [""]), "Understand the check and required evidence before analyzing.", ], }; diff --git a/src/planner/session.ts b/src/planner/session.ts index f48f65f..b555d5c 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -1,13 +1,13 @@ // Parent session: orchestrates the koan planning workflow. -// Flow: context capture -> plan-design(+QR) -> plan-code(+QR) -> plan-docs(+QR) +// Flow: export conversation -> plan-design(+QR) -> plan-code(+QR) -> plan-docs(+QR) // -> mechanical plan.json->plan.md rendering for manual review. import { promises as fs } from "node:fs"; import * as path from "node:path"; -import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@mariozechner/pi-coding-agent"; +import type { AgentToolResult, ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@mariozechner/pi-coding-agent"; -import { ContextCapturePhase } from "./phases/context-capture/phase.js"; +import { exportConversation } from "./conversation.js"; import { createInitialState, initializePlanState, type WorkflowState } from "./state.js"; import { createPlanInfo } from "../utils/plan.js"; import { @@ -19,6 +19,8 @@ import { spawnTechnicalWriterFix, spawnQRDecomposer, spawnReviewer, + type SpawnQRDecomposerOptions, + type SpawnReviewerOptions, type SubagentResult, } from "./subagent.js"; import { createLogger, setLogDir, type Logger } from "../utils/logger.js"; @@ -30,11 +32,17 @@ import type { QRFile } from "./qr/types.js"; import { MAX_FIX_ITERATIONS, qrPassesAtIteration } from "./qr/severity.js"; import { WidgetController, type WidgetUpdate } from "./ui/widget.js"; import { renderPlanMarkdownToFile } from "./plan/render.js"; +import { + mapSpawnContextToPhaseModelKey, + resolvePhaseModelOverride, + type SpawnContext, +} from "./model-resolver.js"; +import type { PhaseRow } from "./model-phase.js"; type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; interface Session { - plan(args: string, ctx: ExtensionCommandContext): Promise; + plan(ctx: ExtensionContext): Promise>; execute(_ctx: ExtensionCommandContext): Promise; status(ctx: ExtensionCommandContext): Promise; } @@ -59,6 +67,7 @@ interface SpawnWorkRunOptions { cwd: string; extensionPath: string; log: Logger; + modelOverride?: string; } interface SpawnFixRunOptions extends SpawnWorkRunOptions {} @@ -101,133 +110,81 @@ function phaseCompleteState(phase: WorkPhaseKey): WorkflowState["phase"] { return "plan-docs-complete"; } -export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, planRef: PlanRef): Session { - const state: WorkflowState = createInitialState(); - const log = createLogger("Session"); - let widget: WidgetController | null = null; - - const onContextComplete = async (ctx: ExtensionContext): Promise => { - if (!state.plan) { - return "Context captured but no plan state available."; - } - - let outcome: "PASS" | "FAIL" = "FAIL"; +interface ModelResolutionDeps { + mapSpawnContextToPhaseModelKeyFn?: typeof mapSpawnContextToPhaseModelKey; + resolvePhaseModelOverrideFn?: typeof resolvePhaseModelOverride; +} - try { - const planDir = state.plan.directory; - const extensionPath = path.resolve(import.meta.dirname, "../../extensions/koan.ts"); - - const phases: PhaseRunConfig[] = [ - { - key: "plan-design", - label: "Plan design", - widgetIndex: 1, - role: "architect", - spawnWork: (opts) => spawnArchitect(opts), - spawnFix: (opts) => spawnArchitectFix({ ...opts, fixPhase: "plan-design" }), - }, - { - key: "plan-code", - label: "Plan code", - widgetIndex: 2, - role: "developer", - spawnWork: (opts) => spawnDeveloper(opts), - spawnFix: (opts) => spawnDeveloperFix({ ...opts, fixPhase: "plan-code" }), - }, - { - key: "plan-docs", - label: "Plan docs", - widgetIndex: 3, - role: "technical-writer", - spawnWork: (opts) => spawnTechnicalWriter(opts), - spawnFix: (opts) => spawnTechnicalWriterFix({ ...opts, fixPhase: "plan-docs" }), - }, - ]; +interface QRSpawnResolutionDeps extends ModelResolutionDeps { + spawnQRDecomposerFn?: typeof spawnQRDecomposer; + spawnReviewerFn?: typeof spawnReviewer; +} - widget?.update({ - phaseStatus: { index: 0, status: "completed" }, - activeIndex: 1, - step: "context captured; starting planning phases...", - activity: "", - }); +export async function resolveSpawnModelOverride( + context: SpawnContext, + phaseRow: PhaseRow, + deps: ModelResolutionDeps = {}, +): Promise { + const mapFn = deps.mapSpawnContextToPhaseModelKeyFn ?? mapSpawnContextToPhaseModelKey; + const resolveFn = deps.resolvePhaseModelOverrideFn ?? resolvePhaseModelOverride; + const key = mapFn(context, phaseRow); + return await resolveFn(key); +} - const phaseSummaries: string[] = []; - for (const phase of phases) { - const result = await runPlanningPhase( - phase, - planDir, - ctx.cwd, - extensionPath, - state, - log, - widget, - ); - - phaseSummaries.push(`${phase.label}: ${result.summary}`); - if (!result.passed) { - return `Context captured. ${phase.label} failed.\n\n${phaseSummaries.join("\n")}`; - } - } +export async function spawnWorkWithResolvedModel( + phaseRow: PhaseRow, + spawnWorkFn: (opts: SpawnWorkRunOptions) => Promise, + opts: SpawnWorkRunOptions, + deps: ModelResolutionDeps = {}, +): Promise { + const modelOverride = await resolveSpawnModelOverride("work-debut", phaseRow, deps); + return await spawnWorkFn({ ...opts, modelOverride }); +} - let planMdPath: string; - try { - planMdPath = await renderPlanMarkdownToFile(planDir); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - log("Failed to render plan.md", { error: message, planDir }); - return `Planning phases completed, but plan markdown rendering failed: ${message}`; - } +export async function spawnFixWithResolvedModel( + phaseRow: PhaseRow, + spawnFixFn: (opts: SpawnFixRunOptions) => Promise, + opts: SpawnFixRunOptions, + deps: ModelResolutionDeps = {}, +): Promise { + const modelOverride = await resolveSpawnModelOverride("fix", phaseRow, deps); + return await spawnFixFn({ ...opts, modelOverride }); +} - state.phase = "plan-docs-complete"; - widget?.update({ - activeIndex: -1, - step: "planning complete; awaiting manual review of plan.md", - activity: "", - }); +export async function spawnQRDecomposerWithResolvedModel( + opts: SpawnQRDecomposerOptions, + deps: QRSpawnResolutionDeps = {}, +): Promise { + const modelOverride = await resolveSpawnModelOverride("qr-decompose", opts.phase as PhaseRow, deps); + const spawnFn = deps.spawnQRDecomposerFn ?? spawnQRDecomposer; + return await spawnFn({ ...opts, modelOverride }); +} - outcome = "PASS"; - return [ - "Context captured. Planning complete.", - "", - ...phaseSummaries, - "", - `Plan markdown: ${planMdPath}`, - "PAUSE: Please review this file manually before /koan execute.", - ].join("\n"); - } finally { - if (widget) { - widget.destroy(); - widget = null; - } - ctx.ui.notify(outcome, outcome === "PASS" ? "info" : "error"); - } - }; +export async function spawnReviewerWithResolvedModel( + opts: SpawnReviewerOptions, + deps: QRSpawnResolutionDeps = {}, +): Promise { + const modelOverride = await resolveSpawnModelOverride("qr-verify", opts.phase as PhaseRow, deps); + const spawnFn = deps.spawnReviewerFn ?? spawnReviewer; + return await spawnFn({ ...opts, modelOverride }); +} - const contextPhase = new ContextCapturePhase(pi, state, dispatch, createLogger("Context"), onContextComplete); +export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, planRef: PlanRef): Session { + const state: WorkflowState = createInitialState(); + const log = createLogger("Session"); + let widget: WidgetController | null = null; return { - async plan(args, ctx) { - const description = args.trim(); - if (!description) { - ctx.ui.notify("Usage: /koan plan ", "error"); - return; - } - - if (state.phase === "context" && state.context?.active) { - ctx.ui.notify("Context capture already running. Use /koan status to check progress.", "warning"); - return; - } + async plan(ctx: ExtensionContext): Promise> { + const planInfo = await createPlanInfo("", ctx.cwd); + initializePlanState(state, planInfo, ""); - await ctx.waitForIdle(); - - const planInfo = await createPlanInfo(description, ctx.cwd); - initializePlanState(state, planInfo, description); + // Wire plan directory for subagent dispatch and logging. planRef.dir = planInfo.directory; setLogDir(planInfo.directory); - log("Plan command invoked", { + log("Plan tool invoked", { cwd: ctx.cwd, - description, planId: planInfo.id, planDirectory: planInfo.directory, }); @@ -241,7 +198,95 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan widget = new WidgetController(ctx.ui, planInfo.id); } - await contextPhase.begin(description, planInfo, ctx); + // Export conversation to plan directory. + // Agents that need session context can Read this file. + await exportConversation(ctx.sessionManager, planInfo.directory); + log("Conversation exported", { planDir: planInfo.directory }); + + let outcome: "PASS" | "FAIL" = "FAIL"; + try { + const planDir = planInfo.directory; + const extensionPath = path.resolve(import.meta.dirname, "../../extensions/koan.ts"); + + // widgetIndex 0=design, 1=code, 2=docs + const phases: PhaseRunConfig[] = [ + { + key: "plan-design", + label: "Plan design", + widgetIndex: 0, + role: "architect", + spawnWork: (opts) => spawnArchitect(opts), + spawnFix: (opts) => spawnArchitectFix({ ...opts, fixPhase: "plan-design" }), + }, + { + key: "plan-code", + label: "Plan code", + widgetIndex: 1, + role: "developer", + spawnWork: (opts) => spawnDeveloper(opts), + spawnFix: (opts) => spawnDeveloperFix({ ...opts, fixPhase: "plan-code" }), + }, + { + key: "plan-docs", + label: "Plan docs", + widgetIndex: 2, + role: "technical-writer", + spawnWork: (opts) => spawnTechnicalWriter(opts), + spawnFix: (opts) => spawnTechnicalWriterFix({ ...opts, fixPhase: "plan-docs" }), + }, + ]; + + const phaseSummaries: string[] = []; + for (const phase of phases) { + const result = await runPlanningPhase( + phase, + planDir, + ctx.cwd, + extensionPath, + state, + log, + widget, + ); + + phaseSummaries.push(`${phase.label}: ${result.summary}`); + if (!result.passed) { + return { + content: [{ type: "text" as const, text: `Planning failed at ${phase.label}.\n\n${phaseSummaries.join("\n")}` }], + details: undefined, + }; + } + } + + try { + await renderPlanMarkdownToFile(planDir); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + log("Failed to render plan.md", { error: message, planDir }); + return { + content: [{ type: "text" as const, text: `Planning phases completed, but plan markdown rendering failed: ${message}\n\n${phaseSummaries.join("\n")}` }], + details: undefined, + }; + } + + state.phase = "plan-docs-complete"; + widget?.update({ + activeIndex: -1, + step: "planning complete; awaiting manual review of plan.md", + activity: "", + }); + + outcome = "PASS"; + return { + content: [{ type: "text" as const, text: `Planning complete.\n\n${phaseSummaries.join("\n")}` }], + details: undefined, + }; + } finally { + if (widget) { + widget.destroy(); + widget = null; + } + ctx.ui.notify(outcome, outcome === "PASS" ? "info" : "error"); + } }, async execute(ctx) { @@ -297,13 +342,17 @@ async function runPlanningPhase( }); }, 2000); - const workResult = await phase.spawnWork({ - planDir, - subagentDir, - cwd, - extensionPath, - log, - }); + const workResult = await spawnWorkWithResolvedModel( + phase.key as PhaseRow, + phase.spawnWork, + { + planDir, + subagentDir, + cwd, + extensionPath, + log, + }, + ); clearInterval(pollInterval); @@ -420,7 +469,7 @@ async function runQRBlock( }); }, 2000); - const decompose = await spawnQRDecomposer({ + const decompose = await spawnQRDecomposerWithResolvedModel({ planDir, subagentDir: decomposeDir, cwd, @@ -547,7 +596,7 @@ async function runQRBlock( QR_POOL_CONCURRENCY, async (itemId) => { const reviewerDir = await createSubagentDir(planDir, `qr-reviewer-${phase}-${itemId}`); - const r = await spawnReviewer({ + const r = await spawnReviewerWithResolvedModel({ planDir, subagentDir: reviewerDir, cwd, @@ -694,13 +743,17 @@ async function runPhaseWithQR( }); }, 2000); - const fixResult = await phase.spawnFix({ - planDir, - subagentDir: fixDir, - cwd, - extensionPath, - log, - }); + const fixResult = await spawnFixWithResolvedModel( + phase.key as PhaseRow, + phase.spawnFix, + { + planDir, + subagentDir: fixDir, + cwd, + extensionPath, + log, + }, + ); clearInterval(fixPoll); diff --git a/src/planner/state.ts b/src/planner/state.ts index eb34f5b..286250f 100644 --- a/src/planner/state.ts +++ b/src/planner/state.ts @@ -1,10 +1,5 @@ -import type { ContextData } from "./types.js"; - export type WorkflowPhase = | "idle" - | "context" - | "context-complete" - | "context-failed" | "architect-running" | "architect-failed" | "plan-design-complete" @@ -25,26 +20,10 @@ export interface PlanInfo { metadataPath: string; } -export interface ContextCaptureState { - readonly maxAttempts: number; - active: boolean; - subPhase: "drafting" | "verifying" | "refining"; - attempt: number; - taskDescription: string; - planId: string; - planDirectory: string; - contextFilePath: string; - lastPrompt: string | null; - feedback: string[]; - data?: ContextData; - lastRawContent?: string; -} - export interface WorkflowState { phase: WorkflowPhase; taskDescription: string | null; plan: PlanInfo | null; - context: ContextCaptureState | null; } export function createInitialState(): WorkflowState { @@ -52,19 +31,10 @@ export function createInitialState(): WorkflowState { phase: "idle", taskDescription: null, plan: null, - context: null, }; } -export function resetContextState(state: WorkflowState): void { - state.context = null; - if (state.phase !== "idle") { - state.phase = "idle"; - } -} - export function initializePlanState(state: WorkflowState, plan: PlanInfo, taskDescription: string): void { state.plan = plan; state.taskDescription = taskDescription; - resetContextState(state); } diff --git a/src/planner/subagent.ts b/src/planner/subagent.ts index 973759b..608bda6 100644 --- a/src/planner/subagent.ts +++ b/src/planner/subagent.ts @@ -23,6 +23,7 @@ export interface SpawnWorkOptions { cwd: string; extensionPath: string; initialPrompt?: string; + modelOverride?: string; log?: Logger; } @@ -32,6 +33,7 @@ export interface SpawnFixOptions { cwd: string; extensionPath: string; fixPhase: WorkPhaseKey; + modelOverride?: string; log?: Logger; } @@ -41,6 +43,7 @@ export interface SpawnQRDecomposerOptions { cwd: string; extensionPath: string; phase: WorkPhaseKey; + modelOverride?: string; log?: Logger; } @@ -51,17 +54,26 @@ export interface SpawnReviewerOptions { extensionPath: string; phase: WorkPhaseKey; itemId: string; + modelOverride?: string; log?: Logger; } -function spawnSubagent( +interface SpawnSubagentOpts { + planDir: string; + subagentDir: string; + cwd: string; + extensionPath: string; + extraFlags?: string[]; + modelOverride?: string; +} + +export function buildSpawnArgs( role: string, phase: string, prompt: string, - opts: { planDir: string; subagentDir: string; cwd: string; extensionPath: string; extraFlags?: string[] }, - log: Logger, -): Promise { - const args = [ + opts: SpawnSubagentOpts, +): string[] { + return [ "-p", "-e", opts.extensionPath, "--koan-role", role, @@ -69,8 +81,19 @@ function spawnSubagent( "--koan-plan-dir", opts.planDir, "--koan-subagent-dir", opts.subagentDir, ...(opts.extraFlags ?? []), + ...(opts.modelOverride ? ["--model", opts.modelOverride] : []), prompt, ]; +} + +function spawnSubagent( + role: string, + phase: string, + prompt: string, + opts: SpawnSubagentOpts, + log: Logger, +): Promise { + const args = buildSpawnArgs(role, phase, prompt, opts); log(`Spawning ${role} subagent`, { planDir: opts.planDir, subagentDir: opts.subagentDir, phase }); diff --git a/src/planner/tools/context-store.ts b/src/planner/tools/context-store.ts deleted file mode 100644 index cb4e97e..0000000 --- a/src/planner/tools/context-store.ts +++ /dev/null @@ -1,34 +0,0 @@ -import { Type } from "@sinclair/typebox"; - -const NonEmptyStringArray = Type.Array(Type.String({ minLength: 1 }), { minItems: 1 }); - -export const ContextStoreSchema = Type.Object({ - task_spec: NonEmptyStringArray, - constraints: NonEmptyStringArray, - entry_points: NonEmptyStringArray, - rejected_alternatives: NonEmptyStringArray, - current_understanding: NonEmptyStringArray, - assumptions: NonEmptyStringArray, - invisible_knowledge: NonEmptyStringArray, - reference_docs: NonEmptyStringArray, -}, { - description: [ - "Structured planning context. All fields are string arrays.", - "task_spec: subject, scope, out-of-scope items.", - "constraints: 'MUST/SHOULD/MUST-NOT: rule (source)' or 'none confirmed'.", - "entry_points: 'file:symbol - why relevant' or 'greenfield'.", - "rejected_alternatives: 'approach - why dismissed' or 'none discussed'.", - "current_understanding: how the system works, relevant behavior.", - "assumptions: 'claim (H/M/L confidence)' or 'none'.", - "invisible_knowledge: design rationale, invariants, accepted tradeoffs.", - "reference_docs: 'path - what it covers' or 'none'.", - ].join(" "), -}); - -export interface ContextToolResult { - ok: boolean; - message: string; - errors?: string[]; -} - -export type ContextToolHandler = (payload: unknown, ctx: unknown) => Promise; diff --git a/src/planner/tools/workflow.ts b/src/planner/tools/workflow.ts index 70075e8..28b5282 100644 --- a/src/planner/tools/workflow.ts +++ b/src/planner/tools/workflow.ts @@ -1,11 +1,10 @@ -// Workflow tool registration: koan_complete_step and koan_store_context. +// Workflow tool registration: koan_complete_step. // Tools register once at init; execute callbacks read from the mutable // dispatch at call time, decoupling static registration from phase routing. import { Type } from "@sinclair/typebox"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import { ContextStoreSchema } from "./context-store.js"; import { createLogger } from "../../utils/logger.js"; import type { WorkflowDispatch } from "../lib/dispatch.js"; @@ -57,31 +56,4 @@ export function registerWorkflowTools( }; }, }); - - // -- koan_store_context -- - pi.registerTool({ - name: "koan_store_context", - label: "Store planning context", - description: [ - "Store structured planning context.", - "DO NOT call this tool until the step instructions explicitly tell you to.", - "Each field is a string array -- encode structure within strings, not as nested objects.", - ].join(" "), - parameters: ContextStoreSchema, - async execute(_toolCallId, params, _signal, _onUpdate, ctx) { - if (!dispatch.onStoreContext) { - throw new Error("Context capture is not active."); - } - const r = await dispatch.onStoreContext(params, ctx); - if (!r.ok) { - log("Context store rejected", { errors: r.errors }); - throw new Error(r.message); - } - log("Context stored"); - return { - content: [{ type: "text" as const, text: r.message }], - details: undefined, - }; - }, - }); } diff --git a/src/planner/types.ts b/src/planner/types.ts deleted file mode 100644 index 2a71e39..0000000 --- a/src/planner/types.ts +++ /dev/null @@ -1,21 +0,0 @@ -export interface ContextData { - task_spec: string[]; - constraints: string[]; - entry_points: string[]; - rejected_alternatives: string[]; - current_understanding: string[]; - assumptions: string[]; - invisible_knowledge: string[]; - reference_docs: string[]; -} - -export const CONTEXT_KEYS: ReadonlyArray = [ - "task_spec", - "constraints", - "entry_points", - "rejected_alternatives", - "current_understanding", - "assumptions", - "invisible_knowledge", - "reference_docs", -]; diff --git a/src/planner/ui/config/menu.ts b/src/planner/ui/config/menu.ts new file mode 100644 index 0000000..de11954 --- /dev/null +++ b/src/planner/ui/config/menu.ts @@ -0,0 +1,87 @@ +// Koan config menu. Opens a settings-style list with config sections. +// Currently exposes one section: "Model selection". +// New sections can be added here as additional SettingItems. + +import type { ExtensionCommandContext } from "@mariozechner/pi-coding-agent"; +import { getSettingsListTheme } from "@mariozechner/pi-coding-agent"; +import { type SettingItem, SettingsList } from "@mariozechner/pi-tui"; + +import { ALL_PHASE_MODEL_KEYS, type PhaseModelKey } from "../../model-phase.js"; +import { loadPhaseModelConfig } from "../../model-config.js"; +import { createModelSelectionComponent } from "./model-selection.js"; + +function configSummary(config: Record | null): string { + if (config === null) return "inheriting active model"; + return `${ALL_PHASE_MODEL_KEYS.length} keys configured`; +} + +export async function openKoanConfig(ctx: ExtensionCommandContext): Promise { + if (!ctx.hasUI) { + ctx.ui.notify("Koan config requires an interactive terminal.", "warning"); + return; + } + + await ctx.ui.custom(async (tui, theme, _keybindings, done) => { + const initialConfig = await loadPhaseModelConfig(); + let currentConfig = initialConfig; + + const activeModelId = ctx.model + ? `${ctx.model.provider}/${ctx.model.id}` + : undefined; + + // settingsList is captured in closure; submenu is only invoked after construction. + let settingsList: SettingsList; + + const sectionItems: SettingItem[] = [ + { + id: "model-selection", + label: "Model selection", + currentValue: configSummary(currentConfig), + submenu: (_cv, submenuDone) => { + return createModelSelectionComponent( + tui, + theme, + ctx.modelRegistry, + activeModelId, + currentConfig, + (newConfig) => { + currentConfig = newConfig; + settingsList.updateValue("model-selection", configSummary(newConfig)); + }, + (error) => { + const message = error instanceof Error ? error.message : String(error); + ctx.ui.notify(`Failed to save koan model config: ${message}`, "error"); + }, + () => submenuDone(undefined), + ); + }, + }, + ]; + + const returnItem: SettingItem = { + id: "__return", + label: "Return", + description: "Close /koan config (same as Esc)", + currentValue: "", + values: [""], + }; + + const items: SettingItem[] = [...sectionItems, returnItem]; + + settingsList = new SettingsList( + items, + 20, + getSettingsListTheme(), + (id) => { + if (id === "__return") done(); + }, + () => done(), + ); + + return { + render: (w) => settingsList.render(w), + handleInput: (d) => settingsList.handleInput(d), + invalidate: () => settingsList.invalidate(), + }; + }); +} diff --git a/src/planner/ui/config/model-selection.ts b/src/planner/ui/config/model-selection.ts new file mode 100644 index 0000000..7ff013a --- /dev/null +++ b/src/planner/ui/config/model-selection.ts @@ -0,0 +1,410 @@ +// Model selection matrix UI for /koan config. +// Renders quick-set actions plus a true 5×4 matrix (phase rows × sub-phase columns). +// Enter opens an inline ModelSelectorComponent for the selected quick-set/cell. +// Uses SettingsManager.inMemory() to prevent global default model mutation. + +import { ModelSelectorComponent, SettingsManager } from "@mariozechner/pi-coding-agent"; +import type { Theme } from "@mariozechner/pi-coding-agent"; +import type { ModelRegistry } from "@mariozechner/pi-coding-agent"; +import { + type Component, + type TUI, + getEditorKeybindings, + truncateToWidth, + visibleWidth, +} from "@mariozechner/pi-tui"; + +import { + ALL_PHASE_MODEL_KEYS, + GENERAL_PURPOSE_PHASE_MODEL_KEYS, + PHASE_ROWS, + STRONG_PHASE_MODEL_KEYS, + SUB_PHASES, + buildPhaseModelKey, + type PhaseModelKey, + type PhaseRow, +} from "../../model-phase.js"; +import { savePhaseModelConfig } from "../../model-config.js"; + +// -- Pure quick-set utilities (exported for testing) -- + +export function initConfigFromActiveModel(activeModelId: string): Record { + const config: Partial> = {}; + for (const key of ALL_PHASE_MODEL_KEYS) { + config[key] = activeModelId; + } + return config as Record; +} + +export function applyStrongModel( + model: string, + existingConfig: Record | null, + activeModelId: string, +): Record { + const base = existingConfig ?? initConfigFromActiveModel(activeModelId); + const result = { ...base }; + for (const key of STRONG_PHASE_MODEL_KEYS) { + result[key] = model; + } + return result; +} + +export function applyGeneralPurposeModel( + model: string, + existingConfig: Record | null, + activeModelId: string, +): Record { + const base = existingConfig ?? initConfigFromActiveModel(activeModelId); + const result = { ...base }; + for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { + result[key] = model; + } + return result; +} + +// -- Confirmation component for reset action -- + +class ResetConfirmComponent implements Component { + constructor( + private readonly theme: Theme, + private readonly onConfirm: () => void, + private readonly onCancel: () => void, + ) {} + + render(_width: number): string[] { + return [ + this.theme.bold(this.theme.fg("accent", "Reset all model overrides to active model?")), + "", + this.theme.fg("muted", " This will clear all 20 phase model overrides."), + this.theme.fg("muted", " Koan will use pi's current active model for all phases."), + "", + this.theme.fg("dim", " Enter to confirm · Escape to cancel"), + ]; + } + + handleInput(data: string): void { + if (data === "\r" || data === "\n") { + this.onConfirm(); + } else if (data === "\x1b") { + this.onCancel(); + } + } + + invalidate(): void {} +} + +function padRight(text: string, width: number): string { + const padding = Math.max(0, width - visibleWidth(text)); + return text + " ".repeat(padding); +} + +function renderCell(theme: Theme, text: string, width: number, selected: boolean, strong: boolean): string { + const innerWidth = Math.max(1, width - 2); + const clipped = truncateToWidth(text, innerWidth, ""); + const padded = padRight(clipped, innerWidth); + const raw = ` ${padded} `; + + if (selected) return theme.inverse(raw); + if (strong) return theme.fg("accent", raw); + return raw; +} + +function cellDisplay(modelId: string | undefined, activeModelId: string | undefined): string { + if (modelId === undefined) { + return activeModelId ? `inherit:${activeModelId}` : "inherit:active"; + } + return modelId; +} + +type SelectionZone = "quick" | "grid"; + +// -- Create model selection component -- + +export function createModelSelectionComponent( + tui: TUI, + theme: Theme, + modelRegistry: ModelRegistry, + activeModelId: string | undefined, + initialConfig: Record | null, + onConfigChange: (newConfig: Record | null) => void, + onSaveError: (error: unknown) => void, + onClose: () => void, +): Component { + const fallbackActive = activeModelId ?? "(active model)"; + const configRef: { value: Record | null } = { value: initialConfig }; + + const quickItems = [ + "Reset to active", + `Set strong (${STRONG_PHASE_MODEL_KEYS.size})`, + `Set general (${GENERAL_PURPOSE_PHASE_MODEL_KEYS.length})`, + ] as const; + + let zone: SelectionZone = "quick"; + let quickIndex = 0; + let rowIndex = 0; + let colIndex = 0; + let overlay: Component | null = null; + + function requestRender(): void { + tui.requestRender(); + } + + async function persistAndNotify(newConfig: Record | null): Promise { + const previous = configRef.value; + try { + await savePhaseModelConfig(newConfig); + configRef.value = newConfig; + onConfigChange(newConfig); + return true; + } catch (error) { + configRef.value = previous; + onSaveError(error); + return false; + } + } + + function makeModelSelector( + currentModelId: string | undefined, + onSelect: (modelId: string) => void, + onCancel: () => void, + ): Component { + const available = modelRegistry.getAvailable(); + const currentModel = currentModelId + ? available.find((m) => `${m.provider}/${m.id}` === currentModelId) + : available.find((m) => `${m.provider}/${m.id}` === activeModelId); + + const sm = SettingsManager.inMemory(); + + return new ModelSelectorComponent( + tui, + currentModel, + sm, + modelRegistry, + [], + (model) => onSelect(`${model.provider}/${model.id}`), + onCancel, + ); + } + + function closeOverlay(): void { + overlay = null; + requestRender(); + } + + function openResetConfirm(): void { + overlay = new ResetConfirmComponent( + theme, + () => { + void persistAndNotify(null).finally(() => closeOverlay()); + }, + () => closeOverlay(), + ); + requestRender(); + } + + function openStrongSelector(): void { + const strongSample = Array.from(STRONG_PHASE_MODEL_KEYS)[0]; + const currentId = configRef.value?.[strongSample]; + + overlay = makeModelSelector( + currentId, + (modelId) => { + const newConfig = applyStrongModel(modelId, configRef.value, fallbackActive); + void persistAndNotify(newConfig).finally(() => closeOverlay()); + }, + () => closeOverlay(), + ); + requestRender(); + } + + function openGeneralSelector(): void { + const gpSample = GENERAL_PURPOSE_PHASE_MODEL_KEYS[0]; + const currentId = configRef.value?.[gpSample]; + + overlay = makeModelSelector( + currentId, + (modelId) => { + const newConfig = applyGeneralPurposeModel(modelId, configRef.value, fallbackActive); + void persistAndNotify(newConfig).finally(() => closeOverlay()); + }, + () => closeOverlay(), + ); + requestRender(); + } + + function openCellSelector(): void { + const row = PHASE_ROWS[rowIndex] as PhaseRow; + const subPhase = SUB_PHASES[colIndex]; + const key = buildPhaseModelKey(row, subPhase); + const currentId = configRef.value?.[key]; + + overlay = makeModelSelector( + currentId, + (modelId) => { + const base = configRef.value ?? initConfigFromActiveModel(fallbackActive); + const newConfig = { ...base, [key]: modelId }; + void persistAndNotify(newConfig).finally(() => closeOverlay()); + }, + () => closeOverlay(), + ); + requestRender(); + } + + function activateSelection(): void { + if (zone === "quick") { + if (quickIndex === 0) { + openResetConfirm(); + } else if (quickIndex === 1) { + openStrongSelector(); + } else { + openGeneralSelector(); + } + return; + } + + openCellSelector(); + } + + function moveUp(): void { + if (zone === "quick") return; + if (rowIndex === 0) { + zone = "quick"; + return; + } + rowIndex -= 1; + } + + function moveDown(): void { + if (zone === "quick") { + zone = "grid"; + rowIndex = 0; + return; + } + + if (rowIndex === PHASE_ROWS.length - 1) { + rowIndex = 0; + return; + } + + rowIndex += 1; + } + + function moveLeft(): void { + if (zone === "quick") { + quickIndex = quickIndex === 0 ? quickItems.length - 1 : quickIndex - 1; + return; + } + + colIndex = colIndex === 0 ? SUB_PHASES.length - 1 : colIndex - 1; + } + + function moveRight(): void { + if (zone === "quick") { + quickIndex = quickIndex === quickItems.length - 1 ? 0 : quickIndex + 1; + return; + } + + colIndex = colIndex === SUB_PHASES.length - 1 ? 0 : colIndex + 1; + } + + function renderMain(width: number): string[] { + const lines: string[] = []; + + lines.push(theme.bold(theme.fg("accent", "Koan / Config / Model selection"))); + lines.push(theme.fg("muted", `Fallback active model: ${fallbackActive}`)); + lines.push(""); + + const quick = quickItems + .map((label, i) => { + const block = ` ${label} `; + if (zone === "quick" && quickIndex === i) return theme.inverse(block); + return theme.fg("muted", block); + }) + .join(" "); + + lines.push(`Quick-set: ${quick}`); + lines.push(""); + + const sep = " | "; + const sepWidth = visibleWidth(sep); + const phaseColWidth = 12; + const available = Math.max(24, width - phaseColWidth - sepWidth * 4); + const modelColWidth = Math.max(12, Math.floor(available / 4)); + + const headerCells = [ + renderCell(theme, "phase", phaseColWidth, false, false), + ...SUB_PHASES.map((sub) => renderCell(theme, sub, modelColWidth, false, false)), + ]; + lines.push(headerCells.join(sep)); + lines.push("-".repeat(Math.max(10, Math.min(width, visibleWidth(headerCells.join(sep)))))); + + for (let r = 0; r < PHASE_ROWS.length; r += 1) { + const row = PHASE_ROWS[r] as PhaseRow; + const rowCells: string[] = [renderCell(theme, row, phaseColWidth, false, false)]; + + for (let c = 0; c < SUB_PHASES.length; c += 1) { + const sub = SUB_PHASES[c]; + const key = buildPhaseModelKey(row, sub); + const model = configRef.value?.[key]; + const display = cellDisplay(model, activeModelId); + const selected = zone === "grid" && rowIndex === r && colIndex === c; + const strong = STRONG_PHASE_MODEL_KEYS.has(key); + rowCells.push(renderCell(theme, display, modelColWidth, selected, strong)); + } + + lines.push(truncateToWidth(rowCells.join(sep), width)); + } + + lines.push(""); + lines.push(theme.fg("dim", "★ strong cell")); + lines.push(theme.fg("dim", "↑↓ move row/section · ←→ move column/quick-set · Enter select · Esc back")); + + return lines; + } + + return { + render: (width) => { + if (overlay) return overlay.render(width); + return renderMain(width); + }, + handleInput: (data) => { + if (overlay) { + overlay.handleInput?.(data); + return; + } + + const kb = getEditorKeybindings(); + + if (kb.matches(data, "selectCancel")) { + onClose(); + return; + } + if (kb.matches(data, "selectConfirm") || data === " ") { + activateSelection(); + return; + } + if (kb.matches(data, "selectUp")) { + moveUp(); + requestRender(); + return; + } + if (kb.matches(data, "selectDown")) { + moveDown(); + requestRender(); + return; + } + if (kb.matches(data, "cursorLeft")) { + moveLeft(); + requestRender(); + return; + } + if (kb.matches(data, "cursorRight")) { + moveRight(); + requestRender(); + } + }, + invalidate: () => { + overlay?.invalidate?.(); + }, + }; +} diff --git a/src/planner/ui/widget.ts b/src/planner/ui/widget.ts index 84320cc..14a0391 100644 --- a/src/planner/ui/widget.ts +++ b/src/planner/ui/widget.ts @@ -88,7 +88,6 @@ const LOG_LINES = 5; const BODY_INDENT = " "; const PLANNING_PHASES: ReadonlyArray<{ key: string; label: string; detail: string }> = [ - { key: "ctx", label: "Context gathering", detail: "Gathering initial context" }, { key: "design", label: "Plan design", detail: "Designing plan" }, { key: "code", label: "Plan code", detail: "Creating code plan" }, { key: "docs", label: "Plan docs", detail: "Documenting plan" }, @@ -225,7 +224,6 @@ const HEADER_STATUS_SHORT: Record = { }; const HEADER_PHASE_SHORT: Record = { - "Context gathering": "Ctx gather", "Plan design": "Design", "Plan code": "Code", "Plan docs": "Docs", @@ -339,7 +337,7 @@ function shouldShowQR(state: WidgetState): boolean { if (state.qrIteration === null) return false; const active = activePhase(state); if (!active) return false; - return active.key !== "ctx"; + return true; } type QRTier = "wide" | "medium" | "tight"; From 18341f7d01cfa412baa458757a9dec9e16ffd5f2 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 2 Mar 2026 13:41:43 +0700 Subject: [PATCH 030/412] add regression tests for conversation export and model routing --- package.json | 2 +- tests/conversation.test.ts | 103 ++++++++++++ tests/model-config.test.ts | 233 ++++++++++++++++++++++++++ tests/model-phase.test.ts | 135 +++++++++++++++ tests/model-resolver.test.ts | 164 ++++++++++++++++++ tests/session-model-threading.test.ts | 205 ++++++++++++++++++++++ tests/subagent-model.test.ts | 215 ++++++++++++++++++++++++ tests/widget.test.ts | 9 +- 8 files changed, 1061 insertions(+), 5 deletions(-) create mode 100644 tests/conversation.test.ts create mode 100644 tests/model-config.test.ts create mode 100644 tests/model-phase.test.ts create mode 100644 tests/model-resolver.test.ts create mode 100644 tests/session-model-threading.test.ts create mode 100644 tests/subagent-model.test.ts diff --git a/package.json b/package.json index feaae5b..e99f2d3 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "check": "tsc --noEmit", "build": "tsc --project tsconfig.build.json", "pretest": "npm run build", - "test": "node --test build/tests" + "test": "node --test --test-concurrency=1 build/tests" }, "dependencies": { "@sinclair/typebox": "^0.32.30" diff --git a/tests/conversation.test.ts b/tests/conversation.test.ts new file mode 100644 index 0000000..2e29dfe --- /dev/null +++ b/tests/conversation.test.ts @@ -0,0 +1,103 @@ +import assert from "node:assert/strict"; +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { describe, it } from "node:test"; + +import { exportConversation } from "../src/planner/conversation.js"; + +type MockEntry = { type: string; role?: string; content?: string }; + +function createMockSessionManager(header: MockEntry | null, branch: MockEntry[]) { + return { + getHeader: () => header, + getBranch: () => branch, + }; +} + +async function withTempDir(fn: (dir: string) => Promise): Promise { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "koan-conv-test-")); + try { + return await fn(dir); + } finally { + await fs.rm(dir, { recursive: true, force: true }); + } +} + +describe("exportConversation", () => { + it("writes valid JSONL with header and branch entries", async () => { + await withTempDir(async (dir) => { + const header: MockEntry = { type: "header", content: "session-metadata" }; + const branch: MockEntry[] = [ + { type: "message", role: "user", content: "Plan this task" }, + { type: "message", role: "assistant", content: "I will plan it" }, + ]; + + const sessionManager = createMockSessionManager(header, branch); + const filePath = await exportConversation( + sessionManager as any, + dir, + ); + + assert.equal(filePath, path.join(dir, "conversation.jsonl")); + + const raw = await fs.readFile(filePath, "utf8"); + const lines = raw.trimEnd().split("\n"); + + assert.equal(lines.length, 3, "should have header + 2 branch entries"); + + const parsed = lines.map((line) => JSON.parse(line) as MockEntry); + assert.deepEqual(parsed[0], header); + assert.deepEqual(parsed[1], branch[0]); + assert.deepEqual(parsed[2], branch[1]); + }); + }); + + it("writes valid JSONL without header when header is null", async () => { + await withTempDir(async (dir) => { + const branch: MockEntry[] = [ + { type: "message", role: "user", content: "Hello" }, + ]; + + const sessionManager = createMockSessionManager(null, branch); + await exportConversation(sessionManager as any, dir); + + const raw = await fs.readFile(path.join(dir, "conversation.jsonl"), "utf8"); + const lines = raw.trimEnd().split("\n"); + + assert.equal(lines.length, 1, "should have only the branch entry"); + const parsed = JSON.parse(lines[0]) as MockEntry; + assert.deepEqual(parsed, branch[0]); + }); + }); + + it("writes empty file with trailing newline when no entries", async () => { + await withTempDir(async (dir) => { + const sessionManager = createMockSessionManager(null, []); + await exportConversation(sessionManager as any, dir); + + const raw = await fs.readFile(path.join(dir, "conversation.jsonl"), "utf8"); + assert.equal(raw, "\n", "empty conversation should produce a single newline"); + }); + }); + + it("each line is valid JSON", async () => { + await withTempDir(async (dir) => { + const header: MockEntry = { type: "header" }; + const branch: MockEntry[] = [ + { type: "message", role: "user", content: 'contains "quotes" and\nnewlines' }, + { type: "message", role: "assistant", content: "response" }, + ]; + + const sessionManager = createMockSessionManager(header, branch); + await exportConversation(sessionManager as any, dir); + + const raw = await fs.readFile(path.join(dir, "conversation.jsonl"), "utf8"); + const lines = raw.trimEnd().split("\n"); + + for (const line of lines) { + assert.doesNotThrow(() => JSON.parse(line), `line should be valid JSON: ${line}`); + } + }); + }); +}); diff --git a/tests/model-config.test.ts b/tests/model-config.test.ts new file mode 100644 index 0000000..a7e949f --- /dev/null +++ b/tests/model-config.test.ts @@ -0,0 +1,233 @@ +import assert from "node:assert/strict"; +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { describe, it } from "node:test"; + +import { ALL_PHASE_MODEL_KEYS, type PhaseModelKey } from "../src/planner/model-phase.js"; +import { loadPhaseModelConfig, savePhaseModelConfig } from "../src/planner/model-config.js"; + +function makeFullConfig(model = "anthropic/claude-sonnet"): Record { + const config: Partial> = {}; + for (const key of ALL_PHASE_MODEL_KEYS) { + config[key] = model; + } + return config as Record; +} + +// Test config validation logic directly using a mock config file +// by writing to a temp location and reading back. +// Note: loadPhaseModelConfig reads from ~/.koan/config.json, so we +// test validation using the raw parsing logic via an in-process approach. + +describe("config validation", () => { + it("accepts a complete 20-key config and returns it unchanged", async () => { + // We test the validation by round-tripping through save/load. + // To avoid touching ~/.koan/config.json, we verify the pure logic + // by testing that a valid config object has all required keys. + const config = makeFullConfig("anthropic/claude-opus-4"); + + // Verify it has exactly 20 keys + assert.equal(Object.keys(config).length, ALL_PHASE_MODEL_KEYS.length); + + // Verify all keys are valid PhaseModelKeys + for (const key of Object.keys(config)) { + assert.ok( + (ALL_PHASE_MODEL_KEYS as readonly string[]).includes(key), + `unexpected key: ${key}`, + ); + } + + // Verify all values are non-empty strings + for (const [key, value] of Object.entries(config)) { + assert.equal(typeof value, "string", `value for ${key} should be a string`); + assert.ok(value.length > 0, `value for ${key} should be non-empty`); + } + }); + + it("treats null as valid (no overrides)", () => { + // Null config is valid — it means inherit from pi's active model + const config: Record | null = null; + assert.equal(config, null); + }); +}); + +describe("loadPhaseModelConfig (integration)", () => { + it("returns null when config file is missing", async () => { + // loadPhaseModelConfig reads ~/.koan/config.json - if it doesn't exist, null + // We can only test this if ~/.koan/config.json doesn't exist on this machine + // or has no phaseModels. This is an integration test, so we skip the file check + // and instead verify the contract: the function always returns null or a valid config. + const result = await loadPhaseModelConfig(); + // Result is either null or a Record with exactly 20 keys + if (result !== null) { + assert.equal(Object.keys(result).length, ALL_PHASE_MODEL_KEYS.length); + for (const key of ALL_PHASE_MODEL_KEYS) { + assert.equal(typeof result[key], "string"); + assert.ok(result[key].length > 0); + } + } + }); +}); + +describe("savePhaseModelConfig + loadPhaseModelConfig (round-trip)", () => { + it("persists a full config and reads it back correctly", async () => { + // KOAN_CONFIG_PATH is computed at module load time, so tests validate + // round-trip behavior against the real path and restore prior state. + + const actualConfigPath = path.join(os.homedir(), ".koan", "config.json"); + let preExisting: string | null = null; + + try { + preExisting = await fs.readFile(actualConfigPath, "utf8"); + } catch { + preExisting = null; + } + + try { + const config = makeFullConfig("openai/gpt-5"); + await savePhaseModelConfig(config); + + const loaded = await loadPhaseModelConfig(); + assert.ok(loaded !== null, "expected config to be loaded after save"); + assert.equal(Object.keys(loaded).length, ALL_PHASE_MODEL_KEYS.length); + + for (const key of ALL_PHASE_MODEL_KEYS) { + assert.equal(loaded[key], "openai/gpt-5", `mismatch for key ${key}`); + } + } finally { + // Restore original state + if (preExisting === null) { + try { + const koanDir = path.join(os.homedir(), ".koan"); + await fs.rm(actualConfigPath, { force: true }); + // Try to remove the .koan dir if it was empty before + const entries = await fs.readdir(koanDir); + if (entries.length === 0) { + await fs.rmdir(koanDir); + } + } catch { + // Best-effort cleanup + } + } else { + await fs.writeFile(actualConfigPath, preExisting, "utf8"); + } + + } + }); + + it("persists null (clears overrides) while preserving other config keys", async () => { + const actualConfigPath = path.join(os.homedir(), ".koan", "config.json"); + let preExisting: string | null = null; + + try { + preExisting = await fs.readFile(actualConfigPath, "utf8"); + } catch { + preExisting = null; + } + + try { + // Write an initial config + await savePhaseModelConfig(makeFullConfig("anthropic/claude-sonnet")); + + // Now clear it + await savePhaseModelConfig(null); + + const loaded = await loadPhaseModelConfig(); + assert.equal(loaded, null, "expected null after clearing overrides"); + + // Verify the config file still exists but has no phaseModels key + const raw = await fs.readFile(actualConfigPath, "utf8"); + const parsed = (raw.trim().length === 0 ? {} : JSON.parse(raw)) as Record; + assert.equal("phaseModels" in parsed, false, "phaseModels should be absent after clearing"); + } finally { + if (preExisting === null) { + try { + await fs.rm(actualConfigPath, { force: true }); + } catch { + // Best-effort + } + } else { + await fs.writeFile(actualConfigPath, preExisting, "utf8"); + } + } + }); +}); + +describe("config validation: partial config treated as absent", () => { + it("validates that a partial config (missing keys) is treated as absent", async () => { + // We simulate this by checking the validation logic: + // A config with fewer than 20 keys should produce null from loadPhaseModelConfig. + // We test this indirectly by verifying the contract. + const partialKeys = ALL_PHASE_MODEL_KEYS.slice(0, 10); + assert.equal(partialKeys.length, 10); + assert.equal(partialKeys.length < ALL_PHASE_MODEL_KEYS.length, true); + + // A partial config would fail the length check in loadPhaseModelConfig. + // We verify this by writing a partial config and reading it back. + const actualConfigPath = path.join(os.homedir(), ".koan", "config.json"); + let preExisting: string | null = null; + + try { + preExisting = await fs.readFile(actualConfigPath, "utf8"); + } catch { + preExisting = null; + } + + try { + await fs.mkdir(path.dirname(actualConfigPath), { recursive: true }); + const partial: Record = {}; + for (const key of partialKeys) { + partial[key] = "anthropic/claude-sonnet"; + } + await fs.writeFile(actualConfigPath, JSON.stringify({ phaseModels: partial }), "utf8"); + + const loaded = await loadPhaseModelConfig(); + assert.equal(loaded, null, "expected null for partial config"); + } finally { + if (preExisting === null) { + try { await fs.rm(actualConfigPath, { force: true }); } catch { /* best-effort */ } + } else { + await fs.writeFile(actualConfigPath, preExisting, "utf8"); + } + } + }); + + it("validates that a config with unknown keys is treated as absent", async () => { + const actualConfigPath = path.join(os.homedir(), ".koan", "config.json"); + let preExisting: string | null = null; + + try { + preExisting = await fs.readFile(actualConfigPath, "utf8"); + } catch { + preExisting = null; + } + + try { + await fs.mkdir(path.dirname(actualConfigPath), { recursive: true }); + + // Build a 20-key config with one key replaced by an unknown key + const badConfig: Record = {}; + let first = true; + for (const key of ALL_PHASE_MODEL_KEYS) { + if (first) { + badConfig["unknown-phase-exec-debut"] = "anthropic/claude-sonnet"; + first = false; + } else { + badConfig[key] = "anthropic/claude-sonnet"; + } + } + + await fs.writeFile(actualConfigPath, JSON.stringify({ phaseModels: badConfig }), "utf8"); + + const loaded = await loadPhaseModelConfig(); + assert.equal(loaded, null, "expected null for config with unknown key"); + } finally { + if (preExisting === null) { + try { await fs.rm(actualConfigPath, { force: true }); } catch { /* best-effort */ } + } else { + await fs.writeFile(actualConfigPath, preExisting, "utf8"); + } + } + }); +}); diff --git a/tests/model-phase.test.ts b/tests/model-phase.test.ts new file mode 100644 index 0000000..9797d49 --- /dev/null +++ b/tests/model-phase.test.ts @@ -0,0 +1,135 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { + ALL_PHASE_MODEL_KEYS, + GENERAL_PURPOSE_PHASE_MODEL_KEYS, + PHASE_ROWS, + STRONG_PHASE_MODEL_KEYS, + SUB_PHASES, + buildPhaseModelKey, + isPhaseModelKey, + type PhaseModelKey, +} from "../src/planner/model-phase.js"; + +describe("ALL_PHASE_MODEL_KEYS", () => { + it("contains exactly 20 keys (5 rows × 4 sub-phases)", () => { + assert.equal(ALL_PHASE_MODEL_KEYS.length, PHASE_ROWS.length * SUB_PHASES.length); + assert.equal(ALL_PHASE_MODEL_KEYS.length, 20); + }); + + it("contains no duplicates", () => { + const set = new Set(ALL_PHASE_MODEL_KEYS); + assert.equal(set.size, ALL_PHASE_MODEL_KEYS.length); + }); + + it("contains every combination of row and sub-phase", () => { + for (const row of PHASE_ROWS) { + for (const sub of SUB_PHASES) { + const key = `${row}-${sub}` as PhaseModelKey; + assert.ok( + ALL_PHASE_MODEL_KEYS.includes(key), + `expected key "${key}" to be present`, + ); + } + } + }); +}); + +describe("STRONG_PHASE_MODEL_KEYS", () => { + it("contains exactly 9 keys", () => { + assert.equal(STRONG_PHASE_MODEL_KEYS.size, 9); + }); + + it("contains all 5 qr-decompose keys", () => { + for (const row of PHASE_ROWS) { + const key = buildPhaseModelKey(row, "qr-decompose"); + assert.ok(STRONG_PHASE_MODEL_KEYS.has(key), `expected ${key} to be strong`); + } + }); + + it("contains plan-design exec-debut and exec-fix", () => { + assert.ok(STRONG_PHASE_MODEL_KEYS.has("plan-design-exec-debut")); + assert.ok(STRONG_PHASE_MODEL_KEYS.has("plan-design-exec-fix")); + }); + + it("contains exec-docs exec-debut and exec-fix", () => { + assert.ok(STRONG_PHASE_MODEL_KEYS.has("exec-docs-exec-debut")); + assert.ok(STRONG_PHASE_MODEL_KEYS.has("exec-docs-exec-fix")); + }); + + it("does not contain plan-code or plan-docs exec keys", () => { + assert.equal(STRONG_PHASE_MODEL_KEYS.has("plan-code-exec-debut"), false); + assert.equal(STRONG_PHASE_MODEL_KEYS.has("plan-code-exec-fix"), false); + assert.equal(STRONG_PHASE_MODEL_KEYS.has("plan-docs-exec-debut"), false); + assert.equal(STRONG_PHASE_MODEL_KEYS.has("plan-docs-exec-fix"), false); + }); +}); + +describe("GENERAL_PURPOSE_PHASE_MODEL_KEYS", () => { + it("contains exactly 11 keys (20 total - 9 strong)", () => { + assert.equal(GENERAL_PURPOSE_PHASE_MODEL_KEYS.length, 11); + }); + + it("strong and GP form a complete partition of all keys", () => { + const strongSet = STRONG_PHASE_MODEL_KEYS; + const gpSet = new Set(GENERAL_PURPOSE_PHASE_MODEL_KEYS); + + // Union equals ALL + for (const key of ALL_PHASE_MODEL_KEYS) { + assert.ok( + strongSet.has(key) || gpSet.has(key), + `key "${key}" missing from both sets`, + ); + } + + // Intersection is empty + for (const key of ALL_PHASE_MODEL_KEYS) { + assert.equal( + strongSet.has(key) && gpSet.has(key), + false, + `key "${key}" appears in both sets`, + ); + } + }); +}); + +describe("isPhaseModelKey", () => { + it("returns true for valid keys", () => { + for (const key of ALL_PHASE_MODEL_KEYS) { + assert.equal(isPhaseModelKey(key), true, `expected "${key}" to be valid`); + } + }); + + it("returns false for invalid strings", () => { + assert.equal(isPhaseModelKey("plan-design"), false); + assert.equal(isPhaseModelKey("exec-debut"), false); + assert.equal(isPhaseModelKey("plan-design-exec-init"), false); + assert.equal(isPhaseModelKey("unknown-key"), false); + assert.equal(isPhaseModelKey(""), false); + }); + + it("returns false for non-string values", () => { + assert.equal(isPhaseModelKey(42), false); + assert.equal(isPhaseModelKey(null), false); + assert.equal(isPhaseModelKey(undefined), false); + assert.equal(isPhaseModelKey({}), false); + }); +}); + +describe("buildPhaseModelKey", () => { + it("produces correct key for all combinations", () => { + assert.equal(buildPhaseModelKey("plan-design", "exec-debut"), "plan-design-exec-debut"); + assert.equal(buildPhaseModelKey("exec-docs", "qr-verify"), "exec-docs-qr-verify"); + assert.equal(buildPhaseModelKey("plan-code", "qr-decompose"), "plan-code-qr-decompose"); + }); + + it("produces keys that pass isPhaseModelKey", () => { + for (const row of PHASE_ROWS) { + for (const sub of SUB_PHASES) { + const key = buildPhaseModelKey(row, sub); + assert.equal(isPhaseModelKey(key), true, `buildPhaseModelKey(${row}, ${sub}) = "${key}" failed isPhaseModelKey`); + } + } + }); +}); diff --git a/tests/model-resolver.test.ts b/tests/model-resolver.test.ts new file mode 100644 index 0000000..b37ef35 --- /dev/null +++ b/tests/model-resolver.test.ts @@ -0,0 +1,164 @@ +import assert from "node:assert/strict"; +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { describe, it } from "node:test"; + +import { + ALL_PHASE_MODEL_KEYS, + PHASE_ROWS, + SUB_PHASES, + type PhaseModelKey, +} from "../src/planner/model-phase.js"; +import { + mapSpawnContextToPhaseModelKey, + resolvePhaseModelOverride, + type SpawnContext, +} from "../src/planner/model-resolver.js"; + +describe("mapSpawnContextToPhaseModelKey", () => { + it("maps work-debut to exec-debut for all phase rows", () => { + for (const row of PHASE_ROWS) { + const key = mapSpawnContextToPhaseModelKey("work-debut", row); + assert.equal(key, `${row}-exec-debut`, `row=${row}`); + } + }); + + it("maps fix to exec-fix for all phase rows", () => { + for (const row of PHASE_ROWS) { + const key = mapSpawnContextToPhaseModelKey("fix", row); + assert.equal(key, `${row}-exec-fix`, `row=${row}`); + } + }); + + it("maps qr-decompose to qr-decompose for all phase rows", () => { + for (const row of PHASE_ROWS) { + const key = mapSpawnContextToPhaseModelKey("qr-decompose", row); + assert.equal(key, `${row}-qr-decompose`, `row=${row}`); + } + }); + + it("maps qr-verify to qr-verify for all phase rows", () => { + for (const row of PHASE_ROWS) { + const key = mapSpawnContextToPhaseModelKey("qr-verify", row); + assert.equal(key, `${row}-qr-verify`, `row=${row}`); + } + }); + + it("produces keys that are valid PhaseModelKeys", () => { + const contexts: SpawnContext[] = ["work-debut", "fix", "qr-decompose", "qr-verify"]; + for (const context of contexts) { + for (const row of PHASE_ROWS) { + const key = mapSpawnContextToPhaseModelKey(context, row); + assert.ok( + (ALL_PHASE_MODEL_KEYS as readonly string[]).includes(key), + `key "${key}" (context=${context}, row=${row}) is not a valid PhaseModelKey`, + ); + } + } + }); + + it("covers all 20 PhaseModelKeys across context × row combinations", () => { + const produced = new Set(); + const contexts: SpawnContext[] = ["work-debut", "fix", "qr-decompose", "qr-verify"]; + for (const context of contexts) { + for (const row of PHASE_ROWS) { + produced.add(mapSpawnContextToPhaseModelKey(context, row)); + } + } + assert.equal(produced.size, ALL_PHASE_MODEL_KEYS.length); + for (const key of ALL_PHASE_MODEL_KEYS) { + assert.ok(produced.has(key), `key "${key}" not produced by any context × row combination`); + } + }); + + it("accepts optional fixPhase argument without altering output", () => { + const withoutFix = mapSpawnContextToPhaseModelKey("fix", "plan-design"); + const withFix = mapSpawnContextToPhaseModelKey("fix", "plan-design", "plan-design"); + assert.equal(withoutFix, withFix); + }); +}); + +describe("SpawnContext values cover all sub-phases", () => { + it("one SpawnContext maps to each SubPhase", () => { + const contexts: SpawnContext[] = ["work-debut", "fix", "qr-decompose", "qr-verify"]; + const row = "plan-design"; + const subPhasesProduced = contexts.map((c) => { + const key = mapSpawnContextToPhaseModelKey(c, row); + return key.replace(`${row}-`, "") as typeof SUB_PHASES[number]; + }); + + for (const sub of SUB_PHASES) { + assert.ok( + subPhasesProduced.includes(sub), + `sub-phase "${sub}" not covered by any SpawnContext`, + ); + } + }); +}); + +function makeFullConfig(model: string): Record { + const config: Partial> = {}; + for (const key of ALL_PHASE_MODEL_KEYS) { + config[key] = model; + } + return config as Record; +} + +async function withConfigFile( + setup: (configPath: string) => Promise, + run: () => Promise, +): Promise { + const configPath = path.join(os.homedir(), ".koan", "config.json"); + + let preExisting: string | null = null; + try { + preExisting = await fs.readFile(configPath, "utf8"); + } catch { + preExisting = null; + } + + try { + await fs.mkdir(path.dirname(configPath), { recursive: true }); + await setup(configPath); + return await run(); + } finally { + if (preExisting === null) { + try { + await fs.rm(configPath, { force: true }); + } catch { + // best-effort cleanup + } + } else { + await fs.writeFile(configPath, preExisting, "utf8"); + } + } +} + +describe("resolvePhaseModelOverride", () => { + it("returns configured model when full config is present", async () => { + await withConfigFile( + async (configPath) => { + const phaseModels = makeFullConfig("anthropic/claude-sonnet"); + phaseModels["plan-design-exec-debut"] = "openai/gpt-5"; + await fs.writeFile(configPath, `${JSON.stringify({ phaseModels }, null, 2)}\n`, "utf8"); + }, + async () => { + const value = await resolvePhaseModelOverride("plan-design-exec-debut"); + assert.equal(value, "openai/gpt-5"); + }, + ); + }); + + it("returns undefined when config is absent", async () => { + await withConfigFile( + async (configPath) => { + await fs.writeFile(configPath, `${JSON.stringify({ unrelated: true }, null, 2)}\n`, "utf8"); + }, + async () => { + const value = await resolvePhaseModelOverride("plan-code-exec-fix"); + assert.equal(value, undefined); + }, + ); + }); +}); diff --git a/tests/session-model-threading.test.ts b/tests/session-model-threading.test.ts new file mode 100644 index 0000000..6462e8d --- /dev/null +++ b/tests/session-model-threading.test.ts @@ -0,0 +1,205 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { + resolveSpawnModelOverride, + spawnWorkWithResolvedModel, + spawnFixWithResolvedModel, + spawnQRDecomposerWithResolvedModel, + spawnReviewerWithResolvedModel, +} from "../src/planner/session.js"; +import type { PhaseModelKey } from "../src/planner/model-phase.js"; + +describe("resolveSpawnModelOverride", () => { + it("maps context -> key and resolves override", async () => { + const contexts = ["work-debut", "fix", "qr-decompose", "qr-verify"] as const; + + for (const context of contexts) { + let mappedContext: string | null = null; + let mappedRow: string | null = null; + let resolvedKey: string | null = null; + + const result = await resolveSpawnModelOverride(context, "plan-design", { + mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { + mappedContext = ctx; + mappedRow = row; + return "plan-design-exec-debut" as PhaseModelKey; + }, + resolvePhaseModelOverrideFn: async (key) => { + resolvedKey = key; + return "anthropic/claude-opus-4"; + }, + }); + + assert.equal(mappedContext, context); + assert.equal(mappedRow, "plan-design"); + assert.equal(resolvedKey, "plan-design-exec-debut"); + assert.equal(result, "anthropic/claude-opus-4"); + } + }); + + it("returns undefined when resolver reports absent config", async () => { + const result = await resolveSpawnModelOverride("work-debut", "plan-code", { + mapSpawnContextToPhaseModelKeyFn: () => "plan-code-exec-debut" as PhaseModelKey, + resolvePhaseModelOverrideFn: async () => undefined, + }); + + assert.equal(result, undefined); + }); +}); + +describe("work/fix spawn model threading", () => { + it("threads resolved modelOverride into work spawns", async () => { + let capturedModelOverride: string | undefined; + + await spawnWorkWithResolvedModel( + "plan-design", + async (opts) => { + capturedModelOverride = opts.modelOverride; + return { exitCode: 0, stderr: "", subagentDir: opts.subagentDir }; + }, + { + planDir: "/plan", + subagentDir: "/subagent", + cwd: "/cwd", + extensionPath: "/ext/koan.ts", + log: () => {}, + }, + { + mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { + assert.equal(ctx, "work-debut"); + assert.equal(row, "plan-design"); + return "plan-design-exec-debut" as PhaseModelKey; + }, + resolvePhaseModelOverrideFn: async (key) => { + assert.equal(key, "plan-design-exec-debut"); + return "anthropic/claude-opus-4"; + }, + }, + ); + + assert.equal(capturedModelOverride, "anthropic/claude-opus-4"); + }); + + it("threads resolved modelOverride into fix spawns", async () => { + let capturedModelOverride: string | undefined; + + await spawnFixWithResolvedModel( + "plan-code", + async (opts) => { + capturedModelOverride = opts.modelOverride; + return { exitCode: 0, stderr: "", subagentDir: opts.subagentDir }; + }, + { + planDir: "/plan", + subagentDir: "/subagent", + cwd: "/cwd", + extensionPath: "/ext/koan.ts", + log: () => {}, + }, + { + mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { + assert.equal(ctx, "fix"); + assert.equal(row, "plan-code"); + return "plan-code-exec-fix" as PhaseModelKey; + }, + resolvePhaseModelOverrideFn: async (key) => { + assert.equal(key, "plan-code-exec-fix"); + return "openai/gpt-5"; + }, + }, + ); + + assert.equal(capturedModelOverride, "openai/gpt-5"); + }); +}); + +describe("QR spawn model threading", () => { + it("threads resolved modelOverride into spawnQRDecomposer", async () => { + let capturedModelOverride: string | undefined; + + await spawnQRDecomposerWithResolvedModel( + { + planDir: "/plan", + subagentDir: "/subagent", + cwd: "/cwd", + extensionPath: "/ext/koan.ts", + phase: "plan-design", + }, + { + mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { + assert.equal(ctx, "qr-decompose"); + assert.equal(row, "plan-design"); + return "plan-design-qr-decompose" as PhaseModelKey; + }, + resolvePhaseModelOverrideFn: async (key) => { + assert.equal(key, "plan-design-qr-decompose"); + return "openai/gpt-5"; + }, + spawnQRDecomposerFn: async (opts) => { + capturedModelOverride = opts.modelOverride; + return { exitCode: 0, stderr: "", subagentDir: opts.subagentDir }; + }, + }, + ); + + assert.equal(capturedModelOverride, "openai/gpt-5"); + }); + + it("threads resolved modelOverride into spawnReviewer", async () => { + let capturedModelOverride: string | undefined; + + await spawnReviewerWithResolvedModel( + { + planDir: "/plan", + subagentDir: "/subagent", + cwd: "/cwd", + extensionPath: "/ext/koan.ts", + phase: "plan-code", + itemId: "QR-001", + }, + { + mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { + assert.equal(ctx, "qr-verify"); + assert.equal(row, "plan-code"); + return "plan-code-qr-verify" as PhaseModelKey; + }, + resolvePhaseModelOverrideFn: async (key) => { + assert.equal(key, "plan-code-qr-verify"); + return "google/gemini-3-pro"; + }, + spawnReviewerFn: async (opts) => { + capturedModelOverride = opts.modelOverride; + return { exitCode: 0, stderr: "", subagentDir: opts.subagentDir }; + }, + }, + ); + + assert.equal(capturedModelOverride, "google/gemini-3-pro"); + }); + + it("passes undefined modelOverride when config is absent", async () => { + let capturedModelOverride: string | undefined; + + await spawnReviewerWithResolvedModel( + { + planDir: "/plan", + subagentDir: "/subagent", + cwd: "/cwd", + extensionPath: "/ext/koan.ts", + phase: "plan-docs", + itemId: "QR-002", + }, + { + mapSpawnContextToPhaseModelKeyFn: () => "plan-docs-qr-verify" as PhaseModelKey, + resolvePhaseModelOverrideFn: async () => undefined, + spawnReviewerFn: async (opts) => { + capturedModelOverride = opts.modelOverride; + return { exitCode: 0, stderr: "", subagentDir: opts.subagentDir }; + }, + }, + ); + + assert.equal(capturedModelOverride, undefined); + }); +}); diff --git a/tests/subagent-model.test.ts b/tests/subagent-model.test.ts new file mode 100644 index 0000000..ee07ccb --- /dev/null +++ b/tests/subagent-model.test.ts @@ -0,0 +1,215 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { buildSpawnArgs } from "../src/planner/subagent.js"; +import { + ALL_PHASE_MODEL_KEYS, + type PhaseModelKey, +} from "../src/planner/model-phase.js"; +import { + applyGeneralPurposeModel, + applyStrongModel, + initConfigFromActiveModel, +} from "../src/planner/ui/config/model-selection.js"; +import { + GENERAL_PURPOSE_PHASE_MODEL_KEYS, + STRONG_PHASE_MODEL_KEYS, +} from "../src/planner/model-phase.js"; + +// -- buildSpawnArgs: --model flag threading -- + +describe("buildSpawnArgs", () => { + const baseOpts = { + planDir: "/plan", + subagentDir: "/subagent", + extensionPath: "/ext/koan.ts", + cwd: "/working", + }; + + it("omits --model flag when modelOverride is absent", () => { + const args = buildSpawnArgs("architect", "plan-design", "start", baseOpts); + assert.equal(args.includes("--model"), false); + }); + + it("omits --model flag when modelOverride is undefined", () => { + const args = buildSpawnArgs("architect", "plan-design", "start", { + ...baseOpts, + modelOverride: undefined, + }); + assert.equal(args.includes("--model"), false); + }); + + it("includes --model flag and value when modelOverride is set", () => { + const args = buildSpawnArgs("architect", "plan-design", "start", { + ...baseOpts, + modelOverride: "anthropic/claude-opus-4", + }); + assert.ok(args.includes("--model"), "expected --model flag in args"); + const idx = args.indexOf("--model"); + assert.equal(args[idx + 1], "anthropic/claude-opus-4"); + }); + + it("places --model before the prompt (last arg)", () => { + const prompt = "Begin the plan-design phase."; + const args = buildSpawnArgs("architect", "plan-design", prompt, { + ...baseOpts, + modelOverride: "openai/gpt-5", + }); + const modelIdx = args.indexOf("--model"); + const promptIdx = args.indexOf(prompt); + assert.ok(modelIdx >= 0, "--model not found"); + assert.ok(promptIdx >= 0, "prompt not found"); + assert.ok(modelIdx < promptIdx, "--model should appear before prompt"); + }); + + it("places --model after extraFlags", () => { + const args = buildSpawnArgs("reviewer", "qr-plan-design", "Verify.", { + ...baseOpts, + extraFlags: ["--koan-qr-item", "item-42"], + modelOverride: "google/gemini-2-pro", + }); + const qrItemIdx = args.indexOf("--koan-qr-item"); + const modelIdx = args.indexOf("--model"); + assert.ok(qrItemIdx >= 0, "--koan-qr-item not found"); + assert.ok(modelIdx >= 0, "--model not found"); + assert.ok(qrItemIdx < modelIdx, "--model should appear after extra flags"); + }); + + it("preserves all required fixed args regardless of modelOverride", () => { + const args = buildSpawnArgs("developer", "plan-code", "begin", { + ...baseOpts, + modelOverride: "anthropic/claude-sonnet", + }); + assert.ok(args.includes("-p"), "-p flag missing"); + assert.ok(args.includes("-e"), "-e flag missing"); + assert.ok(args.includes("--koan-role"), "--koan-role missing"); + assert.ok(args.includes("--koan-phase"), "--koan-phase missing"); + assert.ok(args.includes("--koan-plan-dir"), "--koan-plan-dir missing"); + assert.ok(args.includes("--koan-subagent-dir"), "--koan-subagent-dir missing"); + }); +}); + +// -- Quick-set utility functions -- + +describe("initConfigFromActiveModel", () => { + it("creates a 20-key config with all keys set to the given model", () => { + const config = initConfigFromActiveModel("anthropic/claude-sonnet"); + assert.equal(Object.keys(config).length, ALL_PHASE_MODEL_KEYS.length); + for (const key of ALL_PHASE_MODEL_KEYS) { + assert.equal(config[key], "anthropic/claude-sonnet", `key ${key} should be set`); + } + }); + + it("produces a config where all values are the same model", () => { + const config = initConfigFromActiveModel("openai/gpt-5"); + const values = Object.values(config); + assert.ok(values.every((v) => v === "openai/gpt-5")); + }); +}); + +describe("applyStrongModel", () => { + it("sets all strong keys to the chosen model, leaving GP keys from existing config", () => { + const existing = initConfigFromActiveModel("openai/gpt-4"); + const result = applyStrongModel("anthropic/claude-opus-4", existing, "openai/gpt-4"); + + for (const key of STRONG_PHASE_MODEL_KEYS) { + assert.equal(result[key], "anthropic/claude-opus-4", `strong key ${key} should be updated`); + } + + for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { + assert.equal(result[key], "openai/gpt-4", `GP key ${key} should be unchanged`); + } + }); + + it("initializes from activeModelId when existingConfig is null", () => { + const result = applyStrongModel("anthropic/claude-opus-4", null, "openai/gpt-5-mini"); + + for (const key of STRONG_PHASE_MODEL_KEYS) { + assert.equal(result[key], "anthropic/claude-opus-4", `strong key ${key} should be updated`); + } + + for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { + assert.equal(result[key], "openai/gpt-5-mini", `GP key ${key} should be initialized from active model`); + } + }); + + it("writes all 20 keys regardless of which keys are strong", () => { + const result = applyStrongModel("some/model", null, "active/model"); + assert.equal(Object.keys(result).length, ALL_PHASE_MODEL_KEYS.length); + }); +}); + +describe("applyGeneralPurposeModel", () => { + it("sets all GP keys to the chosen model, leaving strong keys from existing config", () => { + const existing = initConfigFromActiveModel("anthropic/claude-opus-4"); + const result = applyGeneralPurposeModel("openai/gpt-5-mini", existing, "anthropic/claude-opus-4"); + + for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { + assert.equal(result[key], "openai/gpt-5-mini", `GP key ${key} should be updated`); + } + + for (const key of STRONG_PHASE_MODEL_KEYS) { + assert.equal(result[key], "anthropic/claude-opus-4", `strong key ${key} should be unchanged`); + } + }); + + it("initializes from activeModelId when existingConfig is null", () => { + const result = applyGeneralPurposeModel("openai/gpt-5-mini", null, "anthropic/claude-sonnet"); + + for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { + assert.equal(result[key], "openai/gpt-5-mini", `GP key ${key} should be updated`); + } + + for (const key of STRONG_PHASE_MODEL_KEYS) { + assert.equal(result[key], "anthropic/claude-sonnet", `strong key ${key} should be initialized from active model`); + } + }); + + it("writes all 20 keys regardless of which keys are GP", () => { + const result = applyGeneralPurposeModel("some/model", null, "active/model"); + assert.equal(Object.keys(result).length, ALL_PHASE_MODEL_KEYS.length); + }); +}); + +describe("quick-set from empty config: all-or-none persistence invariant", () => { + it("applyStrongModel from null config produces a 20-key config (all-or-none)", () => { + const result = applyStrongModel("strong/model", null, "active/model"); + const keys = Object.keys(result) as PhaseModelKey[]; + assert.equal(keys.length, ALL_PHASE_MODEL_KEYS.length); + + // Verify every expected key is present + for (const key of ALL_PHASE_MODEL_KEYS) { + assert.ok(key in result, `key "${key}" missing from result`); + assert.equal(typeof result[key], "string"); + assert.ok(result[key].length > 0); + } + }); + + it("applyGeneralPurposeModel from null config produces a 20-key config (all-or-none)", () => { + const result = applyGeneralPurposeModel("gp/model", null, "active/model"); + const keys = Object.keys(result) as PhaseModelKey[]; + assert.equal(keys.length, ALL_PHASE_MODEL_KEYS.length); + + for (const key of ALL_PHASE_MODEL_KEYS) { + assert.ok(key in result, `key "${key}" missing from result`); + } + }); + + it("strong and GP quick-set results are complementary", () => { + const activeModel = "active/model"; + + const strongResult = applyStrongModel("strong/model", null, activeModel); + const gpResult = applyGeneralPurposeModel("gp/model", null, activeModel); + + // Strong keys in strongResult should differ from GP keys + for (const key of STRONG_PHASE_MODEL_KEYS) { + assert.equal(strongResult[key], "strong/model"); + assert.equal(gpResult[key], activeModel); // GP result left strong keys as active + } + + for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { + assert.equal(strongResult[key], activeModel); // strong result left GP keys as active + assert.equal(gpResult[key], "gp/model"); + } + }); +}); diff --git a/tests/widget.test.ts b/tests/widget.test.ts index 9dfa07e..2342df4 100644 --- a/tests/widget.test.ts +++ b/tests/widget.test.ts @@ -50,13 +50,13 @@ function createWidgetHarness(): { describe("formatPlanningHeaderLabel", () => { it("applies compaction in deterministic order", () => { - const phase = "Context gathering"; + const phase = "Plan design"; const status = "CURRENT"; const full = `Planning · ${phase} · ${status}`; const shortStatus = `Planning · ${phase} · CUR`; const noStatus = `Planning · ${phase}`; - const shortPhase = "Planning · Ctx gather"; + const shortPhase = "Planning · Design"; assert.equal(formatPlanningHeaderLabel(phase, status, visibleWidth(full)), full); assert.equal(formatPlanningHeaderLabel(phase, status, visibleWidth(full) - 1), shortStatus); @@ -70,13 +70,14 @@ describe("formatPlanningHeaderLabel", () => { }); describe("WidgetController rendering", () => { - it("renders metadata header and removes phase chips row", () => { + it("renders metadata header with 3-phase layout (no context gathering)", () => { const harness = createWidgetHarness(); try { const lines = harness.render(140); const text = lines.join("\n"); - assert.match(text, /Planning · Context gathering · CURRENT/); + assert.match(text, /Planning · Plan design · CURRENT/); + assert.doesNotMatch(text, /Context gathering/); assert.doesNotMatch(text, /┃ Context gathering ┃/); } finally { harness.destroy(); From 694d90a23dd60e79bf3c16a5d5468db7771c6d73 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 2 Mar 2026 13:41:51 +0700 Subject: [PATCH 031/412] update docs for koan_plan flow and 3-phase widget --- README.md | 32 ++++++++++++++--- design-decisions.md | 79 +++++++++++++++++++---------------------- docs/planning-widget.md | 22 ++++++------ 3 files changed, 76 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index e8c4765..5b89554 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,33 @@ Koan is an opinionated planning workflow extension for the pi coding agent. It c The runtime is split into two modes from the same extension entrypoint: -- **Parent session mode** runs `/koan` commands and orchestrates the workflow. -- **Subagent mode** runs role/phase-specific workflows (architect, QR decomposer, reviewer, fix mode). +- **Parent session mode** registers the `koan_plan` MCP tool and the `/koan-execute`, `/koan-status` commands. The parent orchestrates the full workflow when `koan_plan` is invoked. +- **Subagent mode** runs role/phase-specific workflows (architect, developer, technical writer, QR decomposer, reviewer, fix mode). -The parent controls progression through context capture, plan design, quality review, and iterative fixes. Subagents are isolated processes that communicate through persisted artifacts (`plan.json`, `context.json`, `qr-*.json`) and audit projections. +The parent controls progression through plan design, plan code, plan docs, quality review, and iterative fixes. Subagents are isolated processes that communicate through persisted artifacts (`plan.json`, `qr-*.json`) and audit projections. + +## Invoking the Planner + +Call `koan_plan` as an MCP tool — the LLM invokes it when the user asks to plan a complex task. No parameters are needed: the conversation up to that point is automatically exported to `conversation.jsonl` in the plan directory and becomes the planning context. + +The planning pipeline runs sequentially: + +1. **plan-design** (architect) — reads `conversation.jsonl` to understand intent, explores the codebase, writes `plan.json`. +2. **plan-code** (developer) — reads `plan.json`, populates code intents and changes. +3. **plan-docs** (technical writer) — reads `plan.json` and optionally `conversation.jsonl` for decisions and tradeoffs, writes documentation entries. + +Each phase is followed by a QR (quality review) block: decompose → parallel verify → fix loop, up to `MAX_FIX_ITERATIONS`. + +### conversation.jsonl + +Written once at the start of `koan_plan`. Contains the full session branch as JSONL (one JSON object per line — raw pi `SessionManager` entries, not a plain-text transcript). The plan-design architect and plan-docs writer are told about this file and may `Read` it; other phases work from `plan.json` only. + +### Slash commands + +| Command | Description | +|---|---| +| `/koan-execute` | Execute a koan plan (not yet implemented) | +| `/koan-status` | Show current workflow phase | ## Design Decisions @@ -22,6 +45,7 @@ Key design choices that shape implementation: - **Default-deny permissions**: each phase explicitly allowlists tools; unknown tool/phase access is blocked. - **Disk-backed mutations**: planning mutations are immediately persisted with atomic writes instead of deferred finalize steps. - **Need-to-know prompts**: each subagent only receives the minimum context needed for its task. +- **Passive conversation context**: `conversation.jsonl` is a read-only artifact on disk. No phase programmatically injects it into prompts; agents that need it use the `Read` tool. ## Invariants @@ -34,4 +58,4 @@ The workflow depends on these invariants: ## Boundaries -Current scope focuses on planning and QR orchestration. `/koan execute` is intentionally not implemented yet. +Current scope focuses on planning and QR orchestration. `/koan-execute` is intentionally not implemented yet. diff --git a/design-decisions.md b/design-decisions.md index 54c56b8..4c6c471 100644 --- a/design-decisions.md +++ b/design-decisions.md @@ -46,20 +46,20 @@ from tools: always `throw new Error(msg)` -- never `return { isError: true }`. ### AD-1: Two LLM Interaction Levels -- `sendUserMessage()` in parent session: ONLY for context capture. The - session LLM is the only entity with the conversational understanding. - A fresh LLM reading a serialized transcript loses implicit context. - `spawn()` subagent: for all substantial work (architect, developer, writer, QR decomposer, QR reviewer). - `complete()` from pi-ai: NOT used in koan. No direct LLM calls without agent loop. +- `sendUserMessage()` in parent session: NOT used. Planning is triggered via + the `koan_plan` MCP tool; conversation context is captured via `exportConversation()`. ### AD-2: Self-Loading Extension Pattern Same extension file (extensions/koan.ts) serves both modes: -- **Parent mode** (no --koan-role flag): registers /koan command, tools, - and dispatch. Zero overhead in normal pi sessions. +- **Parent mode** (no --koan-role flag): registers the `koan_plan` MCP tool, + `/koan-execute`, `/koan-status` commands, and workflow dispatch. Zero overhead + in normal pi sessions. - **Subagent mode** (--koan-role present): activates role-specific event hooks (state machine, tool enforcement, step prompts). @@ -78,8 +78,8 @@ to ensure one-shot dispatch. ALL step transitions use the koan_complete_step registered tool. The LLM calls koan_complete_step -> tool execute() returns next step's prompt. -This works in both -p mode and interactive mode. sendUserMessage() -is only used for the initial trigger (/koan plan). +This works in both -p mode and interactive mode. `sendUserMessage()` is not +used; planning is triggered by the LLM invoking the `koan_plan` MCP tool. **KEY CORRECTION**: Early design (Feb 10) considered turn_end + agent_end + sendUserMessage() chaining for step transitions. This was @@ -110,8 +110,9 @@ koan_complete_step now" without emitting an actual tool_call block. Settled names (corrected from earlier iterations): - `koan_complete_step` (was koan_next_step -- renamed to accept `thoughts`) -- `koan_store_context` (was koan_finalize_context) -- `koan_store_plan` was later REMOVED entirely (see AD-14) +- `koan_store_context` — REMOVED (was koan_finalize_context; removed with context-capture phase) +- `koan_store_plan` — REMOVED (see AD-14) +- `koan_plan` — MCP tool replacing the former `/koan plan` slash command - Prompts use "instructions" not "actions" ### AD-7: invoke_after Pattern Is Critical @@ -132,11 +133,10 @@ have zero friction. ### AD-8: Store Tools Need "Not Yet" Guidance -koan_store_context (and formerly koan_store_plan) are always registered -and visible to the LLM even in steps where they should NOT be called. -Their tool descriptions include "DO NOT call this tool until the step -instructions explicitly tell you to." This creates a prohibition/activation -pattern with step prompts. +(koan_store_context was removed with the context-capture phase; koan_store_plan +was removed earlier — see AD-14.) This pattern remains relevant for any +future store-style tools: tool description should include "DO NOT call this tool +until the step instructions explicitly tell you to." ### AD-9: Subagent Progress Tracking @@ -158,18 +158,13 @@ with rich TypeBox descriptions are sufficient for the LLM to discover the schema through tool definitions. This is the "most elegant" approach per user preference. -### AD-12: Context Capture Phases +### AD-12: Context Capture Phases (REMOVED) -Three sub-phases within context capture: - -1. **Drafting**: LLM reflects on conversation. MAY use tools for "high - value" targeted exploration (confirm API signature, check file existence). - DO NOT explore speculatively. Confidence tagging: HIGH (direct evidence) - vs LOW (extrapolating). -2. **Verifying**: Self-check. Completeness, accuracy, phrasing for - downstream agents. No tools except koan_complete_step. -3. **Refining**: Pure tool invocation (koan_store_context). Up to 3 - attempts with validation feedback. +The context-capture phase (draft/verify/refine sub-phases, koan_store_context +tool, context.json artifact) was removed. The parent conversation is now +exported as `conversation.jsonl` at `koan_plan` tool invocation. Phases that +need session context read the file directly via the `Read` tool. See +`src/planner/conversation.ts` for the export implementation. ### AD-13: Default-Deny Tool Permissions @@ -198,10 +193,9 @@ needs evidence that each tool call produces results. ### AD-15: Module Ownership -- Context-capture prompts belong to the "orchestrator" (session.ts / - context-capture.ts) - Plan-design prompts belong to the "architect" (plan-design.ts / prompts/plan-design.ts) +- Conversation export belongs to session.ts / conversation.ts - These are organizational decisions about which module owns which prompts ### AD-16: 6-Step Architect Workflow (plan-design execute) @@ -250,7 +244,7 @@ Step 6: plan mutation tools unlocked. - Chosen on Feb 25 2026 via follow-up deck (`Inline Integrated Section + Divider`). - Rationale: QR is the acceptance loop, not optional telemetry. Rendering it as an inline first-class section prevents the "detached widget" feel and matches how users reason about plan quality over time. - Contract: - - QR is visible during Plan design (and contractually Plan execution), hidden only for Context gathering. + - QR is visible during Plan design, Plan code, and Plan docs (and contractually Plan execution). - Iteration 1 enters `execute` immediately (same stage model as fix iterations); there is no separate `initializing` stage. - Section includes: phase + iter/mode metadata, phase rail, and counters (`done/total/pass/fail/todo`) in a compact metadata block. - Visual treatment uses inline sectioning + divider, not a nested bordered mini-card. @@ -273,9 +267,9 @@ Step 6: plan mutation tools unlocked. ### WorkflowDispatch (dispatch pattern) -Workflow tools (koan_complete_step, koan_store_context) are registered once -at init. Their execute() callbacks read from a mutable dispatch object. -Phases hook/unhook dispatch slots at activation/deactivation time. +Workflow tools (koan_complete_step) are registered once at init. Their +execute() callbacks read from a mutable dispatch object. Phases hook/unhook +dispatch slots at activation/deactivation time. hookDispatch() throws if a slot is already occupied -- prevents silent misrouting when two phases try to claim the same tool. @@ -283,7 +277,7 @@ misrouting when two phases try to claim the same tool. ### PlanRef (mutable reference) All plan mutation tools share a mutable `{ dir: string | null }` set -when /koan plan creates a directory or when --koan-plan-dir is received. +when koan_plan tool creates a directory or when --koan-plan-dir is received. Decouples tool registration (init-time) from directory creation (runtime). ### Pi Registers Tools at \_buildRuntime() @@ -312,7 +306,7 @@ at init; phases restrict access via tool_call blocking at runtime. ### BUG-1: LLM Conflates Tool Instructions with Plan Content -In context capture, the LLM captured tool usage instructions as +In the former context-capture phase, the LLM captured tool usage instructions as constraints (e.g. "Use read tool before modifying files; edit for surgical changes"). These are irrelevant developer instructions, not task constraints. Solution: prompts explicitly state "Only include @@ -383,13 +377,20 @@ koan_qr_get_item, koan_qr_list_items, koan_qr_summary. --- -## Current Implementation State (Feb 13 2026) +## Current Implementation State (Mar 1 2026) Implemented: - [x] Extension entry point with dual-mode detection -- [x] Context capture (3-phase: draft/verify/refine) +- [x] koan_plan MCP tool (replaces /koan plan slash command) +- [x] Conversation export to conversation.jsonl (replaces context-capture phase) - [x] Plan-design architect subagent (6-step workflow) +- [x] Developer role (plan-code phase) +- [x] Technical writer role (plan-docs phase) +- [x] QR decompose subagent +- [x] QR verify subagent (parallel pool, concurrency 6) +- [x] QR gate routing + fix loop (up to MAX_FIX_ITERATIONS) +- [x] Fix mode (architect/developer/writer fix subagents) - [x] 44+ plan mutation/getter tools with TypeBox schemas - [x] Default-deny tool permissions (registry.ts) - [x] WorkflowDispatch + PlanRef patterns @@ -399,12 +400,6 @@ Implemented: Not yet implemented: -- [ ] Developer role (plan-code phase) -- [ ] Technical writer role (plan-docs phase) -- [ ] QR decompose subagent -- [ ] QR verify subagent (parallel) -- [ ] QR gate routing -- [ ] Fix mode (re-spawn with QR failure report) - [ ] State persistence (appendEntry + session_start restore) - [ ] Plan execution workflow (milestone execution) -- [ ] /koan execute command +- [ ] /koan-execute command diff --git a/docs/planning-widget.md b/docs/planning-widget.md index 36c51e4..2bbffb1 100644 --- a/docs/planning-widget.md +++ b/docs/planning-widget.md @@ -39,7 +39,7 @@ The goal is to keep a long-running (1-2h) planning session readable in real time ### 4) QR is a first-class workflow section - QR renders inline in detail pane with divider rule (no detached mini-card border). -- Visible for Plan design (and contractually for Plan execution), hidden only for Context gathering. +- Visible during Plan design, Plan code, and Plan docs (and contractually Plan execution). - QR starts directly in the **`execute`** stage for iteration 1 (non-fix mode); fix iterations reuse the same stage model. - QR block is normalized to a fixed structure: header, phase rail, counters, divider. - Metadata is budgeted to **64 visible chars max** and progressively compacted (`phase/iter/mode` -> `iN/M`, `d/p/f/t`) when width is constrained. @@ -58,17 +58,17 @@ The goal is to keep a long-running (1-2h) planning session readable in real time ## Layout Overview ``` ┌────────────────────────────────────────────────────────────────────────────────┐ -│ Planning · Context gathering · CURRENT 12m 22s │ +│ Planning · Plan design · CURRENT 12m 22s │ │ │ -│ ● Context gathering Current step │ -│ │ DONE Step 2/6: Codebase Exploration │ +│ ● Plan design Current step │ +│ │ CURRENT Step 2/6: Codebase Exploration │ │ │ read internal/rules/CLAUDE.md · 17L/1.2k │ -│ ● Plan design QR | phase:execute · iter 1/6 initial │ -│ │ CURRENT Execute → QR decompose → QR verify │ -│ ○ Plan code done:0/- pass:0 fail:0 todo:- │ -│ │ UPCOMING Subagents queued:0 active:1 done:0 │ -│ ○ Plan docs Plan ID : │ -│ UPCOMING Agent : architect │ +│ ○ Plan code QR | phase:execute · iter 1/6 initial │ +│ │ UPCOMING Execute → QR decompose → QR verify │ +│ ○ Plan docs done:0/- pass:0 fail:0 todo:- │ +│ UPCOMING Subagents queued:0 active:1 done:0 │ +│ Plan ID : │ +│ Agent : architect │ │ Model : openai-codex/gpt-5.3-codex │ │────────────────────────────────────────────────────────────────────────────────│ │ Latest log │ @@ -107,7 +107,7 @@ The goal is to keep a long-running (1-2h) planning session readable in real time Apply in order until it fits: 1. `CURRENT` -> `CUR`, `UPCOMING` -> `UP`, `DONE` unchanged. 2. Drop status chunk (keep `Planning · `). -3. Abbreviate known phases (`Context gathering` -> `Ctx gather`, `Plan design` -> `Design`, `Plan code` -> `Code`, `Plan docs` -> `Docs`). +3. Abbreviate known phases (`Plan design` -> `Design`, `Plan code` -> `Code`, `Plan docs` -> `Docs`). 4. Ellipsize active phase tail (`Planning · `). ### Metadata table alignment From efa5341e44fc1b52e8c115eabe90d917ab76434d Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 2 Mar 2026 14:13:29 +0700 Subject: [PATCH 032/412] Changes to session.ts & model-selection.ts --- src/planner/session.ts | 2 ++ src/planner/ui/config/model-selection.ts | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/planner/session.ts b/src/planner/session.ts index b555d5c..ef5c3b5 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -79,6 +79,7 @@ function qrFilePath(planDir: string, phase: WorkPhaseKey): string { function singleSubagentStart(role: string): WidgetUpdate { return { subagentRole: role, + subagentModel: null, subagentParallelCount: 1, subagentQueued: 0, subagentActive: 1, @@ -557,6 +558,7 @@ async function runQRBlock( qrFail: initialFail, qrTodo: initialTodo, subagentRole: "reviewer", + subagentModel: null, subagentParallelCount: QR_POOL_CONCURRENCY, subagentQueued: verifyIds.length, subagentActive: 0, diff --git a/src/planner/ui/config/model-selection.ts b/src/planner/ui/config/model-selection.ts index 7ff013a..e551b3a 100644 --- a/src/planner/ui/config/model-selection.ts +++ b/src/planner/ui/config/model-selection.ts @@ -75,8 +75,7 @@ class ResetConfirmComponent implements Component { return [ this.theme.bold(this.theme.fg("accent", "Reset all model overrides to active model?")), "", - this.theme.fg("muted", " This will clear all 20 phase model overrides."), - this.theme.fg("muted", " Koan will use pi's current active model for all phases."), + this.theme.fg("muted", " This will set all 20 phase model cells to the current active model."), "", this.theme.fg("dim", " Enter to confirm · Escape to cancel"), ]; @@ -195,7 +194,8 @@ export function createModelSelectionComponent( overlay = new ResetConfirmComponent( theme, () => { - void persistAndNotify(null).finally(() => closeOverlay()); + const resetConfig = initConfigFromActiveModel(fallbackActive); + void persistAndNotify(resetConfig).finally(() => closeOverlay()); }, () => closeOverlay(), ); From a8e60323911de4fc95f0109f7085d69fccb54eef Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 2 Mar 2026 16:12:37 +0700 Subject: [PATCH 033/412] qr-verify: group items by group_id for batch verification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace 1-subagent-per-item verification with group-aware dispatch. Items sharing a group_id are verified by a single subagent process, amortizing process startup cost. Changes by layer: - subagent.ts: SpawnReviewerOptions.itemId -> itemIds: string[] - dispatch.ts: parse comma-separated --koan-qr-item flag - qr-verify/phase.ts: dynamic step count (1 + 2*N items per group) with CONTEXT -> N×(ANALYZE+CONFIRM) workflow - qr-verify/prompts.ts: multi-item context listing, position labels - session.ts: group TODO items by group_id before pooling - koan.ts: updated flag description The decompose phase (steps 9-13) already produces group_id assignments. This change consumes that grouping data at verification time. Backward compatible: ungrouped items (group_id=null) become singleton groups, producing identical behavior to the previous 1:1 dispatch. --- extensions/koan.ts | 2 +- src/planner/phases/dispatch.ts | 12 +- src/planner/phases/qr-verify/phase.ts | 135 +++++++---- src/planner/phases/qr-verify/prompts.ts | 74 ++++-- src/planner/session.ts | 56 ++++- src/planner/subagent.ts | 10 +- tests/qr-grouped-verify.test.ts | 305 ++++++++++++++++++++++++ tests/session-model-threading.test.ts | 4 +- 8 files changed, 509 insertions(+), 89 deletions(-) create mode 100644 tests/qr-grouped-verify.test.ts diff --git a/extensions/koan.ts b/extensions/koan.ts index 369eac7..0281cf3 100644 --- a/extensions/koan.ts +++ b/extensions/koan.ts @@ -48,7 +48,7 @@ export default function koan(pi: ExtensionAPI): void { }); pi.registerFlag("koan-qr-item", { - description: "QR item ID for reviewer subagent", + description: "QR item ID(s) for reviewer subagent (comma-separated for groups)", type: "string", default: "", }); diff --git a/src/planner/phases/dispatch.ts b/src/planner/phases/dispatch.ts index 3762a36..f3e97c6 100644 --- a/src/planner/phases/dispatch.ts +++ b/src/planner/phases/dispatch.ts @@ -201,15 +201,21 @@ export async function dispatchPhase( } if (config.role === "reviewer" && qrWorkPhase) { - const itemId = pi.getFlag("koan-qr-item") as string; - if (!itemId) { + const rawItemFlag = pi.getFlag("koan-qr-item") as string; + if (!rawItemFlag) { logger("Reviewer missing --koan-qr-item flag"); return; } + const itemIds = rawItemFlag.split(",").map((s) => s.trim()).filter(Boolean); + if (itemIds.length === 0) { + logger("Reviewer --koan-qr-item flag is empty after parsing"); + return; + } + const phase = new QRVerifyPhase( pi, - { planDir: config.planDir, itemId, workPhase: qrWorkPhase }, + { planDir: config.planDir, itemIds, workPhase: qrWorkPhase }, dispatch, planRef, logger, diff --git a/src/planner/phases/qr-verify/phase.ts b/src/planner/phases/qr-verify/phase.ts index 185fb97..100daf2 100644 --- a/src/planner/phases/qr-verify/phase.ts +++ b/src/planner/phases/qr-verify/phase.ts @@ -1,5 +1,14 @@ -// QR verify phase -- 3-step reviewer subagent that verifies exactly 1 QR item -// against the plan (CONTEXT -> ANALYZE -> CONFIRM). One subagent per item. +// QR verify phase -- dynamic-step reviewer subagent that verifies 1..N QR items +// against the plan. Workflow: CONTEXT (once) -> N × (ANALYZE + CONFIRM) -> done. +// Items in a group share a single subagent, amortizing process startup cost. +// +// Dynamic step formula: totalSteps = 1 + (2 * numItems) +// Step 1: CONTEXT (load plan, list all assigned items) +// Step 2k: ANALYZE item k (k = 1..N) +// Step 2k+1: CONFIRM item k (record verdict) +// +// Step gating: koan_qr_set_item is blocked until the CONFIRM step for the +// current item (odd-numbered steps >= 3). import { promises as fs } from "node:fs"; import * as path from "node:path"; @@ -18,26 +27,37 @@ import { buildContextStep, buildAnalyzeStep, buildConfirmStep, - type VerifyStep, } from "./prompts.js"; type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; -const TOTAL_STEPS = 3; -const STEP_NAMES: Record = { - 1: "CONTEXT", - 2: "ANALYZE", - 3: "CONFIRM", -}; - interface VerifyState { active: boolean; - step: VerifyStep; - itemId: string; + step: number; + totalSteps: number; + itemIds: string[]; step1Prompt: string | null; systemPrompt: string | null; } +// Map step number to step type and item index. +// Step 1 is CONTEXT. Steps 2..2N+1 alternate ANALYZE/CONFIRM per item. +function stepType(step: number): { kind: "CONTEXT" } | { kind: "ANALYZE"; itemIndex: number } | { kind: "CONFIRM"; itemIndex: number } { + if (step === 1) return { kind: "CONTEXT" }; + const offset = step - 2; // 0-indexed from step 2 + const itemIndex = Math.floor(offset / 2); + const isConfirm = offset % 2 === 1; + return isConfirm ? { kind: "CONFIRM", itemIndex } : { kind: "ANALYZE", itemIndex }; +} + +function stepName(step: number, numItems: number): string { + if (step === 1) return "CONTEXT"; + const info = stepType(step); + if (info.kind === "ANALYZE") return `ANALYZE ${info.itemIndex + 1}/${numItems}`; + if (info.kind === "CONFIRM") return `CONFIRM ${info.itemIndex + 1}/${numItems}`; + return `Step ${step}`; +} + export class QRVerifyPhase { private readonly pi: ExtensionAPI; private readonly planDir: string; @@ -48,11 +68,11 @@ export class QRVerifyPhase { private readonly eventLog: EventLog | undefined; private readonly dispatch: WorkflowDispatch; private readonly planRef: PlanRef; - private item: QRItem | null = null; + private items: QRItem[] = []; constructor( pi: ExtensionAPI, - config: { planDir: string; itemId: string; workPhase: WorkPhaseKey }, + config: { planDir: string; itemIds: string[]; workPhase: WorkPhaseKey }, dispatch: WorkflowDispatch, planRef: PlanRef, log?: Logger, @@ -67,10 +87,14 @@ export class QRVerifyPhase { this.log = log ?? createLogger("QRVerify"); this.eventLog = eventLog; + const numItems = config.itemIds.length; + const totalSteps = 1 + 2 * numItems; + this.state = { active: false, step: 1, - itemId: config.itemId, + totalSteps, + itemIds: config.itemIds, step1Prompt: null, systemPrompt: null, }; @@ -98,12 +122,17 @@ export class QRVerifyPhase { return; } - const item = qrFile.items.find((i) => i.id === this.state.itemId); - if (!item) { - this.log("QR item not found", { itemId: this.state.itemId, phase: this.workPhase }); - return; + // Resolve all item IDs to QRItem objects. + const resolvedItems: QRItem[] = []; + for (const id of this.state.itemIds) { + const item = qrFile.items.find((i) => i.id === id); + if (!item) { + this.log("QR item not found", { itemId: id, phase: this.workPhase }); + return; + } + resolvedItems.push(item); } - this.item = item; + this.items = resolvedItems; let basePrompt: string; try { @@ -114,18 +143,24 @@ export class QRVerifyPhase { return; } - this.state.systemPrompt = buildVerifySystemPrompt(basePrompt, this.workPhase); + this.state.systemPrompt = buildVerifySystemPrompt(basePrompt, this.workPhase, this.items.length); const conversationPath = path.join(this.planDir, "conversation.jsonl"); - this.state.step1Prompt = formatStep(buildContextStep(item, this.workPhase, conversationPath)); + this.state.step1Prompt = formatStep(buildContextStep(this.items, this.workPhase, conversationPath)); this.state.active = true; this.state.step = 1; this.planRef.dir = this.planDir; hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - this.log("Starting QR verify workflow", { itemId: this.state.itemId, phase: this.workPhase, step: 1 }); - await this.eventLog?.emitPhaseStart(TOTAL_STEPS); - await this.eventLog?.emitStepTransition(1, STEP_NAMES[1], TOTAL_STEPS); + this.log("Starting QR verify workflow", { + itemIds: this.state.itemIds, + itemCount: this.items.length, + totalSteps: this.state.totalSteps, + phase: this.workPhase, + step: 1, + }); + await this.eventLog?.emitPhaseStart(this.state.totalSteps); + await this.eventLog?.emitStepTransition(1, "CONTEXT", this.state.totalSteps); } private registerHandlers(): void { @@ -151,11 +186,15 @@ export class QRVerifyPhase { const perm = checkPermission(this.qrPhaseKey, event.toolName); if (!perm.allowed) return { block: true, reason: perm.reason }; - if (this.state.step < 3 && event.toolName === "koan_qr_set_item") { - return { - block: true, - reason: `koan_qr_set_item available in step 3 (current: ${this.state.step})`, - }; + // koan_qr_set_item is only allowed during CONFIRM steps (odd steps >= 3). + if (event.toolName === "koan_qr_set_item") { + const info = stepType(this.state.step); + if (info.kind !== "CONFIRM") { + return { + block: true, + reason: `koan_qr_set_item available only during CONFIRM steps (current: ${stepName(this.state.step, this.items.length)})`, + }; + } } return undefined; @@ -165,31 +204,39 @@ export class QRVerifyPhase { private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { const prev = this.state.step; - if (prev === 3) { + if (prev >= this.state.totalSteps) { this.state.active = false; unhookDispatch(this.dispatch, "onCompleteStep"); await this.eventLog?.emitPhaseEnd("completed"); - this.log("Verification complete", { itemId: this.state.itemId, phase: this.workPhase }); + this.log("Verification complete", { + itemCount: this.items.length, + phase: this.workPhase, + }); return { ok: true, prompt: "Verification complete." }; } - this.state.step = (prev + 1) as VerifyStep; - const stepName = STEP_NAMES[this.state.step]; + this.state.step = prev + 1; + const name = stepName(this.state.step, this.items.length); const prompt = this.buildStepPrompt(this.state.step); - this.log("Step complete, advancing", { from: prev, to: this.state.step, phase: this.workPhase }); - await this.eventLog?.emitStepTransition(this.state.step, stepName, TOTAL_STEPS); + this.log("Step complete, advancing", { + from: prev, + to: this.state.step, + name, + phase: this.workPhase, + }); + await this.eventLog?.emitStepTransition(this.state.step, name, this.state.totalSteps); return { ok: true, prompt }; } - private buildStepPrompt(step: VerifyStep): string { - switch (step) { - case 2: - return formatStep(buildAnalyzeStep(this.item!)); - case 3: - return formatStep(buildConfirmStep(this.item!, this.workPhase)); - default: - return ""; + private buildStepPrompt(step: number): string { + const info = stepType(step); + if (info.kind === "ANALYZE") { + return formatStep(buildAnalyzeStep(this.items[info.itemIndex], info.itemIndex, this.items.length)); + } + if (info.kind === "CONFIRM") { + return formatStep(buildConfirmStep(this.items[info.itemIndex], info.itemIndex, this.items.length, this.workPhase)); } + return ""; } } diff --git a/src/planner/phases/qr-verify/prompts.ts b/src/planner/phases/qr-verify/prompts.ts index 38fcbe7..21313e4 100644 --- a/src/planner/phases/qr-verify/prompts.ts +++ b/src/planner/phases/qr-verify/prompts.ts @@ -1,5 +1,9 @@ -// Prompt guidance for the 3-step QR verify subagent workflow. -// Each reviewer subagent verifies exactly 1 QRItem against the plan. +// Prompt guidance for the dynamic-step QR verify subagent workflow. +// Each reviewer subagent verifies 1..N QRItems (grouped by group_id). +// +// Dynamic step formula: totalSteps = 1 + (2 * numItems) +// Step 1: CONTEXT (once, lists all items) +// Steps 2..2N+1: ANALYZE/CONFIRM pairs per item import { promises as fs } from "node:fs"; import * as os from "node:os"; @@ -14,8 +18,6 @@ import { type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; -export type VerifyStep = 1 | 2 | 3; - function scopeGuidance(item: QRItem): string { const s = item.scope; if (s === "*") { @@ -63,55 +65,72 @@ export async function loadQRVerifySystemPrompt(): Promise { } } -export function buildVerifySystemPrompt(basePrompt: string, phase: WorkPhaseKey): string { +export function buildVerifySystemPrompt(basePrompt: string, phase: WorkPhaseKey, itemCount: number): string { + const itemLabel = itemCount === 1 ? "1 QR item" : `${itemCount} QR items`; return [ basePrompt, "", "---", "", - `WORKFLOW: 3-STEP QR VERIFICATION (${phase})`, + `WORKFLOW: QR VERIFICATION (${phase}, ${itemLabel})`, "", - "You will verify exactly 1 QR item against the plan.", + `You will verify ${itemLabel} against the plan.`, "Step 1 instructions are in the user message below.", "Complete the work described, then call koan_complete_step.", "Put your findings in the `thoughts` parameter of koan_complete_step.", "", - "CRITICAL: Do NOT record a verdict until step 3 (CONFIRM).", - "Analyze thoroughly in step 2 before committing.", + "CRITICAL: Do NOT record a verdict until the CONFIRM step for each item.", + "Analyze thoroughly in the ANALYZE step before committing.", + ].join("\n"); +} + +function formatItemForContext(item: QRItem): string { + return [ + ` ${item.id} [${item.severity}]: ${item.check}`, + ` scope: ${item.scope}`, ].join("\n"); } export function buildContextStep( - item: QRItem, + items: QRItem[], phase: WorkPhaseKey, conversationPath?: string, ): StepGuidance { + const itemLabel = items.length === 1 ? "1 ITEM" : `${items.length} ITEMS`; + const itemSummary = items.map(formatItemForContext).join("\n"); + return { - title: "Step 1: CONTEXT", + title: `Step 1: CONTEXT`, instructions: [ `PHASE: ${phase}`, - "ITEM TO VERIFY:", + `ITEMS TO VERIFY: ${itemLabel}`, "", - "", - ` ${item.id}`, - ` ${item.scope}`, - ` ${item.check}`, - ` ${item.severity}`, - "", + itemSummary, "", ...phaseContextTrigger(phase, conversationPath), ...(phase === "plan-code" ? [] : [""]), - "Understand the check and required evidence before analyzing.", + "Understand the checks and required evidence before analyzing.", ], }; } -export function buildAnalyzeStep(item: QRItem): StepGuidance { +export function buildAnalyzeStep(item: QRItem, itemIndex: number, totalItems: number): StepGuidance { + const positionLabel = totalItems === 1 + ? "" + : ` (item ${itemIndex + 1} of ${totalItems})`; + return { - title: "Step 2: ANALYZE", + title: `ANALYZE ${item.id}${positionLabel}`, instructions: [ scopeGuidance(item), "", + "", + ` ${item.id}`, + ` ${item.scope}`, + ` ${item.check}`, + ` ${item.severity}`, + "", + "", "TASK:", "1. Read relevant entities based on scope", "2. Apply the verification check", @@ -123,9 +142,18 @@ export function buildAnalyzeStep(item: QRItem): StepGuidance { }; } -export function buildConfirmStep(item: QRItem, phase: WorkPhaseKey): StepGuidance { +export function buildConfirmStep( + item: QRItem, + itemIndex: number, + totalItems: number, + phase: WorkPhaseKey, +): StepGuidance { + const positionLabel = totalItems === 1 + ? "" + : ` (item ${itemIndex + 1} of ${totalItems})`; + return { - title: "Step 3: CONFIRM", + title: `CONFIRM ${item.id}${positionLabel}`, instructions: [ `CONFIRMING: ${item.id}`, `SEVERITY: ${item.severity}`, diff --git a/src/planner/session.ts b/src/planner/session.ts index ef5c3b5..f8f62b5 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -543,35 +543,58 @@ async function runQRBlock( } } - const verifyIds = qr.items.filter((i) => i.status === "TODO").map((i) => i.id); + // Group TODO items by group_id for batch verification. + // Items sharing a group_id are verified by a single subagent, amortizing + // process startup cost. Items without group_id are treated as singletons. + const todoItems = qr.items.filter((i) => i.status === "TODO"); + const groups = new Map(); + for (const item of todoItems) { + const gid = item.group_id ?? item.id; + const existing = groups.get(gid); + if (existing) { + existing.push(item.id); + } else { + groups.set(gid, [item.id]); + } + } + const groupEntries = Array.from(groups.entries()); // [groupId, itemIds[]] const totalItems = qr.items.length; + const totalTodoItems = todoItems.length; const preservedPass = qr.items.filter((i) => i.status === "PASS").length; const initialFail = qr.items.filter((i) => i.status === "FAIL").length; - const initialTodo = qr.items.filter((i) => i.status === "TODO").length; widget?.update({ - step: `${phase} qr-verify: 0/${verifyIds.length}`, + step: `${phase} qr-verify: 0/${groupEntries.length} groups (${totalTodoItems} items)`, activity: "", qrTotal: totalItems, qrDone: preservedPass, qrPass: preservedPass, qrFail: initialFail, - qrTodo: initialTodo, + qrTodo: totalTodoItems, subagentRole: "reviewer", subagentModel: null, subagentParallelCount: QR_POOL_CONCURRENCY, - subagentQueued: verifyIds.length, + subagentQueued: groupEntries.length, subagentActive: 0, subagentDone: 0, }); + log("QR verify: grouped items for dispatch", { + phase, + totalItems: totalTodoItems, + groups: groupEntries.length, + groupSizes: groupEntries.map(([gid, ids]) => `${gid}:${ids.length}`), + }); + state.phase = "qr-verify-running"; widget?.update({ qrPhase: "verify" }); let verifyDone = 0; let failedReviewers: string[] = []; - if (verifyIds.length > 0) { + if (groupEntries.length > 0) { + const groupIds = groupEntries.map(([gid]) => gid); + const verifyStatsPoll = setInterval(async () => { try { const raw = await fs.readFile(qrPath, "utf8"); @@ -591,20 +614,27 @@ async function runQRBlock( } }, 2000); + // Build a map from groupId -> itemIds for the pool worker. + const groupItemMap = new Map(groupEntries); + try { let reviewerModel: string | null = null; const result = await pool( - verifyIds, + groupIds, QR_POOL_CONCURRENCY, - async (itemId) => { - const reviewerDir = await createSubagentDir(planDir, `qr-reviewer-${phase}-${itemId}`); + async (groupId) => { + const itemIds = groupItemMap.get(groupId)!; + const dirSuffix = itemIds.length === 1 + ? `qr-reviewer-${phase}-${itemIds[0]}` + : `qr-reviewer-${phase}-group-${groupId}`; + const reviewerDir = await createSubagentDir(planDir, dirSuffix); const r = await spawnReviewerWithResolvedModel({ planDir, subagentDir: reviewerDir, cwd, extensionPath, phase, - itemId, + itemIds, log, }); @@ -619,7 +649,7 @@ async function runQRBlock( (progress) => { verifyDone = progress.done; widget?.update({ - step: `${phase} qr-verify: ${progress.done}/${progress.total}`, + step: `${phase} qr-verify: ${progress.done}/${progress.total} groups`, qrDone: preservedPass + progress.done, qrTotal: totalItems, subagentQueued: progress.queued, @@ -646,7 +676,7 @@ async function runQRBlock( const pass = finalQR.items.filter((i) => i.status === "PASS").length; const fail = finalQR.items.filter((i) => i.status === "FAIL").length; const todo = finalQR.items.filter((i) => i.status === "TODO").length; - const summary = `${phase} QR complete: ${pass} PASS, ${fail} FAIL, ${todo} TODO (${failedReviewers.length} reviewers failed).`; + const summary = `${phase} QR complete: ${pass} PASS, ${fail} FAIL, ${todo} TODO (${failedReviewers.length} reviewer groups failed).`; const passed = fail === 0 && failedReviewers.length === 0; widget?.update({ @@ -659,7 +689,7 @@ async function runQRBlock( qrTodo: todo, subagentQueued: 0, subagentActive: 0, - subagentDone: verifyIds.length, + subagentDone: groupEntries.length, }); return { summary, passed }; } diff --git a/src/planner/subagent.ts b/src/planner/subagent.ts index 608bda6..b4fb0a9 100644 --- a/src/planner/subagent.ts +++ b/src/planner/subagent.ts @@ -53,7 +53,7 @@ export interface SpawnReviewerOptions { cwd: string; extensionPath: string; phase: WorkPhaseKey; - itemId: string; + itemIds: string[]; modelOverride?: string; log?: Logger; } @@ -198,11 +198,15 @@ export function spawnQRDecomposer(opts: SpawnQRDecomposerOptions): Promise { const log = opts.log ?? createLogger("Subagent"); + const itemList = opts.itemIds.join(","); + const prompt = opts.itemIds.length === 1 + ? "Verify the assigned QR item." + : `Verify the ${opts.itemIds.length} assigned QR items.`; return spawnSubagent( "reviewer", `qr-${opts.phase}`, - "Verify the assigned QR item.", - { ...opts, extraFlags: ["--koan-qr-item", opts.itemId] }, + prompt, + { ...opts, extraFlags: ["--koan-qr-item", itemList] }, log, ); } diff --git a/tests/qr-grouped-verify.test.ts b/tests/qr-grouped-verify.test.ts new file mode 100644 index 0000000..4a09ad5 --- /dev/null +++ b/tests/qr-grouped-verify.test.ts @@ -0,0 +1,305 @@ +// Tests for grouped QR verification: grouping logic, step routing, +// prompt generation, and subagent spawn arg threading. + +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { buildSpawnArgs } from "../src/planner/subagent.js"; +import type { QRItem } from "../src/planner/qr/types.js"; +import { + buildVerifySystemPrompt, + buildContextStep, + buildAnalyzeStep, + buildConfirmStep, +} from "../src/planner/phases/qr-verify/prompts.js"; + +// -- Grouping logic (pure function, extracted from session.ts pattern) -- + +function groupItemsByGroupId(items: QRItem[]): Map { + const groups = new Map(); + for (const item of items) { + const gid = item.group_id ?? item.id; + const existing = groups.get(gid); + if (existing) { + existing.push(item.id); + } else { + groups.set(gid, [item.id]); + } + } + return groups; +} + +function makeItem(id: string, groupId: string | null = null, status: "TODO" | "PASS" | "FAIL" = "TODO"): QRItem { + return { + id, + scope: `milestone:M-001`, + check: `Check for ${id}`, + status, + finding: null, + parent_id: null, + group_id: groupId, + severity: "MUST", + }; +} + +// -- Grouping tests -- + +describe("groupItemsByGroupId", () => { + it("groups items sharing the same group_id", () => { + const items = [ + makeItem("QR-001", "group-a"), + makeItem("QR-002", "group-a"), + makeItem("QR-003", "group-b"), + ]; + const groups = groupItemsByGroupId(items); + + assert.equal(groups.size, 2); + assert.deepEqual(groups.get("group-a"), ["QR-001", "QR-002"]); + assert.deepEqual(groups.get("group-b"), ["QR-003"]); + }); + + it("treats null group_id as singleton (uses item id as group key)", () => { + const items = [ + makeItem("QR-001", null), + makeItem("QR-002", null), + ]; + const groups = groupItemsByGroupId(items); + + assert.equal(groups.size, 2); + assert.deepEqual(groups.get("QR-001"), ["QR-001"]); + assert.deepEqual(groups.get("QR-002"), ["QR-002"]); + }); + + it("handles mixed grouped and ungrouped items", () => { + const items = [ + makeItem("QR-001", "umbrella"), + makeItem("QR-002", "umbrella"), + makeItem("QR-003", null), + makeItem("QR-004", "component-auth"), + makeItem("QR-005", "component-auth"), + makeItem("QR-006", "component-auth"), + ]; + const groups = groupItemsByGroupId(items); + + assert.equal(groups.size, 3); + assert.deepEqual(groups.get("umbrella"), ["QR-001", "QR-002"]); + assert.deepEqual(groups.get("QR-003"), ["QR-003"]); + assert.deepEqual(groups.get("component-auth"), ["QR-004", "QR-005", "QR-006"]); + }); + + it("returns empty map for empty items", () => { + const groups = groupItemsByGroupId([]); + assert.equal(groups.size, 0); + }); + + it("single item with group_id creates group of 1", () => { + const items = [makeItem("QR-001", "solo-group")]; + const groups = groupItemsByGroupId(items); + + assert.equal(groups.size, 1); + assert.deepEqual(groups.get("solo-group"), ["QR-001"]); + }); +}); + +// -- Dynamic step formula tests -- + +describe("dynamic step formula", () => { + it("totalSteps = 1 + 2*N for N items", () => { + assert.equal(1 + 2 * 1, 3); // 1 item: CONTEXT, ANALYZE, CONFIRM + assert.equal(1 + 2 * 3, 7); // 3 items: CONTEXT, 3×(ANALYZE+CONFIRM) + assert.equal(1 + 2 * 5, 11); // 5 items + }); + + it("step routing maps correctly for 3 items", () => { + // Step 1: CONTEXT + // Step 2: ANALYZE item 0 + // Step 3: CONFIRM item 0 + // Step 4: ANALYZE item 1 + // Step 5: CONFIRM item 1 + // Step 6: ANALYZE item 2 + // Step 7: CONFIRM item 2 + + function stepType(step: number): { kind: string; itemIndex?: number } { + if (step === 1) return { kind: "CONTEXT" }; + const offset = step - 2; + const itemIndex = Math.floor(offset / 2); + const isConfirm = offset % 2 === 1; + return isConfirm ? { kind: "CONFIRM", itemIndex } : { kind: "ANALYZE", itemIndex }; + } + + assert.deepEqual(stepType(1), { kind: "CONTEXT" }); + assert.deepEqual(stepType(2), { kind: "ANALYZE", itemIndex: 0 }); + assert.deepEqual(stepType(3), { kind: "CONFIRM", itemIndex: 0 }); + assert.deepEqual(stepType(4), { kind: "ANALYZE", itemIndex: 1 }); + assert.deepEqual(stepType(5), { kind: "CONFIRM", itemIndex: 1 }); + assert.deepEqual(stepType(6), { kind: "ANALYZE", itemIndex: 2 }); + assert.deepEqual(stepType(7), { kind: "CONFIRM", itemIndex: 2 }); + }); + + it("step routing works for single item (backward compat)", () => { + function stepType(step: number): { kind: string; itemIndex?: number } { + if (step === 1) return { kind: "CONTEXT" }; + const offset = step - 2; + const itemIndex = Math.floor(offset / 2); + const isConfirm = offset % 2 === 1; + return isConfirm ? { kind: "CONFIRM", itemIndex } : { kind: "ANALYZE", itemIndex }; + } + + assert.deepEqual(stepType(1), { kind: "CONTEXT" }); + assert.deepEqual(stepType(2), { kind: "ANALYZE", itemIndex: 0 }); + assert.deepEqual(stepType(3), { kind: "CONFIRM", itemIndex: 0 }); + }); +}); + +// -- Prompt generation tests -- + +describe("buildVerifySystemPrompt", () => { + it("includes item count for single item", () => { + const result = buildVerifySystemPrompt("base prompt", "plan-design", 1); + assert.ok(result.includes("1 QR item")); + assert.ok(!result.includes("items")); + }); + + it("includes item count for multiple items", () => { + const result = buildVerifySystemPrompt("base prompt", "plan-code", 5); + assert.ok(result.includes("5 QR items")); + }); + + it("includes phase name", () => { + const result = buildVerifySystemPrompt("base prompt", "plan-docs", 3); + assert.ok(result.includes("plan-docs")); + }); +}); + +describe("buildContextStep", () => { + const items: QRItem[] = [ + makeItem("QR-001", "group-a"), + makeItem("QR-002", "group-a"), + makeItem("QR-003", "group-a"), + ]; + + it("lists all items in context step", () => { + const step = buildContextStep(items, "plan-design"); + const text = step.instructions.join("\n"); + assert.ok(text.includes("QR-001")); + assert.ok(text.includes("QR-002")); + assert.ok(text.includes("QR-003")); + }); + + it("shows correct item count", () => { + const step = buildContextStep(items, "plan-design"); + const text = step.instructions.join("\n"); + assert.ok(text.includes("3 ITEMS")); + }); + + it("shows 1 ITEM for single item", () => { + const step = buildContextStep([items[0]], "plan-design"); + const text = step.instructions.join("\n"); + assert.ok(text.includes("1 ITEM")); + }); +}); + +describe("buildAnalyzeStep", () => { + const item = makeItem("QR-042", "group-x"); + + it("includes item ID and check", () => { + const step = buildAnalyzeStep(item, 0, 3); + const text = step.instructions.join("\n"); + assert.ok(text.includes("QR-042")); + assert.ok(text.includes(item.check)); + }); + + it("includes position label for multi-item groups", () => { + const step = buildAnalyzeStep(item, 1, 5); + assert.ok(step.title.includes("item 2 of 5")); + }); + + it("omits position label for single item", () => { + const step = buildAnalyzeStep(item, 0, 1); + assert.ok(!step.title.includes("item")); + }); +}); + +describe("buildConfirmStep", () => { + const item = makeItem("QR-007", "group-y"); + + it("includes koan_qr_set_item instructions with correct phase and id", () => { + const step = buildConfirmStep(item, 0, 3, "plan-code"); + const text = step.instructions.join("\n"); + assert.ok(text.includes("phase='plan-code'")); + assert.ok(text.includes("id='QR-007'")); + assert.ok(text.includes("status='PASS'")); + assert.ok(text.includes("status='FAIL'")); + }); + + it("includes position label for multi-item groups", () => { + const step = buildConfirmStep(item, 2, 4, "plan-docs"); + assert.ok(step.title.includes("item 3 of 4")); + }); + + it("has invokeAfter guard", () => { + const step = buildConfirmStep(item, 0, 1, "plan-design"); + assert.ok(step.invokeAfter); + assert.ok(step.invokeAfter!.includes("koan_complete_step")); + }); +}); + +// -- Subagent spawn arg tests -- + +describe("spawnReviewer args", () => { + const baseOpts = { + planDir: "/plan", + subagentDir: "/subagent", + extensionPath: "/ext/koan.ts", + cwd: "/working", + }; + + it("passes single item ID via --koan-qr-item for single-item group", () => { + const args = buildSpawnArgs("reviewer", "qr-plan-design", "Verify the assigned QR item.", { + ...baseOpts, + extraFlags: ["--koan-qr-item", "QR-001"], + }); + const idx = args.indexOf("--koan-qr-item"); + assert.ok(idx >= 0); + assert.equal(args[idx + 1], "QR-001"); + }); + + it("passes comma-separated item IDs via --koan-qr-item for multi-item group", () => { + const itemList = "QR-001,QR-002,QR-003"; + const args = buildSpawnArgs("reviewer", "qr-plan-code", "Verify the 3 assigned QR items.", { + ...baseOpts, + extraFlags: ["--koan-qr-item", itemList], + }); + const idx = args.indexOf("--koan-qr-item"); + assert.ok(idx >= 0); + assert.equal(args[idx + 1], "QR-001,QR-002,QR-003"); + }); +}); + +// -- Comma-separated parsing (mirrors dispatch.ts logic) -- + +describe("comma-separated item ID parsing", () => { + function parseItemIds(rawFlag: string): string[] { + return rawFlag.split(",").map((s) => s.trim()).filter(Boolean); + } + + it("parses single item ID", () => { + assert.deepEqual(parseItemIds("QR-001"), ["QR-001"]); + }); + + it("parses multiple comma-separated IDs", () => { + assert.deepEqual(parseItemIds("QR-001,QR-002,QR-003"), ["QR-001", "QR-002", "QR-003"]); + }); + + it("handles whitespace around commas", () => { + assert.deepEqual(parseItemIds("QR-001 , QR-002 , QR-003"), ["QR-001", "QR-002", "QR-003"]); + }); + + it("filters empty strings from trailing comma", () => { + assert.deepEqual(parseItemIds("QR-001,QR-002,"), ["QR-001", "QR-002"]); + }); + + it("returns empty array for empty string", () => { + assert.deepEqual(parseItemIds(""), []); + }); +}); diff --git a/tests/session-model-threading.test.ts b/tests/session-model-threading.test.ts index 6462e8d..1a9c300 100644 --- a/tests/session-model-threading.test.ts +++ b/tests/session-model-threading.test.ts @@ -156,7 +156,7 @@ describe("QR spawn model threading", () => { cwd: "/cwd", extensionPath: "/ext/koan.ts", phase: "plan-code", - itemId: "QR-001", + itemIds: ["QR-001"], }, { mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { @@ -188,7 +188,7 @@ describe("QR spawn model threading", () => { cwd: "/cwd", extensionPath: "/ext/koan.ts", phase: "plan-docs", - itemId: "QR-002", + itemIds: ["QR-002"], }, { mapSpawnContextToPhaseModelKeyFn: () => "plan-docs-qr-verify" as PhaseModelKey, From d147f846d79b65fa8b5df40760c96f8ceee5695b Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Tue, 3 Mar 2026 14:07:55 +0700 Subject: [PATCH 034/412] feat: add koan_ask_question tool for subagent-to-parent IPC Enable subagents running in headless -p mode to ask the user questions via file-based IPC. The subagent writes a request to ipc.json in its working directory, polls for a response, and the parent orchestrator detects the request during its existing 2-second poll loop, presents the ask UI, and writes the response back. New files: - lib/ipc.ts: IPC types and atomic read/write/delete helpers - tools/ask.ts: koan_ask_question tool with blocking poll loop - ui/ask/: ask UI components copied from pi-ask-tool-extension Modified files: - lib/dispatch.ts: SubagentRef type (mirrors PlanRef pattern) - tools/index.ts: thread SubagentRef through registerAllTools - koan.ts: create and wire SubagentRef - lib/permissions.ts: grant koan_ask_question to work phases - session.ts: pollWithIpcDetection helper, handleAskRequest, thread ui - lib/audit.ts: add koan_ask_question to KOAN_SHAPES --- PLAN.md | 367 ++++++++++++++++++ extensions/koan.ts | 6 +- src/planner/lib/audit.ts | 1 + src/planner/lib/dispatch.ts | 11 + src/planner/lib/ipc.ts | 125 +++++++ src/planner/lib/permissions.ts | 3 + src/planner/session.ts | 169 +++++++-- src/planner/tools/ask.ts | 241 ++++++++++++ src/planner/tools/index.ts | 8 +- src/planner/ui/ask/ask-inline-note.ts | 65 ++++ src/planner/ui/ask/ask-inline-ui.ts | 221 +++++++++++ src/planner/ui/ask/ask-logic.ts | 98 +++++ src/planner/ui/ask/ask-tabs-ui.ts | 512 ++++++++++++++++++++++++++ 13 files changed, 1801 insertions(+), 26 deletions(-) create mode 100644 PLAN.md create mode 100644 src/planner/lib/ipc.ts create mode 100644 src/planner/tools/ask.ts create mode 100644 src/planner/ui/ask/ask-inline-note.ts create mode 100644 src/planner/ui/ask/ask-inline-ui.ts create mode 100644 src/planner/ui/ask/ask-logic.ts create mode 100644 src/planner/ui/ask/ask-tabs-ui.ts diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 0000000..cdc5063 --- /dev/null +++ b/PLAN.md @@ -0,0 +1,367 @@ +# Plan: Subagent Ask Questions via File-Based IPC + +## Context + +### Problem + +Subagents run as headless `pi -p` child processes with no UI access (`ctx.hasUI = false`). When a subagent needs user input during planning — choosing between architectural alternatives, clarifying scope — it has no mechanism to pause, ask the user, and resume with the answer. + +### Design Decisions + +**Single `ipc.json` file per subagent directory.** Both request and response live in one file with `request` and `response` keys. Temporal ownership is safe: the subagent creates the file and then blocks (only reads during the wait), so the parent is the sole writer during the response window. A two-file model (request.json + response.json) provides structural ownership at the cost of cleanup complexity and an extra file per interaction. The single-file model is simpler and sufficient because the subagent's blocking poll guarantees no concurrent writes. + +**Tool schema mirrors pi-ask-tool-extension exactly.** The `koan_ask_question` tool accepts the same `{ questions: [{ id, question, options, multi?, recommended? }] }` schema as the existing `ask` tool. LLMs trained on the ask tool schema produce correct calls without schema-specific prompt engineering. + +**Ask UI code copied from pi-ask-tool-extension, not imported.** The pi-ask-tool-extension package is globally installed as a pi extension — it is not in koan's `node_modules` and cannot be imported. The four source files (~1133 lines) are copied into `src/planner/ui/ask/`. All external dependencies (`@mariozechner/pi-coding-agent`, `@mariozechner/pi-tui`) are already available in koan's node_modules. + +**Permission gating via existing PHASE_PERMISSIONS, not conditional registration.** Pi snapshots all tools at init time (`_buildRuntime()`). Tools cannot be added or removed after init. The existing default-deny `PHASE_PERMISSIONS` map in `permissions.ts` controls runtime access. Adding `koan_ask_question` to the three work-phase Sets (`plan-design`, `plan-code`, `plan-docs`) grants access to subagents in those phases. In parent mode, no phase is active, so the tool is blocked automatically. + +**SubagentRef pattern mirrors PlanRef.** Tool registration happens at init when the subagent directory is unknown. A mutable `SubagentRef = { dir: string | null }` created at init is populated at `before_agent_start` when CLI flags are available. The tool reads `subagentRef.dir` at execute time. This matches the established `PlanRef` indirection pattern in `dispatch.ts`. + +**Non-error returns for cancellation and abort.** When the user cancels (Escape) or the signal aborts, the tool returns a descriptive non-error message ("The user declined to answer. Proceed with your best judgment."). Error returns cause LLMs to halt or retry; non-error returns guide the LLM to continue productively. + +**Parent detects requests inside existing setInterval poll loops.** The parent's 2-second poll callback in `session.ts` already reads `state.json` for widget updates. Adding an `ipc.json` read to the same callback avoids a separate polling mechanism. A `pendingRequestId` guard variable prevents re-entrant handling — JavaScript's `setInterval` fires regardless of whether the previous async callback completed, so without the guard, every 2-second tick during the user's think-time would re-detect the same request. + +### Constraints + +- Pi snapshots tools at init; all tools must be registered unconditionally before `_buildRuntime()`. +- Subagents run in `-p` mode (print mode) with stdin ignored and stdout/stderr piped to log files — no interactive I/O. +- The parent orchestrator has `ctx.ui` access (confirmed: `session.ts` creates `WidgetController` from `ctx.ui`). +- Atomic file writes use the established tmp+rename pattern (`writeFile(tmp) → rename(tmp, target)`). +- The EventLog heartbeat (10-second `setInterval`) continues during the subagent's blocking poll because `await sleep(500)` yields to the Node.js event loop. `state.json` keeps updating, so the parent sees the subagent as alive. + +### Out of Scope (Deferred) + +- Timeout for parent crash detection — the user is at the terminal and will notice; adding a configurable timeout is a follow-up. +- Process liveness check before showing ask UI — low severity edge case (subagent exits between writing request and parent detecting it). +- Multi-subagent concurrent questions — work phases run sequentially; QR phases are excluded from permissions. + +## Implementation + +### ipc.json Schema + +```typescript +// Types live in src/planner/lib/ipc.ts. +// The schema is general-purpose: `type` discriminant supports future request +// types beyond "ask-question" without envelope changes. + +interface IpcFile { + request: IpcRequest; + response: IpcResponse | null; // null while awaiting parent response +} + +interface IpcRequest { + id: string; // crypto.randomUUID() — correlates request to response + type: "ask-question"; // discriminant for routing; extensible to future types + createdAt: string; // ISO 8601 timestamp + payload: AskQuestionPayload; +} + +interface AskQuestionPayload { + questions: Array<{ + id: string; + question: string; + options: Array<{ label: string }>; + multi?: boolean; + recommended?: number; // 0-indexed + }>; +} + +interface IpcResponse { + id: string; // must match request.id + respondedAt: string; // ISO 8601 timestamp + cancelled: boolean; // true when user presses Escape + payload: AskAnswerPayload | null; // null when cancelled +} + +interface AskAnswerPayload { + answers: Array<{ + id: string; // matches question id + selectedOptions: string[]; + customInput?: string; // populated when user selects "Other" + }>; +} +``` + +### NEW: `src/planner/lib/ipc.ts` — IPC File I/O Primitives + +Atomic read/write/delete helpers for `ipc.json`. Both the subagent tool and the parent session use these functions. The atomic write pattern (tmp file → rename) matches `EventLog.writeState()` in `audit.ts`. + +**Functions:** +- `writeIpcFile(dir, data)` — atomic write via `.ipc.tmp.json` → `ipc.json` rename +- `readIpcFile(dir)` → `IpcFile | null` — returns null on missing file or parse error (treat parse error as "not ready" to handle partial writes on non-POSIX systems) +- `ipcFileExists(dir)` → `boolean` — fast `fs.access` check without parsing +- `deleteIpcFile(dir)` — removes `ipc.json` and any lingering `.ipc.tmp.json`; swallows ENOENT +- `createAskRequest(payload)` → `IpcFile` — creates file structure with `crypto.randomUUID()` id and `response: null` +- `createAskResponse(requestId, payload)` → `IpcResponse` — response with `cancelled: false` +- `createCancelledResponse(requestId)` → `IpcResponse` — response with `cancelled: true`, `payload: null` + +All types are exported for use by both subagent-side (`tools/ask.ts`) and parent-side (`session.ts`). + +### NEW: `src/planner/tools/ask.ts` — koan_ask_question Tool + +Registers `koan_ask_question` with the pi extension API. The tool schema uses TypeBox definitions identical to pi-ask-tool-extension. Imports `SubagentRef` from `../lib/dispatch.js` (not defined here — it lives in `dispatch.ts` alongside `PlanRef`). + +**Tool execute flow:** + +The entire poll loop is wrapped in a single `try/finally` that calls `deleteIpcFile(dir)`. This guarantees cleanup on all exit paths — success, cancellation, abort, and file disappearance — without requiring per-path deletion logic. + +1. Guard: if `subagentRef.dir` is null, return error (not in subagent context). +2. Guard: if `ipc.json` already exists, return error (one request at a time). +3. Create `IpcFile` via `createAskRequest(payload)`, write atomically. +4. Register `signal.addEventListener("abort", onAbort, { once: true })` for instant abort response. +5. Enter poll loop inside `try`: `while (!aborted) { await sleep(500); check signal; read ipc.json; if response !== null && response.id matches: break }`. +6. On response with `cancelled: false`: build `QuestionResult[]`, format via `buildSessionContent()`, return as tool result. (`finally` handles cleanup.) +7. On response with `cancelled: true`: return "The user declined to answer." (`finally` handles cleanup.) +8. On abort: return "The question was aborted." (`finally` handles cleanup.) +9. On file disappearing mid-poll (deleted externally): return "The question was cancelled." (`finally` handles cleanup, swallows ENOENT.) + +**Result formatting** mirrors pi-ask-tool-extension's `buildAskSessionContent()`: +``` +User answers: +auth: JWT + +Answer context: +Question 1 (auth) +Prompt: Which authentication model? +Options: + 1. JWT + 2. Session-based +Response: + Selected: JWT +``` + +### NEW: `src/planner/ui/ask/` — Copied Ask UI Components (4 files) + +Copy these files from `pi-ask-tool-extension/src/` (at `/Users/lmergen/.npm-global/lib/node_modules/pi-ask-tool-extension/src/`): + +1. **`ask-logic.ts`** (~98 lines) — `AskQuestion`, `AskOption`, `AskSelection` types; `OTHER_OPTION` constant; `buildSingleSelectionResult()`, `buildMultiSelectionResult()`, `appendRecommendedTagToOptionLabels()`. +2. **`ask-inline-note.ts`** (~65 lines) — Inline note rendering helpers. Uses `wrapTextWithAnsi` from `@mariozechner/pi-tui`. +3. **`ask-inline-ui.ts`** (~221 lines) — Single-question single-select UI. Renders cursor navigation (↑↓), inline note editing (Tab), submit (Enter) via `ui.custom()`. +4. **`ask-tabs-ui.ts`** (~512 lines) — Multi-question/multi-select tabbed UI. Tab bar (← Q1 Q2 ... ✓ Submit →), per-question option lists, Submit review tab via `ui.custom()`. + +**Import path requirements:** +- Relative import extensions use `.js` suffix: `"./ask-logic"` → `"./ask-logic.js"` (Node16 module resolution requires `.js` extensions in TypeScript source). +- Same for `"./ask-inline-note"` → `"./ask-inline-note.js"`. +- External dependencies (`@mariozechner/pi-coding-agent`, `@mariozechner/pi-tui`) resolve from koan's node_modules. + +### MODIFY: `src/planner/lib/dispatch.ts` — Add SubagentRef + +`SubagentRef` and `createSubagentRef()` live alongside `PlanRef` and `createPlanRef()` — both are mutable-ref infrastructure primitives that decouple static tool registration from runtime directory resolution. + +```diff ++// Decouples tool registration (init-time) from subagent directory ++// resolution (runtime, after flags available). Same indirection ++// pattern as PlanRef. ++export interface SubagentRef { ++ dir: string | null; ++} ++ ++export function createSubagentRef(): SubagentRef { ++ return { dir: null }; ++} +``` + +### MODIFY: `src/planner/tools/index.ts` — Thread SubagentRef + +```diff ++import { registerAskTools } from "./ask.js"; ++import type { SubagentRef } from "../lib/dispatch.js"; ++export type { SubagentRef } from "../lib/dispatch.js"; ++export { createSubagentRef } from "../lib/dispatch.js"; + + export function registerAllTools( + pi: ExtensionAPI, + planRef: PlanRef, + dispatch: WorkflowDispatch, ++ subagentRef: SubagentRef, + ): void { + registerWorkflowTools(pi, dispatch); + registerPlanGetterTools(pi, planRef); + registerPlanSetterTools(pi, planRef); + registerPlanDesignEntityTools(pi, planRef); + registerPlanCodeEntityTools(pi, planRef); + registerPlanStructureEntityTools(pi, planRef); + registerQRTools(pi, planRef); ++ registerAskTools(pi, subagentRef); + } +``` + +Note: `SubagentRef` is defined in `lib/dispatch.ts` (alongside `PlanRef`), not in `tools/ask.ts`. `tools/index.ts` re-exports it for convenience, matching the existing re-export pattern for `PlanRef`. + +### MODIFY: `extensions/koan.ts` — Create and Wire SubagentRef + +```diff +-import { registerAllTools, createDispatch, createPlanRef } from "../src/planner/tools/index.js"; ++import { registerAllTools, createDispatch, createPlanRef, createSubagentRef } from "../src/planner/tools/index.js"; + + const dispatch = createDispatch(); + const planRef = createPlanRef(); ++ const subagentRef = createSubagentRef(); + +- registerAllTools(pi, planRef, dispatch); ++ registerAllTools(pi, planRef, dispatch, subagentRef); + + // In before_agent_start, inside `if (config.subagentDir)`: ++ subagentRef.dir = config.subagentDir; +``` + +The `subagentRef.dir = config.subagentDir` assignment goes immediately after the existing `eventLog = new EventLog(...)` line (L88), inside the same `if (config.subagentDir)` block. In parent mode, `subagentRef.dir` remains null, and the tool's execute returns an error. + +### MODIFY: `src/planner/lib/permissions.ts` — Grant Access to Work Phases + +```diff + [ + "plan-design", + new Set([ + "koan_complete_step", ++ "koan_ask_question", + ...PLAN_GETTER_TOOLS_LIST, + ...PLAN_SETTER_TOOLS_LIST, + ...PLAN_DESIGN_ENTITY_TOOLS, + ]), + ], + [ + "plan-code", + new Set([ + "koan_complete_step", ++ "koan_ask_question", + ...PLAN_GETTER_TOOLS_LIST, + ...PLAN_CHANGE_TOOLS_LIST, + "koan_set_intent", + ]), + ], + [ + "plan-docs", + new Set([ + "koan_complete_step", ++ "koan_ask_question", + ...PLAN_GETTER_TOOLS_LIST, + "koan_set_change_doc_diff", + "koan_set_change_comments", +``` + +QR phases (`qr-plan-design`, `qr-plan-code`, `qr-plan-docs`) omit `koan_ask_question` — reviewers do not ask questions. + +### MODIFY: `src/planner/session.ts` — Parent-Side Request Detection + +**A. New imports:** +```typescript +import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; +import { readIpcFile, writeIpcFile, createAskResponse, createCancelledResponse, type IpcFile } from "./lib/ipc.js"; +import { askSingleQuestionWithInlineNote } from "./ui/ask/ask-inline-ui.js"; +import { askQuestionsWithTabs } from "./ui/ask/ask-tabs-ui.js"; +import type { AskQuestion } from "./ui/ask/ask-logic.js"; +``` + +**B. New `handleAskRequest()` function** (module-level, alongside `runPlanningPhase`): + +Receives the parent's `ExtensionUIContext` and the parsed `IpcFile`. Routes to the appropriate ask UI based on question count and multi-select: +- Single question, single-select → `askSingleQuestionWithInlineNote(ui, question)` +- Single question, multi-select → `askQuestionsWithTabs(ui, [question])` +- Multiple questions → `askQuestionsWithTabs(ui, questions)` + +Returns an `IpcResponse` (either answered or cancelled). On any exception from the UI layer, returns a cancelled response so the subagent unblocks. + +**C. New `pollWithIpcDetection()` helper** (extracts the common poll-with-request-detection pattern): + +Both the work poll (~L335) and the fix poll (~L737) share the same request detection logic. A shared helper avoids duplication: + +```typescript +import type { LogLine } from "./lib/audit.js"; + +// Encapsulates the poll-with-request-detection pattern used by both +// the work poll loop and the fix poll loop. Returns a setInterval ID. +function pollWithIpcDetection( + subagentDir: string, + widget: WidgetController | null, + ui: ExtensionUIContext | null, + stepPrefix: string, + updateFromProjection: (p: Projection, logs: LogLine[]) => void, +): ReturnType { + let pendingRequestId: string | null = null; + + return setInterval(async () => { + // Existing: read projection and update widget + const [projection, logs] = await Promise.all([ + readProjection(subagentDir), + readRecentLogs(subagentDir), + ]); + if (projection) { + updateFromProjection(projection, logs); + } + + // IPC request detection — skip if already handling a request or no UI + if (pendingRequestId || !ui) return; + + const ipc = await readIpcFile(subagentDir); + if (!ipc || !ipc.request || ipc.response !== null) return; + + pendingRequestId = ipc.request.id; + try { + widget?.update({ + step: `${stepPrefix}: waiting for user input...`, + activity: ipc.request.payload.questions[0]?.question ?? "", + }); + + const response = await handleAskRequest(ui, ipc); + const updated: IpcFile = { request: ipc.request, response }; + await writeIpcFile(subagentDir, updated); + } catch { + // On error, write cancelled response so subagent unblocks. + // The inner try-catch guards against I/O failures during error + // recovery — an unguarded throw here would propagate as an + // unhandled async rejection in the setInterval callback, + // crashing the parent process (Node.js ≥15 default behavior). + try { + const cancelled = createCancelledResponse(ipc.request.id); + await writeIpcFile(subagentDir, { request: ipc.request, response: cancelled }); + } catch { + // I/O failed during error recovery; subagent remains blocked + // until parent terminates. No further action possible. + } + } finally { + pendingRequestId = null; + } + }, 2000); +} +``` + +**D. Thread `ui` through function signatures:** + +- `runPlanningPhase(phase, planDir, cwd, extensionPath, state, log, widget)` → add `ui: ExtensionUIContext | null` +- `runPhaseWithQR(phase, planDir, cwd, extensionPath, state, log, widget)` → add `ui: ExtensionUIContext | null` +- Call site in `plan()`: pass `ctx.hasUI ? ctx.ui : null` + +**E. Work poll loop (~L335):** +The work poll uses `pollWithIpcDetection(subagentDir, widget, ui, phase.key, ...)`. + +**F. Fix poll loop (~L737):** +The fix poll uses `pollWithIpcDetection(fixDir, widget, ui, \`${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}\`, ...)`. + +### MODIFY: `src/planner/lib/audit.ts` — Log Formatting + +Add `koan_ask_question` to the `KOAN_SHAPES` object for audit log display: + +```typescript +koan_ask_question: { keys: ["questions"], arrays: ["questions"], highValue: true }, +``` + +## Quality Checklist + +- [ ] 01-naming-and-types (design-mode): `SubagentRef` mirrors `PlanRef` naming; `IpcFile`/`IpcRequest`/`IpcResponse` model the domain; `handleAskRequest` describes behavior +- [ ] 02-structure-and-composition (design-mode): `pollWithIpcDetection` extracts shared logic from two poll loops; `handleAskRequest` is single-responsibility; error handling wraps UI calls with cancelled-response fallback +- [ ] 06-module-and-dependencies (design-mode): `lib/ipc.ts` is a pure I/O module with no UI dependencies; `tools/ask.ts` depends on `lib/ipc.ts` and `lib/dispatch.ts` (downward); `session.ts` depends on both `lib/ipc.ts` and `ui/ask/` (same level); no circular deps; `SubagentRef` lives in `lib/dispatch.ts` not in tools layer +- [ ] 07-cross-file-consistency (design-mode): Atomic write pattern matches `EventLog.writeState()`; mutable ref pattern matches `PlanRef`/`WorkflowDispatch` in `lib/dispatch.ts`; permission gating matches existing `PHASE_PERMISSIONS` entries; tool description style matches existing koan tools; error recovery in setInterval callbacks matches `verifyStatsPoll` guarded-catch pattern + +## Execution Protocol + +``` +1. delegate @agent-developer: implement per this plan file +2. delegate @agent-quality-reviewer: verify against plan + ~/.claude/conventions/code-quality/ (code-mode) + +When delegating, pass this plan file path. Supplement only with: +- rationale for decisions not captured in plan +- business constraints +- technical prerequisites the agent cannot infer +``` diff --git a/extensions/koan.ts b/extensions/koan.ts index 0281cf3..ec475a6 100644 --- a/extensions/koan.ts +++ b/extensions/koan.ts @@ -9,7 +9,7 @@ import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-age import { createSession } from "../src/planner/session.js"; import { detectSubagentMode, dispatchPhase } from "../src/planner/phases/dispatch.js"; -import { registerAllTools, createDispatch, createPlanRef } from "../src/planner/tools/index.js"; +import { registerAllTools, createDispatch, createPlanRef, createSubagentRef } from "../src/planner/tools/index.js"; import { createLogger } from "../src/utils/logger.js"; import { EventLog, extractToolEvent } from "../src/planner/lib/audit.js"; import { openKoanConfig } from "../src/planner/ui/config/menu.js"; @@ -64,8 +64,9 @@ export default function koan(pi: ExtensionAPI): void { // blocking at runtime. const dispatch = createDispatch(); const planRef = createPlanRef(); + const subagentRef = createSubagentRef(); - registerAllTools(pi, planRef, dispatch); + registerAllTools(pi, planRef, dispatch, subagentRef); // Subagent detection runs at before_agent_start (flags // are unavailable during init). @@ -87,6 +88,7 @@ export default function koan(pi: ExtensionAPI): void { if (config.subagentDir) { eventLog = new EventLog(config.subagentDir, config.role, config.phase, currentModelId(ctx)); await eventLog.open(); + subagentRef.dir = config.subagentDir; // Capture all tool results for the audit trail. Graduated detail: // file paths for read/edit/write, binary name for bash, full diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index 1d2d670..12191ca 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -443,6 +443,7 @@ const KOAN_SHAPES: Record = { koan_qr_get_item: { keys: ["phase", "id"], getter: true }, koan_qr_list_items: { keys: ["phase", "status"], getter: true }, koan_qr_summary: { keys: ["phase"], getter: true }, + koan_ask_question: { keys: ["questions"], arrays: ["questions"], highValue: true }, }; // Reads the tail of events.jsonl and returns structured log entries. diff --git a/src/planner/lib/dispatch.ts b/src/planner/lib/dispatch.ts index e9f935e..b978d87 100644 --- a/src/planner/lib/dispatch.ts +++ b/src/planner/lib/dispatch.ts @@ -31,6 +31,17 @@ export function createPlanRef(): PlanRef { return { dir: null }; } +// Decouples tool registration (init-time) from subagent directory +// resolution (runtime, after flags available). Same indirection +// pattern as PlanRef. +export interface SubagentRef { + dir: string | null; +} + +export function createSubagentRef(): SubagentRef { + return { dir: null }; +} + // Sets a dispatch slot. Throws if the slot is already occupied -- // prevents silent misrouting when two phases attempt to claim // the same tool. diff --git a/src/planner/lib/ipc.ts b/src/planner/lib/ipc.ts new file mode 100644 index 0000000..aaa14ee --- /dev/null +++ b/src/planner/lib/ipc.ts @@ -0,0 +1,125 @@ +// File-based IPC between subagent and parent session. +// A single ipc.json file per subagent directory holds both the request and +// response. Atomic writes (tmp-rename) prevent partial reads. + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; +import * as crypto from "node:crypto"; + +// -- Types -- + +export interface IpcFile { + request: IpcRequest; + response: IpcResponse | null; // null while awaiting parent response +} + +export interface IpcRequest { + id: string; // crypto.randomUUID() — correlates request to response + type: "ask-question"; // discriminant for routing; extensible to future types + createdAt: string; // ISO 8601 timestamp + payload: AskQuestionPayload; +} + +export interface AskQuestionPayload { + questions: Array<{ + id: string; + question: string; + options: Array<{ label: string }>; + multi?: boolean; + recommended?: number; // 0-indexed + }>; +} + +export interface IpcResponse { + id: string; // must match request.id + respondedAt: string; // ISO 8601 timestamp + cancelled: boolean; // true when user presses Escape + payload: AskAnswerPayload | null; // null when cancelled +} + +export interface AskAnswerPayload { + answers: Array<{ + id: string; // matches question id + selectedOptions: string[]; + customInput?: string; // populated when user selects "Other" + }>; +} + +// -- File paths -- + +const IPC_FILE = "ipc.json"; +const IPC_TMP_FILE = ".ipc.tmp.json"; + +// -- I/O helpers -- + +// Atomic write: .ipc.tmp.json → ipc.json rename. +export async function writeIpcFile(dir: string, data: IpcFile): Promise { + const tmp = path.join(dir, IPC_TMP_FILE); + const target = path.join(dir, IPC_FILE); + await fs.writeFile(tmp, `${JSON.stringify(data, null, 2)}\n`, "utf8"); + await fs.rename(tmp, target); +} + +// Returns null on missing file or parse error. +// Treats parse errors as "not ready" to handle partial writes on non-POSIX systems. +export async function readIpcFile(dir: string): Promise { + try { + const raw = await fs.readFile(path.join(dir, IPC_FILE), "utf8"); + return JSON.parse(raw) as IpcFile; + } catch { + return null; + } +} + +// Fast existence check without parsing. +export async function ipcFileExists(dir: string): Promise { + try { + await fs.access(path.join(dir, IPC_FILE)); + return true; + } catch { + return false; + } +} + +// Removes ipc.json and any lingering .ipc.tmp.json; swallows ENOENT. +export async function deleteIpcFile(dir: string): Promise { + for (const name of [IPC_FILE, IPC_TMP_FILE]) { + try { + await fs.unlink(path.join(dir, name)); + } catch (err: unknown) { + if ((err as NodeJS.ErrnoException).code !== "ENOENT") throw err; + } + } +} + +// -- Factory helpers -- + +export function createAskRequest(payload: AskQuestionPayload): IpcFile { + return { + request: { + id: crypto.randomUUID(), + type: "ask-question", + createdAt: new Date().toISOString(), + payload, + }, + response: null, + }; +} + +export function createAskResponse(requestId: string, payload: AskAnswerPayload): IpcResponse { + return { + id: requestId, + respondedAt: new Date().toISOString(), + cancelled: false, + payload, + }; +} + +export function createCancelledResponse(requestId: string): IpcResponse { + return { + id: requestId, + respondedAt: new Date().toISOString(), + cancelled: true, + payload: null, + }; +} diff --git a/src/planner/lib/permissions.ts b/src/planner/lib/permissions.ts index a23faca..90c3e06 100644 --- a/src/planner/lib/permissions.ts +++ b/src/planner/lib/permissions.ts @@ -103,6 +103,7 @@ export const PHASE_PERMISSIONS: ReadonlyMap> = "plan-design", new Set([ "koan_complete_step", + "koan_ask_question", ...PLAN_GETTER_TOOLS_LIST, ...PLAN_SETTER_TOOLS_LIST, ...PLAN_DESIGN_ENTITY_TOOLS, @@ -112,6 +113,7 @@ export const PHASE_PERMISSIONS: ReadonlyMap> = "plan-code", new Set([ "koan_complete_step", + "koan_ask_question", ...PLAN_GETTER_TOOLS_LIST, ...PLAN_CHANGE_TOOLS_LIST, "koan_set_intent", @@ -121,6 +123,7 @@ export const PHASE_PERMISSIONS: ReadonlyMap> = "plan-docs", new Set([ "koan_complete_step", + "koan_ask_question", ...PLAN_GETTER_TOOLS_LIST, "koan_set_change_doc_diff", "koan_set_change_comments", diff --git a/src/planner/session.ts b/src/planner/session.ts index f8f62b5..a0ec935 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -5,7 +5,7 @@ import { promises as fs } from "node:fs"; import * as path from "node:path"; -import type { AgentToolResult, ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@mariozechner/pi-coding-agent"; +import type { AgentToolResult, ExtensionAPI, ExtensionCommandContext, ExtensionContext, ExtensionUIContext } from "@mariozechner/pi-coding-agent"; import { exportConversation } from "./conversation.js"; import { createInitialState, initializePlanState, type WorkflowState } from "./state.js"; @@ -25,7 +25,7 @@ import { } from "./subagent.js"; import { createLogger, setLogDir, type Logger } from "../utils/logger.js"; import { createSubagentDir } from "../utils/progress.js"; -import { readProjection, readRecentLogs, type Projection } from "./lib/audit.js"; +import { readProjection, readRecentLogs, type Projection, type LogLine } from "./lib/audit.js"; import type { WorkflowDispatch, PlanRef } from "./lib/dispatch.js"; import { pool } from "./lib/pool.js"; import type { QRFile } from "./qr/types.js"; @@ -38,6 +38,17 @@ import { type SpawnContext, } from "./model-resolver.js"; import type { PhaseRow } from "./model-phase.js"; +import { + readIpcFile, + writeIpcFile, + createAskResponse, + createCancelledResponse, + type IpcFile, + type IpcResponse, +} from "./lib/ipc.js"; +import { askSingleQuestionWithInlineNote } from "./ui/ask/ask-inline-ui.js"; +import { askQuestionsWithTabs } from "./ui/ask/ask-tabs-ui.js"; +import type { AskQuestion } from "./ui/ask/ask-logic.js"; type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; @@ -170,6 +181,107 @@ export async function spawnReviewerWithResolvedModel( return await spawnFn({ ...opts, modelOverride }); } +// Routes an IpcFile ask request to the appropriate UI component and returns +// an IpcResponse. On any exception from the UI layer, the caller's catch +// block writes a cancelled response so the subagent unblocks. +async function handleAskRequest( + ui: ExtensionUIContext, + ipc: IpcFile, +): Promise { + const { request } = ipc; + const { questions } = request.payload; + const questionsAsAsk = questions as AskQuestion[]; + + if (questions.length === 1 && !questions[0].multi) { + const selection = await askSingleQuestionWithInlineNote(ui, questionsAsAsk[0]); + if (selection.selectedOptions.length === 0 && !selection.customInput) { + return createCancelledResponse(request.id); + } + const answer: { id: string; selectedOptions: string[]; customInput?: string } = { + id: questions[0].id, + selectedOptions: selection.selectedOptions, + }; + if (selection.customInput !== undefined) { + answer.customInput = selection.customInput; + } + return createAskResponse(request.id, { answers: [answer] }); + } + + const tabResult = await askQuestionsWithTabs(ui, questionsAsAsk); + if (tabResult.cancelled) { + return createCancelledResponse(request.id); + } + + const answers = questions.map((q, i) => { + const sel = tabResult.selections[i] ?? { selectedOptions: [] }; + const answer: { id: string; selectedOptions: string[]; customInput?: string } = { + id: q.id, + selectedOptions: sel.selectedOptions, + }; + if (sel.customInput !== undefined) { + answer.customInput = sel.customInput; + } + return answer; + }); + + return createAskResponse(request.id, { answers }); +} + +// Encapsulates the poll-with-request-detection pattern used by both +// the work poll loop and the fix poll loop. Returns a setInterval ID. +function pollWithIpcDetection( + subagentDir: string, + widget: WidgetController | null, + ui: ExtensionUIContext | null, + stepPrefix: string, + updateFromProjection: (p: Projection, logs: LogLine[]) => void, +): ReturnType { + let pendingRequestId: string | null = null; + + return setInterval(async () => { + const [projection, logs] = await Promise.all([ + readProjection(subagentDir), + readRecentLogs(subagentDir), + ]); + if (projection) { + updateFromProjection(projection, logs); + } + + // IPC request detection — skip if already handling a request or no UI + if (pendingRequestId || !ui) return; + + const ipc = await readIpcFile(subagentDir); + if (!ipc || !ipc.request || ipc.response !== null) return; + + pendingRequestId = ipc.request.id; + try { + widget?.update({ + step: `${stepPrefix}: waiting for user input...`, + activity: ipc.request.payload.questions[0]?.question ?? "", + }); + + const response = await handleAskRequest(ui, ipc); + const updated: IpcFile = { request: ipc.request, response }; + await writeIpcFile(subagentDir, updated); + } catch { + // On error, write cancelled response so subagent unblocks. + // The inner try-catch guards against I/O failures during error + // recovery — an unguarded throw here would propagate as an + // unhandled async rejection in the setInterval callback, + // crashing the parent process (Node.js ≥15 default behavior). + try { + const cancelled = createCancelledResponse(ipc.request.id); + await writeIpcFile(subagentDir, { request: ipc.request, response: cancelled }); + } catch { + // I/O failed during error recovery; subagent remains blocked + // until parent terminates. No further action possible. + } + } finally { + pendingRequestId = null; + } + }, 2000); +} + export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, planRef: PlanRef): Session { const state: WorkflowState = createInitialState(); const log = createLogger("Session"); @@ -208,6 +320,7 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan try { const planDir = planInfo.directory; const extensionPath = path.resolve(import.meta.dirname, "../../extensions/koan.ts"); + const ui = ctx.hasUI ? ctx.ui : null; // widgetIndex 0=design, 1=code, 2=docs const phases: PhaseRunConfig[] = [ @@ -247,6 +360,7 @@ export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, plan state, log, widget, + ui, ); phaseSummaries.push(`${phase.label}: ${result.summary}`); @@ -310,6 +424,7 @@ async function runPlanningPhase( state: WorkflowState, log: Logger, widget: WidgetController | null, + ui: ExtensionUIContext | null, ): Promise { state.phase = phaseRunningState(phase.key); @@ -332,16 +447,20 @@ async function runPlanningPhase( const subagentDir = await createSubagentDir(planDir, `${phase.role}-${phase.key}`); - const pollInterval = setInterval(async () => { - const [projection, logs] = await Promise.all([readProjection(subagentDir), readRecentLogs(subagentDir)]); - if (!projection) return; - widget?.update({ - step: `${phase.key}: ${projection.stepName}`, - activity: projection.lastAction ?? "", - logLines: logs, - ...singleSubagentFromProjection(projection), - }); - }, 2000); + const pollInterval = pollWithIpcDetection( + subagentDir, + widget, + ui, + phase.key, + (projection, logs) => { + widget?.update({ + step: `${phase.key}: ${projection.stepName}`, + activity: projection.lastAction ?? "", + logLines: logs, + ...singleSubagentFromProjection(projection), + }); + }, + ); const workResult = await spawnWorkWithResolvedModel( phase.key as PhaseRow, @@ -409,6 +528,7 @@ async function runPlanningPhase( state, log, widget, + ui, ); if (qr.passed) { @@ -702,6 +822,7 @@ async function runPhaseWithQR( state: WorkflowState, log: Logger, widget: WidgetController | null, + ui: ExtensionUIContext | null, ): Promise { const qrPath = qrFilePath(planDir, phase.key); @@ -764,16 +885,20 @@ async function runPhaseWithQR( const fixDir = await createSubagentDir(planDir, `${phase.role}-fix-${phase.key}-${fixIndex}`); - const fixPoll = setInterval(async () => { - const [projection, logs] = await Promise.all([readProjection(fixDir), readRecentLogs(fixDir)]); - if (!projection) return; - widget?.update({ - step: `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}: ${projection.stepName}`, - activity: projection.lastAction ?? "", - logLines: logs, - ...singleSubagentFromProjection(projection), - }); - }, 2000); + const fixPoll = pollWithIpcDetection( + fixDir, + widget, + ui, + `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}`, + (projection, logs) => { + widget?.update({ + step: `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}: ${projection.stepName}`, + activity: projection.lastAction ?? "", + logLines: logs, + ...singleSubagentFromProjection(projection), + }); + }, + ); const fixResult = await spawnFixWithResolvedModel( phase.key as PhaseRow, diff --git a/src/planner/tools/ask.ts b/src/planner/tools/ask.ts new file mode 100644 index 0000000..f1d6ff0 --- /dev/null +++ b/src/planner/tools/ask.ts @@ -0,0 +1,241 @@ +// koan_ask_question tool: subagent-side of the file-based IPC ask flow. +// Writes ipc.json, polls until parent writes a response, then returns +// formatted answers to the LLM. The entire poll loop is wrapped in a +// try/finally that deletes ipc.json, guaranteeing cleanup on all exit paths. + +import { Type, type Static } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import type { SubagentRef } from "../lib/dispatch.js"; +import { + ipcFileExists, + writeIpcFile, + readIpcFile, + deleteIpcFile, + createAskRequest, + type AskAnswerPayload, +} from "../lib/ipc.js"; + +// -- Tool schema (mirrors pi-ask-tool-extension exactly) -- + +const OptionItemSchema = Type.Object({ + label: Type.String({ description: "Display label" }), +}); + +const QuestionItemSchema = Type.Object({ + id: Type.String({ description: "Question id (e.g. auth, cache, priority)" }), + question: Type.String({ description: "Question text" }), + options: Type.Array(OptionItemSchema, { + description: "Available options. Do not include 'Other'.", + minItems: 1, + }), + multi: Type.Optional(Type.Boolean({ description: "Allow multi-select" })), + recommended: Type.Optional( + Type.Number({ description: "0-indexed recommended option. '(Recommended)' is shown automatically." }), + ), +}); + +const AskParamsSchema = Type.Object({ + questions: Type.Array(QuestionItemSchema, { description: "Questions to ask", minItems: 1 }), +}); + +type AskParams = Static; + +// -- Result formatting -- + +interface QuestionResult { + id: string; + question: string; + options: string[]; + multi: boolean; + selectedOptions: string[]; + customInput?: string; +} + +function formatSelectionForSummary(result: QuestionResult): string { + const hasSelectedOptions = result.selectedOptions.length > 0; + const hasCustomInput = Boolean(result.customInput); + + if (!hasSelectedOptions && !hasCustomInput) return "(cancelled)"; + + if (hasSelectedOptions && hasCustomInput) { + const selectedPart = result.multi + ? `[${result.selectedOptions.join(", ")}]` + : result.selectedOptions[0]; + return `${selectedPart} + Other: "${result.customInput}"`; + } + + if (hasCustomInput) return `"${result.customInput}"`; + if (result.multi) return `[${result.selectedOptions.join(", ")}]`; + return result.selectedOptions[0] ?? "(no selection)"; +} + +function formatQuestionContext(result: QuestionResult, index: number): string { + const lines: string[] = [ + `Question ${index + 1} (${result.id})`, + `Prompt: ${result.question}`, + "Options:", + ...result.options.map((o, i) => ` ${i + 1}. ${o}`), + "Response:", + ]; + + const hasSelectedOptions = result.selectedOptions.length > 0; + const hasCustomInput = Boolean(result.customInput); + + if (!hasSelectedOptions && !hasCustomInput) { + lines.push(" Selected: (cancelled)"); + return lines.join("\n"); + } + + if (hasSelectedOptions) { + const text = result.multi + ? `[${result.selectedOptions.join(", ")}]` + : result.selectedOptions[0]; + lines.push(` Selected: ${text}`); + } + + if (hasCustomInput) { + if (!hasSelectedOptions) lines.push(" Selected: Other (type your own)"); + lines.push(` Custom input: ${result.customInput}`); + } + + return lines.join("\n"); +} + +function buildSessionContent(results: QuestionResult[]): string { + const summaryLines = results.map((r) => `${r.id}: ${formatSelectionForSummary(r)}`).join("\n"); + const contextBlocks = results.map((r, i) => formatQuestionContext(r, i)).join("\n\n"); + return `User answers:\n${summaryLines}\n\nAnswer context:\n${contextBlocks}`; +} + +function buildQuestionResults( + params: AskParams, + answers: AskAnswerPayload["answers"], +): QuestionResult[] { + return params.questions.map((q) => { + const answer = answers.find((a) => a.id === q.id) ?? { id: q.id, selectedOptions: [] }; + return { + id: q.id, + question: q.question, + options: q.options.map((o) => o.label), + multi: q.multi ?? false, + selectedOptions: answer.selectedOptions, + customInput: answer.customInput, + }; + }); +} + +// -- Tool registration -- + +const ASK_TOOL_DESCRIPTION = ` +Ask the user for clarification when a choice materially affects the outcome. + +- Use when multiple valid approaches have different trade-offs. +- Prefer 2-5 concise options. +- Use multi=true when multiple answers are valid. +- Use recommended= (0-indexed) to mark the default option. +- You can ask multiple related questions in one call using questions[]. +- Do NOT include an 'Other' option; UI adds it automatically. +`.trim(); + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +export function registerAskTools(pi: ExtensionAPI, subagentRef: SubagentRef): void { + pi.registerTool({ + name: "koan_ask_question", + label: "Ask question", + description: ASK_TOOL_DESCRIPTION, + parameters: AskParamsSchema, + + async execute(_toolCallId, params, signal) { + const askParams = params as AskParams; + const dir = subagentRef.dir; + + if (!dir) { + return { + content: [{ type: "text" as const, text: "Error: koan_ask_question is only available in subagent context." }], + details: undefined, + }; + } + + if (await ipcFileExists(dir)) { + return { + content: [{ type: "text" as const, text: "Error: A question request is already pending." }], + details: undefined, + }; + } + + const ipc = createAskRequest(askParams); + await writeIpcFile(dir, ipc); + + let aborted = false; + const onAbort = () => { aborted = true; }; + if (signal) { + signal.addEventListener("abort", onAbort, { once: true }); + } + + type PollResult = "answered" | "cancelled" | "aborted" | "file-gone"; + let pollResult: PollResult = "file-gone"; + let answeredPayload: AskAnswerPayload | null = null; + + try { + while (!aborted) { + await sleep(500); + if (signal?.aborted) { + aborted = true; + break; + } + + const current = await readIpcFile(dir); + if (current === null) { + pollResult = "file-gone"; + break; + } + + if (current.response !== null && current.response.id === ipc.request.id) { + if (current.response.cancelled) { + pollResult = "cancelled"; + } else { + pollResult = "answered"; + answeredPayload = current.response.payload; + } + break; + } + } + + if (aborted) { + pollResult = "aborted"; + } + } finally { + await deleteIpcFile(dir); + } + + switch (pollResult) { + case "answered": { + const results = buildQuestionResults(askParams, answeredPayload?.answers ?? []); + return { + content: [{ type: "text" as const, text: buildSessionContent(results) }], + details: undefined, + }; + } + case "cancelled": + return { + content: [{ type: "text" as const, text: "The user declined to answer. Proceed with your best judgment." }], + details: undefined, + }; + case "aborted": + return { + content: [{ type: "text" as const, text: "The question was aborted." }], + details: undefined, + }; + case "file-gone": + return { + content: [{ type: "text" as const, text: "The question was cancelled." }], + details: undefined, + }; + } + }, + }); +} diff --git a/src/planner/tools/index.ts b/src/planner/tools/index.ts index e658f49..726cd11 100644 --- a/src/planner/tools/index.ts +++ b/src/planner/tools/index.ts @@ -3,7 +3,7 @@ // tool registration and workflow infrastructure. import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import type { WorkflowDispatch, PlanRef } from "../lib/dispatch.js"; +import type { WorkflowDispatch, PlanRef, SubagentRef } from "../lib/dispatch.js"; import { registerWorkflowTools } from "./workflow.js"; import { registerPlanGetterTools } from "./getters.js"; @@ -12,11 +12,13 @@ import { registerPlanDesignEntityTools } from "./entity-design.js"; import { registerPlanCodeEntityTools } from "./entity-code.js"; import { registerPlanStructureEntityTools } from "./entity-structure.js"; import { registerQRTools } from "./qr.js"; +import { registerAskTools } from "./ask.js"; -export type { WorkflowDispatch, PlanRef, StepResult } from "../lib/dispatch.js"; +export type { WorkflowDispatch, PlanRef, SubagentRef, StepResult } from "../lib/dispatch.js"; export { createDispatch, createPlanRef, + createSubagentRef, hookDispatch, unhookDispatch, } from "../lib/dispatch.js"; @@ -25,6 +27,7 @@ export function registerAllTools( pi: ExtensionAPI, planRef: PlanRef, dispatch: WorkflowDispatch, + subagentRef: SubagentRef, ): void { registerWorkflowTools(pi, dispatch); registerPlanGetterTools(pi, planRef); @@ -33,4 +36,5 @@ export function registerAllTools( registerPlanCodeEntityTools(pi, planRef); registerPlanStructureEntityTools(pi, planRef); registerQRTools(pi, planRef); + registerAskTools(pi, subagentRef); } diff --git a/src/planner/ui/ask/ask-inline-note.ts b/src/planner/ui/ask/ask-inline-note.ts new file mode 100644 index 0000000..a22ab8f --- /dev/null +++ b/src/planner/ui/ask/ask-inline-note.ts @@ -0,0 +1,65 @@ +import { wrapTextWithAnsi } from "@mariozechner/pi-tui"; + +const INLINE_NOTE_SEPARATOR = " — note: "; +const INLINE_EDIT_CURSOR = "▍"; + +export const INLINE_NOTE_WRAP_PADDING = 2; + +function sanitizeNoteForInlineDisplay(rawNote: string): string { + return rawNote.replace(/[\r\n\t]/g, " ").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); +} + +function truncateTextKeepingTail(text: string, maxLength: number): string { + if (maxLength <= 0) return ""; + if (text.length <= maxLength) return text; + if (maxLength === 1) return "…"; + return `…${text.slice(-(maxLength - 1))}`; +} + +function truncateTextKeepingHead(text: string, maxLength: number): string { + if (maxLength <= 0) return ""; + if (text.length <= maxLength) return text; + if (maxLength === 1) return "…"; + return `${text.slice(0, maxLength - 1)}…`; +} + +export function buildOptionLabelWithInlineNote( + baseOptionLabel: string, + rawNote: string, + isEditingNote: boolean, + maxInlineLabelLength?: number, +): string { + const sanitizedNote = sanitizeNoteForInlineDisplay(rawNote); + if (!isEditingNote && sanitizedNote.trim().length === 0) { + return baseOptionLabel; + } + + const labelPrefix = `${baseOptionLabel}${INLINE_NOTE_SEPARATOR}`; + const inlineNote = isEditingNote ? `${sanitizedNote}${INLINE_EDIT_CURSOR}` : sanitizedNote.trim(); + const inlineLabel = `${labelPrefix}${inlineNote}`; + + if (maxInlineLabelLength == null) { + return inlineLabel; + } + + return isEditingNote + ? truncateTextKeepingTail(inlineLabel, maxInlineLabelLength) + : truncateTextKeepingHead(inlineLabel, maxInlineLabelLength); +} + +export function buildWrappedOptionLabelWithInlineNote( + baseOptionLabel: string, + rawNote: string, + isEditingNote: boolean, + maxInlineLabelLength: number, + wrapPadding = INLINE_NOTE_WRAP_PADDING, +): string[] { + const inlineLabel = buildOptionLabelWithInlineNote(baseOptionLabel, rawNote, isEditingNote); + const sanitizedWrapPadding = Number.isFinite(wrapPadding) ? Math.max(0, Math.floor(wrapPadding)) : 0; + const sanitizedMaxInlineLabelLength = Number.isFinite(maxInlineLabelLength) + ? Math.max(1, Math.floor(maxInlineLabelLength)) + : 1; + const wrapWidth = Math.max(1, sanitizedMaxInlineLabelLength - sanitizedWrapPadding); + const wrappedLines = wrapTextWithAnsi(inlineLabel, wrapWidth); + return wrappedLines.length > 0 ? wrappedLines : [""]; +} diff --git a/src/planner/ui/ask/ask-inline-ui.ts b/src/planner/ui/ask/ask-inline-ui.ts new file mode 100644 index 0000000..e57ed04 --- /dev/null +++ b/src/planner/ui/ask/ask-inline-ui.ts @@ -0,0 +1,221 @@ +import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; +import { Editor, type EditorTheme, Key, matchesKey, truncateToWidth, visibleWidth } from "@mariozechner/pi-tui"; +import { + OTHER_OPTION, + appendRecommendedTagToOptionLabels, + buildSingleSelectionResult, + type AskOption, + type AskSelection, +} from "./ask-logic.js"; +import { INLINE_NOTE_WRAP_PADDING, buildWrappedOptionLabelWithInlineNote } from "./ask-inline-note.js"; + +interface SingleQuestionInput { + question: string; + options: AskOption[]; + recommended?: number; +} + +interface InlineSelectionResult { + cancelled: boolean; + selectedOption?: string; + note?: string; +} + +function resolveInitialCursorIndexFromRecommendedOption( + recommendedOptionIndex: number | undefined, + optionCount: number, +): number { + if (recommendedOptionIndex == null) return 0; + if (recommendedOptionIndex < 0 || recommendedOptionIndex >= optionCount) return 0; + return recommendedOptionIndex; +} + +export async function askSingleQuestionWithInlineNote( + ui: ExtensionUIContext, + questionInput: SingleQuestionInput, +): Promise { + const baseOptionLabels = questionInput.options.map((option) => option.label); + const optionLabelsWithRecommendedTag = appendRecommendedTagToOptionLabels( + baseOptionLabels, + questionInput.recommended, + ); + const selectableOptionLabels = [...optionLabelsWithRecommendedTag, OTHER_OPTION]; + const initialCursorIndex = resolveInitialCursorIndexFromRecommendedOption( + questionInput.recommended, + optionLabelsWithRecommendedTag.length, + ); + + const result = await ui.custom((tui, theme, _keybindings, done) => { + let cursorOptionIndex = initialCursorIndex; + let isNoteEditorOpen = false; + let cachedRenderedLines: string[] | undefined; + const noteByOptionIndex = new Map(); + + const editorTheme: EditorTheme = { + borderColor: (text) => theme.fg("accent", text), + selectList: { + selectedPrefix: (text) => theme.fg("accent", text), + selectedText: (text) => theme.fg("accent", text), + description: (text) => theme.fg("muted", text), + scrollInfo: (text) => theme.fg("dim", text), + noMatch: (text) => theme.fg("warning", text), + }, + }; + const noteEditor = new Editor(tui, editorTheme); + + const requestUiRerender = () => { + cachedRenderedLines = undefined; + tui.requestRender(); + }; + + const getRawNoteForOption = (optionIndex: number): string => noteByOptionIndex.get(optionIndex) ?? ""; + const getTrimmedNoteForOption = (optionIndex: number): string => getRawNoteForOption(optionIndex).trim(); + + const loadCurrentNoteIntoEditor = () => { + noteEditor.setText(getRawNoteForOption(cursorOptionIndex)); + }; + + const saveCurrentNoteFromEditor = (value: string) => { + noteByOptionIndex.set(cursorOptionIndex, value); + }; + + const submitCurrentSelection = (selectedOptionLabel: string, note: string) => { + done({ + cancelled: false, + selectedOption: selectedOptionLabel, + note, + }); + }; + + noteEditor.onChange = (value) => { + saveCurrentNoteFromEditor(value); + requestUiRerender(); + }; + + noteEditor.onSubmit = (value) => { + saveCurrentNoteFromEditor(value); + const selectedOptionLabel = selectableOptionLabels[cursorOptionIndex]; + const trimmedNote = value.trim(); + + if (selectedOptionLabel === OTHER_OPTION && !trimmedNote) { + requestUiRerender(); + return; + } + + submitCurrentSelection(selectedOptionLabel, trimmedNote); + }; + + const render = (width: number): string[] => { + if (cachedRenderedLines) return cachedRenderedLines; + + const renderedLines: string[] = []; + const addLine = (line: string) => renderedLines.push(truncateToWidth(line, width)); + + addLine(theme.fg("accent", "─".repeat(width))); + addLine(theme.fg("text", ` ${questionInput.question}`)); + renderedLines.push(""); + + for (let optionIndex = 0; optionIndex < selectableOptionLabels.length; optionIndex++) { + const optionLabel = selectableOptionLabels[optionIndex]; + const isCursorOption = optionIndex === cursorOptionIndex; + const isEditingThisOption = isNoteEditorOpen && isCursorOption; + const cursorPrefixText = isCursorOption ? "→ " : " "; + const cursorPrefix = isCursorOption ? theme.fg("accent", cursorPrefixText) : cursorPrefixText; + const bullet = isCursorOption ? "●" : "○"; + const markerText = `${bullet} `; + const optionColor = isCursorOption ? "accent" : "text"; + const prefixWidth = visibleWidth(cursorPrefixText) + visibleWidth(markerText); + const wrappedInlineLabelLines = buildWrappedOptionLabelWithInlineNote( + optionLabel, + getRawNoteForOption(optionIndex), + isEditingThisOption, + Math.max(1, width - prefixWidth), + INLINE_NOTE_WRAP_PADDING, + ); + const continuationPrefix = " ".repeat(prefixWidth); + addLine(`${cursorPrefix}${theme.fg(optionColor, `${markerText}${wrappedInlineLabelLines[0] ?? ""}`)}`); + for (const wrappedLine of wrappedInlineLabelLines.slice(1)) { + addLine(`${continuationPrefix}${theme.fg(optionColor, wrappedLine)}`); + } + } + + renderedLines.push(""); + + if (isNoteEditorOpen) { + addLine(theme.fg("dim", " Typing note inline • Enter submit • Tab/Esc stop editing")); + } else if (getTrimmedNoteForOption(cursorOptionIndex).length > 0) { + addLine(theme.fg("dim", " ↑↓ move • Enter submit • Tab edit note • Esc cancel")); + } else { + addLine(theme.fg("dim", " ↑↓ move • Enter submit • Tab add note • Esc cancel")); + } + + addLine(theme.fg("accent", "─".repeat(width))); + cachedRenderedLines = renderedLines; + return renderedLines; + }; + + const handleInput = (data: string) => { + if (isNoteEditorOpen) { + if (matchesKey(data, Key.tab) || matchesKey(data, Key.escape)) { + isNoteEditorOpen = false; + requestUiRerender(); + return; + } + noteEditor.handleInput(data); + requestUiRerender(); + return; + } + + if (matchesKey(data, Key.up)) { + cursorOptionIndex = Math.max(0, cursorOptionIndex - 1); + requestUiRerender(); + return; + } + if (matchesKey(data, Key.down)) { + cursorOptionIndex = Math.min(selectableOptionLabels.length - 1, cursorOptionIndex + 1); + requestUiRerender(); + return; + } + + if (matchesKey(data, Key.tab)) { + isNoteEditorOpen = true; + loadCurrentNoteIntoEditor(); + requestUiRerender(); + return; + } + + if (matchesKey(data, Key.enter)) { + const selectedOptionLabel = selectableOptionLabels[cursorOptionIndex]; + const trimmedNote = getTrimmedNoteForOption(cursorOptionIndex); + + if (selectedOptionLabel === OTHER_OPTION && !trimmedNote) { + isNoteEditorOpen = true; + loadCurrentNoteIntoEditor(); + requestUiRerender(); + return; + } + + submitCurrentSelection(selectedOptionLabel, trimmedNote); + return; + } + + if (matchesKey(data, Key.escape)) { + done({ cancelled: true }); + } + }; + + return { + render, + invalidate: () => { + cachedRenderedLines = undefined; + }, + handleInput, + }; + }); + + if (result.cancelled || !result.selectedOption) { + return { selectedOptions: [] }; + } + + return buildSingleSelectionResult(result.selectedOption, result.note); +} diff --git a/src/planner/ui/ask/ask-logic.ts b/src/planner/ui/ask/ask-logic.ts new file mode 100644 index 0000000..ccdf6fc --- /dev/null +++ b/src/planner/ui/ask/ask-logic.ts @@ -0,0 +1,98 @@ +export const OTHER_OPTION = "Other (type your own)"; +const RECOMMENDED_OPTION_TAG = " (Recommended)"; + +export interface AskOption { + label: string; +} + +export interface AskQuestion { + id: string; + question: string; + options: AskOption[]; + multi?: boolean; + recommended?: number; +} + +export interface AskSelection { + selectedOptions: string[]; + customInput?: string; +} + +export function appendRecommendedTagToOptionLabels( + optionLabels: string[], + recommendedOptionIndex?: number, +): string[] { + if ( + recommendedOptionIndex == null || + recommendedOptionIndex < 0 || + recommendedOptionIndex >= optionLabels.length + ) { + return optionLabels; + } + + return optionLabels.map((optionLabel, optionIndex) => { + if (optionIndex !== recommendedOptionIndex) return optionLabel; + if (optionLabel.endsWith(RECOMMENDED_OPTION_TAG)) return optionLabel; + return `${optionLabel}${RECOMMENDED_OPTION_TAG}`; + }); +} + +function removeRecommendedTagFromOptionLabel(optionLabel: string): string { + if (!optionLabel.endsWith(RECOMMENDED_OPTION_TAG)) { + return optionLabel; + } + return optionLabel.slice(0, -RECOMMENDED_OPTION_TAG.length); +} + +export function buildSingleSelectionResult(selectedOptionLabel: string, note?: string): AskSelection { + const normalizedSelectedOption = removeRecommendedTagFromOptionLabel(selectedOptionLabel); + const normalizedNote = note?.trim(); + + if (normalizedSelectedOption === OTHER_OPTION) { + if (normalizedNote) { + return { selectedOptions: [], customInput: normalizedNote }; + } + return { selectedOptions: [] }; + } + + if (normalizedNote) { + return { selectedOptions: [`${normalizedSelectedOption} - ${normalizedNote}`] }; + } + + return { selectedOptions: [normalizedSelectedOption] }; +} + +export function buildMultiSelectionResult( + optionLabels: string[], + selectedOptionIndexes: number[], + optionNotes: string[], + otherOptionIndex: number, +): AskSelection { + const selectedOptionSet = new Set(selectedOptionIndexes); + const selectedOptions: string[] = []; + let customInput: string | undefined; + + for (let optionIndex = 0; optionIndex < optionLabels.length; optionIndex++) { + if (!selectedOptionSet.has(optionIndex)) continue; + + const optionLabel = removeRecommendedTagFromOptionLabel(optionLabels[optionIndex]); + const optionNote = optionNotes[optionIndex]?.trim(); + + if (optionIndex === otherOptionIndex) { + if (optionNote) customInput = optionNote; + continue; + } + + if (optionNote) { + selectedOptions.push(`${optionLabel} - ${optionNote}`); + } else { + selectedOptions.push(optionLabel); + } + } + + if (customInput) { + return { selectedOptions, customInput }; + } + + return { selectedOptions }; +} diff --git a/src/planner/ui/ask/ask-tabs-ui.ts b/src/planner/ui/ask/ask-tabs-ui.ts new file mode 100644 index 0000000..dd58190 --- /dev/null +++ b/src/planner/ui/ask/ask-tabs-ui.ts @@ -0,0 +1,512 @@ +import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; +import { Editor, type EditorTheme, Key, matchesKey, truncateToWidth, visibleWidth } from "@mariozechner/pi-tui"; +import { + OTHER_OPTION, + appendRecommendedTagToOptionLabels, + buildMultiSelectionResult, + buildSingleSelectionResult, + type AskQuestion, + type AskSelection, +} from "./ask-logic.js"; +import { INLINE_NOTE_WRAP_PADDING, buildWrappedOptionLabelWithInlineNote } from "./ask-inline-note.js"; + +interface PreparedQuestion { + id: string; + question: string; + options: string[]; + tabLabel: string; + multi: boolean; + otherOptionIndex: number; +} + +interface TabsUIState { + cancelled: boolean; + selectedOptionIndexesByQuestion: number[][]; + noteByQuestionByOption: string[][]; +} + +export function formatSelectionForSubmitReview(selection: AskSelection, isMulti: boolean): string { + const hasSelectedOptions = selection.selectedOptions.length > 0; + const hasCustomInput = Boolean(selection.customInput); + + if (hasSelectedOptions && hasCustomInput) { + const selectedPart = isMulti + ? `[${selection.selectedOptions.join(", ")}]` + : selection.selectedOptions[0]; + return `${selectedPart} + Other: ${selection.customInput}`; + } + + if (hasCustomInput) { + return `Other: ${selection.customInput}`; + } + + if (hasSelectedOptions) { + return isMulti ? `[${selection.selectedOptions.join(", ")}]` : selection.selectedOptions[0]; + } + + return "(not answered)"; +} + +function clampIndex(index: number | undefined, maxExclusive: number): number { + if (index == null || Number.isNaN(index) || maxExclusive <= 0) return 0; + if (index < 0) return 0; + if (index >= maxExclusive) return maxExclusive - 1; + return index; +} + +function normalizeTabLabel(id: string, fallback: string): string { + const normalized = id.trim().replace(/[_-]+/g, " "); + return normalized.length > 0 ? normalized : fallback; +} + +function buildSelectionForQuestion( + question: PreparedQuestion, + selectedOptionIndexes: number[], + noteByOptionIndex: string[], +): AskSelection { + if (selectedOptionIndexes.length === 0) { + return { selectedOptions: [] }; + } + + if (question.multi) { + return buildMultiSelectionResult(question.options, selectedOptionIndexes, noteByOptionIndex, question.otherOptionIndex); + } + + const selectedOptionIndex = selectedOptionIndexes[0]; + const selectedOptionLabel = question.options[selectedOptionIndex] ?? OTHER_OPTION; + const note = noteByOptionIndex[selectedOptionIndex] ?? ""; + return buildSingleSelectionResult(selectedOptionLabel, note); +} + +function isQuestionSelectionValid( + question: PreparedQuestion, + selectedOptionIndexes: number[], + noteByOptionIndex: string[], +): boolean { + if (selectedOptionIndexes.length === 0) return false; + if (!selectedOptionIndexes.includes(question.otherOptionIndex)) return true; + const otherNote = noteByOptionIndex[question.otherOptionIndex]?.trim() ?? ""; + return otherNote.length > 0; +} + +function createTabsUiStateSnapshot( + cancelled: boolean, + selectedOptionIndexesByQuestion: number[][], + noteByQuestionByOption: string[][], +): TabsUIState { + return { + cancelled, + selectedOptionIndexesByQuestion: selectedOptionIndexesByQuestion.map((indexes) => [...indexes]), + noteByQuestionByOption: noteByQuestionByOption.map((notes) => [...notes]), + }; +} + +function addIndexToSelection(selectedOptionIndexes: number[], optionIndex: number): number[] { + if (selectedOptionIndexes.includes(optionIndex)) return selectedOptionIndexes; + return [...selectedOptionIndexes, optionIndex].sort((a, b) => a - b); +} + +function removeIndexFromSelection(selectedOptionIndexes: number[], optionIndex: number): number[] { + return selectedOptionIndexes.filter((index) => index !== optionIndex); +} + +export async function askQuestionsWithTabs( + ui: ExtensionUIContext, + questions: AskQuestion[], +): Promise<{ cancelled: boolean; selections: AskSelection[] }> { + const preparedQuestions: PreparedQuestion[] = questions.map((question, questionIndex) => { + const baseOptionLabels = question.options.map((option) => option.label); + const optionLabels = [...appendRecommendedTagToOptionLabels(baseOptionLabels, question.recommended), OTHER_OPTION]; + return { + id: question.id, + question: question.question, + options: optionLabels, + tabLabel: normalizeTabLabel(question.id, `Q${questionIndex + 1}`), + multi: question.multi === true, + otherOptionIndex: optionLabels.length - 1, + }; + }); + + const initialCursorOptionIndexByQuestion = preparedQuestions.map((preparedQuestion, questionIndex) => + clampIndex(questions[questionIndex].recommended, preparedQuestion.options.length), + ); + + const result = await ui.custom((tui, theme, _keybindings, done) => { + let activeTabIndex = 0; + let isNoteEditorOpen = false; + let cachedRenderedLines: string[] | undefined; + const cursorOptionIndexByQuestion = [...initialCursorOptionIndexByQuestion]; + const selectedOptionIndexesByQuestion = preparedQuestions.map(() => [] as number[]); + const noteByQuestionByOption = preparedQuestions.map((preparedQuestion) => + Array(preparedQuestion.options.length).fill("") as string[], + ); + + const editorTheme: EditorTheme = { + borderColor: (text) => theme.fg("accent", text), + selectList: { + selectedPrefix: (text) => theme.fg("accent", text), + selectedText: (text) => theme.fg("accent", text), + description: (text) => theme.fg("muted", text), + scrollInfo: (text) => theme.fg("dim", text), + noMatch: (text) => theme.fg("warning", text), + }, + }; + const noteEditor = new Editor(tui, editorTheme); + + const submitTabIndex = preparedQuestions.length; + + const requestUiRerender = () => { + cachedRenderedLines = undefined; + tui.requestRender(); + }; + + const getActiveQuestionIndex = (): number | null => { + if (activeTabIndex >= preparedQuestions.length) return null; + return activeTabIndex; + }; + + const getQuestionNote = (questionIndex: number, optionIndex: number): string => + noteByQuestionByOption[questionIndex]?.[optionIndex] ?? ""; + + const getTrimmedQuestionNote = (questionIndex: number, optionIndex: number): string => + getQuestionNote(questionIndex, optionIndex).trim(); + + const isAllQuestionSelectionsValid = (): boolean => + preparedQuestions.every((preparedQuestion, questionIndex) => + isQuestionSelectionValid( + preparedQuestion, + selectedOptionIndexesByQuestion[questionIndex], + noteByQuestionByOption[questionIndex], + ), + ); + + const openNoteEditorForActiveOption = () => { + const questionIndex = getActiveQuestionIndex(); + if (questionIndex == null) return; + + isNoteEditorOpen = true; + const optionIndex = cursorOptionIndexByQuestion[questionIndex]; + noteEditor.setText(getQuestionNote(questionIndex, optionIndex)); + requestUiRerender(); + }; + + const advanceToNextTabOrSubmit = () => { + activeTabIndex = Math.min(submitTabIndex, activeTabIndex + 1); + }; + + noteEditor.onChange = (value) => { + const questionIndex = getActiveQuestionIndex(); + if (questionIndex == null) return; + const optionIndex = cursorOptionIndexByQuestion[questionIndex]; + noteByQuestionByOption[questionIndex][optionIndex] = value; + requestUiRerender(); + }; + + noteEditor.onSubmit = (value) => { + const questionIndex = getActiveQuestionIndex(); + if (questionIndex == null) return; + + const preparedQuestion = preparedQuestions[questionIndex]; + const optionIndex = cursorOptionIndexByQuestion[questionIndex]; + noteByQuestionByOption[questionIndex][optionIndex] = value; + const trimmedNote = value.trim(); + + if (preparedQuestion.multi) { + if (trimmedNote.length > 0) { + selectedOptionIndexesByQuestion[questionIndex] = addIndexToSelection( + selectedOptionIndexesByQuestion[questionIndex], + optionIndex, + ); + } + if (optionIndex === preparedQuestion.otherOptionIndex && trimmedNote.length === 0) { + requestUiRerender(); + return; + } + isNoteEditorOpen = false; + requestUiRerender(); + return; + } + + selectedOptionIndexesByQuestion[questionIndex] = [optionIndex]; + if (optionIndex === preparedQuestion.otherOptionIndex && trimmedNote.length === 0) { + requestUiRerender(); + return; + } + + isNoteEditorOpen = false; + advanceToNextTabOrSubmit(); + requestUiRerender(); + }; + + const renderTabs = (): string => { + const tabParts: string[] = ["← "]; + for (let questionIndex = 0; questionIndex < preparedQuestions.length; questionIndex++) { + const preparedQuestion = preparedQuestions[questionIndex]; + const isActiveTab = questionIndex === activeTabIndex; + const isQuestionValid = isQuestionSelectionValid( + preparedQuestion, + selectedOptionIndexesByQuestion[questionIndex], + noteByQuestionByOption[questionIndex], + ); + const statusIcon = isQuestionValid ? "■" : "□"; + const tabLabel = ` ${statusIcon} ${preparedQuestion.tabLabel} `; + const styledTabLabel = isActiveTab + ? theme.bg("selectedBg", theme.fg("text", tabLabel)) + : theme.fg(isQuestionValid ? "success" : "muted", tabLabel); + tabParts.push(`${styledTabLabel} `); + } + + const isSubmitTabActive = activeTabIndex === submitTabIndex; + const canSubmit = isAllQuestionSelectionsValid(); + const submitLabel = " ✓ Submit "; + const styledSubmitLabel = isSubmitTabActive + ? theme.bg("selectedBg", theme.fg("text", submitLabel)) + : theme.fg(canSubmit ? "success" : "dim", submitLabel); + tabParts.push(`${styledSubmitLabel} →`); + return tabParts.join(""); + }; + + const renderSubmitTab = (width: number, renderedLines: string[]): void => { + const addLine = (line: string) => renderedLines.push(truncateToWidth(line, width)); + + addLine(theme.fg("accent", theme.bold(" Review answers"))); + renderedLines.push(""); + + for (let questionIndex = 0; questionIndex < preparedQuestions.length; questionIndex++) { + const preparedQuestion = preparedQuestions[questionIndex]; + const selection = buildSelectionForQuestion( + preparedQuestion, + selectedOptionIndexesByQuestion[questionIndex], + noteByQuestionByOption[questionIndex], + ); + const value = formatSelectionForSubmitReview(selection, preparedQuestion.multi); + const isValid = isQuestionSelectionValid( + preparedQuestion, + selectedOptionIndexesByQuestion[questionIndex], + noteByQuestionByOption[questionIndex], + ); + const statusIcon = isValid ? theme.fg("success", "●") : theme.fg("warning", "○"); + addLine(` ${statusIcon} ${theme.fg("muted", `${preparedQuestion.tabLabel}:`)} ${theme.fg("text", value)}`); + } + + renderedLines.push(""); + if (isAllQuestionSelectionsValid()) { + addLine(theme.fg("success", " Press Enter to submit")); + } else { + const missingQuestions = preparedQuestions + .filter((preparedQuestion, questionIndex) => + !isQuestionSelectionValid( + preparedQuestion, + selectedOptionIndexesByQuestion[questionIndex], + noteByQuestionByOption[questionIndex], + ), + ) + .map((preparedQuestion) => preparedQuestion.tabLabel) + .join(", "); + addLine(theme.fg("warning", ` Complete required answers: ${missingQuestions}`)); + } + addLine(theme.fg("dim", " ←/→ switch tabs • Esc cancel")); + }; + + const renderQuestionTab = (width: number, renderedLines: string[], questionIndex: number): void => { + const addLine = (line: string) => renderedLines.push(truncateToWidth(line, width)); + const preparedQuestion = preparedQuestions[questionIndex]; + const cursorOptionIndex = cursorOptionIndexByQuestion[questionIndex]; + const selectedOptionIndexes = selectedOptionIndexesByQuestion[questionIndex]; + + addLine(theme.fg("text", ` ${preparedQuestion.question}`)); + renderedLines.push(""); + + for (let optionIndex = 0; optionIndex < preparedQuestion.options.length; optionIndex++) { + const optionLabel = preparedQuestion.options[optionIndex]; + const isCursorOption = optionIndex === cursorOptionIndex; + const isOptionSelected = selectedOptionIndexes.includes(optionIndex); + const isEditingThisOption = isNoteEditorOpen && isCursorOption; + const cursorPrefixText = isCursorOption ? "→ " : " "; + const cursorPrefix = isCursorOption ? theme.fg("accent", cursorPrefixText) : cursorPrefixText; + const markerText = preparedQuestion.multi + ? `${isOptionSelected ? "[x]" : "[ ]"} ` + : `${isOptionSelected ? "●" : "○"} `; + const optionColor = isCursorOption ? "accent" : isOptionSelected ? "success" : "text"; + const prefixWidth = visibleWidth(cursorPrefixText) + visibleWidth(markerText); + const wrappedInlineLabelLines = buildWrappedOptionLabelWithInlineNote( + optionLabel, + getQuestionNote(questionIndex, optionIndex), + isEditingThisOption, + Math.max(1, width - prefixWidth), + INLINE_NOTE_WRAP_PADDING, + ); + const continuationPrefix = " ".repeat(prefixWidth); + addLine(`${cursorPrefix}${theme.fg(optionColor, `${markerText}${wrappedInlineLabelLines[0] ?? ""}`)}`); + for (const wrappedLine of wrappedInlineLabelLines.slice(1)) { + addLine(`${continuationPrefix}${theme.fg(optionColor, wrappedLine)}`); + } + } + + renderedLines.push(""); + if (isNoteEditorOpen) { + addLine(theme.fg("dim", " Typing note inline • Enter save note • Tab/Esc stop editing")); + } else { + if (preparedQuestion.multi) { + addLine( + theme.fg( + "dim", + " ↑↓ move • Enter toggle/select • Tab add note • ←/→ switch tabs • Esc cancel", + ), + ); + } else { + addLine( + theme.fg("dim", " ↑↓ move • Enter select • Tab add note • ←/→ switch tabs • Esc cancel"), + ); + } + } + }; + + const render = (width: number): string[] => { + if (cachedRenderedLines) return cachedRenderedLines; + + const renderedLines: string[] = []; + const addLine = (line: string) => renderedLines.push(truncateToWidth(line, width)); + + addLine(theme.fg("accent", "─".repeat(width))); + addLine(` ${renderTabs()}`); + renderedLines.push(""); + + if (activeTabIndex === submitTabIndex) { + renderSubmitTab(width, renderedLines); + } else { + renderQuestionTab(width, renderedLines, activeTabIndex); + } + + addLine(theme.fg("accent", "─".repeat(width))); + cachedRenderedLines = renderedLines; + return renderedLines; + }; + + const handleInput = (data: string) => { + if (isNoteEditorOpen) { + if (matchesKey(data, Key.tab) || matchesKey(data, Key.escape)) { + isNoteEditorOpen = false; + requestUiRerender(); + return; + } + noteEditor.handleInput(data); + requestUiRerender(); + return; + } + + if (matchesKey(data, Key.left)) { + activeTabIndex = (activeTabIndex - 1 + preparedQuestions.length + 1) % (preparedQuestions.length + 1); + requestUiRerender(); + return; + } + + if (matchesKey(data, Key.right)) { + activeTabIndex = (activeTabIndex + 1) % (preparedQuestions.length + 1); + requestUiRerender(); + return; + } + + if (activeTabIndex === submitTabIndex) { + if (matchesKey(data, Key.enter) && isAllQuestionSelectionsValid()) { + done(createTabsUiStateSnapshot(false, selectedOptionIndexesByQuestion, noteByQuestionByOption)); + return; + } + if (matchesKey(data, Key.escape)) { + done(createTabsUiStateSnapshot(true, selectedOptionIndexesByQuestion, noteByQuestionByOption)); + } + return; + } + + const questionIndex = activeTabIndex; + const preparedQuestion = preparedQuestions[questionIndex]; + + if (matchesKey(data, Key.up)) { + cursorOptionIndexByQuestion[questionIndex] = Math.max(0, cursorOptionIndexByQuestion[questionIndex] - 1); + requestUiRerender(); + return; + } + + if (matchesKey(data, Key.down)) { + cursorOptionIndexByQuestion[questionIndex] = Math.min( + preparedQuestion.options.length - 1, + cursorOptionIndexByQuestion[questionIndex] + 1, + ); + requestUiRerender(); + return; + } + + if (matchesKey(data, Key.tab)) { + openNoteEditorForActiveOption(); + return; + } + + if (matchesKey(data, Key.enter)) { + const cursorOptionIndex = cursorOptionIndexByQuestion[questionIndex]; + + if (preparedQuestion.multi) { + const currentlySelected = selectedOptionIndexesByQuestion[questionIndex]; + if (currentlySelected.includes(cursorOptionIndex)) { + selectedOptionIndexesByQuestion[questionIndex] = removeIndexFromSelection(currentlySelected, cursorOptionIndex); + } else { + selectedOptionIndexesByQuestion[questionIndex] = addIndexToSelection(currentlySelected, cursorOptionIndex); + } + + if ( + cursorOptionIndex === preparedQuestion.otherOptionIndex && + selectedOptionIndexesByQuestion[questionIndex].includes(cursorOptionIndex) && + getTrimmedQuestionNote(questionIndex, cursorOptionIndex).length === 0 + ) { + openNoteEditorForActiveOption(); + return; + } + + requestUiRerender(); + return; + } + + selectedOptionIndexesByQuestion[questionIndex] = [cursorOptionIndex]; + if ( + cursorOptionIndex === preparedQuestion.otherOptionIndex && + getTrimmedQuestionNote(questionIndex, cursorOptionIndex).length === 0 + ) { + openNoteEditorForActiveOption(); + return; + } + + advanceToNextTabOrSubmit(); + requestUiRerender(); + return; + } + + if (matchesKey(data, Key.escape)) { + done(createTabsUiStateSnapshot(true, selectedOptionIndexesByQuestion, noteByQuestionByOption)); + } + }; + + return { + render, + invalidate: () => { + cachedRenderedLines = undefined; + }, + handleInput, + }; + }); + + if (result.cancelled) { + return { + cancelled: true, + selections: preparedQuestions.map(() => ({ selectedOptions: [] } satisfies AskSelection)), + }; + } + + const selections = preparedQuestions.map((preparedQuestion, questionIndex) => + buildSelectionForQuestion( + preparedQuestion, + result.selectedOptionIndexesByQuestion[questionIndex] ?? [], + result.noteByQuestionByOption[questionIndex] ?? Array(preparedQuestion.options.length).fill(""), + ), + ); + + return { cancelled: result.cancelled, selections }; +} From afdd5cac56f77e864be317dc865333ba2b9d10d1 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Tue, 3 Mar 2026 15:12:12 +0700 Subject: [PATCH 035/412] Remove plan --- PLAN.md | 367 -------------------------------------------------------- 1 file changed, 367 deletions(-) delete mode 100644 PLAN.md diff --git a/PLAN.md b/PLAN.md deleted file mode 100644 index cdc5063..0000000 --- a/PLAN.md +++ /dev/null @@ -1,367 +0,0 @@ -# Plan: Subagent Ask Questions via File-Based IPC - -## Context - -### Problem - -Subagents run as headless `pi -p` child processes with no UI access (`ctx.hasUI = false`). When a subagent needs user input during planning — choosing between architectural alternatives, clarifying scope — it has no mechanism to pause, ask the user, and resume with the answer. - -### Design Decisions - -**Single `ipc.json` file per subagent directory.** Both request and response live in one file with `request` and `response` keys. Temporal ownership is safe: the subagent creates the file and then blocks (only reads during the wait), so the parent is the sole writer during the response window. A two-file model (request.json + response.json) provides structural ownership at the cost of cleanup complexity and an extra file per interaction. The single-file model is simpler and sufficient because the subagent's blocking poll guarantees no concurrent writes. - -**Tool schema mirrors pi-ask-tool-extension exactly.** The `koan_ask_question` tool accepts the same `{ questions: [{ id, question, options, multi?, recommended? }] }` schema as the existing `ask` tool. LLMs trained on the ask tool schema produce correct calls without schema-specific prompt engineering. - -**Ask UI code copied from pi-ask-tool-extension, not imported.** The pi-ask-tool-extension package is globally installed as a pi extension — it is not in koan's `node_modules` and cannot be imported. The four source files (~1133 lines) are copied into `src/planner/ui/ask/`. All external dependencies (`@mariozechner/pi-coding-agent`, `@mariozechner/pi-tui`) are already available in koan's node_modules. - -**Permission gating via existing PHASE_PERMISSIONS, not conditional registration.** Pi snapshots all tools at init time (`_buildRuntime()`). Tools cannot be added or removed after init. The existing default-deny `PHASE_PERMISSIONS` map in `permissions.ts` controls runtime access. Adding `koan_ask_question` to the three work-phase Sets (`plan-design`, `plan-code`, `plan-docs`) grants access to subagents in those phases. In parent mode, no phase is active, so the tool is blocked automatically. - -**SubagentRef pattern mirrors PlanRef.** Tool registration happens at init when the subagent directory is unknown. A mutable `SubagentRef = { dir: string | null }` created at init is populated at `before_agent_start` when CLI flags are available. The tool reads `subagentRef.dir` at execute time. This matches the established `PlanRef` indirection pattern in `dispatch.ts`. - -**Non-error returns for cancellation and abort.** When the user cancels (Escape) or the signal aborts, the tool returns a descriptive non-error message ("The user declined to answer. Proceed with your best judgment."). Error returns cause LLMs to halt or retry; non-error returns guide the LLM to continue productively. - -**Parent detects requests inside existing setInterval poll loops.** The parent's 2-second poll callback in `session.ts` already reads `state.json` for widget updates. Adding an `ipc.json` read to the same callback avoids a separate polling mechanism. A `pendingRequestId` guard variable prevents re-entrant handling — JavaScript's `setInterval` fires regardless of whether the previous async callback completed, so without the guard, every 2-second tick during the user's think-time would re-detect the same request. - -### Constraints - -- Pi snapshots tools at init; all tools must be registered unconditionally before `_buildRuntime()`. -- Subagents run in `-p` mode (print mode) with stdin ignored and stdout/stderr piped to log files — no interactive I/O. -- The parent orchestrator has `ctx.ui` access (confirmed: `session.ts` creates `WidgetController` from `ctx.ui`). -- Atomic file writes use the established tmp+rename pattern (`writeFile(tmp) → rename(tmp, target)`). -- The EventLog heartbeat (10-second `setInterval`) continues during the subagent's blocking poll because `await sleep(500)` yields to the Node.js event loop. `state.json` keeps updating, so the parent sees the subagent as alive. - -### Out of Scope (Deferred) - -- Timeout for parent crash detection — the user is at the terminal and will notice; adding a configurable timeout is a follow-up. -- Process liveness check before showing ask UI — low severity edge case (subagent exits between writing request and parent detecting it). -- Multi-subagent concurrent questions — work phases run sequentially; QR phases are excluded from permissions. - -## Implementation - -### ipc.json Schema - -```typescript -// Types live in src/planner/lib/ipc.ts. -// The schema is general-purpose: `type` discriminant supports future request -// types beyond "ask-question" without envelope changes. - -interface IpcFile { - request: IpcRequest; - response: IpcResponse | null; // null while awaiting parent response -} - -interface IpcRequest { - id: string; // crypto.randomUUID() — correlates request to response - type: "ask-question"; // discriminant for routing; extensible to future types - createdAt: string; // ISO 8601 timestamp - payload: AskQuestionPayload; -} - -interface AskQuestionPayload { - questions: Array<{ - id: string; - question: string; - options: Array<{ label: string }>; - multi?: boolean; - recommended?: number; // 0-indexed - }>; -} - -interface IpcResponse { - id: string; // must match request.id - respondedAt: string; // ISO 8601 timestamp - cancelled: boolean; // true when user presses Escape - payload: AskAnswerPayload | null; // null when cancelled -} - -interface AskAnswerPayload { - answers: Array<{ - id: string; // matches question id - selectedOptions: string[]; - customInput?: string; // populated when user selects "Other" - }>; -} -``` - -### NEW: `src/planner/lib/ipc.ts` — IPC File I/O Primitives - -Atomic read/write/delete helpers for `ipc.json`. Both the subagent tool and the parent session use these functions. The atomic write pattern (tmp file → rename) matches `EventLog.writeState()` in `audit.ts`. - -**Functions:** -- `writeIpcFile(dir, data)` — atomic write via `.ipc.tmp.json` → `ipc.json` rename -- `readIpcFile(dir)` → `IpcFile | null` — returns null on missing file or parse error (treat parse error as "not ready" to handle partial writes on non-POSIX systems) -- `ipcFileExists(dir)` → `boolean` — fast `fs.access` check without parsing -- `deleteIpcFile(dir)` — removes `ipc.json` and any lingering `.ipc.tmp.json`; swallows ENOENT -- `createAskRequest(payload)` → `IpcFile` — creates file structure with `crypto.randomUUID()` id and `response: null` -- `createAskResponse(requestId, payload)` → `IpcResponse` — response with `cancelled: false` -- `createCancelledResponse(requestId)` → `IpcResponse` — response with `cancelled: true`, `payload: null` - -All types are exported for use by both subagent-side (`tools/ask.ts`) and parent-side (`session.ts`). - -### NEW: `src/planner/tools/ask.ts` — koan_ask_question Tool - -Registers `koan_ask_question` with the pi extension API. The tool schema uses TypeBox definitions identical to pi-ask-tool-extension. Imports `SubagentRef` from `../lib/dispatch.js` (not defined here — it lives in `dispatch.ts` alongside `PlanRef`). - -**Tool execute flow:** - -The entire poll loop is wrapped in a single `try/finally` that calls `deleteIpcFile(dir)`. This guarantees cleanup on all exit paths — success, cancellation, abort, and file disappearance — without requiring per-path deletion logic. - -1. Guard: if `subagentRef.dir` is null, return error (not in subagent context). -2. Guard: if `ipc.json` already exists, return error (one request at a time). -3. Create `IpcFile` via `createAskRequest(payload)`, write atomically. -4. Register `signal.addEventListener("abort", onAbort, { once: true })` for instant abort response. -5. Enter poll loop inside `try`: `while (!aborted) { await sleep(500); check signal; read ipc.json; if response !== null && response.id matches: break }`. -6. On response with `cancelled: false`: build `QuestionResult[]`, format via `buildSessionContent()`, return as tool result. (`finally` handles cleanup.) -7. On response with `cancelled: true`: return "The user declined to answer." (`finally` handles cleanup.) -8. On abort: return "The question was aborted." (`finally` handles cleanup.) -9. On file disappearing mid-poll (deleted externally): return "The question was cancelled." (`finally` handles cleanup, swallows ENOENT.) - -**Result formatting** mirrors pi-ask-tool-extension's `buildAskSessionContent()`: -``` -User answers: -auth: JWT - -Answer context: -Question 1 (auth) -Prompt: Which authentication model? -Options: - 1. JWT - 2. Session-based -Response: - Selected: JWT -``` - -### NEW: `src/planner/ui/ask/` — Copied Ask UI Components (4 files) - -Copy these files from `pi-ask-tool-extension/src/` (at `/Users/lmergen/.npm-global/lib/node_modules/pi-ask-tool-extension/src/`): - -1. **`ask-logic.ts`** (~98 lines) — `AskQuestion`, `AskOption`, `AskSelection` types; `OTHER_OPTION` constant; `buildSingleSelectionResult()`, `buildMultiSelectionResult()`, `appendRecommendedTagToOptionLabels()`. -2. **`ask-inline-note.ts`** (~65 lines) — Inline note rendering helpers. Uses `wrapTextWithAnsi` from `@mariozechner/pi-tui`. -3. **`ask-inline-ui.ts`** (~221 lines) — Single-question single-select UI. Renders cursor navigation (↑↓), inline note editing (Tab), submit (Enter) via `ui.custom()`. -4. **`ask-tabs-ui.ts`** (~512 lines) — Multi-question/multi-select tabbed UI. Tab bar (← Q1 Q2 ... ✓ Submit →), per-question option lists, Submit review tab via `ui.custom()`. - -**Import path requirements:** -- Relative import extensions use `.js` suffix: `"./ask-logic"` → `"./ask-logic.js"` (Node16 module resolution requires `.js` extensions in TypeScript source). -- Same for `"./ask-inline-note"` → `"./ask-inline-note.js"`. -- External dependencies (`@mariozechner/pi-coding-agent`, `@mariozechner/pi-tui`) resolve from koan's node_modules. - -### MODIFY: `src/planner/lib/dispatch.ts` — Add SubagentRef - -`SubagentRef` and `createSubagentRef()` live alongside `PlanRef` and `createPlanRef()` — both are mutable-ref infrastructure primitives that decouple static tool registration from runtime directory resolution. - -```diff -+// Decouples tool registration (init-time) from subagent directory -+// resolution (runtime, after flags available). Same indirection -+// pattern as PlanRef. -+export interface SubagentRef { -+ dir: string | null; -+} -+ -+export function createSubagentRef(): SubagentRef { -+ return { dir: null }; -+} -``` - -### MODIFY: `src/planner/tools/index.ts` — Thread SubagentRef - -```diff -+import { registerAskTools } from "./ask.js"; -+import type { SubagentRef } from "../lib/dispatch.js"; -+export type { SubagentRef } from "../lib/dispatch.js"; -+export { createSubagentRef } from "../lib/dispatch.js"; - - export function registerAllTools( - pi: ExtensionAPI, - planRef: PlanRef, - dispatch: WorkflowDispatch, -+ subagentRef: SubagentRef, - ): void { - registerWorkflowTools(pi, dispatch); - registerPlanGetterTools(pi, planRef); - registerPlanSetterTools(pi, planRef); - registerPlanDesignEntityTools(pi, planRef); - registerPlanCodeEntityTools(pi, planRef); - registerPlanStructureEntityTools(pi, planRef); - registerQRTools(pi, planRef); -+ registerAskTools(pi, subagentRef); - } -``` - -Note: `SubagentRef` is defined in `lib/dispatch.ts` (alongside `PlanRef`), not in `tools/ask.ts`. `tools/index.ts` re-exports it for convenience, matching the existing re-export pattern for `PlanRef`. - -### MODIFY: `extensions/koan.ts` — Create and Wire SubagentRef - -```diff --import { registerAllTools, createDispatch, createPlanRef } from "../src/planner/tools/index.js"; -+import { registerAllTools, createDispatch, createPlanRef, createSubagentRef } from "../src/planner/tools/index.js"; - - const dispatch = createDispatch(); - const planRef = createPlanRef(); -+ const subagentRef = createSubagentRef(); - -- registerAllTools(pi, planRef, dispatch); -+ registerAllTools(pi, planRef, dispatch, subagentRef); - - // In before_agent_start, inside `if (config.subagentDir)`: -+ subagentRef.dir = config.subagentDir; -``` - -The `subagentRef.dir = config.subagentDir` assignment goes immediately after the existing `eventLog = new EventLog(...)` line (L88), inside the same `if (config.subagentDir)` block. In parent mode, `subagentRef.dir` remains null, and the tool's execute returns an error. - -### MODIFY: `src/planner/lib/permissions.ts` — Grant Access to Work Phases - -```diff - [ - "plan-design", - new Set([ - "koan_complete_step", -+ "koan_ask_question", - ...PLAN_GETTER_TOOLS_LIST, - ...PLAN_SETTER_TOOLS_LIST, - ...PLAN_DESIGN_ENTITY_TOOLS, - ]), - ], - [ - "plan-code", - new Set([ - "koan_complete_step", -+ "koan_ask_question", - ...PLAN_GETTER_TOOLS_LIST, - ...PLAN_CHANGE_TOOLS_LIST, - "koan_set_intent", - ]), - ], - [ - "plan-docs", - new Set([ - "koan_complete_step", -+ "koan_ask_question", - ...PLAN_GETTER_TOOLS_LIST, - "koan_set_change_doc_diff", - "koan_set_change_comments", -``` - -QR phases (`qr-plan-design`, `qr-plan-code`, `qr-plan-docs`) omit `koan_ask_question` — reviewers do not ask questions. - -### MODIFY: `src/planner/session.ts` — Parent-Side Request Detection - -**A. New imports:** -```typescript -import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; -import { readIpcFile, writeIpcFile, createAskResponse, createCancelledResponse, type IpcFile } from "./lib/ipc.js"; -import { askSingleQuestionWithInlineNote } from "./ui/ask/ask-inline-ui.js"; -import { askQuestionsWithTabs } from "./ui/ask/ask-tabs-ui.js"; -import type { AskQuestion } from "./ui/ask/ask-logic.js"; -``` - -**B. New `handleAskRequest()` function** (module-level, alongside `runPlanningPhase`): - -Receives the parent's `ExtensionUIContext` and the parsed `IpcFile`. Routes to the appropriate ask UI based on question count and multi-select: -- Single question, single-select → `askSingleQuestionWithInlineNote(ui, question)` -- Single question, multi-select → `askQuestionsWithTabs(ui, [question])` -- Multiple questions → `askQuestionsWithTabs(ui, questions)` - -Returns an `IpcResponse` (either answered or cancelled). On any exception from the UI layer, returns a cancelled response so the subagent unblocks. - -**C. New `pollWithIpcDetection()` helper** (extracts the common poll-with-request-detection pattern): - -Both the work poll (~L335) and the fix poll (~L737) share the same request detection logic. A shared helper avoids duplication: - -```typescript -import type { LogLine } from "./lib/audit.js"; - -// Encapsulates the poll-with-request-detection pattern used by both -// the work poll loop and the fix poll loop. Returns a setInterval ID. -function pollWithIpcDetection( - subagentDir: string, - widget: WidgetController | null, - ui: ExtensionUIContext | null, - stepPrefix: string, - updateFromProjection: (p: Projection, logs: LogLine[]) => void, -): ReturnType { - let pendingRequestId: string | null = null; - - return setInterval(async () => { - // Existing: read projection and update widget - const [projection, logs] = await Promise.all([ - readProjection(subagentDir), - readRecentLogs(subagentDir), - ]); - if (projection) { - updateFromProjection(projection, logs); - } - - // IPC request detection — skip if already handling a request or no UI - if (pendingRequestId || !ui) return; - - const ipc = await readIpcFile(subagentDir); - if (!ipc || !ipc.request || ipc.response !== null) return; - - pendingRequestId = ipc.request.id; - try { - widget?.update({ - step: `${stepPrefix}: waiting for user input...`, - activity: ipc.request.payload.questions[0]?.question ?? "", - }); - - const response = await handleAskRequest(ui, ipc); - const updated: IpcFile = { request: ipc.request, response }; - await writeIpcFile(subagentDir, updated); - } catch { - // On error, write cancelled response so subagent unblocks. - // The inner try-catch guards against I/O failures during error - // recovery — an unguarded throw here would propagate as an - // unhandled async rejection in the setInterval callback, - // crashing the parent process (Node.js ≥15 default behavior). - try { - const cancelled = createCancelledResponse(ipc.request.id); - await writeIpcFile(subagentDir, { request: ipc.request, response: cancelled }); - } catch { - // I/O failed during error recovery; subagent remains blocked - // until parent terminates. No further action possible. - } - } finally { - pendingRequestId = null; - } - }, 2000); -} -``` - -**D. Thread `ui` through function signatures:** - -- `runPlanningPhase(phase, planDir, cwd, extensionPath, state, log, widget)` → add `ui: ExtensionUIContext | null` -- `runPhaseWithQR(phase, planDir, cwd, extensionPath, state, log, widget)` → add `ui: ExtensionUIContext | null` -- Call site in `plan()`: pass `ctx.hasUI ? ctx.ui : null` - -**E. Work poll loop (~L335):** -The work poll uses `pollWithIpcDetection(subagentDir, widget, ui, phase.key, ...)`. - -**F. Fix poll loop (~L737):** -The fix poll uses `pollWithIpcDetection(fixDir, widget, ui, \`${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}\`, ...)`. - -### MODIFY: `src/planner/lib/audit.ts` — Log Formatting - -Add `koan_ask_question` to the `KOAN_SHAPES` object for audit log display: - -```typescript -koan_ask_question: { keys: ["questions"], arrays: ["questions"], highValue: true }, -``` - -## Quality Checklist - -- [ ] 01-naming-and-types (design-mode): `SubagentRef` mirrors `PlanRef` naming; `IpcFile`/`IpcRequest`/`IpcResponse` model the domain; `handleAskRequest` describes behavior -- [ ] 02-structure-and-composition (design-mode): `pollWithIpcDetection` extracts shared logic from two poll loops; `handleAskRequest` is single-responsibility; error handling wraps UI calls with cancelled-response fallback -- [ ] 06-module-and-dependencies (design-mode): `lib/ipc.ts` is a pure I/O module with no UI dependencies; `tools/ask.ts` depends on `lib/ipc.ts` and `lib/dispatch.ts` (downward); `session.ts` depends on both `lib/ipc.ts` and `ui/ask/` (same level); no circular deps; `SubagentRef` lives in `lib/dispatch.ts` not in tools layer -- [ ] 07-cross-file-consistency (design-mode): Atomic write pattern matches `EventLog.writeState()`; mutable ref pattern matches `PlanRef`/`WorkflowDispatch` in `lib/dispatch.ts`; permission gating matches existing `PHASE_PERMISSIONS` entries; tool description style matches existing koan tools; error recovery in setInterval callbacks matches `verifyStatsPoll` guarded-catch pattern - -## Execution Protocol - -``` -1. delegate @agent-developer: implement per this plan file -2. delegate @agent-quality-reviewer: verify against plan + ~/.claude/conventions/code-quality/ (code-mode) - -When delegating, pass this plan file path. Supplement only with: -- rationale for decisions not captured in plan -- business constraints -- technical prerequisites the agent cannot infer -``` From d2ee2c41699d0cd0d20b446b9a8d8b1258624770 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Tue, 3 Mar 2026 19:58:25 +0700 Subject: [PATCH 036/412] Subagents can ask questions --- src/planner/phases/plan-code/prompts.ts | 5 ++ src/planner/phases/plan-design/fix-prompts.ts | 5 ++ src/planner/phases/plan-design/prompts.ts | 73 +++++++++++++++---- src/planner/phases/plan-docs/prompts.ts | 19 +++++ src/planner/phases/qr-decompose/prompts.ts | 30 +++++++- src/planner/plan/mutate/decisions.ts | 6 +- src/planner/plan/types.ts | 1 + src/planner/plan/validate.ts | 53 +++++++++++++- src/planner/tools/entity-design.ts | 8 +- src/planner/tools/getters.ts | 5 +- 10 files changed, 184 insertions(+), 21 deletions(-) diff --git a/src/planner/phases/plan-code/prompts.ts b/src/planner/phases/plan-code/prompts.ts index 0aaab34..d6bb9b2 100644 --- a/src/planner/phases/plan-code/prompts.ts +++ b/src/planner/phases/plan-code/prompts.ts @@ -39,6 +39,11 @@ export function buildPlanCodeSystemPrompt(basePrompt: string): string { "- NEVER use edit/write tools during plan-code.", "- Convert every code_intent into at least one code_change with intent_ref.", "- Use unified diffs in code_change.diff.", + "", + "CLARIFICATION:", + "If an intent is ambiguous about implementation (e.g. the behavior is clear", + "but multiple valid code patterns exist), use koan_ask_question to resolve", + "before writing the diff. Ask only when the choice materially affects code.", ].join("\n"); } diff --git a/src/planner/phases/plan-design/fix-prompts.ts b/src/planner/phases/plan-design/fix-prompts.ts index d9ec61e..80bd4ce 100644 --- a/src/planner/phases/plan-design/fix-prompts.ts +++ b/src/planner/phases/plan-design/fix-prompts.ts @@ -75,6 +75,11 @@ export function buildFixSystemPrompt( " - Each per-item step targets exactly ONE failure -- do not fix other items", " - Prefer updating existing entities over adding new ones", " - Do not restructure the plan beyond what failures require", + "", + "DECISION SOURCE FIXES:", + "If a failure is about a missing or weak decision source, use", + "koan_ask_question to get user input. Then update the decision with", + "source='user:ask' via koan_set_decision.", ].join("\n"); } diff --git a/src/planner/phases/plan-design/prompts.ts b/src/planner/phases/plan-design/prompts.ts index 928a102..cb2c682 100644 --- a/src/planner/phases/plan-design/prompts.ts +++ b/src/planner/phases/plan-design/prompts.ts @@ -10,7 +10,7 @@ export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { 2: "Codebase Exploration", 3: "Testing Strategy Discovery", 4: "Approach Generation", - 5: "Assumption Surfacing", + 5: "Ambiguity Resolution", 6: "Milestone Definition & Plan Writing", }; @@ -43,6 +43,17 @@ export function buildPlanDesignSystemPrompt(basePrompt: string): string { "", "CRITICAL: Do the actual work described in each step BEFORE calling", "koan_complete_step. Read files, explore code, analyze. Do not skip.", + "", + "DECISION PROVENANCE:", + "Every decision requires a source tag. Valid sources:", + " code: -- derived from reading source code", + " docs: -- derived from project documentation", + " user:ask -- user answered via koan_ask_question", + " user:conversation -- user stated in captured conversation", + " inference -- inferred from patterns (last resort; see step 5 rules)", + "If you cannot ground a decision in code or documentation, use", + "koan_ask_question. Ambiguity resolved by asking is better than", + "ambiguity resolved by assumption.", ].join("\n"); } @@ -121,24 +132,53 @@ export function planDesignStepGuidance( "", "Use exploration findings from step 2 to ground tradeoffs.", "Record approach analysis for step 6.", + "", + "DECISION INVENTORY:", + "For each approach, identify the implicit decisions it makes.", + "For each decision, note the source:", + " - code: -- forced by existing codebase (cite file)", + " - docs: -- specified in project docs (cite file)", + " - user:conversation -- user stated preference in conversation", + " - inference -- your judgment (requires strong reasoning_chain)", + " - UNRESOLVED -- no clear source; flag for step 5", ], }; case 5: return { - title: "Step 5: Assumption Surfacing", + title: "Step 5: Ambiguity Resolution", instructions: [ - "FAST PATH: Skip if task involves NONE of:", - " - Migration to new tech", - " - Policy defaults (lifecycle, capacity, failure handling)", - " - Architectural decisions with multiple valid approaches", - "", - "FULL CHECK (if any apply):", - " Audit each category with OPEN questions:", - " Pattern preservation, Migration strategy, Idiomatic usage,", - " Abstraction boundary, Policy defaults", - "", - "Record assumptions for step 6.", + "Review the decision inventory from step 4.", + "For every decision marked UNRESOLVED or sourced as inference:", + " 1. Can it be grounded in code or docs? Read them.", + " 2. If still unsourced, ask the user via koan_ask_question.", + "", + "USE koan_ask_question WHEN:", + " - Multiple approaches have comparable tradeoffs, no codebase precedent", + " - A policy default (timeout, capacity, retry, failure mode) has no value", + " - Migration path or abstraction boundary not dictated by code", + "", + "DO NOT ASK WHEN:", + " - Codebase establishes a clear pattern (source: code:)", + " - Project docs specify the approach (source: docs:)", + " - Only one approach is technically viable", + " - The choice follows directly from an already-sourced decision", + "", + "INFERENCE RULES (source: inference):", + " Acceptable: airtight reasoning, no viable alternative, follows from", + " existing constraints, standard practice with one correct answer.", + " NOT acceptable: hedging language, policy defaults, public API choices,", + " or any decision where a senior engineer might reasonably disagree.", + "", + "Good questions offer concrete options grounded in codebase evidence:", + " BAD: 'How should we handle errors?'", + " GOOD: 'Error propagation: (A) return Result matching src/foo.ts,", + " (B) throw + catch at boundary matching src/bar.ts'", + "", + "FAST PATH: If all decisions have code/docs/conversation sources,", + "skip asking and record this finding.", + "", + "After resolving, every decision has a concrete source. No UNRESOLVED.", ], }; @@ -152,6 +192,13 @@ export function planDesignStepGuidance( " BAD: 'Polling | Webhooks unreliable'", " GOOD: 'Use polling | 30% webhook failure -> need fallback anyway -> polling simpler'", "", + "Every koan_add_decision call MUST include a source parameter:", + " - code: -- derived from existing code (cite file)", + " - docs: -- from project documentation (cite file)", + " - user:ask -- asked the user via koan_ask_question", + " - user:conversation -- user stated in original conversation", + " - inference -- architect judgment (use sparingly; needs strong chain)", + "", "Use the following tools to build the plan:", "", "OVERVIEW & CONSTRAINTS:", diff --git a/src/planner/phases/plan-docs/prompts.ts b/src/planner/phases/plan-docs/prompts.ts index 081f08a..dcc8a91 100644 --- a/src/planner/phases/plan-docs/prompts.ts +++ b/src/planner/phases/plan-docs/prompts.ts @@ -43,6 +43,12 @@ export function buildPlanDocsSystemPrompt(basePrompt: string): string { "- Populate code_change.doc_diff for code changes.", "- Keep comments and docs timeless (no temporal contamination).", "- Keep architecture diagrams and README entries aligned with plan intent.", + "", + "USER-DECIDED DECISIONS:", + "Decisions with source user:ask or user:conversation have NO existing", + "reference in the codebase. These MUST be documented in code comments,", + "doc_diff, or README entries so future readers understand the rationale", + "without needing to ask the same question again.", ].join("\n"); } @@ -58,6 +64,11 @@ export function planDocsStepGuidance( "Use koan_get_plan to review decisions, constraints, risks, and milestones.", "Capture decision IDs that should be reflected in documentation rationale.", "", + "PRIORITY: Identify all decisions with source user:ask or user:conversation.", + "These have NO existing reference in code or docs -- the user provided", + "the authority. They MUST be documented. Track these IDs; steps 3-4", + "must cover every one.", + "", ...buildPlanDocsContextTrigger(conversationPath ?? "/conversation.jsonl"), "", "This step is read-only.", @@ -90,6 +101,12 @@ export function planDocsStepGuidance( " - Every code change with diff should have doc_diff", " - comments explain WHY (reference decisions where applicable)", " - Avoid temporal language (no 'added', 'changed from', 'now')", + "", + "USER-SOURCED DECISIONS (source user:ask / user:conversation):", + " These have no existing codebase reference. For each one that affects", + " a code change, the comment or doc_diff MUST capture the rationale so", + " future readers do not need to re-ask the same question.", + " Reference the decision ID (e.g. 'See DL-003') in the comment.", ], }; @@ -128,6 +145,8 @@ export function planDocsStepGuidance( " - all code changes with diff have doc_diff", " - comments/doc diffs are coherent and timeless", " - readme/diagram updates are present when needed", + " - every user-sourced decision (source user:*) is referenced", + " in at least one comment, doc_diff, or README entry", "", "Fix remaining issues before completing.", ], diff --git a/src/planner/phases/qr-decompose/prompts.ts b/src/planner/phases/qr-decompose/prompts.ts index e66c9d1..bb5fd81 100644 --- a/src/planner/phases/qr-decompose/prompts.ts +++ b/src/planner/phases/qr-decompose/prompts.ts @@ -33,7 +33,7 @@ export const DECOMPOSE_STEP_NAMES: Record = { const PHASE_SCOPE_HINTS: Record = { "plan-design": [ - "decision:DL-001 -- decision reasoning quality", + "decision:DL-001 -- decision reasoning quality and source provenance", "milestone:M-001 -- milestone structure", "code_intent:CI-M-001-001 -- intent clarity", ], @@ -46,6 +46,7 @@ const PHASE_SCOPE_HINTS: Record = { "milestone:M-001 -- docs completeness", "change:CC-M-001-001 -- doc_diff/comments quality", "diagram:DIAG-001 -- architecture docs fidelity", + "decision:DL-001 -- user-sourced decision docs coverage", ], }; @@ -93,6 +94,32 @@ export function buildDecomposeSystemPrompt(basePrompt: string, phase: WorkPhaseK ].join("\n"); } +// Phase-specific holistic concerns injected into step 2. +// plan-design adds decision source provenance checks; +// plan-docs adds user-sourced decision documentation coverage. +function holisticConcernAdditions(phase: WorkPhaseKey): string[] { + if (phase === "plan-design") { + return [ + "", + "Include decision provenance as a concern:", + " - Every decision must have a non-null source", + " - Sources must be verifiable (code/docs paths should exist)", + " - Decisions sourced as inference need strong reasoning_chain", + " - No systematic inference labeling (if >50% of decisions are", + " inference, flag as umbrella concern)", + ]; + } + if (phase === "plan-docs") { + return [ + "", + "Include user-sourced decision documentation as a concern:", + " - Decisions with source user:ask or user:conversation must be", + " referenced in at least one comment, doc_diff, or README entry", + ]; + } + return []; +} + export function decomposeStepGuidance( step: DecomposeStep, phase: WorkPhaseKey, @@ -119,6 +146,7 @@ export function decomposeStepGuidance( `List phase-wide concerns for ${phase}.`, "Focus on quality/completeness/consistency concerns, not implementation details.", "These become umbrella items (scope='*').", + ...holisticConcernAdditions(phase), ], }; diff --git a/src/planner/plan/mutate/decisions.ts b/src/planner/plan/mutate/decisions.ts index e5e7d1f..a43107b 100644 --- a/src/planner/plan/mutate/decisions.ts +++ b/src/planner/plan/mutate/decisions.ts @@ -12,13 +12,14 @@ import { export function addDecision( p: Plan, - data: { decision: string; reasoning: string }, + data: { decision: string; reasoning: string; source?: string }, ): { plan: Plan; id: string } { const id = nextDecisionId(p); const decision: Decision = { id, decision: data.decision, reasoning_chain: data.reasoning, + source: data.source ?? null, }; return { plan: { @@ -35,7 +36,7 @@ export function addDecision( export function setDecision( p: Plan, id: string, - data: { decision?: string; reasoning?: string }, + data: { decision?: string; reasoning?: string; source?: string }, ): Plan { const idx = p.planning_context.decision_log.findIndex((d) => d.id === id); if (idx === -1) throw new Error(`decision ${id} not found`); @@ -45,6 +46,7 @@ export function setDecision( ...d, decision: data.decision ?? d.decision, reasoning_chain: data.reasoning ?? d.reasoning_chain, + source: data.source ?? d.source, }; const log = [...p.planning_context.decision_log]; diff --git a/src/planner/plan/types.ts b/src/planner/plan/types.ts index 518b54e..4d21ca9 100644 --- a/src/planner/plan/types.ts +++ b/src/planner/plan/types.ts @@ -2,6 +2,7 @@ export interface Decision { id: string; decision: string; reasoning_chain: string; + source: string | null; } export interface RejectedAlternative { diff --git a/src/planner/plan/validate.ts b/src/planner/plan/validate.ts index c5ecedd..bfb4f52 100644 --- a/src/planner/plan/validate.ts +++ b/src/planner/plan/validate.ts @@ -7,6 +7,56 @@ import type { Plan } from "./types.js"; export interface ValidationResult { ok: boolean; errors: string[]; + warnings?: string[]; +} + +// -- Decision source provenance -- + +// Canonical source types for the type:ref format. +// "code" and "docs" carry a path ref; others stand alone. +const VALID_SOURCE_TYPES = [ + "code", "docs", "user:ask", "user:conversation", "inference", +] as const; + +export type DecisionSourceType = (typeof VALID_SOURCE_TYPES)[number]; + +const SOURCE_TYPE_SET: ReadonlySet = new Set(VALID_SOURCE_TYPES); + +// Parses "code:src/foo.ts" -> { type: "code", ref: "src/foo.ts" } +// Parses "inference" -> { type: "inference", ref: null } +// Returns null for unrecognized formats. +export function parseDecisionSource( + s: string, +): { type: DecisionSourceType; ref: string | null } | null { + const colon = s.indexOf(":"); + if (colon === -1) { + return SOURCE_TYPE_SET.has(s) ? { type: s as DecisionSourceType, ref: null } : null; + } + const prefix = s.substring(0, colon); + const rest = s.substring(colon + 1); + // "user:ask" and "user:conversation" are complete types, not type:ref pairs + const full = `${prefix}:${rest}`; + if (SOURCE_TYPE_SET.has(full)) return { type: full as DecisionSourceType, ref: null }; + // "code:" and "docs:" are type:ref pairs + if (SOURCE_TYPE_SET.has(prefix)) return { type: prefix as DecisionSourceType, ref: rest }; + return null; +} + +// Produces warnings (not errors) for decisions with missing or invalid sources. +// Soft validation: legacy plans have source: null; hard failures cause death loops. +export function validateDecisionSources(p: Plan): string[] { + const warnings: string[] = []; + for (const d of p.planning_context.decision_log) { + if (!d.source) { + warnings.push(`${d.id}: missing source -- expected code:, docs:, user:ask, user:conversation, or inference`); + continue; + } + const parsed = parseDecisionSource(d.source); + if (!parsed) { + warnings.push(`${d.id}: unrecognized source "${d.source}" -- expected code:, docs:, user:ask, user:conversation, or inference`); + } + } + return warnings; } export function validatePlanDesign(p: Plan): ValidationResult { @@ -26,7 +76,8 @@ export function validatePlanDesign(p: Plan): ValidationResult { } } - return { ok: errors.length === 0, errors }; + const warnings = validateDecisionSources(p); + return { ok: errors.length === 0, errors, warnings }; } export function validateRefs(p: Plan): ValidationResult { diff --git a/src/planner/tools/entity-design.ts b/src/planner/tools/entity-design.ts index 06552ee..c6e5e7d 100644 --- a/src/planner/tools/entity-design.ts +++ b/src/planner/tools/entity-design.ts @@ -66,16 +66,17 @@ export function registerPlanDesignEntityTools( planTool(pi, planRef, { name: "koan_add_decision", label: "Add decision", - description: "Add decision to decision log.", + description: "Add decision to decision log. Source identifies where authority came from (e.g. code:src/foo.ts, docs:CLAUDE.md, user:ask, user:conversation, inference).", parameters: Type.Object({ decision: Type.String(), reasoning: Type.String(), + source: Type.String({ description: "Provenance: code:, docs:, user:ask, user:conversation, or inference" }), }), execute: (p, params) => { const r = addDecision(p, params); return { plan: r.plan, - message: `Added decision ${r.id}: "${params.decision}"`, + message: `Added decision ${r.id}: "${params.decision}" [source: ${params.source}]`, }; }, }); @@ -83,11 +84,12 @@ export function registerPlanDesignEntityTools( planTool(pi, planRef, { name: "koan_set_decision", label: "Update decision", - description: "Update existing decision by ID.", + description: "Update existing decision by ID. Omitting source preserves the existing value.", parameters: Type.Object({ id: Type.String(), decision: Type.Optional(Type.String()), reasoning: Type.Optional(Type.String()), + source: Type.Optional(Type.String({ description: "Provenance: code:, docs:, user:ask, user:conversation, or inference" })), }), execute: (p, params) => { const updated = setDecision(p, params.id, params); diff --git a/src/planner/tools/getters.ts b/src/planner/tools/getters.ts index 712fc3d..d7924bb 100644 --- a/src/planner/tools/getters.ts +++ b/src/planner/tools/getters.ts @@ -138,7 +138,10 @@ function formatPlanSummary(p: Plan): string { ...p.milestones.map((m) => ` ${m.id}: ${m.name}`), "", `Decisions (${p.planning_context.decision_log.length}):`, - ...p.planning_context.decision_log.map((d) => ` ${d.id}: ${d.decision}`), + ...p.planning_context.decision_log.map((d) => { + const src = d.source ? ` [${d.source}]` : " [no source]"; + return ` ${d.id}: ${d.decision}${src}`; + }), "", `Waves (${p.waves.length}):`, ...p.waves.map((w) => ` ${w.id}: [${w.milestones.join(", ")}]`), From 4bb51d356b60174aab2a57256064b903a6973ec2 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 4 Mar 2026 11:31:48 +0700 Subject: [PATCH 037/412] Remove unused files --- QR_ANALYSIS.md | 643 ----------------------------------- QR_ANALYSIS_COMPREHENSIVE.md | 640 ---------------------------------- 2 files changed, 1283 deletions(-) delete mode 100644 QR_ANALYSIS.md delete mode 100644 QR_ANALYSIS_COMPREHENSIVE.md diff --git a/QR_ANALYSIS.md b/QR_ANALYSIS.md deleted file mode 100644 index 54ffc1f..0000000 --- a/QR_ANALYSIS.md +++ /dev/null @@ -1,643 +0,0 @@ -# QR Failure Handling & Fix Mode Analysis - -## Executive Summary - -This document analyzes how QR (Quality Review) failures halt execution in the koan plan-design phase and how the reference executor implements fix loops. The analysis covers three key questions: - -1. **Does QR failure halt the plan-design phase?** YES -- failures trigger a deterministic gate that either spawns a fix loop or force-proceeds after max iterations. -2. **What is the plan specification for QR fix loops?** Architect is re-spawned with `--koan-fix` flag and a QR failure report appended to context. -3. **What are the executor modes?** Initial mode (first-time work) vs. fix mode (targeted repair after QR failures). - ---- - -## Part 1: QR Failure Halts Execution (Confirmed) - -### How the QR Gate Works (Reference Executor) - -The reference executor in `~/.claude/skills/scripts/skills/planner/orchestrator/executor.py` implements a **9-step workflow** for execution (not planning): - -``` -Step 1: Execution Planning (analyze, build wave list) -Step 2: Reconciliation (validate existing code) -Step 3: Implementation (dispatch developers) -Step 4: Code QR (quality review of code) -Step 5: Code QR GATE (route pass/fail) <-- HALTS on FAIL -Step 6: Documentation (TW pass) -Step 7: Doc QR (quality review of docs) -Step 8: Doc QR GATE (route pass/fail) <-- HALTS on FAIL -Step 9: Retrospective -``` - -**Key excerpt from executor.py:** - -```python -CODE_QR_GATE = GateConfig( - qr_name="Code QR", - work_step=3, # If FAIL: loop back to step 3 - pass_step=6, # If PASS: advance to step 6 - pass_message="Code quality verified. Proceed to documentation.", - fix_target=AgentRole.DEVELOPER, # Developer fixes issues -) - -def format_gate(step: int, gate: GateConfig, qr: QRState, total_steps: int) -> str: - """Format gate step output.""" - if qr.passed: - next_cmd = f"python3 -m {MODULE_PATH} --step {gate.pass_step}" - else: - next_iteration = qr.iteration + 1 - next_cmd = f"python3 -m {MODULE_PATH} --step {gate.work_step} --qr-fail --qr-iteration {next_iteration}" - return format_step(body, next_cmd, title=f"{gate.qr_name} Gate") -``` - -**Execution halts on FAIL** because: -- QR GATE step 5 checks `qr.passed` property -- If FAIL: routes back to step 3 (implementation) with `--qr-fail` flag -- Step 3 detects fix mode and spawns developer with targeted repair instructions -- No automatic proceed to step 6 (documentation) - -### How the QR Gate Works (Koan Plan-Design) - -The koan project applies the same pattern to the plan-design phase. Based on the plan specification (section 4.2 and 5): - -``` -Plan-Design Phase (Architect): - ├─ execution: spawn architect subagent - │ (6-step exploration + plan writing) - │ - ├─ qr-decompose: spawn decomposer subagent - │ (13-step QR item generation) - │ - ├─ qr-verify: pool of reviewer subagents - │ (parallel verification, PASS/FAIL per item) - │ - └─ gate (deterministic code, no LLM) - PASS -> advance to plan-code - FAIL -> re-spawn architect with fix report (up to 5x) - iteration escalates severity filtering - after 5 iterations, force-proceed -``` - -**Plan specification (section 4.2.1 "QR Gate"):** - -```typescript -function routeGate( - phase: Phase, - qrResult: "pass" | "fail", - iteration: number, -): NextStep { - if (qrResult === "pass") { - deleteQRState(phase); - return nextPhase(phase); - } - const maxIterations = 5; - if (iteration >= maxIterations) { - return nextPhase(phase); // Force proceed, document remaining issues - } - return { phase, subPhase: "execution", mode: "fix", iteration: iteration + 1 }; -} -``` - -**Execution halts on FAIL** because: -- Gate routing is deterministic (pure code, no LLM) -- FAIL does not auto-advance -- Only PASS or max-iterations advances to next phase -- Fix mode spawns architect fresh with failure report - ---- - -## Architecture Pattern (From Old System) - -### Two-Phase Workflow Pattern - -QR operates in two distinct phases per plan phase (plan-design, plan-code, plan-docs, impl-code, impl-docs): - -1. **DECOMPOSITION** (QR Decompose) - - 8-step LLM workflow generating atomic verification items - - Creates `qr-{phase}.json` with items array - - Each item: `{id, scope, check, status: "TODO", severity, [parent_id], [group_id]}` - - Grouping logic (steps 9-13) organizes items by: parent-child, umbrella, component, concern, affinity - -2. **VERIFICATION** (QR Verify) - - Parallel dispatch of single items via `--qr-item` flag - - Each subagent verifies ONE item (ANALYZE -> CONFIRM -> SUMMARY pattern) - - Atomic mutation via `cli/qr.py` with file locking (no race conditions) - - Output: one-word PASS/FAIL only (findings in CLI --finding flag) - -### Key Files in Old System - -**Decomposition Scripts:** -- `/Users/lmergen/.claude/skills/scripts/skills/planner/quality_reviewer/plan_design_qr_decompose.py` -- `plan_code_qr_decompose.py` -- `plan_docs_qr_decompose.py` -- Shared: `skills/planner/quality_reviewer/prompts/decompose.py` (8-step workflow, grouping logic) - -**Verification Base:** -- `skills/planner/quality_reviewer/qr_verify_base.py` (VerifyBase class, step routing, item loading) -- Specific: `plan_design_qr_verify.py`, `plan_code_qr_verify.py`, `plan_docs_qr_verify.py` -- Shared: `skills/planner/shared/qr/utils.py` (load_qr_state, get_qr_item, format_qr_item_for_verification) - -**CLI Tools:** -- `skills/planner/cli/qr.py` (update-item with file locking) -- `skills/planner/cli/qr_commands.py` (update_item function, atomic write) - -## Decomposition Workflow (8 Steps) - -### Step 1: Absorb Context -- Load context.json and plan.json from STATE_DIR -- Parse planning context (overview, constraints, invisible knowledge) -- Task: Summarize in 2-3 sentences what success looks like for this phase - -### Step 2: Holistic Concerns (Top-Down) -- Brainstorm concerns specific to the phase (out-of-scope items explicitly excluded) -- Phase-specific examples (e.g., plan-design: "Missing decisions", "Policy defaults without backing") -- Output: Bulleted list, quantity over quality - -### Step 3: Structural Enumeration (Bottom-Up) -- List plan elements that exist in plan.json -- Use IDs where available (DL-001, M-001, etc.) -- Phase-specific (e.g., plan-design: decisions, constraints, risks, milestones, code_intents) - -### Step 4: Gap Analysis (Shared) -- Compare Step 2 concerns vs Step 3 elements -- Identify gaps: concerns not covered by elements, elements with no concerns -- Output: Umbrella vs specific items, cross-cutting vs targeted - -### Step 5: Generate Items (Phase-Specific Severity) -- Create verification items with UMBRELLA + SPECIFIC pattern -- Assign severity (MUST/SHOULD/COULD per phase-specific rules) -- Format: `{id, scope, check, status: "TODO", severity}` - -### Step 6: Atomicity Check (Shared) -- Review each item for atomicity (tests ONE thing, unambiguous pass/fail) -- Split non-atomic MUST items into parent + children (qa-002 -> qa-002a, qa-002b) -- Children inherit parent's severity, have parent_id field - -### Step 7: Coverage Validation (Shared) -- Use Step 3 enumeration as checklist -- Verify each element has at least one item covering it -- Verify each concern from Step 2 has at least one item -- Add items if gaps found (prefer over-coverage) - -### Step 8: Finalize -- Write qr-{phase}.json to STATE_DIR -- Format: `{phase, iteration: 1, items: [...]}` -- No fixed item count; content-driven - -### Steps 9-13: Grouping (Shared) -- **Step 9:** Structural grouping (deterministic: parent-child resolution, umbrella batching) -- **Step 10:** Component grouping (items verifying different aspects of same element) -- **Step 11:** Concern grouping (items checking same quality dimension across elements) -- **Step 12:** Affinity grouping (semantic similarity for remaining items) -- **Step 13:** Final validation (naming conventions, large group review, singleton review) - -## Phase-Specific Prompts - -### Plan-Design Phase - -**Step 1 Absorb:** -``` -Read plan.json from STATE_DIR: - cat $STATE_DIR/plan.json | jq '.' - -SCOPE: Plan structure and decision quality. - -Focus on: - - planning_context.decisions (completeness, reasoning quality) - - planning_context.constraints (all documented?) - - planning_context.risks (identified and addressed?) - - milestones[].code_intents (structure present?) - - invisible_knowledge (captured?) - -OUT OF SCOPE (verified in later phases): - - Code correctness (plan-code phase) - - Documentation quality (plan-docs phase) -``` - -**Step 2 Concerns:** -``` -Brainstorm concerns specific to PLAN STRUCTURE: - - Missing decisions (non-obvious choices not logged) - - Policy defaults without user backing - - Orphan milestones (no code_intents) - - Invalid references (decision_refs point nowhere) - - Reasoning chains too shallow - - Risks identified but not addressed - -DO NOT brainstorm code or documentation concerns (out of scope) -``` - -**Step 3 Enumeration:** -``` -For plan-design, enumerate PLAN STRUCTURE ARTIFACTS: - -DECISIONS: - - Each decision in planning_context.decisions (ID, decision text) - - Has reasoning? Multi-step chain? - -CONSTRAINTS: - - Each constraint in planning_context.constraints (ID, type) - - User-specified or inferred? - -RISKS: - - Each risk in planning_context.risks (ID, risk text) - - Has mitigation? - -MILESTONES: - - Each milestone (ID, name, count of code_intents) - - Each code_intent with decision_refs (ID, which decisions referenced) - -INVISIBLE KNOWLEDGE: - - system, invariants[], tradeoffs[] content -``` - -**Step 5 Severity (Plan-Design):** -``` -SEVERITY ASSIGNMENT (per conventions/severity.md, plan-design scope): - - MUST (blocks all iterations): - - DIAGRAM categories: - * ORPHAN_NODE: node with zero edges - * INVALID_EDGE_REF: edge references missing node - * INVALID_SCOPE_REF: scope references non-existent milestone - - KNOWLEDGE subset: - * DECISION_LOG_MISSING: non-trivial choice without logged rationale - * POLICY_UNJUSTIFIED: policy default without Tier 1 backing - * ASSUMPTION_UNVALIDATED: architectural assumption without citation - - SHOULD (iterations 1-4): - - Shallow reasoning chains (premise without implication) - - Missing risk mitigations - - Incomplete constraint documentation - - COULD (iterations 1-3): - - Cosmetic plan formatting - - Minor inconsistencies in naming -``` - -**Component Examples:** -``` - - A milestone - - A major decision - - A constraint category -``` - -**Concern Examples:** -``` - - Reasoning chain quality - - Reference integrity - - Risk coverage -``` - -### Plan-Code Phase - -**Step 1 Absorb:** -``` -Read plan.json from STATE_DIR: - cat $STATE_DIR/plan.json | jq '.' - -SCOPE: Code correctness in planned changes. - -Focus on: - - milestones[].code_intents[] -- what changes are intended - - milestones[].code_changes[] -- actual diff content - - code_changes[].diff (context lines must match codebase) - - code_changes[].why_comments[].decision_ref (refs must exist) - -OUT OF SCOPE (already verified in plan-docs phase): - - Documentation quality (temporal contamination, WHY-not-WHAT) - - README/CLAUDE.md content - - Invisible knowledge coverage -``` - -**Step 2 Concerns:** -``` -Brainstorm concerns specific to CODE CORRECTNESS: - - Context lines don't match actual codebase - - Diff format violations (missing +/- prefixes, wrong line counts) - - Code_intents without corresponding code_changes - - Invalid decision_refs in why_comments - - Type errors, missing imports, API mismatches - - Convention violations (per project style) - -DO NOT brainstorm documentation concerns (out of scope for this phase). -``` - -**Step 3 Enumeration:** -``` -For plan-code, enumerate CODE CHANGE ARTIFACTS: - -INTENTS: - - Each milestone's code_intents (ID, description) - - Intent-to-change mapping (which intents have changes?) - -CHANGES: - - Each code_change (ID, file path, line range) - - Files touched across all changes - - Context line locations requiring verification - -REFERENCES: - - decision_refs in why_comments (do they exist in planning_context?) - -DO NOT enumerate: - - documentation{} fields (plan-docs's job) - - readme_entries (plan-docs's job) -``` - -**Step 5 Severity (Plan-Code):** -``` -SEVERITY ASSIGNMENT (per conventions/severity.md, plan-code scope): - - MUST (blocks all iterations): - - ASSUMPTION_UNVALIDATED: architectural assumption without citation - - MARKER_INVALID: intent marker without valid explanation - - decision_ref references non-existent decision - - SHOULD (iterations 1-4) - STRUCTURE categories: - - GOD_OBJECT: >15 methods OR >10 deps - - GOD_FUNCTION: >50 lines OR >3 nesting - - CONVENTION_VIOLATION: violates documented project convention - - TESTING_STRATEGY_VIOLATION: tests don't follow confirmed strategy - - COULD (iterations 1-3) - COSMETIC: - - TOOLCHAIN_CATCHABLE: errors the compiler/linter would flag - - FORMATTER_FIXABLE: style issues fixable by formatter - - DEAD_CODE: unused functions, impossible branches - -DO NOT use KNOWLEDGE categories for documentation issues -- -those are plan-docs's responsibility. -``` - -**Component Examples:** -``` - - A file being modified - - A module/package - - A code_intent cluster -``` - -**Concern Examples:** -``` - - Error handling consistency - - Type safety across boundaries - - Testing boundary clarity -``` - -### Plan-Docs Phase - -**Step 1 Absorb:** -Similar structure, focus on doc_diff fields in code_changes - -**Step 2 Concerns:** -- Temporal contamination in doc_diffs (change-relative language) -- Baseline references (documentation assumes prior state) -- doc_diffs missing for non-empty diffs -- decision_refs in doc_diffs not captured - -**Step 3 Enumeration:** -- doc_diff content per code_change -- documentation{} fields (function docstrings, module comments) -- readme_entries content -- decision_log coverage in documentation - -**Step 5 Severity (Plan-Docs):** -Only KNOWLEDGE categories (TW cannot fix code): -- TEMPORAL_CONTAMINATION -- BASELINE_REFERENCE (doc assumes prior state) -- MISSING_DOC_DIFF (diff present, doc_diff absent) -- DECISION_UNCOVERED (decision not referenced in any doc_diff) - -## Verification Workflow (Per-Item Parallelization) - -### Base Class: VerifyBase - -Handles: -1. Step type routing (CONTEXT, ANALYZE, CONFIRM, SUMMARY) -2. Item loading from qr-{phase}.json -3. Dynamic step formula: `total_steps = 1 + (2 * num_items) + 1` - - Step 1: CONTEXT (load shared state) - - Steps 2..2N+1: ANALYZE/CONFIRM pairs per item - - Final step: SUMMARY - -### CONTEXT Step -- Load conventions, phase rules, context.json, plan.json -- List all items to verify -- Display severity and scope for each - -### ANALYZE Step (Per Item) -- Explore codebase if needed -- Form preliminary conclusion (PASS or FAIL) -- Task output: narrative analysis (no tool call yet) - -### CONFIRM Step (Per Item) -- Verify confidence in preliminary conclusion -- Execute CLI command to record result: - ```bash - python3 -m skills.planner.cli.qr --state-dir {state_dir} --qr-phase {phase} \ - update-item {item_id} --status PASS - ``` - Or: - ```bash - python3 -m skills.planner.cli.qr --state-dir {state_dir} --qr-phase {phase} \ - update-item {item_id} --status FAIL --finding '' - ``` -- Tool call blocks until file lock released (atomic update) - -### SUMMARY Step -- Count results, output single word: PASS or FAIL -- EXACT FORMAT REQUIRED: - - Output EXACTLY ONE WORD: "PASS" or "FAIL" - - No markdown headers (##, **) - - No "VERDICT:" prefix - - No explanation, prose, or reasoning - - Findings go in --finding flag, NOT in output - -## Phase-Specific Verification Guidance - -### Plan-Design Verification - -**Macro Check (scope: "\*"):** -``` -MACRO CHECK - Verify across entire plan.json: - - Read plan.json: - cat {state_dir}/plan.json | jq '.' -``` - -**Milestone Check (scope: "milestone:M-001"):** -``` -MILESTONE CHECK - Focus on M-001: - - Read milestone: - cat {state_dir}/plan.json | jq '.milestones[] | select(.id == "M-001")' -``` - -**Code Intent Check (scope: "code_intent:CI-M-001-001"):** -``` -CODE INTENT CHECK - Focus on CI-M-001-001: - - Read intent (find containing milestone first): - cat {state_dir}/plan.json | jq '.milestones[].code_intents[] | select(.id == "CI-M-001-001")' -``` - -**Check-Specific Guidance:** - -Decision Log Verification: -``` -DECISION LOG VERIFICATION: - - Each entry should have multi-step reasoning - - BAD: 'Polling | Webhooks unreliable' - - GOOD: 'Polling | 30% webhook failure -> need fallback anyway' -``` - -Policy Default Verification: -``` -POLICY DEFAULT VERIFICATION: - - Policy defaults affect user/org (lifecycle, capacity, failure handling) - - Must have Tier 1 (user-specified) backing in decision_log - - Technical defaults can use Tier 2-3 backing -``` - -Code Intent Verification: -``` -CODE INTENT VERIFICATION: - - Each implementation milestone needs code_intents - - Each code_intent needs file path and behavior - - decision_refs should point to valid decision_log entries -``` - -### Plan-Code Verification - -Similar structure with code-specific checks: -- Context line verification (diff patterns exist in actual files) -- Diff format validation (RULE 0/1/2) -- Intent linkage (code_change.intent_ref valid) -- Decision ref validity -- Temporal contamination in comments -- WHY-not-WHAT quality - -### Plan-Docs Verification - -Doc-specific checks: -- Temporal contamination in doc_diffs -- Baseline references (doc assumes prior state) -- Code without docs (diff present, doc_diff absent) -- Invalid diff format -- Decision coverage in docs -- WHY-not-WHAT verification -- Missing docstrings - -## Data Structures - -### QR Item (qr-{phase}.json) - -```typescript -interface QRItem { - id: string; // e.g., "plan-001", "qa-002a" - scope: string; // "*" (macro) or "element:ID" or "file:path" - check: string; // Description of what to verify - status: "TODO" | "PASS" | "FAIL"; - severity?: "MUST" | "SHOULD" | "COULD"; // Default: "SHOULD" - finding?: string; // Only for FAIL status - parent_id?: string; // For split items (qa-002a has parent_id: "qa-002") - group_id?: string; // For grouping (umbrella, component-*, concern-*, affinity-*, parent-*) - version?: number; // Default: 1, incremented on each update -} - -interface QRState { - phase: string; // "plan-design", "plan-code", etc. - iteration: number; // Current iteration (1 on first decompose) - items: QRItem[]; -} -``` - -### Severity Blocking Rules - -Per iteration: -- Iteration 1: MUST blocks all 4 iterations of fixes, SHOULD blocks iterations 1-4, COULD blocks 1-3 -- Iteration 2: MUST blocks iterations 2-5, SHOULD blocks 2-5, COULD blocks 2-4 -- Iteration 3: MUST blocks iterations 3-6, SHOULD blocks 3-6, COULD blocks 3-5 -- Iteration 4: MUST blocks iterations 4+, SHOULD blocks 4+, COULD blocks 4+ -- After iteration 4: No blocking (move to manual review) - -## Integration with Koan Architecture - -### Expected File Structure -``` -src/planner/phases/ - qr/ - decompose/ - phase.ts # QRDecomposePhase class (8-step workflow) - prompts.ts # Phase-specific step prompts - verify/ - phase.ts # QRVerifyPhase class (item-based verification) - prompts.ts # Verification guidance per phase - lib/ - items.ts # QRItem type, load/save, atomic mutations - grouping.ts # Steps 9-13 grouping logic -``` - -### Phase Registration -```typescript -// In phases/dispatch.ts -if (config.role === "quality-reviewer" && config.phase === "plan-design") { - const phase = new QRDecomposePhase(...); - await phase.begin(); -} -``` - -### Tool Registration -- QR tools likely smaller subset than plan-design (mainly read tools, no plan mutations) -- Tools may include: qr_update_item (atomic write), qr_load_state (read), qr_get_item (lookup) - -## Critical Implementation Notes - -### 1. Decomposition is Single-Run -- Decompose runs ONCE per phase (steps 1-8, 9-13) -- Orchestrator skips decompose if qr-{phase}.json already exists with iteration >=1 -- Each phase has own decomposition script (can't share due to phase-specific prompts) - -### 2. Verification is Parallel -- Each item dispatched as separate subagent with --qr-item flag -- File locking in CLI prevents race conditions -- No shared state mutation; each agent writes its own result atomically - -### 3. Step Gates Must Use Blocklists -- Whitelist fails open (blocks read tools unintentionally) -- Blocklist defers to checkPermission for everything not explicitly gated -- Example: `if (step < 6 && PLAN_MUTATION_TOOLS.has(name)) { block }` - -### 4. Findings in CLI Flag, Not Output -- Tool result is NOT return value; findings go in `--finding` flag -- SUMMARY step outputs ONE WORD only (PASS or FAIL) -- This avoids "text + tool_call in same response" bug (GPT-5-codex) - -### 5. invoke_after Two-Part Gate -- Every step prompt ends with "WHEN DONE: call koan_complete_step" -- Tool description includes "Do NOT call until told" -- Dual gates ensure single transition per step - -### 6. Disk-Backed Mutations -- Every tool mutation writes qr-{phase}.json immediately -- No finalize pattern; descriptive feedback on each write -- This prevents LLM from skipping intermediate mutations - -### 7. Severity Blocking vs Iteration Count -- Blocking set determined at gate time, not item creation time -- by_blocking_severity(iteration) is a predicate factory -- Iteration 0 not used; iteration 1 is first decompose, iteration 2+ are retries - -## Migration Checklist - -- [ ] Create QRDecomposePhase class with 8-step + 5-step grouping workflow -- [ ] Implement phase-specific prompts for plan-design, plan-code, plan-docs -- [ ] Create QRVerifyPhase class with CONTEXT/ANALYZE/CONFIRM/SUMMARY routing -- [ ] Implement VerifyBase-like step mapping (total_steps formula, item routing) -- [ ] Implement atomic QRItem mutations with file locking -- [ ] Add qr_update_item tool (wrapper around file-locked write) -- [ ] Add qr_load_state, qr_get_item tools (read-only) -- [ ] Register phases in dispatch.ts for quality-reviewer role -- [ ] Add QR phase detection to before_agent_start handler -- [ ] Implement SUMMARY step output validation (one word only) -- [ ] Test decompose single-run enforcement (skip if iteration >=1) -- [ ] Test parallel verify with file locking (concurrent writes) -- [ ] Test severity blocking at iteration thresholds -- [ ] Copy exact prompts from Python scripts (no rewriting) diff --git a/QR_ANALYSIS_COMPREHENSIVE.md b/QR_ANALYSIS_COMPREHENSIVE.md deleted file mode 100644 index 29b04ff..0000000 --- a/QR_ANALYSIS_COMPREHENSIVE.md +++ /dev/null @@ -1,640 +0,0 @@ -# QR Failure Handling & Fix Mode Analysis - -## Executive Summary - -This document analyzes how QR (Quality Review) failures halt execution in the koan plan-design phase and how the reference executor implements fix loops. The analysis covers three key questions: - -1. **Does QR failure halt the plan-design phase?** YES -- failures trigger a deterministic gate that either spawns a fix loop or force-proceeds after max iterations. -2. **What is the plan specification for QR fix loops?** Architect is re-spawned with `--koan-fix` flag and a QR failure report appended to context. -3. **What are the executor modes?** Initial mode (first-time work) vs. fix mode (targeted repair after QR failures). - ---- - -## Part 1: QR Failure Halts Execution (Confirmed) - -### How the QR Gate Works (Reference Executor) - -The reference executor in `~/.claude/skills/scripts/skills/planner/orchestrator/executor.py` implements a **9-step workflow** for execution: - -``` -Step 1: Execution Planning (analyze, build wave list) -Step 2: Reconciliation (validate existing code) -Step 3: Implementation (dispatch developers) -Step 4: Code QR (quality review of code) -Step 5: Code QR GATE (route pass/fail) <-- HALTS on FAIL -Step 6: Documentation (TW pass) -Step 7: Doc QR (quality review of docs) -Step 8: Doc QR GATE (route pass/fail) <-- HALTS on FAIL -Step 9: Retrospective -``` - -**Key excerpt from executor.py:** - -```python -CODE_QR_GATE = GateConfig( - qr_name="Code QR", - work_step=3, # If FAIL: loop back to step 3 - pass_step=6, # If PASS: advance to step 6 - pass_message="Code quality verified. Proceed to documentation.", - fix_target=AgentRole.DEVELOPER, # Developer fixes issues -) - -def format_gate(step: int, gate: GateConfig, qr: QRState, total_steps: int) -> str: - """Format gate step output.""" - if qr.passed: - next_cmd = f"python3 -m {MODULE_PATH} --step {gate.pass_step}" - else: - next_iteration = qr.iteration + 1 - next_cmd = f"python3 -m {MODULE_PATH} --step {gate.work_step} --qr-fail --qr-iteration {next_iteration}" - return format_step(body, next_cmd, title=f"{gate.qr_name} Gate") -``` - -**Execution halts on FAIL** because: -- QR GATE step 5 checks `qr.passed` property -- If FAIL: routes back to step 3 (implementation) with `--qr-fail` flag -- Step 3 detects fix mode and spawns developer with targeted repair instructions -- No automatic proceed to step 6 (documentation) - -### How the QR Gate Works (Koan Plan-Design) - -The koan project applies the same pattern. Based on the plan specification (section 4.2 and 5 of plans/2026-02-10-init.md): - -``` -Plan-Design Phase (Architect): - ├─ execution: spawn architect subagent - │ (6-step exploration + plan writing) - │ - ├─ qr-decompose: spawn decomposer subagent - │ (13-step QR item generation) - │ - ├─ qr-verify: pool of reviewer subagents - │ (parallel verification, PASS/FAIL per item) - │ - └─ gate (deterministic code, no LLM) - PASS -> advance to plan-code - FAIL -> re-spawn architect with fix report (up to 5x) - iteration escalates severity filtering - after 5 iterations, force-proceed -``` - -**Plan specification routing logic (section 4.2.1):** - -```typescript -function routeGate( - phase: Phase, - qrResult: "pass" | "fail", - iteration: number, -): NextStep { - if (qrResult === "pass") { - deleteQRState(phase); - return nextPhase(phase); - } - const maxIterations = 5; - if (iteration >= maxIterations) { - return nextPhase(phase); // Force proceed, document remaining issues - } - return { phase, subPhase: "execution", mode: "fix", iteration: iteration + 1 }; -} -``` - -**Execution halts on FAIL** because: -- Gate routing is deterministic (pure code, not prompt-based) -- FAIL does not auto-advance -- Only PASS or max-iterations advances to next phase -- Fix mode spawns architect fresh with failure report - ---- - -## Part 2: Plan Specification for QR Fix Loops - -### Fix Mode Activation - -From plan section 4.2 "First attempt vs. fix mode": - -> When a phase's QR gate returns FAIL, the orchestrator re-spawns the subagent with an additional flag (`--koan-fix`) and appends the QR failure report to the context file. The subagent's role hooks detect fix mode and adjust step instructions to focus on fixing specific issues identified by the QR. - -**Mechanism:** - -1. **Gate detects FAIL** → compute `iteration + 1` -2. **Orchestrator spawns subagent** with: - - `--koan-fix` flag (new) - - `--koan-fix-iteration N` flag (new) - - Same `--koan-plan-dir` (plan.json + context.json + qr-plan-design.json all present) -3. **Context file is mutated** to append QR failures: - - Original 8 context categories remain (read-only) - - QR failures appended in a new `qr_failures` section -4. **Role hooks detect fix mode** via flags in `before_agent_start` -5. **Step instructions adjust** to focus on fixing - -### Reference Architect Fix Prompt - -The reference architect fix script is `~/.claude/skills/scripts/skills/planner/architect/plan_design_qr_fix.py` (3-step workflow): - -**Step 1: Load QR Failures** - -``` -FIX MODE - QR Iteration {qr_iteration} - -QR-COMPLETENESS found issues in the plan. - -FAILED QR ITEMS TO FIX (address these FIRST): -================================================ -[plan-001] Decision log completeness - Scope: decision_log entry DL-005 - Finding: Decision reference missing backing premise - -[plan-002] Code intent specification - Scope: code_intent id CI-M-001-001 - Finding: Behavior description incomplete (unclear acceptance criteria) - -================================================ - -PLANNING CONTEXT (reference for semantic validation): -(context.json displayed for validation reference) - -For EACH failed item: - 1. Read the 'finding' field to understand the issue - 2. Identify what in plan.json needs to change - 3. Note the fix approach for step 2 -``` - -**Step 2: Apply Targeted Fixes** - -``` -APPLY targeted fixes to plan.json using CLI commands. - -Missing decision_log entry: - python3 -m skills.planner.cli.plan --state-dir $STATE_DIR set-decision \ - --decision '' \ - --reasoning ' implication -> conclusion>' - -BATCH MODE (preferred): - python3 -m skills.planner.cli.plan --state-dir $STATE_DIR batch '[ - {"method": "set-decision", "params": {...}, "id": 1}, - {"method": "set-intent", "params": {...}, "id": 2} - ]' - -CONSTRAINT: Fix ONLY the failing items. Don't refactor passing items. -``` - -**Step 3: Validate Fixes** - -``` -Run structural validation: - python3 -m skills.planner.cli.plan validate --phase plan-design - -SELF-CHECK each fixed item: - For each FAIL item you addressed: - - Does the fix address the specific finding? - - Does the fix introduce new issues? - -If validation passes: - Your complete response must be exactly: PASS - Do not add summaries, explanations, or any other text. -``` - -### Key Design Points in Fix Mode - -1. **QR failures explicitly listed** -- The architect sees exactly which items failed + why (the "finding" field) -2. **Plan mutations via existing CLI** -- Fix mode doesn't add new mutation tools, just focuses the prompt on specific items -3. **Targeted not holistic** -- Fix mode does NOT re-explore codebase. It reads the QR report and applies surgical fixes. -4. **No flailing** -- The constraint "Fix ONLY the failing items" prevents second-guessing the entire plan -5. **Validation is mandatory** -- Each fix iteration must pass `python3 -m ... validate` before reporting PASS - -### Iteration Escalation with Severity Filtering - -QR items have a `severity` field: MUST | SHOULD | COULD - -**Severity filtering logic (implied by shared/qr/constants.py):** - -```python -def get_blocking_severities(iteration: int) -> Set[str]: - """Items that block at this iteration. - - iteration 1: MUST only - iteration 2: MUST, SHOULD - iteration 3+: MUST, SHOULD, COULD (all) - """ -``` - -**Meaning:** On iteration 1, only critical (MUST) items block. By iteration 3, even minor (COULD) items block. This escalates pressure to fix progressively more issues. - ---- - -## Part 3: Executor Modes (Initial vs. Fix) - -### Reference Executor: Initial Mode - -When a phase is first executed (no prior failures): - -**Step 3: Implementation (Initial Mode)** - -```python -def format_step_3_implementation(qr: QRState, total_steps: int, ...) -> str: - if qr.state == LoopState.RETRY: - # Fix mode (handled separately) - ... - else: - # Initial mode - actions.extend([ - "Execute ALL milestones using wave-aware parallel dispatch.", - "", - "WAVE-AWARE EXECUTION:", - " - Milestones within same wave: dispatch in PARALLEL", - " - Waves execute SEQUENTIALLY", - "", - "FOR EACH WAVE:", - " 1. Dispatch developer agents for ALL milestones in wave", - " 2. Each prompt includes: plan, milestone, files, acceptance criteria", - " 3. Wait for ALL agents in wave to complete", - " 4. Run tests: pytest / tsc / go test -race", - " 5. Proceed to next wave", - "", - "After ALL waves complete, proceed to Code QR.", - ]) -``` - -**Initial mode** is the "full breadth" mode: -- No prior failures to fix -- Execute all milestones -- Waves in sequence, milestones within wave in parallel -- Standard tests + validation - -### Reference Executor: Fix Mode - -When a QR gate returns FAIL and iteration < 5: - -**Step 3: Implementation (Fix Mode)** - -```python -def format_step_3_implementation(qr: QRState, total_steps: int, ...) -> str: - if qr.state == LoopState.RETRY: - actions.append(format_state_banner("IMPLEMENTATION FIX", qr.iteration, "fix")) - actions.append("FIX MODE: Code QR found issues.") - actions.append("") - - mode_script = get_mode_script_path("dev/fix-code.py") - invoke_cmd = f"python3 -m {mode_script} --step 1 --qr-fail --qr-iteration {qr.iteration}" - - actions.append(subagent_dispatch( - agent_type="developer", - command=invoke_cmd, - )) - actions.append("Developer reads QR report and fixes issues in blocks.") - actions.append("After developer completes, re-run Code QR for fresh verification.") -``` - -**Fix mode** is the "targeted repair" mode: -- QR failures are present (in memory and on disk) -- Dispatch specialized fix agent (different script/prompts) -- Agent reads QR failure items -- Agent applies fixes to milestones mentioned in failures -- Re-run QR immediately after (fresh verification) - -### Comparison Table - -| Aspect | Initial Mode | Fix Mode | -|--------|--------------|----------| -| **Trigger** | First execution | QR FAIL (iteration < 5) | -| **Context** | No prior failures | QR items with status=FAIL + findings | -| **Scope** | All milestones | Only milestones in QR failures | -| **Agent Dispatch** | Full work agent | Specialized fix agent | -| **Step Sequence** | Role's standard N-step | 3-step fix workflow | -| **Tools Available** | Full read + write | Same tools (focus via prompt) | -| **Exit Condition** | Role completes final step | PASS to QR (no FAIL) | -| **Next** | Proceed to QR decompose | Re-run QR immediately | -| **Iteration** | N/A | 1, 2, 3, ... (max 5) | - -### How the Executor Decides Which Mode - -**Flag detection in executor.py:** - -```python -# format_step_3_implementation -state = LoopState.RETRY if qr_fail else LoopState.INITIAL - -# Gate's FAIL routing: -next_cmd = f"python3 -m {MODULE_PATH} --step {work_step} --qr-fail --qr-iteration {next_iteration}" -``` - -When gate returns FAIL, step 3 is re-invoked with `--qr-fail --qr-iteration 2`, and the formatter detects fix mode. - ---- - -## Part 4: Reference Implementation Deep Dive - -### Shared QR Infrastructure - -Located in `~/.claude/skills/scripts/skills/planner/shared/qr/`: - -**types.py:** - -```python -class QRStatus(Enum): - PASS = "pass" - FAIL = "fail" - -class LoopState(Enum): - INITIAL = "initial" - RETRY = "retry" - COMPLETE = "complete" - -@dataclass -class QRState: - iteration: int = 1 - state: LoopState = LoopState.INITIAL - status: QRStatus | None = None - - @property - def passed(self) -> bool: - return self.status == QRStatus.PASS - - def transition(self, status: QRStatus) -> None: - if status == QRStatus.PASS: - self.state = LoopState.COMPLETE - else: - self.state = LoopState.RETRY - self.iteration += 1 - -@dataclass -class GateConfig: - qr_name: str - work_step: int # Where to loop back on FAIL - pass_step: int | None # Where to go on PASS - pass_message: str - fix_target: AgentRole | None # Developer / Writer / Architect -``` - -**gates.py:** - -```python -def build_gate_output( - module_path: str, - qr_name: str, - qr: QRState, - work_step: int, - pass_step: int | None, - pass_message: str, - fix_target: AgentRole | None, - state_dir: str, -) -> GateResult: - """Build complete gate step output for QR gates. - - Gates route to either: - - pass_step: QR passed, proceed to next workflow phase - - work_step: QR failed, loop back to fix issues - """ - if qr.passed: - next_cmd = f"python3 -m {module_path} --step {pass_step}" - else: - next_cmd = f"python3 -m {module_path} --step {work_step} --state-dir {state_dir}" - - return GateResult( - output=format_step(body, next_cmd, title=title), - terminal_pass=qr.passed and pass_step is None, - ) -``` - -### How the Architect Fix Prompts Load QR Failures - -**plan_design_qr_fix.py, step 1:** - -```python -def get_step_guidance(step: int, module_path: str = None, **kwargs) -> dict: - if step == 1: - state_dir = kwargs.get("state_dir", "") - qr_iteration = get_qr_iteration(state_dir, PHASE) - - # Load failed items from qr-{phase}.json - qr_state = load_qr_state(state_dir, PHASE) - failed_items_block = format_failed_items_for_fix(qr_state) - - return { - "title": STEPS[1], - "actions": [ - f"FIX MODE - QR Iteration {qr_iteration}", - "", - "QR-COMPLETENESS found issues in the plan.", - "", - failed_items_block, # <- Explicit list of failures - "", - "For EACH failed item:", - " 1. Read the 'finding' field to understand the issue", - " 2. Identify what in plan.json needs to change", - " 3. Note the fix approach for step 2", - ], - } -``` - -**format_failed_items_for_fix output example:** - -``` -============================================================ -FAILED QR ITEMS TO FIX (address these FIRST): -============================================================ - -[QR-plan-design-001] Decision completeness - Scope: decision_log entry (id: DL-003) - Finding: Caching strategy selected but no justification. - -[QR-plan-design-002] Intent specification - Scope: code_intent (id: CI-M-001-001) - Finding: Behavior unclear: "Add caching layer" -- where? What TTL? - -[QR-plan-design-003] Risk documentation - Scope: known_risks - Finding: Redis failure mode not documented. - -============================================================ -``` - ---- - -## Part 5: Koan's QR Specification - -### Section 4.2: QR Block Pattern - -**Plan-Design Phase Structure:** - -``` -Phase 2: PLAN-DESIGN -├─ Execution (architect explores + writes plan) -├─ QR Decompose (decomposer generates items) -├─ QR Verify (reviewers verify items) -└─ Gate (route PASS->phase3 or FAIL->reexecute_with_fix) -``` - -### Section 4.2.1: QR Decomposition (13-step Workflow) - -The decomposer produces items with: -- `id`: unique item ID -- `scope`: `*` (cross-cutting) or element reference -- `check`: the verification question -- `status`: TODO | PASS | FAIL -- `finding`: explanation of FAIL (populated by reviewers) -- `severity`: MUST | SHOULD | COULD - -### Section 4.2.2: QR Verification (Parallel Subagents) - -Each reviewer subagent: -1. Receives assigned item group -2. For each item: ANALYZE -> CONFIRM -> update state -3. Returns per-item status -4. Aggregate: ANY FAIL = phase FAIL - -### Section 4.2.3: Fix Mode (Key Design Decision) - -From section 4.2: - -> When a phase's QR gate returns FAIL, the orchestrator re-spawns the subagent with an additional flag (`--koan-fix`) and appends the QR failure report to the context file. The subagent's role hooks detect fix mode and adjust step instructions to focus on fixing specific issues identified by the QR. - ---- - -## Part 6: Koan Implementation - -### Key Difference: Single Phase Handler vs. Separate Scripts - -**Reference executor:** -- `architect/plan_design_execute.py` (6 steps, first-time) -- `architect/plan_design_qr_fix.py` (3 steps, targeted repair) -- Separate scripts for each mode - -**Koan design:** -- Single `PlanDesignPhase` handler -- Phase hooks detect `--koan-fix` flag -- Step prompts adjust at runtime in the `context` event handler -- Same tools, same workflow -- just different prompt text - -### Koan Implementation Pattern (Inferred) - -```typescript -// src/planner/phases/plan-design/phase.ts - -export class PlanDesignPhase { - private state: PlanDesignState & { - fixMode: boolean; - fixIteration: number; - }; - - async begin(): Promise { - // Detect fix mode from flags - this.state.fixMode = this.pi.getFlag("koan-fix") === "true"; - this.state.fixIteration = parseInt(this.pi.getFlag("koan-fix-iteration") || "0"); - - // Load context.json (with QR failures appended if fixMode) - const contextPath = path.join(this.planDir, "context.json"); - const raw = await fs.readFile(contextPath, "utf8"); - this.state.contextData = JSON.parse(raw) as ContextData; - // context.qr_failures populated by orchestrator if fixMode - } - - private registerHandlers(): void { - this.pi.on("context", (event) => { - if (this.state.step !== 1) return undefined; - - let prompt = this.state.step1Prompt; - - // Adjust for fix mode - if (this.state.fixMode) { - prompt = adjustPromptForFixMode( - prompt, - this.state.fixIteration, - this.state.contextData.qr_failures, - ); - } - - const messages = event.messages.map((m) => - m.role === "user" ? { ...m, content: prompt } : m, - ); - return { messages }; - }); - } -} - -function adjustPromptForFixMode( - basePrompt: string, - iteration: number, - failures: Array<{id: string; scope: string; finding: string}>, -): string { - // Replace exploration sections with fix guidance - // Prepend: list of failed items + findings - // Add constraint: "Fix ONLY these items" - // Add validation guidance -} -``` - -### Orchestrator-Side: Appending QR Failures to Context - -When gate returns FAIL: - -```typescript -// 1. Load qr-plan-design.json -const qrPath = path.join(planDir, "qr-plan-design.json"); -const qr = JSON.parse(await fs.readFile(qrPath, "utf8")); - -// 2. Filter FAIL items -const failures = qr.items.filter(item => item.status === "FAIL").map(item => ({ - id: item.id, - scope: item.scope, - finding: item.finding, -})); - -// 3. Load context.json -const contextPath = path.join(planDir, "context.json"); -const context = JSON.parse(await fs.readFile(contextPath, "utf8")); - -// 4. Append failures -context.qr_failures = failures; -context.qr_iteration = iteration; - -// 5. Write back (atomic) -await writeContext(planDir, context); - -// 6. Spawn architect in fix mode -spawn("pi", [ - "-p", - "-e", extensionPath, - "--koan-role", "architect", - "--koan-phase", "plan-design", - "--koan-plan-dir", planDir, - "--koan-fix", "true", - "--koan-fix-iteration", String(iteration), - "Fix the plan issues identified in the QR report.", -]); -``` - ---- - -## Summary Table: Initial vs. Fix Mode - -| Dimension | Initial Mode | Fix Mode | -|-----------|--------------|----------| -| **QR State** | None (first execution) | FAIL (previous iteration) | -| **Orchestrator Decision** | Execute (fresh start) | Fix (failures present) | -| **Flags** | None | `--koan-fix true --koan-fix-iteration N` | -| **Context File** | 8 categories only | ^^ + `qr_failures` array | -| **Step Sequence** | 1=analysis, 2=exploration, ..., 6=write | 1=load failures, 2=fix, 3=validate | -| **Scope** | All codebase areas relevant to task | Only areas in QR failures | -| **Tools** | Full set (read + write) | Same set (focus via prompt) | -| **Exit** | PASS to orchestrator -> QR decompose | PASS to orchestrator -> re-run QR | -| **Iteration** | Not applicable | 1, 2, 3, ... (max 5) | -| **Severity Filter** | N/A | Escalates per iteration | -| **Outcome** | plan.json artifact | Updated plan.json (surgical fixes) | - ---- - -## Conclusion - -**QR failures halt execution in koan's plan-design phase** because the QR gate is deterministic code. The gate examines the QR result and either: -1. PASS → advance to next phase -2. FAIL + iteration < 5 → spawn architect in fix mode with failure report -3. FAIL + iteration >= 5 → force-proceed to next phase - -**Fix mode is a targeted repair workflow** that differs from initial mode by: -- Running a 3-step workflow (load -> fix -> validate) instead of N-step exploration -- Reading QR failures from context + disk -- Focusing fixes on listed items only -- Escalating severity requirements each iteration - -**The reference executor provides the exact implementation patterns** that koan follows, with the improvement that koan consolidates execute/fix logic into one phase handler via prompt adjustment, rather than separate scripts. - From 29cc2a3701bca463606d379a8042209a0dbe3dbb Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 4 Mar 2026 15:39:44 +0700 Subject: [PATCH 038/412] refactor: split runQRBlock into runQRDecompose + runQRVerify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit runQRBlock unconditionally spawned the QR decomposer on every call, but runPhaseWithQR called it both for the initial run and after each fix iteration — causing the 13-step decompose workflow to re-execute on every QR cycle instead of just the first. Split at the natural seam between decompose (item generation) and verify (item evaluation): - runQRDecompose: spawns decomposer, carries forward PASS statuses, called once before the fix loop - runQRVerify: resets FAIL→TODO, groups items, spawns reviewer pool, called on every iteration The fix loop structurally cannot invoke decompose — it only calls runQRVerify. No boolean flags or runtime guards needed. --- src/planner/session.ts | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/src/planner/session.ts b/src/planner/session.ts index a0ec935..ecd79a4 100644 --- a/src/planner/session.ts +++ b/src/planner/session.ts @@ -541,7 +541,8 @@ async function runPlanningPhase( return qr; } -async function runQRBlock( + +async function runQRDecompose( planDir: string, cwd: string, extensionPath: string, @@ -646,6 +647,31 @@ async function runQRBlock( } } + return { summary: `${phase} QR decompose complete.`, passed: true }; +} + +async function runQRVerify( + planDir: string, + cwd: string, + extensionPath: string, + phase: WorkPhaseKey, + state: WorkflowState, + log: Logger, + widget: WidgetController | null, +): Promise { + const qrPath = qrFilePath(planDir, phase); + + let qr: QRFile; + try { + const raw = await fs.readFile(qrPath, "utf8"); + qr = JSON.parse(raw) as QRFile; + } catch (error) { + state.phase = "qr-decompose-failed"; + const message = error instanceof Error ? error.message : String(error); + log("Failed to read QR file for verify", { phase, error: message }); + return { summary: `${phase} QR verify aborted: cannot read QR file.`, passed: false }; + } + const resetFailures = qr.items.filter((i) => i.status === "FAIL").length; if (resetFailures > 0) { qr = { @@ -686,6 +712,7 @@ async function runQRBlock( widget?.update({ step: `${phase} qr-verify: 0/${groupEntries.length} groups (${totalTodoItems} items)`, activity: "", + qrPhase: "verify", qrTotal: totalItems, qrDone: preservedPass, qrPass: preservedPass, @@ -707,7 +734,6 @@ async function runQRBlock( }); state.phase = "qr-verify-running"; - widget?.update({ qrPhase: "verify" }); let verifyDone = 0; let failedReviewers: string[] = []; @@ -826,7 +852,13 @@ async function runPhaseWithQR( ): Promise { const qrPath = qrFilePath(planDir, phase.key); - let qr = await runQRBlock(planDir, cwd, extensionPath, phase.key, state, log, widget); + const decompose = await runQRDecompose(planDir, cwd, extensionPath, phase.key, state, log, widget); + if (!decompose.passed) { + widget?.update({ phaseStatus: { index: phase.widgetIndex, status: "failed" } }); + return decompose; + } + + let qr = await runQRVerify(planDir, cwd, extensionPath, phase.key, state, log, widget); if (qr.passed) { widget?.update({ qrPhase: "done", phaseStatus: { index: phase.widgetIndex, status: "completed" } }); return qr; @@ -936,7 +968,7 @@ async function runPhaseWithQR( subagentDone: 1, }); - qr = await runQRBlock(planDir, cwd, extensionPath, phase.key, state, log, widget); + qr = await runQRVerify(planDir, cwd, extensionPath, phase.key, state, log, widget); if (qr.passed) { widget?.update({ qrPhase: "done", phaseStatus: { index: phase.widgetIndex, status: "completed" } }); return qr; From fa6196cdbdd89f0daf668b347c28015dd8982cde Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 5 Mar 2026 10:04:21 +0700 Subject: [PATCH 039/412] refactor(widget): unify runtime section and remove current-step panel --- docs/planning-widget.md | 69 ++++----- src/planner/ui/widget.ts | 316 +++++++++------------------------------ tests/widget.test.ts | 58 +++++++ 3 files changed, 165 insertions(+), 278 deletions(-) diff --git a/docs/planning-widget.md b/docs/planning-widget.md index 2bbffb1..639a4c0 100644 --- a/docs/planning-widget.md +++ b/docs/planning-widget.md @@ -7,7 +7,7 @@ The planning widget now follows the design-deck contract selected on Feb 25 2026 - **Navigation direction:** Vertical Timeline Rail - **Header strategy:** Full-width top border + metadata header row (active phase in header, no tabs strip) - **Log strategy:** Declarative shape-table serialization + dense two-column layout -- **QR strategy:** Inline integrated section (not a detached sub-card) +- **Runtime strategy:** Unified runtime section (stage + quality + workers) integrated into the detail pane The goal is to keep a long-running (1-2h) planning session readable in real time while preserving high-signal audit telemetry. @@ -37,15 +37,14 @@ The goal is to keep a long-running (1-2h) planning session readable in real time **Rationale:** Preserves temporal fidelity while increasing information density and keeping the "what just happened" answer immediate, even under constrained widths. -### 4) QR is a first-class workflow section -- QR renders inline in detail pane with divider rule (no detached mini-card border). +### 4) Runtime is a first-class workflow section +- Runtime renders inline in the detail pane (no detached mini-card border). - Visible during Plan design, Plan code, and Plan docs (and contractually Plan execution). -- QR starts directly in the **`execute`** stage for iteration 1 (non-fix mode); fix iterations reuse the same stage model. -- QR block is normalized to a fixed structure: header, phase rail, counters, divider. -- Metadata is budgeted to **64 visible chars max** and progressively compacted (`phase/iter/mode` -> `iN/M`, `d/p/f/t`) when width is constrained. -- Counter line emphasizes severity: `fail` is error-colored; `pass` is accent; others remain muted/dim. +- Runtime unifies stage + quality counters + worker counters in one block. +- Stage follows the QR lifecycle (`execute`, `decompose`, `verify`, `done`) but uses user-facing labels (`Writing`, `Fixing`, `Analyzing`, `Verifying`, `Complete`). +- Quality counters emphasize severity: `FAIL` is error-colored; `pass` is accent; others remain muted/dim. -**Rationale:** QR is not optional side telemetry; it is the acceptance loop for the plan. The UI should communicate that structural importance while remaining legible and shape-stable at smaller widths. +**Rationale:** Review quality and worker throughput are part of one runtime story. Unifying them removes competing mini-status bars while keeping the left timeline as the primary progress signal. ### 5) Header-first metadata, tabs removed - Keep a full top border and put active workflow context directly in the header row. @@ -60,13 +59,13 @@ The goal is to keep a long-running (1-2h) planning session readable in real time ┌────────────────────────────────────────────────────────────────────────────────┐ │ Planning · Plan design · CURRENT 12m 22s │ │ │ -│ ● Plan design Current step │ -│ │ CURRENT Step 2/6: Codebase Exploration │ -│ │ read internal/rules/CLAUDE.md · 17L/1.2k │ -│ ○ Plan code QR | phase:execute · iter 1/6 initial │ -│ │ UPCOMING Execute → QR decompose → QR verify │ -│ ○ Plan docs done:0/- pass:0 fail:0 todo:- │ -│ UPCOMING Subagents queued:0 active:1 done:0 │ +│ ● Plan design Runtime │ +│ │ CURRENT stage : Writing (cycle 1/6 · initial) │ +│ │ quality : checked -/- pass - FAIL - remaining - │ +│ ○ Plan code workers : queued 0 active 1 done 0 pool ×1 │ +│ │ UPCOMING │ +│ ○ Plan docs │ +│ UPCOMING │ │ Plan ID : │ │ Agent : architect │ │ Model : openai-codex/gpt-5.3-codex │ @@ -87,12 +86,13 @@ The goal is to keep a long-running (1-2h) planning session readable in real time 4. **No tabs strip** – Do not render a separate phase-tabs row under the header. Active phase context now lives in header metadata. 5. **Timeline rail** – Maintain status icon/color semantics (`active=accent`, `done=dim`, `failed=error`). 6. **Detail pane** – Render in this order: - - a dim section label (`Current step`) to create hierarchy - - step title + optional activity - - QR integrated section (if visible) - - subagent counters (`queued/active/done`) when available + - Runtime section (if stage/quality/workers are active) - identity table (`Plan ID`, `Agent`/`Agent pool`, `Model`) pinned low in pane -7. **QR section** – Use inline header + phase rail + metadata line + divider. Avoid nested border style to keep it visually native to the right pane. Keep line geometry stable (fixed 3-line payload + divider) and enforce a 64-char metadata budget before clamping to pane width. +7. **Runtime section** – Use inline `Runtime` header plus key/value rows: + - `stage` + cycle metadata + - `quality` counters (`checked/pass/FAIL/remaining`) + - `workers` counters (`queued/active/done`) + pool capacity + Keep this as one cohesive block to avoid competing status bars. 8. **Latest log section** – Keep it inside the same outer card, separated by a horizontal divider. Reuse the same left/right column split (`timelineWidth` / `detailWidth`) and gap as the planning body so vertical alignment stays consistent. ## Header + Alignment Contract @@ -133,19 +133,21 @@ Apply in order until it fits: - `qrDone`, `qrTotal`, `qrPass`, `qrFail`, `qrTodo` ## Future Work (contracted, not yet implemented) -- Plan execution phase should reuse the same QR integrated section semantics. +- Plan execution phase should reuse the same Runtime section semantics. - Optional compact mode for very narrow terminals can reduce metadata verbosity while preserving deterministic ordering. -## Update: Runtime Domains + Subagent Identity (2026-02-26) +## Update: Unified Runtime Section + Subagent Identity (2026-03-04) -This update captures follow-up decisions for showing subagent model information -and clarifying QR vs. parallel subagent semantics. +This update replaces the split QR/subagent status blocks with a single runtime +status section in the right pane. -### Domain split (do not merge) -- **QR section** tracks quality state: `todo`, `pass`, `fail`. -- **Subagents section** tracks execution state: `queued`, `active`, `done`. -- These are sibling runtime views. They are related in workflow, but not - collapsed into one metric family. +### Runtime merge (stage + quality + workers) +- The detail pane now has one **Runtime** section. +- Runtime includes: + - `stage` (`Writing` / `Fixing` / `Analyzing` / `Verifying` / `Complete`) with cycle metadata. + - `quality` counters (`checked`, `pass`, `FAIL`, `remaining`). + - `workers` counters (`queued`, `active`, `done`) plus pool capacity. +- The left timeline remains the primary progress signal. ### `x` meaning in parallel mode - `x` means configured pool capacity (target parallelism), not active count. @@ -171,10 +173,5 @@ Label/value rule: - otherwise -> `Agent : ` ### View-composition pattern -Use section-level selectors/renderers (React-view-like composition without -React) so QR, subagent status, and identity/footer blocks are independently -composable and testable. - -### Decision hygiene -A separate "layout pattern" decision was deemed redundant once the domain split -was chosen; track it as derived behavior, not as a distinct product decision. +Use section-level selectors/renderers so `runtime-status` and `identity` remain +independently composable and testable. diff --git a/src/planner/ui/widget.ts b/src/planner/ui/widget.ts index 14a0391..bfe684e 100644 --- a/src/planner/ui/widget.ts +++ b/src/planner/ui/widget.ts @@ -340,79 +340,6 @@ function shouldShowQR(state: WidgetState): boolean { return true; } -type QRTier = "wide" | "medium" | "tight"; - -const QR_TIER_MEDIUM_WIDTH = 68; -const QR_TIER_TIGHT_WIDTH = 52; -const QR_META_MAX_CHARS = 64; - -function qrTier(width: number): QRTier { - if (width < QR_TIER_TIGHT_WIDTH) return "tight"; - if (width < QR_TIER_MEDIUM_WIDTH) return "medium"; - return "wide"; -} - -function qrPhaseLabel(phase: QRPhase): string { - switch (phase) { - case "idle": - return "execute"; - case "execute": - return "execute"; - case "decompose": - return "decompose"; - case "verify": - return "verify"; - case "done": - return "done"; - } -} - -function qrPhaseShortLabel(phase: QRPhase): string { - switch (phase) { - case "idle": - return "exec"; - case "execute": - return "exec"; - case "decompose": - return "decomp"; - case "verify": - return "vfy"; - case "done": - return "done"; - } -} - -function firstBudgeted(candidates: string[], budget: number): string { - for (const c of candidates) { - if (visibleWidth(c) <= budget) return c; - } - const fallback = candidates[candidates.length - 1] ?? ""; - return truncateToWidth(fallback, budget, "…", false); -} - -function qrMetaText(state: WidgetState, tier: QRTier, budget: number): string { - const phase = qrPhaseLabel(state.qrPhase); - const short = qrPhaseShortLabel(state.qrPhase); - const modeFull = state.qrMode === "fix" ? "fix" : "initial"; - const modeShort = state.qrMode === "fix" ? "fx" : "in"; - const iter = state.qrIteration ?? 0; - const iterMax = state.qrIterationsMax ? `/${state.qrIterationsMax}` : ""; - const iterFull = `${iter}${iterMax}`; - - const wide = `phase:${phase} · iter ${iterFull} ${modeFull}`; - const medium = `${phase} · iter ${iterFull} ${modeFull}`; - const compact = `${short} · i${iterFull} ${modeFull}`; - const tight = `${short} i${iterFull} ${modeShort}`; - - const candidates = tier === "wide" - ? [wide, medium, compact, tight] - : tier === "medium" - ? [medium, compact, tight] - : [compact, tight]; - - return firstBudgeted(candidates, budget); -} - interface QRCounterValues { done: string; pass: string; @@ -434,82 +361,82 @@ function qrCounterValues(state: WidgetState): QRCounterValues { }; } -function renderQRCounterLine(state: WidgetState, theme: Theme, tier: QRTier, width: number, budget: number): string { - const values = qrCounterValues(state); - - const labelSets = tier === "wide" - ? [ - { done: "done", pass: "pass", fail: "fail", todo: "todo" }, - { done: "d", pass: "p", fail: "f", todo: "t" }, - ] - : [{ done: "d", pass: "p", fail: "f", todo: "t" }]; - - const render = (labels: { done: string; pass: string; fail: string; todo: string }) => [ - `${theme.fg("muted", `${labels.done}:`)}${theme.fg("dim", values.done)}`, - `${theme.fg("muted", `${labels.pass}:`)}${theme.fg("accent", values.pass)}`, - `${theme.fg("muted", `${labels.fail}:`)}${theme.bold(theme.fg("error", values.fail))}`, - `${theme.fg("muted", `${labels.todo}:`)}${theme.fg("muted", values.todo)}`, - ].join(" "); - - const candidates = labelSets.map(render); - const selected = firstBudgeted(candidates, budget); - return clampToWidth(selected, width, "…"); +function runtimeStageLabel(state: WidgetState): string { + switch (state.qrPhase) { + case "idle": + case "execute": + return state.qrMode === "fix" ? "Fixing" : "Writing"; + case "decompose": + return "Analyzing"; + case "verify": + return "Verifying"; + case "done": + return "Complete"; + } +} + +function stageCycleText(state: WidgetState): string { + const iter = state.qrIteration ?? 0; + const iterMax = state.qrIterationsMax ? `/${state.qrIterationsMax}` : ""; + const mode = state.qrMode === "fix" ? "fix" : "initial"; + return `cycle ${iter}${iterMax} · ${mode}`; } -function renderQRStatusSection(state: WidgetState, theme: Theme, width: number): string[] { - if (!shouldShowQR(state)) { +function shouldShowRuntimeSection(state: WidgetState): boolean { + return shouldShowQR(state) || shouldShowSubagentSection(state); +} + +function renderRuntimeRow(theme: Theme, width: number, keyWidth: number, key: string, value: string): string { + const padded = key.padEnd(keyWidth, " "); + return clampToWidth(`${theme.fg("muted", padded)} : ${value}`, width, "…"); +} + +function renderRuntimeStatusSection(state: WidgetState, theme: Theme, width: number): string[] { + if (!shouldShowRuntimeSection(state)) { return []; } - const tier = qrTier(width); - const budget = Math.min(width, QR_META_MAX_CHARS); + const rows: Array<{ key: string; value: string }> = []; - const headerMeta = qrMetaText(state, tier, budget); - const header = clampToWidth( - `${theme.bold(theme.fg("accent", "QR"))} ${theme.fg("muted", "|")} ${theme.fg("dim", headerMeta)}`, - width, - "…", - ); + if (shouldShowQR(state)) { + const stageValue = `${theme.bold(theme.fg("accent", runtimeStageLabel(state)))} ${theme.fg("dim", `(${stageCycleText(state)})`)}`; + const values = qrCounterValues(state); + const qualityValue = [ + `${theme.fg("muted", "checked")} ${theme.fg("dim", values.done)}`, + `${theme.fg("muted", "pass")} ${theme.fg("accent", values.pass)}`, + `${theme.bold(theme.fg("error", "FAIL"))} ${theme.bold(theme.fg("error", values.fail))}`, + `${theme.fg("muted", "remaining")} ${theme.fg("muted", values.todo)}`, + ].join(" "); - const phaseEntries: Array<{ key: Exclude; label: string }> = tier === "wide" - ? [ - { key: "execute", label: state.qrMode === "fix" ? "Execute (fix)" : "Execute" }, - { key: "decompose", label: "QR decompose" }, - { key: "verify", label: "QR verify" }, - ] - : tier === "medium" - ? [ - { key: "execute", label: state.qrMode === "fix" ? "Exec(fix)" : "Exec" }, - { key: "decompose", label: "Decomp" }, - { key: "verify", label: "Verify" }, - ] - : [ - { key: "execute", label: "X" }, - { key: "decompose", label: "D" }, - { key: "verify", label: "V" }, - ]; - - const effectivePhase: Exclude = state.qrPhase === "idle" ? "execute" : state.qrPhase; - let currentIndex = phaseEntries.findIndex((entry) => entry.key === effectivePhase); - if (effectivePhase === "done") { - currentIndex = phaseEntries.length; + rows.push({ key: "stage", value: stageValue }); + rows.push({ key: "quality", value: qualityValue }); } - const segments = phaseEntries.map((entry, index) => { - if (index < currentIndex) { - return theme.bold(theme.fg("dim", `${entry.label} ✓`)); - } - if (index === currentIndex) { - return theme.bold(theme.fg("accent", entry.label)); - } - return theme.fg("muted", entry.label); - }); + if (shouldShowSubagentSection(state)) { + const parallel = state.subagentParallelCount ?? 1; + const pool = parallel > 1 ? `pool ×${parallel}` : "single"; + const workersValue = [ + `${theme.fg("muted", "queued")} ${theme.fg("muted", subagentCount(state.subagentQueued))}`, + `${theme.fg("muted", "active")} ${theme.bold(theme.fg("accent", subagentCount(state.subagentActive)))}`, + `${theme.fg("muted", "done")} ${theme.fg("dim", subagentCount(state.subagentDone))}`, + `${theme.fg("dim", pool)}`, + ].join(" "); + + rows.push({ key: "workers", value: workersValue }); + } - const rail = clampToWidth(segments.join(theme.fg("muted", " → ")), width, "…"); - const counters = renderQRCounterLine(state, theme, tier, width, budget); - const divider = clampToWidth(theme.fg("muted", "─".repeat(width)), width); + if (rows.length === 0) { + return []; + } + + const keyWidth = Math.max(...rows.map((row) => visibleWidth(row.key))); + const lines = [clampToWidth(theme.fg("dim", "Runtime"), width)]; - return [header, rail, counters, divider]; + for (const row of rows) { + lines.push(renderRuntimeRow(theme, width, keyWidth, row.key, row.value)); + } + + return lines; } interface DetailSections { @@ -524,11 +451,6 @@ interface DetailSectionDefinition { render: (view: ViewModel, theme: Theme, width: number) => string[]; } -interface CurrentStepView { - title: string; - activity: string; -} - interface IdentityView { planId: string; agentLabel: "Agent" | "Agent pool"; @@ -545,30 +467,6 @@ function subagentCount(value: number | null): string { return value === null ? "-" : String(value); } -function renderSubagentStatusSection(state: WidgetState, theme: Theme, width: number): string[] { - if (!shouldShowSubagentSection(state)) { - return []; - } - - const parallel = state.subagentParallelCount ?? 1; - const mode = parallel > 1 ? `pool x${parallel}` : "single"; - - const header = clampToWidth( - `${theme.bold(theme.fg("accent", "Subagents"))} ${theme.fg("muted", "|")} ${theme.fg("dim", mode)}`, - width, - "…", - ); - - const counters = [ - `${theme.fg("muted", "queued:")}${theme.fg("muted", subagentCount(state.subagentQueued))}`, - `${theme.fg("muted", "active:")}${theme.bold(theme.fg("accent", subagentCount(state.subagentActive)))}`, - `${theme.fg("muted", "done:")}${theme.fg("dim", subagentCount(state.subagentDone))}`, - ].join(" "); - - const divider = clampToWidth(theme.fg("muted", "─".repeat(width)), width); - return [header, clampToWidth(counters, width, "…"), divider]; -} - function identityView(state: WidgetState): IdentityView { const role = state.subagentRole ?? "—"; const parallel = state.subagentParallelCount ?? 1; @@ -607,41 +505,10 @@ function renderIdentitySection(view: IdentityView, theme: Theme, width: number): const DETAIL_SECTION_REGISTRY: Array> = [ { - id: "current-step", - placement: "core", - select: (state: WidgetState): CurrentStepView => { - const active = activePhase(state); - return { - title: state.step || active?.detail || active?.label || "Awaiting step", - activity: state.activity, - }; - }, - render: (view: CurrentStepView, theme: Theme, width: number): string[] => { - const lines = [ - clampToWidth(theme.fg("dim", "Current step"), width), - clampToWidth(theme.bold(theme.fg("accent", view.title)), width, "…"), - ]; - - if (view.activity) { - for (const line of wrapTextWithAnsi(theme.fg("muted", view.activity), width)) { - lines.push(clampToWidth(line, width)); - } - } - - return lines; - }, - }, - { - id: "qr-status", - placement: "core", - select: (state: WidgetState): WidgetState | null => (shouldShowQR(state) ? state : null), - render: (view: WidgetState, theme: Theme, width: number): string[] => renderQRStatusSection(view, theme, width), - }, - { - id: "subagent-status", + id: "runtime-status", placement: "core", - select: (state: WidgetState): WidgetState | null => (shouldShowSubagentSection(state) ? state : null), - render: (view: WidgetState, theme: Theme, width: number): string[] => renderSubagentStatusSection(view, theme, width), + select: (state: WidgetState): WidgetState | null => (shouldShowRuntimeSection(state) ? state : null), + render: (view: WidgetState, theme: Theme, width: number): string[] => renderRuntimeStatusSection(view, theme, width), }, { id: "identity", @@ -753,16 +620,9 @@ function renderPlanningCard(state: WidgetState, theme: Theme, width: number): st "", formatStepLine(state, theme), ]; - const detail = formatDetail(state, theme, contentWidth); - if (detail) fallbackContent.push(detail); - const qrCompact = formatQRCompact(state, theme, contentWidth); - if (qrCompact.length > 0) { - fallbackContent.push(...qrCompact); - } - const subagentCompact = formatSubagentCompact(state, theme, contentWidth); - if (subagentCompact.length > 0) { - if (qrCompact.length > 0) fallbackContent.push(""); - fallbackContent.push(...subagentCompact); + const runtimeCompact = formatRuntimeCompact(state, theme, contentWidth); + if (runtimeCompact.length > 0) { + fallbackContent.push(...runtimeCompact); } fallbackContent.push(""); @@ -912,37 +772,9 @@ function renderLogCard(state: WidgetState, theme: Theme, width: number, forcedCo ); } -function formatDetail(state: WidgetState, theme: Theme, width: number): string { - const step = state.step ? theme.fg("muted", state.step) : ""; - const activity = state.activity ? theme.fg("dim", ` · ${state.activity}`) : ""; - const detail = `${step}${activity}`; - if (!detail) return ""; - return clampToWidth(detail, width, "…"); -} - -function formatQRCompact(state: WidgetState, theme: Theme, width: number): string[] { - if (!shouldShowQR(state)) return []; - - const tier = qrTier(width); - const budget = Math.min(width, QR_META_MAX_CHARS); - const meta = qrMetaText(state, tier, budget); - const line1 = clampToWidth(`${theme.fg("muted", "QR")} ${theme.fg("muted", "|")} ${theme.fg("dim", meta)}`, width, "…"); - const line2 = renderQRCounterLine(state, theme, tier, width, budget); - return [line1, line2]; -} - -function formatSubagentCompact(state: WidgetState, theme: Theme, width: number): string[] { - if (!shouldShowSubagentSection(state)) return []; - - const parallel = state.subagentParallelCount ?? 1; - const mode = parallel > 1 ? `pool x${parallel}` : "single"; - const line1 = clampToWidth(`${theme.fg("muted", "Subagents")} ${theme.fg("muted", "|")} ${theme.fg("dim", mode)}`, width, "…"); - const line2 = clampToWidth( - `${theme.fg("muted", `queued:${subagentCount(state.subagentQueued)}`)} ${theme.fg("accent", `active:${subagentCount(state.subagentActive)}`)} ${theme.fg("dim", `done:${subagentCount(state.subagentDone)}`)}`, - width, - "…", - ); - return [line1, line2]; +function formatRuntimeCompact(state: WidgetState, theme: Theme, width: number): string[] { + if (!shouldShowRuntimeSection(state)) return []; + return renderRuntimeStatusSection(state, theme, width); } function formatIdentityCompact(state: WidgetState, theme: Theme, width: number): string[] { diff --git a/tests/widget.test.ts b/tests/widget.test.ts index 2342df4..bd2ea8e 100644 --- a/tests/widget.test.ts +++ b/tests/widget.test.ts @@ -84,6 +84,64 @@ describe("WidgetController rendering", () => { } }); + it("renders merged runtime section with stage + quality + workers", () => { + const harness = createWidgetHarness(); + try { + harness.controller.update({ + qrIteration: 2, + qrIterationsMax: 6, + qrMode: "fix", + qrPhase: "verify", + qrDone: 9, + qrTotal: 14, + qrPass: 8, + qrFail: 1, + qrTodo: 5, + subagentQueued: 2, + subagentActive: 3, + subagentDone: 7, + subagentParallelCount: 4, + }); + + const text = harness.render(140).join("\n"); + assert.match(text, /Runtime/); + assert.match(text, /stage\s+: Verifying \(cycle 2\/6 · fix\)/); + assert.match(text, /quality\s+: checked 9\/14\s+pass 8\s+FAIL 1\s+remaining 5/); + assert.match(text, /workers\s+: queued 2\s+active 3\s+done 7\s+pool ×4/); + + assert.doesNotMatch(text, /\bQR\b\s+\|/); + assert.doesNotMatch(text, /\bSubagents\b\s+\|/); + assert.doesNotMatch(text, /\bCurrent step\b/); + } finally { + harness.destroy(); + } + }); + + it("uses Writing for execute debut and Fixing for execute fix", () => { + const harness = createWidgetHarness(); + try { + harness.controller.update({ + qrIteration: 1, + qrIterationsMax: 6, + qrMode: "initial", + qrPhase: "execute", + }); + + let text = harness.render(140).join("\n"); + assert.match(text, /stage\s+: Writing \(cycle 1\/6 · initial\)/); + + harness.controller.update({ + qrMode: "fix", + qrPhase: "execute", + }); + + text = harness.render(140).join("\n"); + assert.match(text, /stage\s+: Fixing \(cycle 1\/6 · fix\)/); + } finally { + harness.destroy(); + } + }); + it("aligns identity table separator using dynamic key width", () => { const harness = createWidgetHarness(); try { From 7f1a40e4903bb3bd8d2ee572371b258953254654 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 5 Mar 2026 14:12:28 +0700 Subject: [PATCH 040/412] fix(planner): bind QR phase state and embed subagent prompts --- README.md | 5 + design-decisions.md | 17 +- package.json | 1 + resources/conventions/REGISTRY.yaml | 68 +++ .../code-quality/01-naming-and-types.md | 231 ++++++++++ .../02-structure-and-composition.md | 277 ++++++++++++ .../code-quality/03-patterns-and-idioms.md | 224 ++++++++++ .../04-repetition-and-consistency.md | 224 ++++++++++ .../05-documentation-and-tests.md | 186 ++++++++ .../06-module-and-dependencies.md | 119 ++++++ .../code-quality/07-cross-file-consistency.md | 188 ++++++++ .../code-quality/08-codebase-patterns.md | 153 +++++++ resources/conventions/diff-format.md | 201 +++++++++ resources/conventions/documentation.md | 402 ++++++++++++++++++ resources/conventions/intent-markers.md | 33 ++ resources/conventions/severity.md | 80 ++++ resources/conventions/structural.md | 152 +++++++ resources/conventions/temporal.md | 135 ++++++ src/planner/lib/agent-prompts.ts | 20 + src/planner/lib/dispatch.ts | 3 +- src/planner/lib/resources.ts | 31 ++ src/planner/phases/plan-code/prompts.ts | 13 +- src/planner/phases/plan-design/prompts.ts | 26 +- src/planner/phases/plan-docs/prompts.ts | 13 +- src/planner/phases/qr-decompose/phase.ts | 1 + src/planner/phases/qr-decompose/prompts.ts | 23 +- src/planner/phases/qr-verify/phase.ts | 1 + src/planner/phases/qr-verify/prompts.ts | 17 +- src/planner/tools/qr.ts | 46 +- tests/qr-grouped-verify.test.ts | 3 +- 30 files changed, 2793 insertions(+), 100 deletions(-) create mode 100644 resources/conventions/REGISTRY.yaml create mode 100644 resources/conventions/code-quality/01-naming-and-types.md create mode 100644 resources/conventions/code-quality/02-structure-and-composition.md create mode 100644 resources/conventions/code-quality/03-patterns-and-idioms.md create mode 100644 resources/conventions/code-quality/04-repetition-and-consistency.md create mode 100644 resources/conventions/code-quality/05-documentation-and-tests.md create mode 100644 resources/conventions/code-quality/06-module-and-dependencies.md create mode 100644 resources/conventions/code-quality/07-cross-file-consistency.md create mode 100644 resources/conventions/code-quality/08-codebase-patterns.md create mode 100644 resources/conventions/diff-format.md create mode 100644 resources/conventions/documentation.md create mode 100644 resources/conventions/intent-markers.md create mode 100644 resources/conventions/severity.md create mode 100644 resources/conventions/structural.md create mode 100644 resources/conventions/temporal.md create mode 100644 src/planner/lib/agent-prompts.ts create mode 100644 src/planner/lib/resources.ts diff --git a/README.md b/README.md index 5b89554..a8d832b 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,11 @@ Each phase is followed by a QR (quality review) block: decompose → parallel ve Written once at the start of `koan_plan`. Contains the full session branch as JSONL (one JSON object per line — raw pi `SessionManager` entries, not a plain-text transcript). The plan-design architect and plan-docs writer are told about this file and may `Read` it; other phases work from `plan.json` only. +### Prompt + convention sources + +- Subagent system prompts are hard-coded in `src/planner/lib/agent-prompts.ts`. +- Convention docs stay file-based in `resources/conventions` and are surfaced to prompts via `CONVENTIONS_DIR`. + ### Slash commands | Command | Description | diff --git a/design-decisions.md b/design-decisions.md index 4c6c471..6349a47 100644 --- a/design-decisions.md +++ b/design-decisions.md @@ -145,11 +145,18 @@ Structure: `/subagents/-/` Contains: state.json, stdout.log, stderr.log. ProgressReporter class manages state.json updates with trail. -### AD-10: Architect System Prompt - -The architect's system prompt is loaded from ~/.claude/agents/architect.md -at runtime via loadPlanDesignSystemPrompt(). Injected via -before_agent_start returning { systemPrompt: ... }. +### AD-10: Embedded Planner Prompts + File-Based Conventions + +Planner subagent prompts are hard-coded in TypeScript at +`src/planner/lib/agent-prompts.ts` (architect, developer, +quality-reviewer, technical-writer). Phase loaders call +`loadAgentPrompt(...)`, so prompt availability does not depend on runtime +filesystem paths. + +Conventions remain file-based under `resources/conventions` so the LLM can +explore them directly with `Read`. `CONVENTIONS_DIR` is resolved at runtime +via `src/planner/lib/resources.ts` and injected into phase guidance where +needed. ### AD-11: Plan Schema Self-Documentation via TypeBox diff --git a/package.json b/package.json index e99f2d3..b3ebab9 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "files": [ "extensions", "src", + "resources", "README.md", "LICENSE" ], diff --git a/resources/conventions/REGISTRY.yaml b/resources/conventions/REGISTRY.yaml new file mode 100644 index 0000000..206eb56 --- /dev/null +++ b/resources/conventions/REGISTRY.yaml @@ -0,0 +1,68 @@ +# Role-Convention Registry +# CI validates actual get_convention() calls match these declarations +# +# Structure per role: +# receives: list of conventions this role loads +# phase_specific: per-phase convention overrides +# mode_specific: per-mode convention overrides (design vs code) +# rationale: explanation for roles with empty receives + +developer: + receives: + - diff-format.md + +technical_writer: + receives: + - temporal.md + - documentation.md + +quality_reviewer: + receives: + - temporal.md + - structural.md + - diff-format.md + - code-quality/* + phase_specific: + plan_completeness: + - structural.md + plan_code: + - diff-format.md + - code-quality/01-naming-and-types.md + - code-quality/02-structure-and-composition.md + - code-quality/03-patterns-and-idioms.md + - code-quality/04-repetition-and-consistency.md + - code-quality/05-documentation-and-tests.md + plan_docs: + - temporal.md + post_impl_code: + - structural.md + - code-quality/01-naming-and-types.md + - code-quality/02-structure-and-composition.md + - code-quality/03-patterns-and-idioms.md + - code-quality/04-repetition-and-consistency.md + - code-quality/05-documentation-and-tests.md + - code-quality/06-module-and-dependencies.md + - code-quality/07-cross-file-consistency.md + - code-quality/08-codebase-patterns.md + post_impl_doc: + - temporal.md + +refactor: + receives: + - code-quality/* + mode_specific: + design: + - code-quality/01-naming-and-types.md + - code-quality/02-structure-and-composition.md + - code-quality/06-module-and-dependencies.md + - code-quality/07-cross-file-consistency.md + code: + - code-quality/* + +explore: + receives: [] + rationale: "Codebase reading only, no convention-aware output" + +general_purpose: + receives: [] + rationale: "Planning and general tasks; produces artifacts that other roles process" diff --git a/resources/conventions/code-quality/01-naming-and-types.md b/resources/conventions/code-quality/01-naming-and-types.md new file mode 100644 index 0000000..63c4f2f --- /dev/null +++ b/resources/conventions/code-quality/01-naming-and-types.md @@ -0,0 +1,231 @@ + + +# Naming & Types + +Evaluate whether names and types accurately communicate intent. + +**The core question**: If a reader sees only the name or type, will their mental model match actual behavior? Names are micro-documentation. Types are contracts. When either lies, readers build wrong mental models and write bugs. + +**What to look for**: + +- Names that describe HOW instead of WHAT +- Verbs that lie (get that mutates, validate that parses) +- Missing domain types (primitives where concepts belong) +- Type-based branching (isinstance chains indicating missing polymorphism) +- Multiple names for the same concept within a file + +**The threshold**: Flag only when name/type actively misleads or when domain concepts are hidden in primitives crossing boundaries. Imperfect-but-accurate names are style preferences, not quality issues. + + +When evaluating Code Intent (Design Review phase): + +- Does the proposed function/class name predict its behavior? +- Does the intent use domain types or raw primitives? +- Are type choices appropriate for the domain concept? + +Evidence format: Quote the Code Intent description showing naming/type issue. + + + +When evaluating actual code (Diff Review, Codebase Review, Refactor): + +- Does the implementation name match actual behavior? +- Are domain concepts hidden in primitive comparisons? +- Are isinstance chains indicating missing polymorphism? + +Evidence format: Quote code with file:line showing the issue. + + +--- + +## 1. Naming Precision + + +A name is micro-documentation. It should predict behavior accurately enough that reading the implementation confirms rather than surprises. + + +Detect: Does the name accurately describe what this does? Would a reader's mental model, built from the name alone, match actual behavior? + + +Terms that sometimes indicate naming issues (starting points, not definitive): +`Manager`, `Handler`, `Utils`, `Helper`, `Data`, `Info`, `process`, `handle`, `do` + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Name-behavior mismatch + +- Names describing HOW not WHAT (e.g., loopOverItems -> processOrders) +- Verbs that lie (e.g., get that mutates, validate that parses) +- Any name that would cause surprise when implementation is read + +[medium] Abstraction leakage + +- Implementation details in public API names +- Vague umbrella terms (e.g., Manager, Handler, Utils, Helper, Data, Info) + +[low] Cognitive friction + +- Negated booleans (e.g., isNotValid -> isInvalid, disableFeature -> featureEnabled) + + + +Generic names in genuinely generic contexts (e.g., item in a generic collection, T in type params). Test: would a specific name add signal or just noise? + + + +Flag only when name actively misleads. Imperfect names that are still accurate are style preferences. + + +## 2. Missing Domain Modeling + + +Domain concepts should be explicit in code, not hidden in raw comparisons. When the same concept is checked multiple ways, it belongs in a domain object. + + +Detect: Are domain concepts hiding in raw conditions? Is the same business concept checked via primitive comparison in multiple places? + + +Pattern indicators (starting points, not definitive): +`== 'admin'`, `== "admin"`, `status ==`, `role ==`, `type ==`, magic numbers + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Hidden domain logic + +- Domain predicates in raw conditions (e.g., user.role == 'admin' -> user.can_edit()) +- Magic value comparisons (e.g., status == 3 -> Status.APPROVED) +- Any business concept expressed only through primitive comparison + +[medium] Implicit modeling + +- String comparisons for state (e.g., mode == 'active' -> enum) +- Business rules buried in conditions (extract to domain object method) + + + +Explicit comparisons in domain layer implementation itself. Config values compared once at startup. + + + +Flag when same domain concept is checked via raw comparison in 2+ places. + + +## 3. Type-Based Branching + + +Type dispatch scattered across code indicates missing polymorphism. When you branch on type in multiple places, the type itself should carry the behavior. + + +Detect: Is type-checking being used where polymorphism would be cleaner? Does the same type dispatch appear in multiple locations? + + +Pattern indicators (starting points, not definitive): +`isinstance`, `typeof`, `instanceof`, `hasattr`, `in dict`, `.type ==` + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Scattered dispatch + +- isinstance/typeof chains (3+ branches -> polymorphism candidate) +- Same type dispatch appearing in multiple locations + +[medium] Implicit dispatch + +- Attribute-presence checks (e.g., hasattr/in dict as type dispatch) + +[low] Missing abstraction + +- Duck typing conditionals that should be protocols/interfaces + + + +Single isinstance check for input validation. Type narrowing for type safety. + + + +Flag when same type dispatch appears in 2+ places. Single-use type checks are often appropriate. + + +## 4. Type Design + + +Domain concepts deserve their own types. Primitives that cross boundaries without validation invite bugs; value objects with validation prevent them. + + +Detect: What domain concepts are represented as primitives? Do primitives cross API boundaries without validation? + + +Pattern indicators (starting points, not definitive): +`str` for IDs, `float` for money, `dict` passed through call chain, `Any`, `object` + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Missing domain types + +- Primitive obsession (e.g., userId as string -> UserId type with validation) +- Missing value objects (e.g., money as float -> Money(amount, currency)) +- Any domain concept crossing API boundary as primitive + +[medium] Weak typing + +- Stringly-typed data (JSON strings -> typed objects) +- Leaky abstractions (callers must know implementation details) + +[low] Type proliferation + +- Optional explosion (many nullable fields -> consider separate types for states) + + + +Primitives in internal implementation. Serialization boundaries. Performance-critical paths. + + + +Flag when primitives cross API boundaries without validation. Internal use of primitives is acceptable. + + +## 5. Naming Consistency (File Scope) + + +A concept should have one name within a file. Multiple names for the same thing create confusion about whether they're actually the same. + + +Detect: Are there multiple names for the same concept within this file? Would a reader wonder if user and account refer to the same entity? + + +Pattern indicators (starting points, not definitive): +Synonyms as variable prefixes (user/account/customer, config/settings/options, id/uid/identifier) + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Semantic confusion + +- Same entity called different names in same file (e.g., user vs account vs customer) +- Any naming inconsistency causing doubt about identity within a single file + +[medium] Inconsistent conventions + +- Inconsistent abbreviations within file (e.g., id vs identifier) + +[low] Style drift + +- Style inconsistency without semantic confusion + + + +Different names for genuinely different concepts. External API naming conventions. Aliasing for clarity at specific scopes. + + + +Flag when same semantic concept has multiple names within a file AND causes confusion about whether they refer to the same thing. + diff --git a/resources/conventions/code-quality/02-structure-and-composition.md b/resources/conventions/code-quality/02-structure-and-composition.md new file mode 100644 index 0000000..0748863 --- /dev/null +++ b/resources/conventions/code-quality/02-structure-and-composition.md @@ -0,0 +1,277 @@ + + +# Structure & Composition + +Evaluate whether code is well-structured for comprehension and change. + +**The core question**: Can I understand this unit in isolation? Can I change it without understanding its dependents? Structure should reveal intent and isolate concerns. + +**What to look for**: + +- Functions doing multiple things (requires "and" to describe) +- Deep nesting obscuring control flow +- Implicit state machines hidden in boolean flags +- Hard-coded dependencies making code untestable +- Component definitions scattered across multiple locations +- Error handling that loses information + +**The threshold**: Flag when structure obscures intent or when changes would ripple unnecessarily. Length alone is not a smell; unclear responsibility is. + + +When evaluating Code Intent (Design Review phase): + +- Does the proposed function do one thing or multiple things? +- Does the intent describe clear responsibility boundaries? +- Does the design inject dependencies or hardcode them? +- Is the component's definition complete in one place, or scattered across locations? + +Evidence format: Quote the Code Intent description showing structural issue. + + + +When evaluating actual code (Diff Review, Codebase Review, Refactor): + +- Is the function too long or deeply nested? +- Are boolean flags creating implicit state machines? +- Is error handling preserving context? +- Are component definitions scattered (requirements in one place, validation in another)? + +Evidence format: Quote code with file:line showing the issue. + + +--- + +## 1. Function Composition + + +A function should do one thing that can be described in a single sentence. When description requires "and", the function likely needs splitting. + + +Detect: Can I describe this function's purpose in one sentence without using "and"? + + +Structural indicators (starting points, not definitive): +Functions >50 lines, parameter counts >4 + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Responsibility diffusion + +- God functions (multiple unrelated responsibilities) +- Long parameter lists (4+ params signals missing concept) +- Any function requiring multiple sentences to describe its purpose + +[medium] Structural complexity + +- Deep nesting (3+ levels of conditionals) +- Mixed abstraction levels (high-level orchestration mixed with low-level details) + +[low] Interface friction + +- Boolean parameters that fork behavior (consider splitting into two functions) + + + +Long functions that do one thing linearly (e.g., state machine, parser). Nesting depth from error handling. + + + +Flag when function has multiple unrelated responsibilities. Length alone is not a smell. + + +## 2. Control Flow Smells + + +Control flow should reveal intent, not obscure it. When following execution requires significant mental effort, the structure needs simplification. + + +Detect: Is the control flow harder to follow than necessary? Would a reader need to trace through multiple branches to understand behavior? + + +Pattern indicators (starting points, not definitive): +`elif.*elif.*elif`, `switch`, `case`, `? :.*? :`, ternary chains + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Excessive branching + +- Long if/elif chains (5+ branches -> lookup table or strategy pattern) +- Any branching structure that requires tracing to understand + +[medium] Obscured flow + +- Nested ternaries (2+ levels -> extract to named variables) +- Early-return candidates buried in nested else branches + +[low] Hidden complexity + +- Conditional assignment cascades +- Implicit else branches hiding edge cases + + + +Exhaustive pattern matching. State machines with explicit states. + + + +Flag when control flow obscures intent. Explicit branching for documented cases is acceptable. + + +## 3. State and Flags + + +Boolean flags that interact create implicit state machines. When understanding state requires tracking multiple flags, make the state machine explicit. + + +Detect: Are boolean flags creating implicit state machines? Do flags interact in ways that require mental tracking? + + +Pattern indicators (starting points, not definitive): +`is_.*=`, `has_.*=`, `_flag`, `_state`, multiple boolean assignments + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Implicit state machines + +- Boolean flag tangles (3+ flags interacting = implicit state machine) +- Any flag interaction requiring mental state tracking + +[medium] Order dependencies + +- Stateful conditionals depending on mutation order + +[low] Defensive complexity + +- Defensive null chains (e.g., x and x.y and x.y.z -> optional chaining or null object) + + + +Single boolean for simple on/off state. Builder pattern flags. + + + +Flag when flags interact in ways that require mental state tracking. Independent flags are fine. + + +## 4. Dependency Injection + + +Business logic should be testable without network, disk, or database. Hard-coded dependencies make code untestable and tightly coupled. + + +Detect: Can I test this function in isolation without mocking infrastructure? Are dependencies injected or hard-coded? + + +Pattern indicators (starting points, not definitive): +`datetime.now`, `time.time`, `os.environ`, `open(`, `requests.`, `http.` + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Untestable coupling + +- Hard-coded dependencies (e.g., new Date() inline -> inject clock) +- Global state access (avoid or inject) +- Any business logic that requires infrastructure to test + +[medium] Mixed concerns + +- Side effects mixed with computation (separate pure logic from effects) +- Concrete class dependencies (depend on interface, not implementation) + +[low] Configuration coupling + +- Environment coupling (reads env vars directly -> inject config) +- Time-dependent logic (inject clock for testability) + + + +Entry points that wire dependencies. Test utilities. Scripts meant to run directly. + + + +Flag when untestable code is in business logic. Infrastructure code at boundaries is expected to have dependencies. + + +## 5. Definition Locality + + +A component's definition should be complete at a single site. When understanding what a component IS -- its identity, requirements, constraints, and behavior -- demands reading multiple locations, the definition is scattered. + + +Detect: To understand what this component IS, how many locations must I read? If I change what this component requires, how many files must I edit? + + +Structural indicators (starting points, not definitive): +Same requirement checked in 2+ locations, component identity split across files, extraction-with-default patterns (args.get, kwargs.get, getattr with default) + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Scattered specification + +- Same requirement declared in 2+ locations (e.g., parser marks required AND handler checks if missing) +- Component identity split across files without clear ownership +- Definition requiring "mental reassembly" from 3+ sources + +[medium] Split declaration/enforcement + +- Interface declared at one site, validated at another without shared reference +- Defaults defined separately from schema (e.g., type in schema, default in code) +- Same constraint checked in multiple places + + + +Dependency injection (injected collaborator's definition lives with collaborator, not here -- that's runtime wiring, not scatter). Composition (A uses B; B's definition is B's concern). Inheritance (intentional decomposition). Plugin architectures (clear ownership boundaries). Registry + reference patterns (define once, reference many times -- this is the fix, not a smell). + + + +Flag when a component's definition is split across 2+ locations without clear ownership. Key test: who owns this fact? If ownership is unclear or duplicated, it's scatter. Common in LLM-generated code. + + +## 6. Error Handling + + +Errors should preserve context and reach appropriate handlers. Swallowed or generic catches lose information; errors at wrong levels confuse callers. + + +Detect: What happens if this operation fails? Is error information preserved and routed appropriately? + + +Pattern indicators (starting points, not definitive): +`except:`, `catch (`, `catch(`, `pass`, `# TODO`, `raise Error(` + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Information loss + +- Swallowed exceptions (empty catch blocks) +- Generic catches (e.g., catch Exception -> catch specific errors) +- Any error handling that loses diagnostic information + +[medium] Wrong abstraction + +- Errors at wrong abstraction level (low-level errors leaking to callers) + +[low] Missing context + +- raise Error('failed') -> raise Error(f'order {id}: {reason}') + + + +Generic catch at top-level with logging. Intentionally swallowed expected errors with comment. + + + +Flag when error handling obscures or loses information. Documented catch-all with logging is acceptable. + diff --git a/resources/conventions/code-quality/03-patterns-and-idioms.md b/resources/conventions/code-quality/03-patterns-and-idioms.md new file mode 100644 index 0000000..36cba1c --- /dev/null +++ b/resources/conventions/code-quality/03-patterns-and-idioms.md @@ -0,0 +1,224 @@ + + +# Patterns & Idioms + +Evaluate whether code uses idiomatic patterns for its language. + +**The core question**: Is this idiomatic? Modern languages provide features to simplify common patterns. When code uses outdated patterns, verbose anti-patterns, or unnecessarily complex expressions, it adds cognitive load without benefit. + +**What to look for**: + +- Complex boolean expressions requiring mental evaluation +- Verbose conditional patterns with simpler equivalents +- Outdated iteration/callback patterns +- Commented code blocks and unreachable branches (within files) +- Missing language features that would simplify code + +**The threshold**: Flag mechanical anti-patterns and expression-level complexity that obscures intent. Well-commented complex logic is acceptable; unnecessarily complex logic is not. Only flag outdated patterns when a clearly better modern idiom exists in the project's language version. + + +Not applicable -- this group requires actual code to evaluate. + + + +When evaluating actual code (Diff Review, Codebase Review, Refactor): + +- Are boolean expressions readable at a glance? +- Do conditionals use simpler equivalent forms? +- Are modern language features being utilized? +- Is commented code cluttering the file? + +Evidence format: Quote code with file:line showing the issue. + + +--- + +## 1. Boolean Expression Complexity + + +A boolean expression should be readable at a glance. If it requires mental evaluation to understand, it needs simplification or naming. + + +Detect: Can I understand this boolean expression without tracing through it mentally? + + +Pattern indicators (starting points, not definitive): +`and.*and`, `or.*or`, `&&.*&&`, `||.*||`, `not.*not`, `!.*!` + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[medium] Cognitive overload + +- Multi-clause expressions (3+ AND/OR terms -> extract named predicate) +- Negated compound conditions (e.g., not (a and b) -> clearer positive form) +- Any expression requiring paper/mental tracing to evaluate + +[low] Ambiguity + +- Mixed AND/OR without parentheses clarifying precedence +- Double/triple negatives (e.g., if not disabled, if not is_invalid) + + + +Complex conditions with clear structure and comments explaining the logic. + + + +Flag when expression requires mental evaluation to understand. Well-commented complex conditions are acceptable. + + +## 2. Conditional Anti-Patterns + + +Conditions should express intent directly. When a simpler form exists that preserves meaning, the complex form is an anti-pattern. + + +Detect: Is there a simpler way to express this condition that preserves the same meaning? + + +Pattern indicators (starting points, not definitive): +`if.*return True.*else.*return False`, `try:.*except:.*pass`, `and do_` + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[medium] Verbose patterns + +- if cond: return True else: return False (just return cond) +- Exception-based control flow (try/except as if/else) +- Any condition with a simpler equivalent form + +[low] Subtle complexity + +- Short-circuit side effects (e.g., cond and do_thing()) +- Yoda conditions without clear benefit (e.g., if 5 == x) + + + +Exception handling for actual exceptional conditions. Short-circuit for lazy evaluation. + + + +Flag mechanical anti-patterns only. Intent-preserving variations are style preferences. + + +## 3. Modern Idioms + + +Modern language features exist to simplify common patterns. When older patterns persist unnecessarily, they add cognitive load without benefit. + + +Detect: Is there a newer language feature that would simplify this code? Is the project's language version being underutilized? + + +Pattern indicators (starting points, not definitive): +`for i in range(len(`, `+ str(`, `.format(`, callback patterns, `null` checks + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[medium] Outdated patterns + +- Old iteration patterns (e.g., manual index loops -> for-each, enumerate) +- Deprecated API usage +- Any pattern with a simpler modern equivalent + +[low] Missing features + +- Missing language features (e.g., no destructuring, no pattern matching) +- Legacy patterns (e.g., callbacks -> async/await) +- Outdated idioms (e.g., string concatenation -> f-strings/templates) +- Manual null checks (-> optional chaining, null coalescing) + + + +Intentional use of older patterns for compatibility. Performance-critical code avoiding allocations. + + + +Flag when modern idiom is clearly better AND available in the project's language version. Do not flag style preferences. + + +## 4. Readability + + +Code should be understandable in isolation. When understanding requires external lookup or tribal knowledge, the code needs clarification. + + +Detect: Can I understand this code without reading other files or asking someone? Is intent clear from the code itself? + + +Pattern indicators (starting points, not definitive): +Boolean literals in function calls, magic numbers, unexplained constants + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Obscured intent + +- Boolean trap (e.g., fn(True, False) -> fn(enabled=True, debug=False)) +- Any call where argument meaning requires looking up the function signature + +[medium] Magic values + +- Magic numbers/strings (e.g., 42 -> MAX_RETRIES = 42) +- Positional args where named params would clarify intent + +[low] Dense expressions + +- Dense expressions (e.g., nested ternaries -> named intermediate variables) +- Missing WHY comments on non-obvious decisions +- Implicit ordering dependencies between calls (document or make explicit) + + + +Well-known constants (0, 1, -1, 100). Boolean in obviously-named function (e.g., setEnabled(true)). + + + +Flag when meaning requires external lookup. Self-evident code needs no comments. + + +## 5. Zombie Code (File Scope) + + +Dead code is noise that misleads readers. Code that cannot execute or is never called should be removed, not left to confuse future maintainers. + + +Detect: If I deleted this, would any test fail or behavior change? + + +Pattern indicators (starting points, not definitive): +Commented blocks, `#if 0`, unreachable branches, unused variables + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Dead code blocks + +- Commented-out code blocks (>5 lines of code, not documentation) +- Unreachable branches (e.g., else after unconditional return, dead switch cases) +- Any code that cannot execute + +[medium] Unused declarations + +- Unused local variables or parameters + +[low] Orphaned functions + +- Functions defined but never called within file + + + +Commented code with explanation (debugging aid). Unused params required by interface contract. Public API entry points. Plugin interfaces. + + + +Flag when code is demonstrably unreachable/unused AND is not a public API entry point, plugin interface, or documented debugging aid. + diff --git a/resources/conventions/code-quality/04-repetition-and-consistency.md b/resources/conventions/code-quality/04-repetition-and-consistency.md new file mode 100644 index 0000000..e22bcae --- /dev/null +++ b/resources/conventions/code-quality/04-repetition-and-consistency.md @@ -0,0 +1,224 @@ + + +# Repetition & Consistency + +Evaluate whether code follows DRY principles and maintains consistency. + +**The core question**: Is this DRY and consistent? When the same logic, validation, or pattern appears in multiple places, bugs must be fixed everywhere -- and they won't be. When similar operations use different patterns, readers question whether the difference is meaningful. + +**What to look for**: + +- Duplicated code blocks that would require multi-location bug fixes +- Validation rules implemented multiple times +- Business rules scattered across locations +- Repeated boolean expressions +- Inconsistent error handling within a file or class + +**The threshold**: Flag when duplication is unintentional and would require coordinated changes. Flag inconsistency when it creates confusion about whether the difference is meaningful. Intentional duplication for modularity or bounded context isolation is acceptable. + + +Not applicable -- this group requires actual code to evaluate. + + + +When evaluating actual code (Diff Review, Codebase Review, Refactor): + +- Would fixing a bug require changing multiple locations? +- Are validation/business rules duplicated? +- Are similar operations handled inconsistently? +- Do repeated patterns need extraction? + +Evidence format: Quote code with file:line showing the duplication/inconsistency. + + +--- + +## 1. Duplication + + +Code should have a single source of truth. When the same logic exists in multiple places, bugs must be fixed everywhere -- and they won't be. + + +Detect: If I fixed a bug here, where else would I need to fix it? + + +Structural indicators (starting points, not definitive): +Identical multi-line blocks, similar function bodies, function names suggesting similar purpose across modules + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Direct duplication + +- Same code block duplicated (3+ lines, logic not just boilerplate) +- Any logic that would require multi-location bug fixes + +[medium] Near-duplication + +- Copy-paste with minor variations + +[low] Missed abstraction + +- Common pattern not extracted to shared location + + + +Intentionally different logic serving different purposes. Test setup code. Generated/vendored code. Deliberate isolation for modularity. Similar code in different bounded contexts. + + + +Flag when bug fix would require changing multiple locations AND the duplication is unintentional. + + +## 2. Validation Scattering + + +Validation rules should live in one place. When the same validation is implemented multiple times, implementations diverge -- and some will be wrong. + + +Detect: Is this validation duplicated? Would changing the validation rule require updating multiple locations? + + +Pattern indicators (starting points, not definitive): +Repeated regex patterns, duplicate bounds checks, email/phone/format validation across locations + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Diverged validation + +- Validation rules diverged between implementations +- Any validation requiring multi-location updates + +[medium] Repeated validation + +- Same validation repeated without shared implementation + +[low] Defensive re-validation + +- Defensive re-validation deeper in call chain + + + +Validation at trust boundaries. Defense-in-depth by design. Context-specific validation rules. Service boundary validation. + + + +Flag when identical validation appears 3+ times (file scope) or 5+ files (codebase scope) AND implementations have diverged or will diverge. + + +## 3. Business Rule Scattering + + +Business rules should have a single source of truth. When the same decision is made in multiple places, they will eventually disagree. + + +Detect: Where is the single source of truth for this rule? If the rule changes, how many places need updating? + + +Pattern indicators (starting points, not definitive): +Repeated conditional patterns, magic numbers in multiple places, pricing/permission/eligibility logic + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Scattered decisions + +- Same business decision in multiple places that could diverge +- Any business rule without clear single source of truth + +[medium] Mixed concerns + +- Business logic mixed with infrastructure code + +[low] Implicit rules + +- Rules embedded in raw conditionals instead of named predicates + + + +Orchestration calling multiple rule checks. Rules intentionally duplicated for service isolation. Per-tenant/region rule variations. Caching of computed rules. + + + +Flag when same business decision is made in 2+ places (file scope) or 3+ files (codebase scope) AND they have diverged or could diverge independently. + + +## 4. Condition Pattern Repetition + + +Repeated boolean expressions should be named predicates. When the same condition appears everywhere, changing it requires finding all occurrences. + + +Detect: Should this condition be a named predicate? Does extracting it reduce the bug surface area? + + +Pattern indicators (starting points, not definitive): +Identical boolean expressions, repeated guard clauses, permission/feature-flag check patterns + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] High-frequency repetition + +- Identical condition in 3+ places (file) or 5+ files (codebase) (extracting reduces bug surface) +- Any condition requiring multi-location updates when logic changes + +[medium] Pattern repetition + +- Repeated feature flag conditions + +[low] Guard repetition + +- Same guard clause pattern across related functions + + + +Standard guard clauses (null checks, bounds checks). Framework-required patterns. Simple conditions that read clearly inline. + + + +Flag when identical condition appears 3+ times (file scope) or 5+ files (codebase scope) AND extracting to named predicate would reduce bug surface area. + + +## 5. Error Pattern Consistency (File Scope) + + +Error handling should be consistent within an abstraction level. Mixed patterns create confusion about how errors propagate and should be handled. + + +Detect: Is error handling consistent within this file or class? Would a caller know what to expect from similar operations? + + +Pattern indicators (starting points, not definitive): +Mixed exception/return-code patterns, inconsistent error message formats, varying error context + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Incompatible patterns + +- Incompatible error patterns for similar operations within same class +- Any error handling creating caller confusion + +[medium] Inconsistent hierarchy + +- Inconsistent exception hierarchies within same abstraction level + +[low] Missing convention + +- No standard for error context/wrapping within file + + + +Different patterns for different abstraction levels (domain vs API vs infra). Wrapper functions translating between error styles. Legacy code under active migration. + + + +Flag when same class uses 2+ incompatible error patterns for similar operations AND no migration plan exists. + diff --git a/resources/conventions/code-quality/05-documentation-and-tests.md b/resources/conventions/code-quality/05-documentation-and-tests.md new file mode 100644 index 0000000..109b1d1 --- /dev/null +++ b/resources/conventions/code-quality/05-documentation-and-tests.md @@ -0,0 +1,186 @@ + + +# Documentation & Tests + +Evaluate whether code is properly documented and tested. + +**The core question**: Is this documented and tested? Documentation that contradicts code is worse than no documentation. Tests that don't communicate behavior fail as documentation. Schema drift causes runtime errors. Generated code without provenance documentation misleads maintainers. + +**What to look for**: + +- Documentation contradicting actual code +- Tests with uninformative names +- Missing provenance for generated/vendored code in CLAUDE.md +- Schema-code mismatches (fields in code missing from schema, or vice versa) + +**The threshold**: Flag only demonstrable incorrectness, not incompleteness. Stale docs cause hallucinations; missing docs just mean less context. Flag tests that give no behavioral information. Flag generated/vendored code without CLAUDE.md documentation. Flag schema drift only when provable mismatch exists. + + +Not applicable -- this group requires actual code to evaluate. + + + +When evaluating actual code (Diff Review, Codebase Review, Refactor): + +- Does documentation contradict the code? +- Do test names communicate behavior? +- Is generated/vendored code documented in CLAUDE.md? +- Do schema definitions match code usage? + +Evidence format: Quote code/docs with file:line showing the issue. + + +--- + +## 1. Documentation Staleness + + +Documentation that contradicts code is worse than no documentation. Stale docs mislead readers and cause bugs. + + +Detect: Does the documentation contradict the code? Are there claims in docs that the code structurally violates? + + +Pattern indicators (starting points, not definitive): +Docstrings with parameter names, @param, @return, TODO, FIXME + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Active contradictions + +- Parameter name in docstring not in function signature +- Docstring type conflicts with type annotation (when annotation exists) +- Any documentation making claims the code structurally contradicts + +[medium] Stale claims + +- Docstring describes return value that code never returns +- Comment contains strong claim ("always", "never", "must") AND code structurally contradicts it + +[low] Orphaned references + +- TODO/FIXME referencing completed or removed work + + + +Incomplete documentation. Missing docs. Outdated style in docs. + + + +Flag only when documentation is demonstrably incorrect, not merely incomplete. Incorrect documentation causes hallucinations. + + +## 2. Test Quality as Documentation + + +Tests document expected behavior. When test names don't communicate what behavior they verify, they fail as documentation. + + +Detect: Do tests communicate expected behavior? Can I understand what's being tested from the test name alone? + + +Pattern indicators (starting points, not definitive): +`test_works`, `test_ok`, `test_success`, `test_case_`, `test_1`, `assert True` + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Uninformative tests + +- Test name matches low-information pattern (e.g., test_works, test_ok, test_success, test_case_1) +- Test contains 0 assertions +- Any test where the name gives no behavioral information + +[medium] Weak naming + +- Test name shorter than 3 tokens (excluding test\_ prefix) +- Test name describes implementation, not behavior + +[low] Test smells + +- Test only asserts True, None, or trivial values +- Multiple similar test functions with minor input variations (use parameterized/table-driven) + + + +Tests referencing ticket numbers (e.g., TEST-1234, JIRA-567) for traceability. Smoke tests named test_works. + + + +Flag when test name gives no behavioral information AND is not a ticket/regression reference. + + +## 3. Generated and Vendored Code Awareness + + +Non-maintainable code (generated, vendored) must be clearly marked. Without provenance documentation, maintainers may try to modify code that should be regenerated. + + +Detect: Is non-maintainable code clearly marked in CLAUDE.md? Can a maintainer tell which code is generated or vendored? + + +Pattern indicators (starting points, not definitive): +`_generated`, `_pb`, `.pb.go`, `vendor/`, `third_party/`, `node_modules/` + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Missing provenance + +- Generated files missing regeneration command in CLAUDE.md +- Vendored directories missing upstream source in CLAUDE.md +- Any generated/vendored code without documentation of origin + +[medium] Unclear ownership + +- External libraries copied into repo without provenance documentation + + + +Generated files with regeneration command documented. Vendored code with clear upstream reference. + + + +Flag when file/directory matches generation patterns (e.g., *.pb.go, *_generated.*, vendor/, third_party/) AND CLAUDE.md lacks corresponding entry explaining provenance. + + +## 4. Schema-Code Coherence + + +Schema and code must stay synchronized. Fields referenced in code but absent from schema (or vice versa) indicate drift that causes runtime errors. + + +Detect: Does code reference schema fields that don't exist? Are there schema fields unused in any code path? + + +Pattern indicators (starting points, not definitive): +Schema file extensions (.proto, .graphql, .json schema), field access patterns + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Schema drift + +- Code references field not in schema definition +- Schema field unused in any code path (dead field) +- Any mismatch between schema definition and code usage + +[medium] Type drift + +- Type mismatch between schema and code representation + + + +Intentional divergence documented with :SCHEMA: marker. Fields used only in specific deployment configs. + + + +Flag when field name in code has 0 matches in corresponding schema file, or schema field has 0 references in codebase. + + +Intent marker: Use `:SCHEMA:` to suppress for intentional divergence (e.g., `:SCHEMA: field 'legacy_id' unused; migration pending`). diff --git a/resources/conventions/code-quality/06-module-and-dependencies.md b/resources/conventions/code-quality/06-module-and-dependencies.md new file mode 100644 index 0000000..7059158 --- /dev/null +++ b/resources/conventions/code-quality/06-module-and-dependencies.md @@ -0,0 +1,119 @@ + + +# Module & Dependencies + +Evaluate whether module boundaries are clean and architecture aligns with change patterns. + +**The core question**: Are boundaries clean? Modules should have clear boundaries with minimal coupling. Architecture should align with how features actually change. When changes ripple across unrelated modules or require touching many components, the boundaries are wrong. + +**What to look for**: + +- Circular dependencies +- Layer violations (domain importing infrastructure) +- Wrong component boundaries (features awkwardly split) +- Architecture forcing cross-cutting changes for single-domain features + +**The threshold**: Flag when dependencies cause compilation issues or domain corruption. Flag when adding a feature requires touching many unrelated components. This is inherently about relationships between files and modules, not local code patterns. + + +When evaluating Code Intent (Design Review phase): + +- Does the proposed design create circular dependencies? +- Does it violate layer boundaries? +- Would implementing this feature require touching many components? + +Evidence format: Quote the Code Intent description showing boundary issue. + + + +When evaluating actual code (Codebase Review, Refactor): + +- Do import graphs show circular dependencies? +- Are there layer violations in actual imports? +- Are features split across many loosely related components? + +Evidence format: Quote import statements or describe dependency structure showing the issue. + + +--- + +## 1. Module Structure + + +Modules should have clear boundaries with minimal coupling. When changes ripple across unrelated modules, the boundaries are wrong. + + +Detect: Do changes ripple to unrelated modules? Can a module be modified without understanding its dependents? + + +Structural indicators (starting points, not definitive): +Import graphs, dependency declarations, module boundaries + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Structural violations + +- Circular dependencies (e.g., A imports B imports A) +- Layer violations (e.g., domain importing infrastructure) +- Any dependency causing compilation order issues or domain corruption + +[medium] Cohesion problems + +- Wrong cohesion (unrelated things grouped in same module) +- Missing facades (module internals exposed directly) + +[low] Scope creep + +- God modules (too many responsibilities in one module) + + + +Circular deps within same bounded context. Infrastructure adapters importing domain. Shared kernel patterns. + + + +Flag when dependency causes compilation order issues OR when layer violation allows infrastructure to corrupt domain. + + +## 2. Architecture + + +Architecture should align with change patterns. When adding a feature requires touching many unrelated components, the architecture fights the domain. + + +Detect: Would adding a feature require touching many components? Do cross-cutting changes indicate misaligned boundaries? + + +Structural indicators (starting points, not definitive): +Component boundaries, service interfaces, configuration locations + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Boundary misalignment + +- Wrong component boundaries (features awkwardly split) +- Single points of failure (no fallback, no retry paths) +- Any architecture forcing cross-cutting changes for single-domain features + +[medium] Scaling issues + +- Scaling bottlenecks (synchronous where async needed) +- Monolith patterns in distributed code (or vice versa) + +[low] Missing structure + +- Missing abstraction layers (everything directly coupled) +- Configuration scattered (no central policy, settings in many places) + + + +Intentional coupling for simplicity. Early-stage monolith. Bounded contexts with shared kernel. + + + +Flag when architecture forces cross-cutting changes for single-domain features. + diff --git a/resources/conventions/code-quality/07-cross-file-consistency.md b/resources/conventions/code-quality/07-cross-file-consistency.md new file mode 100644 index 0000000..ce28189 --- /dev/null +++ b/resources/conventions/code-quality/07-cross-file-consistency.md @@ -0,0 +1,188 @@ + + +# Cross-File Consistency + +Evaluate whether patterns are consistent across files. + +**The core question**: Is this consistent across files? Similar APIs should behave similarly. The same concept should have one name throughout the codebase. Error handling should be predictable at each abstraction level. Feature flags should be evaluated consistently. + +**What to look for**: + +- Cross-module naming drift (userId/uid/id for same concept) +- Incompatible signatures for similar operations across modules +- Cross-abstraction-level error pattern inconsistency +- Feature flags checked with different logic in different places + +**The threshold**: Flag when inconsistency creates confusion or unpredictability for consumers. Flag when same concept has multiple names across modules AND causes integration confusion. This group requires seeing multiple files to detect patterns. + + +When evaluating Code Intent (Design Review phase): + +- Does the proposed API match existing similar APIs? +- Does it introduce a new name for an existing concept? +- Would error handling match other components at this level? + +Evidence format: Quote the Code Intent description showing inconsistency. + + + +When evaluating actual code (Codebase Review, Refactor): + +- Are similar operations using different conventions? +- Is the same concept named differently across modules? +- Do similar errors get handled differently at the same level? + +Evidence format: Quote code from multiple files showing the inconsistency. + + +--- + +## 1. Interface Consistency + + +Similar APIs should have consistent signatures. When similar functions surprise users with different conventions, they cause bugs. + + +Detect: Would a user of these APIs be surprised by inconsistency? Do similar operations have incompatible signatures? + + +Pattern indicators (starting points, not definitive): +Similar function signatures with different parameter orders, CRUD operation patterns, service method signatures + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Signature inconsistency + +- APIs with similar purposes have incompatible signatures AND share consumers +- Any API inconsistency causing caller confusion + +[medium] Naming inconsistency + +- Inconsistent naming conventions across related functions + +[low] Pattern inconsistency + +- Mixed sync/async for similar operations without clear reason + + + +Intentional API differences. Domain-specific conventions. Versioned APIs. Overloads with clear distinct purpose. + + + +Flag when 2+ similar functions have different parameter orders (file scope) or 3+ APIs have incompatible signatures (codebase scope) AND confusion impacts consumers. + + +## 2. Naming Consistency (Cross-File Scope) + + +A concept should have one name throughout the codebase. Multiple names for the same thing create confusion about whether they're actually the same. + + +Detect: Are there multiple names for the same concept across modules? Would a reader wonder if userId and uid refer to the same entity? + + +Pattern indicators (starting points, not definitive): +Synonyms as variable prefixes across modules (user/account/customer, config/settings/options, id/uid/identifier) + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Semantic confusion + +- Synonym drift causing confusion at integration points +- Any naming inconsistency causing doubt about identity across modules + +[medium] Inconsistent conventions + +- Inconsistent abbreviations across modules (e.g., userId vs uid vs id) + +[low] Style drift + +- Style inconsistency without semantic confusion + + + +Different names for genuinely different concepts. External API naming conventions. Domain-specific terminology. Legacy compatibility aliases in bounded migration. + + + +Flag when same semantic concept has 3+ different names across modules AND causes confusion about whether they refer to the same thing. + + +## 3. Error Pattern Consistency (Cross-File Scope) + + +Error handling should be consistent within an abstraction level. Mixed patterns create confusion about how errors propagate and should be handled. + + +Detect: Is error handling consistent across components at the same abstraction level? Would a caller know what to expect from similar operations? + + +Pattern indicators (starting points, not definitive): +Mixed exception/return-code patterns, inconsistent error message formats, varying error context across modules + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Incompatible patterns + +- Incompatible error patterns for similar operations across components +- Any error handling creating caller confusion at integration boundaries + +[medium] Inconsistent hierarchy + +- Inconsistent exception hierarchies at same abstraction level + +[low] Missing convention + +- No standard for error context/wrapping across modules + + + +Different patterns for different abstraction levels (domain vs API vs infra). Wrapper functions translating between error styles. Legacy code under active migration. + + + +Flag when same abstraction level uses 3+ incompatible error patterns across files for similar operations AND no migration plan exists. + + +## 4. Feature Flag Sprawl + + +Feature flags should be checked consistently. When the same flag is evaluated with different logic in different places, behavior becomes unpredictable. + + +Detect: How are feature flags checked across the codebase? Is the same flag evaluated consistently everywhere? + + +Structural indicators (starting points, not definitive): +Feature flag checks, toggle patterns, conditional feature code + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Inconsistent evaluation + +- Feature flags checked inconsistently (different conditions for same flag) +- Any flag with divergent evaluation logic across locations + +[medium] Undocumented dependencies + +- Flag dependencies not documented (flag A requires flag B) + + + +Flags with intentionally different behavior per context. A/B test variations. Gradual rollout logic. + + + +Flag when same feature flag is checked with different logic in different places AND the difference is unintentional. + + +Note: Dead flags (feature shipped, never removed) are covered in 08-codebase-patterns.md Zombie Code (Codebase Scope). diff --git a/resources/conventions/code-quality/08-codebase-patterns.md b/resources/conventions/code-quality/08-codebase-patterns.md new file mode 100644 index 0000000..aef9b93 --- /dev/null +++ b/resources/conventions/code-quality/08-codebase-patterns.md @@ -0,0 +1,153 @@ + + +# Codebase Patterns + +Evaluate patterns that only emerge from codebase-wide analysis. + +**The core question**: What patterns are emerging? Understanding should not require reading the entire codebase. Repeated patterns across files indicate missing abstractions. Dead exports and modules accumulate as noise. These issues are invisible in local review -- they only become visible when seeing the whole codebase. + +**What to look for**: + +- Flows requiring 5+ files to understand with no documentation +- Same transformation applied in 3+ files (missed abstraction) +- Exported functions with 0 callers anywhere +- Feature flags always true/false (never toggled) +- Dead modules with no imports from live code + +**The threshold**: Flag when comprehension is broken (5+ files, no guide). Flag when pattern appears in 3+ implementations AND extraction would help. Flag demonstrably dead code that's not a public API or plugin interface. This group requires whole-codebase visibility. + + +Not applicable -- this group requires whole-codebase analysis. + + + +When evaluating the codebase (Codebase Review, Refactor): + +- Can I understand flows without reading many files? +- Are there repeated patterns that should be abstracted? +- Is there dead code at the export/module level? + +Evidence format: Describe the pattern across multiple files or quote specific dead exports. + + +--- + +## 1. Cross-File Comprehension + + +Understanding a flow should not require reading the entire codebase. When grasping one operation requires 5+ files with no guide, comprehension is broken. + + +Detect: How many files must I read to understand this flow? Is there documentation or an orchestrator that explains the big picture? + + +Structural indicators (starting points, not definitive): +Call chains, event handlers, callback registrations + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Implicit contracts + +- Implicit contracts between files (caller must know callee internals) +- Any flow requiring undocumented assumptions to understand + +[medium] Hidden dependencies + +- Hidden dependencies (file A assumes file B ran first) + +[low] Scattered flow + +- Scattered control flow (one operation spans 5+ files with no orchestrator) + + + +Well-documented module boundaries. Plugin architectures. Event-driven designs with clear event contracts. + + + +Flag when understanding a single operation requires reading 5+ files with no documentation of the flow. + + +## 2. Abstraction Opportunities + + +Repeated patterns across files indicate missing abstractions. When you see the same transformation in 3+ places, a concept is trying to emerge. + + +Detect: What domain concept is hiding across these repeated patterns? Would extracting a shared abstraction reduce duplication? + + +Structural indicators (starting points, not definitive): +Parallel implementations, similar transformation chains, repeated configuration shapes + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Missed abstractions + +- Same transformation applied in multiple files (3+ occurrences) +- Any pattern appearing across implementations that should be shared + +[medium] Structural duplication + +- Parallel class hierarchies doing similar things differently +- Copy-paste inheritance (similar classes with minor variations) + +[low] Configuration patterns + +- Data transformation pipelines with identical structure +- Configuration patterns repeated without abstraction + + + +Intentionally similar but independent implementations. Domain-specific variations. Templates/generators producing similar code. + + + +Flag when pattern appears in 3+ implementations AND the fix is extracting shared abstraction. These become visible only after seeing multiple implementations. + + +## 3. Zombie Code (Codebase Scope) + + +Dead code is noise that misleads readers. Code that cannot execute or is never called should be removed, not left to confuse future maintainers. + + +Detect: If I deleted this export or module, would any test fail or behavior change? + + +Pattern indicators (starting points, not definitive): +Exported symbols with 0 callers, feature flags, configuration options, dead modules + + + +Illustrative patterns (not exhaustive -- similar violations exist): + +[high] Dead exports + +- Exported functions with 0 callers anywhere in codebase +- Feature flags always true/false (never toggled in any environment) +- Any publicly accessible code with no consumers + +[medium] Stale flags + +- Dead flags (feature shipped, flag never removed) + +[low] Orphaned configuration + +- Configuration options never read +- Dead modules (no imports from any live code path) + + + +Public API entry points. Plugin interfaces. Feature flags controlled externally. Backward compatibility exports with deprecation notice. + + + +Flag when code is demonstrably unreachable/unused AND is not a public API entry point, plugin interface, or documented compatibility shim. + + +Note: File-scope zombie code (commented blocks, unreachable branches) is covered in 03-patterns-and-idioms.md Zombie Code (File Scope). diff --git a/resources/conventions/diff-format.md b/resources/conventions/diff-format.md new file mode 100644 index 0000000..1cc3374 --- /dev/null +++ b/resources/conventions/diff-format.md @@ -0,0 +1,201 @@ +# Unified Diff Format for Plan Code Changes + +This document is the authoritative specification for code changes in implementation plans. + +## Purpose + +Unified diff format encodes both **location** and **content** in a single structure. This eliminates the need for location directives in comments (e.g., "insert at line 42") and provides reliable anchoring even when line numbers drift. + +## Anatomy + +```diff +--- a/path/to/file.py ++++ b/path/to/file.py +@@ -123,6 +123,15 @@ def existing_function(ctx): + # Context lines (unchanged) serve as location anchors + existing_code() + ++ # NEW: Comments explain WHY - transcribed verbatim by Developer ++ # Guard against race condition when messages arrive out-of-order ++ new_code() + + # More context to anchor the insertion point + more_existing_code() +``` + +## Components + +| Component | Authority | Purpose | +| ------------------------------------------ | ------------------------- | ---------------------------------------------------------- | +| File path (`--- a/path/to/file.py`) | **AUTHORITATIVE** | Exact target file | +| Line numbers (`@@ -123,6 +123,15 @@`) | **APPROXIMATE** | May drift as earlier milestones modify the file | +| Function context (`@@ ... @@ def func():`) | **SCOPE HINT** | Function/method containing the change | +| Context lines (unchanged) | **AUTHORITATIVE ANCHORS** | Developer matches these patterns to locate insertion point | +| `+` lines | **NEW CODE** | Code to add, including WHY comments | +| `-` lines | **REMOVED CODE** | Code to delete | + +## Two-Layer Location Strategy + +Code changes use two complementary layers for location: + +1. **Prose scope hint** (optional): Natural language describing conceptual location +2. **Diff with context**: Precise insertion point via context line matching + +### Layer 1: Prose Scope Hints + +For complex changes, add a prose description before the diff block: + +````markdown +Add validation after input sanitization in `UserService.validate()`: + +```diff +@@ -123,6 +123,15 @@ def validate(self, user): + sanitized = sanitize(user.input) + ++ # Validate format before proceeding ++ if not is_valid_format(sanitized): ++ raise ValidationError("Invalid format") ++ + return process(sanitized) +`` ` +``` +```` + +The prose tells Developer **where conceptually** (which method, what operation precedes it). The diff tells Developer **where exactly** (context lines to match). + +**When to use prose hints:** + +- Changes to large files (>300 lines) +- Multiple changes to the same file in one milestone +- Complex nested structures where function context alone is ambiguous +- When the surrounding code logic matters for understanding placement + +**When prose is optional:** + +- Small files with obvious structure +- Single change with unique context lines +- Function context in @@ line provides sufficient scope + +### Layer 2: Function Context in @@ Line + +The `@@` line can include function/method context after the line numbers: + +```diff +@@ -123,6 +123,15 @@ def validate(self, user): +``` + +This follows standard unified diff format (git generates this automatically). It tells Developer which function contains the change, aiding navigation even when line numbers drift. + +## Why Context Lines Matter + +When a plan has multiple milestones that modify the same file, earlier milestones shift line numbers. The `@@ -123` in Milestone 3 may no longer be accurate after Milestones 1 and 2 execute. + +**Context lines solve this**: Developer searches for the unchanged context patterns in the actual file. These patterns are stable anchors that survive line number drift. + +Include 2-3 context lines before and after changes for reliable matching. + +## Comment Placement + +Comments in `+` lines explain **WHY**, not **WHAT**. These comments: + +- Are transcribed verbatim by Developer +- Source rationale from Planning Context (Decision Log, Rejected Alternatives) +- Use concrete terms without hidden baselines +- Must pass temporal contamination review (see `.claude/conventions/temporal.md`) + +**Important**: Comments written during planning often contain temporal contamination -- change-relative language, baseline references, or location directives. @agent-technical-writer reviews and fixes these before @agent-developer transcribes them. + + +```diff ++ # Polling chosen over webhooks: 30% webhook delivery failures in third-party API ++ # WebSocket rejected to preserve stateless architecture ++ updates = poll_api(interval=30) +``` +Explains WHY this approach was chosen. + + + +```diff ++ # Poll the API every 30 seconds ++ updates = poll_api(interval=30) +``` +Restates WHAT the code does - redundant with the code itself. + + + +```diff ++ # Generous timeout for slow networks ++ REQUEST_TIMEOUT = 60 +``` +"Generous" compared to what? Hidden baseline provides no actionable information. + + + +```diff ++ # 60s accommodates 95th percentile upstream response times ++ REQUEST_TIMEOUT = 60 +``` +Concrete justification that explains why this specific value. + + +## Location Directives: Forbidden + +The diff structure handles location. Location directives in comments are redundant and error-prone. + + +```python +# Insert this BEFORE the retry loop (line 716) +# Timestamp guard: prevent older data from overwriting newer +get_ctx, get_cancel = context.with_timeout(ctx, 500) +``` +Location directive leaked into comment - line numbers become stale. + + + +```diff +@@ -714,6 +714,10 @@ def put(self, ctx, tags): + for tag in tags: + subject = tag.subject + +- # Timestamp guard: prevent older data from overwriting newer +- # due to network delays, retries, or concurrent writes +- get_ctx, get_cancel = context.with_timeout(ctx, 500) + + # Retry loop for Put operations + for attempt in range(max_retries): + +``` +Context lines (`for tag in tags`, `# Retry loop`) are stable anchors that survive line number drift. + + +## When to Use Diff Format + + + +| Code Characteristic | Use Diff? | Boundary Test | +| --------------------------------------- | --------- | ---------------------------------------- | +| Conditionals, loops, error handling, | YES | Has branching logic | +| state machines | | | +| Multiple insertions same file | YES | >1 change location | +| Deletions or replacements | YES | Removing/changing existing code | +| Pure assignment/return (CRUD, getters) | NO | Single statement, no branching | +| Boilerplate from template | NO | Developer can generate from pattern name | + +The boundary test: "Does Developer need to see exact placement and context to implement correctly?" + +- YES -> diff format +- NO (can implement from description alone) -> prose sufficient + + + +## Validation Checklist + +Before finalizing code changes in a plan: + +- [ ] File path is exact (not "auth files" but `src/auth/handler.py`) +- [ ] Context lines exist in target file (validate patterns match actual code) +- [ ] Comments explain WHY, not WHAT +- [ ] No location directives in comments +- [ ] No hidden baselines (test: "[adjective] compared to what?") +- [ ] 2-3 context lines for reliable anchoring +``` diff --git a/resources/conventions/documentation.md b/resources/conventions/documentation.md new file mode 100644 index 0000000..4f4bc68 --- /dev/null +++ b/resources/conventions/documentation.md @@ -0,0 +1,402 @@ +# Documentation Conventions + +This is the authoritative documentation conventions file. All code-adjacent +documentation (CLAUDE.md, README.md) must follow these principles. + +## Core Principles + +**Self-contained documentation**: All code-adjacent documentation (CLAUDE.md, +README.md) must be self-contained. Do NOT reference external authoritative +sources (doc/ directories, wikis, external documentation). If knowledge exists +in an authoritative source, it must be summarized locally. Duplication is +acceptable; the maintenance burden is the cost of locality. + +**CLAUDE.md = pure index**: CLAUDE.md files are navigation aids only. They +contain WHAT is in the directory and WHEN to read each file. All explanatory +content (architecture, decisions, invariants) belongs in README.md. + +**README.md = invisible knowledge**: README.md files capture knowledge NOT +visible from reading source code. If ANY invisible knowledge exists for a +directory, README.md is required. + +## CLAUDE.md Format Specification + +### Index Format + +Use tabular format with What and When columns: + +```markdown +## Files + +| File | What | When to read | +| ----------- | ------------------------------ | ----------------------------------------- | +| `cache.rs` | LRU cache with O(1) operations | Implementing caching, debugging evictions | +| `errors.rs` | Error types and Result aliases | Adding error variants, handling failures | + +## Subdirectories + +| Directory | What | When to read | +| ----------- | ----------------------------- | ----------------------------------------- | +| `config/` | Runtime configuration loading | Adding config options, modifying defaults | +| `handlers/` | HTTP request handlers | Adding endpoints, modifying request flow | +``` + +### Column Guidelines + +- **File/Directory**: Use backticks around names: `cache.rs`, `config/` +- **What**: Factual description of contents (nouns, not actions) +- **When to read**: Task-oriented triggers using action verbs (implementing, + debugging, modifying, adding, understanding) +- At least one column must have content; empty cells use `-` + +### Trigger Quality Test + +Given task "add a new validation rule", can an LLM scan the "When to read" +column and identify the right file? + +### Generated and Vendored Code + +CLAUDE.md MUST flag files/directories that should not be manually edited: + +| Directory | What | When to read | +| -------------- | --------------------------------- | ------------------- | +| `proto/gen/` | Generated from proto/. Run `make` | Never edit directly | +| `vendor/` | Vendored deps, upstream: go.mod | Never edit directly | +| `third_party/` | Copied from github.com/foo v1.2.3 | Never edit directly | + +The "When to read" column should indicate these are not editable. Include +regeneration commands in the "What" column or in a dedicated Regenerate section. + +This prevents LLMs from wasting effort analyzing or "improving" auto-generated +code, and prevents edits that will be overwritten or cause merge conflicts. + +See also: conventions/code-quality/baseline.md "Generated and Vendored Code Awareness". + +### ROOT vs SUBDIRECTORY CLAUDE.md + +**ROOT CLAUDE.md:** + +```markdown +# [Project Name] + +[One sentence: what this is] + +## Files + +| File | What | When to read | +| ---- | ---- | ------------ | + +## Subdirectories + +| Directory | What | When to read | +| --------- | ---- | ------------ | + +## Build + +[Copy-pasteable command] + +## Test + +[Copy-pasteable command] + +## Development + +[Setup instructions, environment requirements, workflow notes] +``` + +**SUBDIRECTORY CLAUDE.md:** + +```markdown +# [directory-name]/ + +## Files + +| File | What | When to read | +| ---- | ---- | ------------ | + +## Subdirectories + +| Directory | What | When to read | +| --------- | ---- | ------------ | +``` + +**Critical constraint:** CLAUDE.md files are navigation aids, not explanatory +documents. They contain: + +- File/directory index (REQUIRED): tabular format with What/When columns +- One-sentence overview (OPTIONAL): what this directory is +- Operational sections (OPTIONAL): Build, Test, Regenerate, Deploy, or similar + commands specific to this directory's artifacts + +They do NOT contain: + +- Architectural explanations (-> README.md) +- Design decisions or rationale (-> README.md) +- Invariants or constraints (-> README.md) +- Multi-paragraph prose (-> README.md) + +Operational sections must be copy-pasteable commands with minimal context, not +explanatory prose about why the build works a certain way. + +## README.md Specification + +### Creation Criteria (Invisible Knowledge Test) + +Create README.md when the directory contains ANY invisible knowledge -- +knowledge NOT visible from reading the code: + +- Planning decisions (from Decision Log during implementation) +- Business context (why the product works this way) +- Architectural rationale (why this structure) +- Trade-offs made (what was sacrificed for what) +- Invariants (rules that must hold but aren't in types) +- Historical context (why not alternatives) +- Performance characteristics (non-obvious efficiency properties) +- Multiple components interact through non-obvious contracts +- The directory's structure encodes domain knowledge +- Failure modes or edge cases aren't apparent from reading individual files +- "Rules" developers must follow that aren't enforced by compiler/linter + +**README.md is required if ANY of the above exist.** The trigger is semantic +(presence of invisible knowledge), not structural (file count, complexity). + +**DO NOT create README.md when:** + +- The directory is purely organizational with no decisions behind its structure +- All knowledge is visible from reading source code +- You'd only be restating what code already shows + +### Content Test + +For each sentence in README.md, ask: "Could a developer learn this by reading +the source files?" + +- If YES: delete the sentence +- If NO: keep it + +README.md earns its tokens by providing INVISIBLE knowledge: the reasoning +behind the code, not descriptions of the code. + +### README.md Structure + +```markdown +# [Component Name] + +## Overview + +[One paragraph: what problem this solves, high-level approach] + +## Architecture + +[How sub-components interact; data flow; key abstractions] + +## Design Decisions + +[Tradeoffs made and why; alternatives considered] + +## Invariants + +[Rules that must be maintained; constraints not enforced by code] +``` + +## Architecture Documentation + +For cross-cutting concerns and system-wide relationships that span multiple +directories, create dedicated architecture documentation. + +### Structure + +```markdown +# Architecture: [System/Feature Name] + +## Overview + +[One paragraph: problem and high-level approach] + +## Components + +[Each component with its single responsibility and boundaries] + +## Data Flow + +[Critical paths - prefer diagrams for complex flows] + +## Design Decisions + +[Key tradeoffs and rationale] + +## Boundaries + +[What this system does NOT do; where responsibility ends] +``` + +### Quality Standard + +Components must explain relationships, not just list responsibilities. + +Wrong -- lists without relationships: + +```markdown +## Components + +- UserService: Handles user operations +- AuthService: Handles authentication +- Database: Stores data +``` + +Right -- explains boundaries and flow: + +```markdown +## Components + +- UserService: User CRUD only. Delegates auth to AuthService. Never queries auth + state directly. +- AuthService: Token validation, session management. Stateless; all state in + Redis. +- PostgreSQL: Source of truth for user data. AuthService has no direct access. + +Flow: Request -> AuthService (validate) -> UserService (logic) -> Database +``` + +Prefer diagrams over prose for relationships. + +## In-Code Documentation + +Code-level documentation captures knowledge at the point where it is most useful. +The principle: knowledge belongs as close as possible to the code it describes. +Cross-cutting knowledge that cannot be localized belongs in README.md. + +### Tier 1: Inline Comments + +Above statements or expressions where the choice is non-obvious. + +Document *why* this approach, never *what* the code does. The reader can see what +the code does: they cannot see why it was chosen over alternatives. + +Good: + +```python +# Polling: 30% webhook delivery failures observed in production +result = poll_endpoint(url, interval=30) + +# Mutex-free: single-writer guarantee from caller contract +counter.fetch_add(1, Ordering::Relaxed) +``` + +Bad: + +```python +# Poll the endpoint +result = poll_endpoint(url, interval=30) + +# Increment the counter +counter.fetch_add(1, Ordering::Relaxed) +``` + +When a decision log entry exists, reference it: `# DL-003: Polling over webhooks` + +### Tier 2: Function-Level Explanation Blocks + +Near the top of non-trivial functions (after signature, before body logic). +Required when a function has >3 distinct transformation steps, coordinates +multiple subsystems, or implements a non-obvious algorithm. + +Content: what the function does, how it does it, how it fits in the overall +architecture, what problem it solves. + +```python +def reconcile_state(local, remote): + # Reconciles local state against remote source of truth. Operates in + # three phases: + # 1. Diff local vs remote to find divergent keys + # 2. For each divergence, apply conflict resolution (remote wins) + # 3. Write merged state back to local store + # + # Called by the sync loop after each heartbeat. Remote state is + # authoritative -- local is a cache that may lag behind. + ... +``` + +Skip for CRUD operations and standard patterns where the code speaks for itself. + +### Tier 3: Docstrings + +**Private functions**: One-line summary + trigger clause (when to call). + +```python +def _normalize_key(k): + """Strip whitespace and lowercase. Use before cache lookup.""" +``` + +**Public functions**: Summary + trigger clause + parameter semantics + example. +Optimized for LLM consumption -- trigger clauses and examples enable accurate +tool selection. + +```python +def validate_config(path, strict=False): + """Validate configuration file against schema. + + Use when loading user-provided config at startup or after hot-reload. + In strict mode, unknown keys are errors; otherwise warnings. + + Args: + path: Absolute path to YAML config file. + strict: Treat unknown keys as errors. + + Returns: + Validated Config object. + + Example: + cfg = validate_config("/etc/app/config.yaml", strict=True) + """ +``` + +### Tier 4: Module Documentation + +Top-of-file comment or module docstring. Documents what the module contains and +why it exists as a separate unit. + +```python +"""Rate limiting using sliding window counters. + +Provides per-client rate limiting for the API gateway. Sliding window +chosen over fixed window to prevent burst-at-boundary attacks (DL-007). +Token bucket rejected: memory overhead per client unacceptable at +projected scale (>100k concurrent clients). +""" +``` + +### Tier 5: Invisible Knowledge Placement + +Invisible knowledge is knowledge not visible from reading the code: business +context, architectural rationale, tradeoffs, constraints, rejected alternatives. + +**Placement hierarchy** (closest viable location wins): + +1. **Inline comment**: When knowledge applies to a specific statement +2. **Function-level block**: When knowledge applies to an entire function's + approach or algorithm +3. **Module docstring**: When knowledge applies to why this module exists or + its overall design +4. **README.md**: When knowledge is cross-cutting (spans multiple files/modules) + or cannot be localized to a single code point + +What is NOT acceptable: invisible knowledge captured only in planning artifacts +(decision logs, plan documents, conversation history) that are not carried +forward into the codebase. Every decision, constraint, and tradeoff must land +in code or README.md. + +### Priority Order + +When deciding what to document, prioritize by uncertainty: + +| Priority | Code Pattern | WHY Question | +| -------- | ---------------------------- | ---------------------- | +| HIGH | Multiple valid approaches | Why this approach? | +| HIGH | Thresholds, timeouts, limits | Why these values? | +| HIGH | Error handling paths | Recovery strategy? | +| HIGH | External system interactions | What assumptions? | +| MEDIUM | Non-standard pattern usage | Why deviate from norm? | +| MEDIUM | Performance-critical paths | Why this optimization? | +| LOW | Boilerplate/established | Skip unless unusual | +| LOW | Simple CRUD operations | Skip unless unusual | diff --git a/resources/conventions/intent-markers.md b/resources/conventions/intent-markers.md new file mode 100644 index 0000000..1ce30d9 --- /dev/null +++ b/resources/conventions/intent-markers.md @@ -0,0 +1,33 @@ +# Intent Markers + +Markers suppress QR checks for intentional code patterns. +\ +## Format + +`:MARKER: [what]; [why]` + +- Semicolon separator REQUIRED +- `[what]` = specific pattern being marked +- `[why]` = rationale (invariant relied upon, safety guarantee, etc.) + +## Markers + +| Marker | Purpose | Example | +| ---------- | -------------------------------- | ---------------------------------------------------- | +| `:PERF:` | Performance-critical intentional | `:PERF: unchecked bounds; loop invariant i15 methods OR >10 deps OR mixed concerns | +| GOD_FUNCTION | >50 lines OR mixed abstraction OR >3 nesting | +| DUPLICATE_LOGIC | Copy-pasted blocks, parallel functions | +| INCONSISTENT_ERROR_HANDLING | Mixed exceptions/codes in same module | +| CONVENTION_VIOLATION | Violates documented project convention | +| TESTING_STRATEGY_VIOLATION | Tests don't follow confirmed strategy | + +### DIAGRAM (MUST for semantic, COULD for format) + +Diagram graph integrity. Semantic issues block; format issues warn. + +| Category | Severity | Detection | +| -------------------- | -------- | ------------------------------------------ | +| ORPHAN_NODE | MUST | Node with zero edges | +| INVALID_EDGE_REF | MUST | Edge source/target references missing node | +| INVALID_SCOPE_REF | MUST | Scope references non-existent milestone | +| DIAGRAM_WIDTH_EXCEED | COULD | ASCII render line > 80 chars | +| UNCLOSED_BOX | COULD | Box corners misaligned in ASCII render | + +### COSMETIC (COULD) + +Auto-fixable, minimal impact. + +| Category | Detection | +| ------------------- | ---------------------------------------------------------- | +| DEAD_CODE | Unused functions, impossible branches | +| FORMATTER_FIXABLE | Style issues fixable by formatter/linter | +| MINOR_INCONSISTENCY | Non-conformance with no documented rule | +| TOOLCHAIN_CATCHABLE | Error in planned code that compiler/linter/interpreter | +| | would flag, where intended correct code is obvious from | +| | context (typos, missing imports, non-exhaustive match). | +| | NOT: errors revealing plan-level misunderstanding -- those | +| | are ASSUMPTION_UNVALIDATED (MUST) | + +## IK Proximity Rule + +Invisible knowledge must be at BEST location: "as close as possible to where +relevant, but not more" + +| Knowledge Type | Best Location | +| -------------- | --------------------------------------- | +| Accepted risks | :TODO: comment at flagged code location | +| Architecture | README.md in SAME directory | +| Tradeoffs | Code comment where decision shows | +| Invariants | Code comment at enforcement point | + +Wrong location = IK_TRANSFER_FAILURE (MUST) diff --git a/resources/conventions/structural.md b/resources/conventions/structural.md new file mode 100644 index 0000000..3d5e8c0 --- /dev/null +++ b/resources/conventions/structural.md @@ -0,0 +1,152 @@ +# Default Conventions + +These conventions apply when project documentation does not specify otherwise. + +## Priority Hierarchy + +Higher tiers override lower. Cite backing source when auditing. + +| Tier | Source | Action | +| ---- | --------------- | -------------------------------- | +| 1 | user-specified | Explicit user instruction: apply | +| 2 | doc-derived | CLAUDE.md / project docs: apply | +| 3 | default-derived | This document: apply | +| 4 | assumption | No backing: CONFIRM WITH USER | + +## Severity Levels + +See `severity.md` for full definitions. + +| Level | Meaning | +| ------ | ------------------------ | +| MUST | Unrecoverable if missed | +| SHOULD | Maintainability debt | +| COULD | Auto-fixable, low impact | + +--- + +## Structural Conventions + + +**God Object**: >15 public methods OR >10 dependencies OR mixed concerns (networking + UI + data) +Severity: SHOULD + + + +**God Function**: >50 lines OR multiple abstraction levels OR >3 nesting levels +Severity: SHOULD +Exception: Inherently sequential algorithms or state machines + + + +**Duplicate Logic**: Copy-pasted blocks, repeated error handling, parallel near-identical functions +Severity: SHOULD + + + +**Dead Code**: No callers, impossible branches, unread variables, unused imports +Severity: COULD + + + +**Inconsistent Error Handling**: Mixed exceptions/error codes, inconsistent types, swallowed errors +Severity: SHOULD +Exception: Project specifies different handling per error category + + +--- + +## File Organization Conventions + + +**Test Organization**: Extend existing test files; create new only when: +- Distinct module boundary OR >500 lines OR different fixtures required +Severity: SHOULD (for unnecessary fragmentation) + + + +**File Creation**: Prefer extending existing files; create new only when: +- Clear module boundary OR >300-500 lines OR distinct responsibility +Severity: COULD + + +--- + +## Testing Conventions + + +**Principle**: Test behavior, not implementation. Fast feedback. + +**Test Type Hierarchy** (preference order): + +1. **Integration tests** (highest value) + - Test end-user verifiable behavior + - Use real systems/dependencies (e.g., testcontainers) + - Verify component interaction at boundaries + - This is where the real value lies + +2. **Property-based / generative tests** (preferred) + - Cover wide input space with invariant assertions + - Catch edge cases humans miss + - Use for functions with clear input/output contracts + +3. **Unit tests** (use sparingly) + - Only for highly complex or critical logic + - Risk: maintenance liability, brittleness to refactoring + - Prefer integration tests that cover same behavior + +**Test Placement**: Tests are part of implementation milestones, not separate +milestones. A milestone is not complete until its tests pass. This creates fast +feedback during development. + +**DO**: + +- Integration tests with real dependencies (testcontainers, etc.) +- Property-based tests for invariant-rich functions +- Parameterized fixtures over duplicate test bodies +- Test behavior observable by end users + +**DON'T**: + +- Test external library/dependency behavior (out of scope) +- Unit test simple code (maintenance liability exceeds value) +- Mock owned dependencies (use real implementations) +- Test implementation details that may change +- One-test-per-variant when parametrization applies + +Severity: SHOULD (violations), COULD (missed opportunities) + + +--- + +## Modernization Conventions + + +**Version Constraint Violation**: Features unavailable in project's documented target version +Requires: Documented target version +Severity: SHOULD + + + +**Modernization Opportunity**: Legacy APIs, verbose patterns, manual stdlib reimplementations +Severity: COULD +Exception: Project requires legacy pattern + + +--- + +## Testing Strategy Defaults + + +**Default Test Type Preferences** (apply when project docs silent): + +| Type | Default Strategy | Rationale | +| ----------- | --------------------------- | ------------------------- | +| Unit | Property-based (quickcheck) | Few tests, many variables | +| Integration | Behavior-focused, real deps | End-user verifiable | +| E2E | Generated datasets | Deterministic replay | + +These are Tier 3 defaults. User confirmation (Tier 1) overrides. + +Severity: TESTING_STRATEGY_VIOLATION (SHOULD) if contradicted without override. + diff --git a/resources/conventions/temporal.md b/resources/conventions/temporal.md new file mode 100644 index 0000000..5e9d08d --- /dev/null +++ b/resources/conventions/temporal.md @@ -0,0 +1,135 @@ +# Temporal Contamination in Code Comments + +This document defines terminology for identifying comments that leak information +about code history, change processes, or planning artifacts. Both +@agent-technical-writer and @agent-quality-reviewer reference this +specification. + +## The Core Principle + +> **Timeless Present Rule**: Comments must be written from the perspective of a +> reader encountering the code for the first time, with no knowledge of what +> came before or how it got here. The code simply _is_. + +**Why this matters**: Change-narrative comments are an LLM artifact -- a +category error, not merely a style issue. The change process is ephemeral and +irrelevant to the code's ongoing existence. Humans writing comments naturally +describe what code IS, not what they DID to create it. Referencing the change +that created a comment is fundamentally confused about what belongs in +documentation. + +Think of it this way: a novel's narrator never describes the author's typing +process. Similarly, code comments should never describe the developer's editing +process. The code simply exists; the path to its existence is invisible. + +In a plan, this means comments are written _as if the plan was already +executed_. + +## Detection Heuristic + +Evaluate each comment against these five questions. Signal words are examples -- +extrapolate to semantically similar constructs. + +### 1. Does it describe an action taken rather than what exists? + +**Category**: Change-relative + +| Contaminated | Timeless Present | +| -------------------------------------- | ----------------------------------------------------------- | +| `// Added mutex to fix race condition` | `// Mutex serializes cache access from concurrent requests` | +| `// New validation for the edge case` | `// Rejects negative values (downstream assumes unsigned)` | +| `// Changed to use batch API` | `// Batch API reduces round-trips from N to 1` | + +Signal words (non-exhaustive): "Added", "Replaced", "Now uses", "Changed to", +"New", "Updated", "Refactored" + +### 2. Does it compare to something not in the code? + +**Category**: Baseline reference + +| Contaminated | Timeless Present | +| ------------------------------------------------- | ------------------------------------------------------------------- | +| `// Replaces per-tag logging with summary` | `// Single summary line; per-tag logging would produce 1500+ lines` | +| `// Unlike the old approach, this is thread-safe` | `// Thread-safe: each goroutine gets independent state` | +| `// Previously handled in caller` | `// Encapsulated here; caller should not manage lifecycle` | + +Signal words (non-exhaustive): "Instead of", "Rather than", "Previously", +"Replaces", "Unlike the old", "No longer" + +### 3. Does it describe where to put code rather than what code does? + +**Category**: Location directive + +| Contaminated | Timeless Present | +| ----------------------------- | --------------------------------------------- | +| `// After the SendAsync call` | _(delete -- diff structure encodes location)_ | +| `// Insert before validation` | _(delete -- diff structure encodes location)_ | +| `// Add this at line 425` | _(delete -- diff structure encodes location)_ | + +Signal words (non-exhaustive): "After", "Before", "Insert", "At line", "Here:", +"Below", "Above" + +**Action**: Always delete. Location is encoded in diff structure, not comments. + +### 4. Does it describe intent rather than behavior? + +**Category**: Planning artifact + +| Contaminated | Timeless Present | +| -------------------------------------- | -------------------------------------------------------- | +| `// TODO: add retry logic later` | _(delete, or implement retry now)_ | +| `// Will be extended for batch mode` | _(delete -- do not document hypothetical futures)_ | +| `// Temporary workaround until API v2` | `// API v1 lacks filtering; client-side filter required` | + +Signal words (non-exhaustive): "Will", "TODO", "Planned", "Eventually", "For +future", "Temporary", "Workaround until" + +**Action**: Delete, implement the feature, or reframe as current constraint. + +### 5. Does it describe the author's choice rather than code behavior? + +**Category**: Intent leakage + +| Contaminated | Timeless Present | +| ------------------------------------------ | ---------------------------------------------------- | +| `// Intentionally placed after validation` | `// Runs after validation completes` | +| `// Deliberately using mutex over channel` | `// Mutex serializes access (single-writer pattern)` | +| `// Chose polling for reliability` | `// Polling: 30% webhook delivery failures observed` | +| `// We decided to cache at this layer` | `// Cache here: reduces DB round-trips for hot path` | + +Signal words (non-exhaustive): "intentionally", "deliberately", "chose", +"decided", "on purpose", "by design", "we opted" + +**Action**: Extract the technical justification; discard the decision narrative. +The reader doesn't need to know someone "decided" -- they need to know WHY this +approach works. + +**The test**: Can you delete the intent word and the comment still makes sense? +If yes, delete the intent word. If no, reframe around the technical reason. + +--- + +**Catch-all**: If a comment only makes sense to someone who knows the code's +history, it is temporally contaminated -- even if it does not match any category +above. + +## Subtle Cases + +Same word, different verdict -- demonstrates that detection requires semantic +judgment, not keyword matching. + +| Comment | Verdict | Reasoning | +| -------------------------------------- | ------------ | ------------------------------------------------ | +| `// Now handles edge cases properly` | Contaminated | "properly" implies it was improper before | +| `// Now blocks until connection ready` | Clean | "now" describes runtime moment, not code history | +| `// Fixed the null pointer issue` | Contaminated | Describes a fix, not behavior | +| `// Returns null when key not found` | Clean | Describes behavior | + +## The Transformation Pattern + +> **Extract the technical justification, discard the change narrative.** + +1. What useful info is buried? (problem, behavior) +2. Reframe as timeless present + +Example: "Added mutex to fix race" -> "Mutex serializes concurrent access" diff --git a/src/planner/lib/agent-prompts.ts b/src/planner/lib/agent-prompts.ts new file mode 100644 index 0000000..8ab8293 --- /dev/null +++ b/src/planner/lib/agent-prompts.ts @@ -0,0 +1,20 @@ +// Hard-coded agent prompts for planner phases. +// These are embedded at compile-time to avoid runtime filesystem dependencies. +// Conventions remain file-based and explorable by the LLM. + +export type AgentPromptName = + "architect" + | "developer" + | "quality-reviewer" + | "technical-writer"; + +const AGENT_PROMPTS: Record = { + "architect": "\nYou are an expert Architect who transforms ambiguous requests into unambiguous executable plans. You design; others implement. All business decisions happen during planning, BEFORE code is written.\n\nYou have the skills to design any system. Proceed with confidence.\n\n## Script Invocation\n\nIf your opening prompt includes a python3 command:\n\n1. Execute it immediately as your first action\n2. Read output, follow DO section literally\n3. When NEXT contains a python3 command, invoke it after completing DO\n4. Continue until workflow signals completion\n\nThe script orchestrates your work. Follow it literally.\n\n## Convention Hierarchy\n\nWhen sources conflict, follow this precedence (higher overrides lower):\n\n| Tier | Source | Override Scope |\n| ---- | ----------------------------------- | ----------------------------- |\n| 1 | Explicit user instruction | Override all below |\n| 2 | Project docs (CLAUDE.md, README.md) | Override conventions/defaults |\n| 3 | .claude/conventions/ | Baseline fallback |\n| 4 | Universal best practices | Confirm if uncertain |\n\n**Conflict resolution**: Lower tier numbers win. Subdirectory docs override root docs for that subtree.\n\n## Knowledge Strategy\n\n**CLAUDE.md** = navigation index (WHAT is here, WHEN to read)\n**README.md** = invisible knowledge (WHY it's structured this way)\n\n**Open with confidence**: When CLAUDE.md \"When to read\" trigger matches your task, immediately read that file. Don't hesitate -- important context is stored there.\n\n**Missing documentation**: If no CLAUDE.md exists, state \"No project documentation found\" and fall back to .claude/conventions/.\n\n## Convention References\n\n| Convention | Source | When Needed |\n| ------------ | ------------------------------------------------------------------------------ | ---------------- |\n| Code quality | | Design, planning |\n\nRead the convention index and follow \"Design Review\" applicability.\n\n## Exploration\n\nUse these tools freely and with confidence:\n\n| Tool | Purpose |\n| ------ | --------------------------------- |\n| Glob | Find files by pattern |\n| Grep | Search content |\n| Read | Examine files |\n| Search | Web search for context |\n| Bash | Run commands, inspect environment |\n\n**Always explore**:\n\n- CLAUDE.md at project root and relevant subdirectories\n- README.md for invisible knowledge constraining design\n- Similar features for established patterns\n- Files that will be modified\n\n**Stopping criteria**:\n\n- Decision criteria covered or determined inapplicable\n- Understand HOW patterns work, not just THAT they exist\n- Max 4 deepening iterations\n\n## Design Responsibilities\n\n**Make decisive choices**: Pick one approach, commit to it. Do not present multiple options unless user decision is genuinely required.\n\n**Capture rationale**: Document WHY, not just WHAT. Decisions need multi-step reasoning (2+ steps).\n\n**Blueprint completeness**:\n\n- Decision Log (non-obvious decisions with rationale)\n- Rejected Alternatives (what was considered, why not chosen)\n- Files (exact paths to create/modify)\n- Acceptance Criteria (testable pass/fail)\n- Code Intent (what to change -- NOT implementation diffs)\n\n## Boundaries\n\n| Architect DOES | Architect DOES NOT |\n| ---------------------------------- | -------------------------------------- |\n| Write Code Intent (what to change) | Write implementation diffs (developer) |\n| Make design decisions | Make user decisions (escalate) |\n| Capture invisible knowledge | Write documentation (technical-writer) |\n| Explore and discover patterns | Review artifacts (quality-reviewer) |\n\n## Escalation\n\n**Escalate when**:\n\n- User preference ambiguity (multiple valid choices with user-relevant tradeoffs)\n- Policy defaults (lifecycle, capacity, failure handling) without user backing\n- Multiple valid architectural approaches with policy-relevant tradeoffs\n\n**Decide autonomously when**:\n\n- Existing pattern to follow\n- Milestone ordering (technical optimization)\n- File organization within constraints\n- Error handling with established project convention\n\n## Thinking Economy\n\nMinimize internal reasoning verbosity:\n\n- Per-thought limit: 10 words\n- Use abbreviated notation: \"Pattern->X; Decision->Y; Capture Z\"\n- DO NOT narrate phases\n- Execute exploration silently; output structured results only\n\nExamples:\n\n- VERBOSE: \"Now I need to find similar features. Let me search for authentication patterns.\"\n- CONCISE: \"Similar auth: Grep auth, Read handlers/\"\n", + "developer": "\nYou are an expert Developer who translates architectural specifications into working code. You execute; others design. A project manager owns design decisions and user communication.\n\nYou have the skills to implement any specification. Proceed with confidence.\n\nSuccess means faithful implementation: code that is correct, readable, and follows project standards. Design decisions, user requirements, and architectural trade-offs belong to others -- your job is execution.\n\n## Script Invocation\n\nIf your opening prompt includes a python3 command:\n\n1. Execute it immediately as your first action\n2. Read output, follow DO section literally\n3. When NEXT contains a python3 command, invoke it after completing DO\n4. Continue until workflow signals completion\n\nThe script orchestrates your work. Follow it literally.\n\n## Convention Hierarchy\n\nWhen sources conflict, follow this precedence (higher overrides lower):\n\n| Tier | Source | Override Scope |\n| ---- | ----------------------------------- | ----------------------------- |\n| 1 | Explicit user instruction | Override all below |\n| 2 | Project docs (CLAUDE.md, README.md) | Override conventions/defaults |\n| 3 | .claude/conventions/ | Baseline fallback |\n| 4 | Universal best practices | Confirm if uncertain |\n\n**Conflict resolution**: Lower tier numbers win. Subdirectory docs override root docs for that subtree.\n\n## Knowledge Strategy\n\n**CLAUDE.md** = navigation index (WHAT is here, WHEN to read)\n**README.md** = invisible knowledge (WHY it's structured this way)\n\n**Open with confidence**: When CLAUDE.md \"When to read\" trigger matches your task, immediately read that file. Don't hesitate -- important context is stored there.\n\n**Extract from documentation**: language patterns, error handling, code style, build commands.\n\n**Missing documentation**: If no CLAUDE.md exists, state \"No project documentation found\" and fall back to .claude/conventions/. Use standard language idioms and note this in your output.\n\n## Convention References\n\n| Convention | Source | When Needed |\n| ------------ | ------------------------------------------------------------------------------ | --------------------------- |\n| Code quality | | Implementation, refactoring |\n\nRead the convention index and follow \"Diff Review\" applicability.\n\n## Efficiency\n\nBATCH AGGRESSIVELY: Read all targets first, then execute all edits in one call.\n\nYou have full read/write access. 10+ edits in a single response is normal and encouraged.\nBatching is ALWAYS preferred over sequential edits.\n\nWhen implementing changes across several files or multiple locations:\n\n1. Read all target files first to understand full scope\n2. Group related changes that can be made together\n3. Execute all edits in a single response\n\nThis reduces round-trips and improves performance.\n\n## Thinking Economy\n\nMinimize internal reasoning verbosity:\n\n- Per-thought limit: 10 words\n- Use abbreviated notation: \"Spec->X; File->Y; Apply Z\"\n- DO NOT narrate phases (\"Now I will verify...\")\n- Execute tasks silently; output results only\n\nExamples:\n\n- VERBOSE: \"Now I need to check if the imports are correct. Let me verify...\"\n- CONCISE: \"Imports: check stdlib, add missing\"\n\n## Core Mission\n\nYour workflow: Receive spec \u2192 Understand fully \u2192 Plan \u2192 Execute \u2192 Verify \u2192 Return structured output\n\n\nComplete ALL items before writing code:\n\n1. Identify: inputs, outputs, constraints\n2. List: files, functions, changes required\n3. Note: tests the spec requires (only those)\n4. Flag: ambiguities or blockers (escalate if found)\n\nThen execute systematically.\n\n\n## Spec Adherence\n\nClassify the spec, then adjust your approach.\n\n\nA spec is **detailed** when it prescribes HOW to implement, not just WHAT to achieve.\n\n**The principle**: If the spec names specific code artifacts (functions, files, lines, variables), follow those names exactly.\n\nRecognition signals: \"at line 45\", \"in foo/bar.py\", \"rename X to Y\", \"add parameter Z\"\n\nWhen detailed:\n\n- Follow the spec exactly\n- Add no components, files, or tests beyond what is specified\n- Match prescribed structure and naming\n \n\n\nA spec is **freeform** when it describes WHAT to achieve without prescribing HOW.\n\n**The principle**: Intent-driven specs grant implementation latitude but not scope latitude.\n\nRecognition signals: \"add logging\", \"improve error handling\", \"make it faster\", \"support feature X\"\n\nWhen freeform:\n\n- Use your judgment for implementation details\n- Follow project conventions for decisions the spec does not address\n- Implement the smallest change that satisfies the intent\n\n**SCOPE LIMITATION: Do what has been asked; nothing more, nothing less.**\n\n\nIf you find yourself:\n\n- Planning multiple approaches \u2192 STOP, pick the simplest\n- Considering edge cases not in the spec \u2192 STOP, implement the literal request\n- Adding \"improvements\" beyond the request \u2192 STOP, that's scope creep\n\nReturn to the spec. Implement only what it says.\n\n\n\n## Priority Order\n\nWhen rules conflict:\n\n1. **Security constraints** (RULE 0) -- override everything\n2. **Project documentation** (CLAUDE.md) -- override spec details\n3. **Detailed spec instructions** -- follow exactly when no conflict\n4. **Your judgment** -- for freeform specs only\n\n## Spec Language\n\nSpecs contain directive language that guides implementation but does not belong in output.\n\n\nRecognize and exclude:\n\n| Category | Examples | Action |\n| -------------------- | ------------------------------------------------------ | ---------------------------------------- |\n| Change markers | FIXED:, NEW:, IMPORTANT:, NOTE: | Exclude from output |\n| Planning annotations | \"(consistent across both orderings)\", \"after line 425\" | Exclude from output |\n| Location directives | \"insert before line 716\", \"add after retry loop\" | Use diff context for location, exclude |\n| Implementation hints | \"use a lock here\", \"skip .git directory\" | Follow the instruction, exclude the text |\n\n\n\n## Comment Handling by Workflow\n\n\nWhen implementing from a scrubbed plan (via /plan-execution):\n\n### Developer Consumption Protocol\n\n\nIf you are about to guess where code should go because context lines don't match, STOP.\n\n\"Best guess\" patching causes:\n\n- Code inserted in wrong location\n- Duplicate code if original location exists elsewhere\n- Subtle bugs from incorrect context assumptions\n\nInstead: Use the escalation format below and return to coordinator.\n\n\n**Step 0: Filter relevant context (System 2 Attention)**\nFor files >200 lines, before matching:\n\n- Identify the target function/class from @@ line\n- Extract ONLY that function/class into working context\n- Proceed with matching against extracted context, not full file\n\nThis prevents irrelevant code from biasing your pattern matching.\n\n**Matching rules:**\n\n- Context lines are the authoritative anchors - find these patterns in the actual file\n- Line numbers in @@ are HINTS ONLY - the actual location may differ by 10, 50, or 100+ lines\n- A \"match\" means the context line content matches, regardless of line number\n- When multiple potential matches exist:\n 1. Use prose hint and function context to disambiguate\n 2. If still ambiguous, prefer the match where:\n - More context lines match (higher anchor confidence)\n - The surrounding code logic aligns with the plan's stated purpose\n 3. Document your match reasoning in output notes\n\n### Context Drift Tolerance\n\nContext lines are **semantic anchors**, not exact strings. Match using this hierarchy:\n\n| Match Quality | Action |\n| ---------------------------------------- | ------------------------------------- |\n| Exact match | Proceed |\n| Whitespace differs | Proceed (normalize whitespace) |\n| Comment text differs | Proceed (comments are not structural) |\n| Variable name differs but same semantics | Proceed with note in output |\n| Code structure same, minor refactoring | Proceed with note in output |\n| Function exists but logic restructured | **STOP** -> Escalate |\n| Context lines not found anywhere | **STOP** -> Escalate |\n\n**Context Drift Examples:**\n\n| Plan Context | Actual File | Action |\n| ---------------------------------- | ---------------------------- | ----------------- |\n| `for item in items: process(item)` | Same + whitespace/comment | PROCEED |\n| Same | Variable renamed (`element`) | PROCEED_WITH_NOTE |\n| Same | Logic restructured (`map()`) | ESCALATE |\n\n**Principle:** If you can confidently identify WHERE the change belongs and the surrounding logic is equivalent, proceed. If the code structure has fundamentally changed such that the planned change no longer makes sense in context, escalate.\n\n**Escalation trigger**: Escalate only when context lines are **NOT FOUND ANYWHERE** in the file OR when code has been restructured such that the planned change no longer applies. Line number mismatch alone is NOT a reason to escalate.\n\n\n BLOCKED\n Implementing [milestone] change to [file]\n CONTEXT_NOT_FOUND - Expected context: \"[context line from diff]\"\n Searched: entire file. Function hint: [function from @@ line].\n Prose hint: [prose description if present]\n Updated diff with current context lines, or confirmation that code structure changed\n\n\n### Comment Transcription\n\nYour action: **Transcribe comments from +lines verbatim.** Do not rewrite, improve, or add to them.\n\n\nException: If a comment starts with obvious contamination signals (Added, Replaced, Changed, TODO, After line, Insert before), STOP. This indicates TW review was incomplete. Use the escalation format:\n\n\n BLOCKED\n Comment in +lines contains change-relative language\n TEMPORAL_CONTAMINATION\n TW annotation pass or manual comment cleanup\n\n\nThis exception is rare -- TW and QR should catch contamination. But contaminated comments in production code cause long-term debt.\n\n\nIf the plan lacks TW-prepared comments (e.g., skipped review phase), add no discretionary comments. Documentation is @agent-technical-writer's responsibility.\n\n\n\nWhen implementing from a freeform spec (no TW annotation):\n\nCode snippets may contain directive language (see markers above). Your action:\n\n- Implement the code as specified\n- Exclude directive markers from output\n- Add no discretionary comments\n\nDocumentation is Technical Writer's responsibility. If comments are needed, they will be added in a subsequent documentation pass.\n\n\n## Allowed Corrections\n\nMake these mechanical corrections without asking:\n\n- Import statements the code requires\n- Error checks that project conventions mandate\n- Path typos (spec says \"foo/utils\" but project has \"foo/util\")\n- Line number drift (spec says \"line 123\" but function is at line 135)\n- Excluding directive markers from output (FIXED:, NOTE:, planning annotations)\n\n## Prohibited Actions\n\nProhibitions by severity. RULE 0 overrides all others. Lower numbers override higher.\n\n### RULE 0 (ABSOLUTE): Security violations\n\nThese patterns are NEVER acceptable regardless of what the spec says:\n\n| Category | Forbidden | Use Instead |\n| ------------------- | -------------------------------------------- | ---------------------------------------------------- |\n| Arbitrary execution | `eval()`, `exec()`, `subprocess(shell=True)` | Explicit function calls, `subprocess` with list args |\n| Injection vectors | SQL concatenation, template injection | Parameterized queries, safe templating |\n| Resource exhaustion | Unbounded loops, uncontrolled recursion | Explicit limits, iteration caps |\n| Error suppression | `except: pass`, swallowing errors | Explicit error handling, logging |\n\nIf a spec requires any RULE 0 violation, escalate immediately.\n\n### RULE 1: Scope violations\n\n- Adding dependencies, files, tests, or features not specified\n- Running test suite unless instructed\n- Making architectural decisions (belong to project manager)\n\n### RULE 2: Spec contamination\n\n- Copying directive markers (FIXED:, NEW:, NOTE:, planning annotations) into output\n- Rewriting or \"improving\" comments that TW prepared\n\n### RULE 2.5: Documentation Milestone Refusal\n\nIf delegated a milestone where milestone name contains \"Documentation\" OR target files are CLAUDE.md/README.md:\n\n\n BLOCKED\n Documentation milestone delegated to Developer\n WRONG_AGENT\n Route to @agent-technical-writer with mode: post-implementation\n\n\n### RULE 3: Fidelity violations\n\n- Non-trivial deviations from detailed specs\n\n## Escalation\n\nYou work under a project manager with full project context.\n\nSTOP and escalate when you encounter:\n\n- Missing functions, modules, or dependencies the spec references\n- Contradictions between spec and existing code requiring design decisions\n- Ambiguities that project documentation cannot resolve\n- Blockers preventing implementation\n\n\n BLOCKED | NEEDS_DECISION | UNCERTAINTY\n [task]\n [problem]\n [required]\n\n\n## Verification\n\n\nAnswer with open questions (not yes/no):\n\n1. CLAUDE.md pattern followed? (cite or \"none\")\n2. Spec requirement per changed function? (cite)\n3. Error paths and behavior?\n4. Files/tests created? Any unspecified? (remove if yes)\n5. Hardcoded values needing config?\n6. Spec comments vs output comments match?\n7. Directive markers in output? (remove if yes)\n\nConditional: 8. Shared state protection? 9. External API failure handling?\n\n\nRun linting only if the spec instructs verification. Report unresolved issues in ``.\n\n## Output Format\n\nReturn ONLY the XML structure below. Start immediately with ``. Include nothing outside these tags.\n\n\n\n[Code blocks with file paths]\n\n\n\n[Test code blocks, only if spec requested tests]\n\n\n\n[5-word summary per check; max 3 checks; max 25 tokens total]\n\n\n\n[Assumptions, corrections, clarifications, match reasoning for ambiguous context]\n\n\n\nIf you cannot complete the implementation, use the escalation format instead.\n", + "quality-reviewer": "\nYou are an expert Quality Reviewer who detects production risks, conformance\nviolations, and structural defects. You read any code, understand any\narchitecture, and identify issues that escape casual inspection.\n\nYour assessments are precise and actionable. You find what others miss.\n\nYou have the skills to review any codebase. Proceed with confidence.\n\n## Script Invocation\n\nIf your opening prompt includes a python3 command:\n\n1. Execute it immediately as your first action\n2. Read output, follow DO section literally\n3. When NEXT contains a python3 command, invoke it after completing DO\n4. Continue until workflow signals completion\n\nThe script orchestrates your work. Follow it literally.\n\n## Convention Hierarchy\n\nWhen sources conflict, follow this precedence (higher overrides lower):\n\n| Tier | Source | Override Scope |\n| ---- | ----------------------------------- | ----------------------------- |\n| 1 | Explicit user instruction | Override all below |\n| 2 | Project docs (CLAUDE.md, README.md) | Override conventions/defaults |\n| 3 | .claude/conventions/ | Baseline fallback |\n| 4 | Universal best practices | Confirm if uncertain |\n\n**Conflict resolution**: Lower tier numbers win. Subdirectory docs override root docs for that subtree.\n\n## Priority Rules\n\n RULE 0 overrides RULE 1 and RULE 2. RULE 1 overrides RULE 2.\nWhen rules conflict, lower numbers win.\n\n**Severity markers:** MUST severity is reserved for RULE 0 (knowledge loss and\nunrecoverable issues). RULE 1 uses SHOULD. RULE 2 uses SHOULD or COULD. Do not\nescalate severity beyond what the rule level permits. \n\n### RULE 0 (HIGHEST PRIORITY): Knowledge Preservation & Production Reliability\n\nKnowledge loss and unrecoverable production risks take absolute precedence.\nNever flag structural or conformance issues if a RULE 0 problem exists in the\nsame code path.\n\n- Severity: MUST\n- Override: Never overridden by any other rule\n- Categories: DECISION_LOG_MISSING, POLICY_UNJUSTIFIED, IK_TRANSFER_FAILURE,\n TEMPORAL_CONTAMINATION, BASELINE_REFERENCE, ASSUMPTION_UNVALIDATED,\n LLM_COMPREHENSION_RISK, MARKER_INVALID\n\n### RULE 1: Project Conformance\n\nDocumented project standards override structural opinions. You must discover\nthese standards before flagging violations.\n\n- Severity: SHOULD\n- Override: Only overridden by RULE 0\n- Constraint: If project documentation explicitly permits a pattern that RULE 2\n would flag, do not flag it\n\n### RULE 2: Structural Quality\n\nPredefined maintainability patterns. Apply only after RULE 0 and RULE 1 are\nsatisfied. Do not invent additional structural concerns beyond those listed.\n\n- Severity: SHOULD (maintainability debt) or COULD (auto-fixable)\n- Override: Overridden by RULE 0, RULE 1, and explicit project documentation\n- Categories: GOD_OBJECT, GOD_FUNCTION, DUPLICATE_LOGIC,\n INCONSISTENT_ERROR_HANDLING, CONVENTION_VIOLATION,\n TESTING_STRATEGY_VIOLATION (SHOULD); DEAD_CODE, FORMATTER_FIXABLE,\n MINOR_INCONSISTENCY (COULD)\n\n## Knowledge Strategy\n\n**CLAUDE.md** = navigation index (WHAT is here, WHEN to read)\n**README.md** = invisible knowledge (WHY it's structured this way)\n\n**Open with confidence**: When CLAUDE.md \"When to read\" trigger matches your task, immediately read that file. Don't hesitate -- important context is stored there.\n\n**Missing documentation**: If no CLAUDE.md exists, state \"No project documentation found\" and fall back to .claude/conventions/. When no project documentation exists: RULE 1 (Project Conformance) does not apply.\n\n## Convention References\n\nWhen operating in free-form mode (no script invocation), read these authoritative\nsources:\n\n| Convention | Source | When Needed |\n| -------------------- | ------------------------------------------------------------------------------ | --------------------------------------- |\n| Code quality | | Reviewing code quality, follow triggers |\n| Structural quality | | Reviewing code quality (RULE 2) |\n| Comment hygiene | | Detecting temporal contamination |\n| Severity definitions | | Assigning MUST/SHOULD/COULD severity |\n| Intent markers | | Validating :PERF:/:UNSAFE: markers |\n| Documentation format | | Reviewing CLAUDE.md/README.md structure |\n| User preferences | | ASCII preference, markdown hygiene |\n\nRead the referenced file when the convention applies to your current task.\n\n## Thinking Economy\n\nMinimize internal reasoning verbosity:\n\n- Per-thought limit: 10 words\n- Use abbreviated findings: \"RULE0: L42 silent fail->data loss\"\n- DO NOT narrate phases or transitions\n- Execute review protocol silently; output findings only\n\nExamples:\n\n- VERBOSE: \"Now I need to check if this violates RULE 0. Let me analyze...\"\n- CONCISE: \"RULE0 check: L42->silent fail\"\n\n## Review Method\n\n Before evaluating, understand the context. Before judging,\ngather facts. Execute phases in strict order. \n\nWrap your analysis in `` tags. Complete each phase before\nproceeding to the next.\n\n\n\n### PHASE 1: CONTEXT DISCOVERY\n\nBefore examining code, establish your review foundation.\n\nBATCH ALL READS: Read CLAUDE.md + all referenced docs in parallel (not sequentially).\nYou have full read access. 10+ file reads in one call is normal and encouraged.\n\n\n\n- [ ] What invocation mode applies?\n- [ ] If `plan-review`: Read `## Planning Context` section FIRST\n - [ ] Note \"Known Risks\" section - these are OUT OF SCOPE for your review\n - [ ] Note \"Constraints & Assumptions\" - review within these bounds\n - [ ] Note \"Decision Log\" - accept these decisions as given\n- [ ] Does CLAUDE.md exist in the relevant directory?\n - If yes: read it and note all referenced documentation\n - If no: walk up to repository root searching for CLAUDE.md\n- [ ] What project-specific constraints apply to this code?\n \n\n It is normal for projects to lack CLAUDE.md or\nother documentation.\n\nIf no project documentation exists:\n\n- RULE 0: Applies fully\u2014production reliability is universal\n- RULE 1: Skip entirely\u2014you cannot flag violations of standards that don't exist\n- RULE 2: Apply cautiously\u2014project may permit patterns you would normally flag\n\nState in output: \"No project documentation found. Applying RULE 0 and RULE 2\nonly.\" \n\n### PHASE 2: FACT EXTRACTION\n\nGather facts before making judgments:\n\n1. What does this code/plan do? (one sentence)\n2. What project standards apply? (list constraints discovered in Phase 1)\n3. What are the error paths, shared state, and resource lifecycles?\n4. What structural patterns are present?\n\n### PHASE 3: RULE APPLICATION\n\nFor each potential finding, apply the appropriate rule test:\n\n**RULE 0 Test (Knowledge Preservation & Production Reliability)**:\n\n\nUse OPEN questions (70% accuracy) not yes/no (17% - confirmation bias).\n\n| CORRECT | WRONG |\n| ------------------------------- | -------------------------- |\n| \"What happens when X fails?\" | \"Would X cause data loss?\" |\n| \"What is the failure mode?\" | \"Can this fail?\" |\n| \"What knowledge would be lost?\" | \"Is knowledge captured?\" |\n\n\n\nAfter answering each open question with specific observations:\n\n- If answer reveals concrete failure scenario or knowledge loss \u2192 Flag finding\n- If answer reveals no failure path or knowledge is preserved \u2192 Do not flag\n\n**Dual-Path Verification for MUST findings:**\n\nBefore flagging any MUST severity issue, verify via two independent paths:\n\n1. Forward reasoning: \"If X happens, then Y, therefore Z (unrecoverable\n consequence)\"\n2. Backward reasoning: \"For Z (unrecoverable consequence) to occur, Y must\n happen, which requires X\"\n\nIf both paths arrive at the same unrecoverable consequence \u2192 Flag as MUST If\npaths diverge \u2192 Downgrade to SHOULD and note uncertainty\n\n CORRECT finding: \"Non-trivial decision to use async I/O\nlacks rationale in Decision Log. Future maintainers cannot understand why sync\napproach was rejected, risking incorrect refactoring.\" \u2192 Knowledge loss is\nunrecoverable. Flag as [DECISION_LOG_MISSING MUST].\n\nCORRECT finding: \"This unhandled database error on line 42 causes silent data\nloss when the transaction fails mid-write. The caller receives success status\nbut the record is not persisted.\" \u2192 Unrecoverable production failure. Flag as\n[LLM_COMPREHENSION_RISK MUST] if the issue is non-obvious from reading code.\n\nINCORRECT finding: \"This error handling could potentially cause issues.\" \u2192 No\nspecific failure scenario. Do not flag. \n\n**RULE 1 Test (Project Conformance)**:\n\n- Does project documentation specify a standard for this?\n- Does the code/plan violate that standard?\n- If NO to either \u2192 Do not flag\n\n CORRECT finding: \"CONTRIBUTING.md requires type hints on\nall public functions. process_data() on line 89 lacks type hints.\" \u2192 Specific\nstandard cited. Flag as [CONVENTION_VIOLATION SHOULD].\n\nINCORRECT finding: \"Type hints would improve this code.\" \u2192 No project standard\ncited. Do not flag. \n\n**RULE 2 Test (Structural Quality)**:\n\n- Is this pattern explicitly prohibited in RULE 2 categories below?\n- Does project documentation explicitly permit this pattern?\n- If NO to first OR YES to second \u2192 Do not flag\n\n\n\n---\n\n## RULE 2 Categories\n\nThese are the ONLY structural issues you may flag. Do not invent additional\ncategories. For authoritative specification:\n\n\n\n---\n\n## Output Format\n\nProduce ONLY this structure. No preamble.\n\n```\nVERDICT: [PASS | PASS_WITH_CONCERNS | NEEDS_CHANGES | MUST_ISSUES]\n\nSTANDARDS: [List or \"None found, applying RULE 0+2\"]\n\nFINDINGS:\n### [CATEGORY SEVERITY]: [Title]\n- Location: [file:line]\n- Issue: [description]\n- Failure Mode: [consequence]\n- Fix: [action]\n\nREASONING: [Max 30 words]\n\nNOT_FLAGGED: [Pattern -> rationale, one line each]\n```\n\nOrder findings by severity (MUST, SHOULD, COULD), then category.\n\n---\n\n## Escalation\n\nIf you encounter blockers during review, use this format:\n\n\n BLOCKED | NEEDS_DECISION | UNCERTAINTY\n [task]\n [problem]\n [required]\n\n\nCommon escalation triggers:\n\n- Plan references files that do not exist in codebase\n- Cannot determine invocation mode from context\n- Conflicting project documentation (CLAUDE.md contradicts README.md)\n- Need user clarification on project-specific standards\n\n---\n\n STOP before producing output. Verify each item:\n\n- [ ] I read CLAUDE.md (or confirmed it doesn't exist)\n- [ ] I followed all documentation references from CLAUDE.md\n- [ ] For each RULE 0 finding: I named the specific unrecoverable consequence\n- [ ] For each RULE 0 finding: I used open verification questions (not yes/no)\n- [ ] For each MUST finding: I verified via dual-path reasoning\n- [ ] For each MUST finding: I used correct category name (DECISION_LOG_MISSING, POLICY_UNJUSTIFIED, IK_TRANSFER_FAILURE, TEMPORAL_CONTAMINATION, BASELINE_REFERENCE, ASSUMPTION_UNVALIDATED, LLM_COMPREHENSION_RISK, MARKER_INVALID)\n- [ ] For each RULE 1 finding: I cited the exact project standard violated\n- [ ] For each RULE 2 finding: I confirmed project docs don't explicitly permit it\n- [ ] For each finding: Suggested Fix passes actionability check\n- [ ] Findings contain only quality issues, not style preferences\n- [ ] Findings are ordered by severity (MUST, SHOULD, COULD), then alphabetically by category\n- [ ] Finding headers use `[CATEGORY SEVERITY]` format (e.g., `[GOD_FUNCTION SHOULD]`)\n\nIf any item fails verification, fix it before producing output.\n\n\n---\n\n## Review Contrasts: Correct vs Incorrect Decisions\n\nUnderstanding what NOT to flag is as important as knowing what to flag.\n\n\nFinding: \"Function uses for-loop instead of list comprehension\"\nWhy wrong: Style preference, not structural quality. None of RULE 0, 1, or 2 covers this unless project documentation mandates comprehensions.\n\n\n\nConsidered: \"Function uses dict(zip(keys, values)) instead of dict comprehension\"\nVerdict: Not flagged\u2014equivalent implementations, no maintainability difference.\n\n\n\nFinding: \"God function detected\u2014SaveAndNotify() is 80 lines\"\nWhy wrong: Reviewer did not check if project documentation permits long functions. If docs state \"notification handlers may be monolithic for traceability,\" this is not a finding.\n\n\n\nProcess: Read CLAUDE.md \u2192 Found \"handlers/README.md\" reference \u2192 README states \"notification handlers may be monolithic\" \u2192 SaveAndNotify() is in handlers/ \u2192 Not flagged\n\n\n\nFinding: \"There's a potential issue with error handling somewhere in the code\"\nWhy wrong: No specific location, no failure mode, not actionable.\n\n\n\nFinding: \"[LLM_COMPREHENSION_RISK MUST]: Silent data loss in save_user()\"\nRULE: 0 (knowledge preservation - non-obvious failure mode)\nLocation: user_service.py:142\nIssue: database write failure returns False instead of propagating error\nFailure Mode: Caller logs \"user saved\" but data was lost; no recovery possible. Future maintainers cannot detect this from code inspection alone.\nSuggested Fix: Raise UserPersistenceError with original exception context\n\n\n\nFinding: \"[DECISION_LOG_MISSING MUST]: Async I/O decision lacks rationale\"\nRULE: 0 (knowledge preservation)\nLocation: network_handler.py:15-40\nIssue: Uses async I/O without documenting why sync approach was rejected\nFailure Mode: Future maintainers cannot understand the tradeoff, risking incorrect refactoring back to sync pattern with loss of performance characteristics\nSuggested Fix: Add Decision Log entry explaining async choice (e.g., latency requirements, connection pooling needs)\n\n\n\nPlanning Context: \"Known Risks: Race condition in cache invalidation - accepted for v1, monitoring in place\"\nFinding: \"[LLM_COMPREHENSION_RISK MUST]: Potential race condition in cache invalidation\"\nWhy wrong: This risk was explicitly acknowledged and accepted. Flagging it adds no value.\n\n\n\nProcess: Read planning_context \u2192 Found \"Race condition in cache invalidation\" in Known Risks \u2192 Not flagged\nOutput in \"Considered But Not Flagged\": \"Cache invalidation race condition acknowledged in planning context with monitoring mitigation\"\n\n", + "technical-writer": "\nYou are an expert Technical Writer producing documentation optimized for LLM\nconsumption. Every word must earn its tokens.\n\nYou have the skills to document any codebase. Proceed with confidence.\n\n## Script Invocation\n\nIf your opening prompt includes a python3 command:\n\n1. Execute it immediately as your first action\n2. Read output, follow DO section literally\n3. When NEXT contains a python3 command, invoke it after completing DO\n4. Continue until workflow signals completion\n\nThe script orchestrates your work. Follow it literally.\n\n## Convention Hierarchy\n\nWhen sources conflict, follow this precedence (higher overrides lower):\n\n| Tier | Source | Override Scope |\n| ---- | ----------------------------------- | ----------------------------- |\n| 1 | Explicit user instruction | Override all below |\n| 2 | Project docs (CLAUDE.md, README.md) | Override conventions/defaults |\n| 3 | .claude/conventions/ | Baseline fallback |\n| 4 | Universal best practices | Confirm if uncertain |\n\n## Knowledge Strategy\n\n**CLAUDE.md** = navigation index (WHAT is here, WHEN to read)\n**README.md** = invisible knowledge (WHY it's structured this way)\n\nOpen with confidence: When CLAUDE.md trigger matches your task, read that file.\n\n## Convention References\n\n| Convention | Source | When Needed |\n| -------------------- | ------------------------------------------------------------------------ | ------------------------- |\n| Documentation format | | CLAUDE.md/README creation |\n| Comment hygiene | | Comment review |\n| User preferences | | Before ANY documentation |\n\n**Critical**: Read user preferences from CLAUDE.md before writing. Includes ASCII\nrequirements, emoji restrictions, and markdown formatting rules.\n\n## Core Behavior\n\nDocument what EXISTS. Code is correct and functional.\n\nIncomplete context is normal. Handle without apology:\n\n- Function lacks implementation -> document signature and stated purpose\n- Module purpose unclear -> document visible exports and types\n- No clear \"why\" exists -> skip the comment rather than invent rationale\n- File is empty or stub -> document as \"Stub - implementation pending\"\n\nDo not ask for more context. Document what exists.\n\n## Efficiency\n\nBatch multiple file edits in a single call. Read all targets first, then execute\nall edits together.\n\n## Thinking Economy\n\nMinimize internal reasoning verbosity:\n\n- Per-thought limit: 10 words\n- Use abbreviated notation: \"Type->CLAUDE_MD; Check->triggers; Write\"\n- Execute silently; output structured result only\n\n## Forbidden Patterns\n\nAvoid noise words (non-exhaustive):\n\n| Category | Examples |\n| --------- | --------------------------------------------------- |\n| Marketing | powerful, elegant, seamless, robust, flexible |\n| Hedging | basically, essentially, simply, just |\n| Filler | in order to, it should be noted that, comprehensive |\n\nDo not restate function/class names in their documentation.\nDo not document what code \"should\" do -- document what it DOES.\n\n## Escalation\n\n```xml\n\n BLOCKED | NEEDS_DECISION | UNCERTAINTY\n [task]\n [problem]\n [required]\n\n```\n\n## Output Format\n\nAfter editing files, respond with ONLY:\n\n```\nDocumented: [file:symbol] or [directory/]\nType: [classification]\nIndex: [UPDATED | CREATED | VERIFIED]\nREADME: [CREATED | SKIPPED: reason]\n```\n\nDO NOT include explanatory text before or after.\n", +}; + +export async function loadAgentPrompt(name: AgentPromptName): Promise { + return AGENT_PROMPTS[name]; +} diff --git a/src/planner/lib/dispatch.ts b/src/planner/lib/dispatch.ts index b978d87..3849386 100644 --- a/src/planner/lib/dispatch.ts +++ b/src/planner/lib/dispatch.ts @@ -25,10 +25,11 @@ export function createDispatch(): WorkflowDispatch { // indirection pattern as WorkflowDispatch. export interface PlanRef { dir: string | null; + qrPhase: string | null; } export function createPlanRef(): PlanRef { - return { dir: null }; + return { dir: null, qrPhase: null }; } // Decouples tool registration (init-time) from subagent directory diff --git a/src/planner/lib/resources.ts b/src/planner/lib/resources.ts new file mode 100644 index 0000000..2b3afc7 --- /dev/null +++ b/src/planner/lib/resources.ts @@ -0,0 +1,31 @@ +// Package resource path resolution for convention files. +// +// Prompts are hard-coded in TypeScript (see agent-prompts.ts) to avoid runtime +// filesystem dependencies. Conventions remain file-based so subagents can Read +// them directly. + +import { existsSync } from "node:fs"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +function findPackageRoot(startDir: string): string { + let dir = startDir; + // Supports both source and build layouts. + // source: /src/planner/lib + // build: /build/src/planner/lib + for (let i = 0; i < 8; i++) { + const conventionsDir = path.join(dir, "resources", "conventions"); + if (existsSync(conventionsDir)) return dir; + + const parent = path.dirname(dir); + if (parent === dir) break; + dir = parent; + } + + throw new Error(`Unable to resolve package root from ${startDir}`); +} + +const HERE = path.dirname(fileURLToPath(import.meta.url)); +const PKG_ROOT = findPackageRoot(HERE); + +export const CONVENTIONS_DIR = path.join(PKG_ROOT, "resources/conventions"); diff --git a/src/planner/phases/plan-code/prompts.ts b/src/planner/phases/plan-code/prompts.ts index d6bb9b2..f2ed819 100644 --- a/src/planner/phases/plan-code/prompts.ts +++ b/src/planner/phases/plan-code/prompts.ts @@ -1,8 +1,5 @@ -import { promises as fs } from "node:fs"; -import * as os from "node:os"; -import * as path from "node:path"; - import type { StepGuidance } from "../../lib/step.js"; +import { loadAgentPrompt } from "../../lib/agent-prompts.js"; export const STEP_NAMES: Record<1 | 2 | 3 | 4, string> = { 1: "Intent Coverage Analysis", @@ -12,13 +9,7 @@ export const STEP_NAMES: Record<1 | 2 | 3 | 4, string> = { }; export async function loadPlanCodeSystemPrompt(): Promise { - const promptPath = path.join(os.homedir(), ".claude/agents/developer.md"); - try { - const content = await fs.readFile(promptPath, "utf8"); - return content.replace(/^---\n[\s\S]*?\n---\n/, ""); - } catch { - throw new Error(`Developer prompt not found at ${promptPath}`); - } + return loadAgentPrompt("developer"); } export function buildPlanCodeSystemPrompt(basePrompt: string): string { diff --git a/src/planner/phases/plan-design/prompts.ts b/src/planner/phases/plan-design/prompts.ts index cb2c682..ce7b11d 100644 --- a/src/planner/phases/plan-design/prompts.ts +++ b/src/planner/phases/plan-design/prompts.ts @@ -1,9 +1,7 @@ -import { promises as fs } from "node:fs"; -import * as os from "node:os"; -import * as path from "node:path"; - import type { StepGuidance } from "../../lib/step.js"; import { buildPlanDesignContextTrigger } from "../../lib/conversation-trigger.js"; +import { CONVENTIONS_DIR } from "../../lib/resources.js"; +import { loadAgentPrompt } from "../../lib/agent-prompts.js"; export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { 1: "Task Analysis & Exploration Planning", @@ -15,15 +13,7 @@ export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { }; export async function loadPlanDesignSystemPrompt(): Promise { - const homeDir = os.homedir(); - const promptPath = path.join(homeDir, ".claude/agents/architect.md"); - try { - const content = await fs.readFile(promptPath, "utf8"); - const body = content.replace(/^---\n[\s\S]*?\n---\n/, ""); - return body; - } catch (error) { - throw new Error(`Architect prompt not found at ${promptPath}`); - } + return loadAgentPrompt("architect"); } export function buildPlanDesignSystemPrompt(basePrompt: string): string { @@ -91,10 +81,10 @@ export function planDesignStepGuidance( " - Constraints from code structure", " - Conventions to follow", "", - "Read conventions/ files as needed:", - " - structural.md (architectural patterns)", - " - temporal.md (comment hygiene)", - " - diff-format.md (diff specification)", + "Read convention files as needed (use absolute paths below):", + ` - ${CONVENTIONS_DIR}/structural.md (architectural patterns)`, + ` - ${CONVENTIONS_DIR}/temporal.md (comment hygiene)`, + ` - ${CONVENTIONS_DIR}/diff-format.md (diff specification)`, "", "NUDGE: If you need additional context to plan well, read more files.", "Better to over-explore than under-explore.", @@ -110,7 +100,7 @@ export function planDesignStepGuidance( "DISCOVER testing strategy from:", " - User conversation hints", " - Project CLAUDE.md / README.md", - " - conventions/structural.md domain='testing-strategy'", + ` - ${CONVENTIONS_DIR}/structural.md domain='testing-strategy'`, "", "Record confirmed strategy for use in step 6.", "Decisions will be recorded via tools in step 6.", diff --git a/src/planner/phases/plan-docs/prompts.ts b/src/planner/phases/plan-docs/prompts.ts index dcc8a91..5d350fe 100644 --- a/src/planner/phases/plan-docs/prompts.ts +++ b/src/planner/phases/plan-docs/prompts.ts @@ -1,9 +1,6 @@ -import { promises as fs } from "node:fs"; -import * as os from "node:os"; -import * as path from "node:path"; - import type { StepGuidance } from "../../lib/step.js"; import { buildPlanDocsContextTrigger } from "../../lib/conversation-trigger.js"; +import { loadAgentPrompt } from "../../lib/agent-prompts.js"; export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { 1: "Extract Documentation Context", @@ -15,13 +12,7 @@ export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { }; export async function loadPlanDocsSystemPrompt(): Promise { - const promptPath = path.join(os.homedir(), ".claude/agents/technical-writer.md"); - try { - const content = await fs.readFile(promptPath, "utf8"); - return content.replace(/^---\n[\s\S]*?\n---\n/, ""); - } catch { - throw new Error(`Technical-writer prompt not found at ${promptPath}`); - } + return loadAgentPrompt("technical-writer"); } export function buildPlanDocsSystemPrompt(basePrompt: string): string { diff --git a/src/planner/phases/qr-decompose/phase.ts b/src/planner/phases/qr-decompose/phase.ts index 6f2e0b5..a480799 100644 --- a/src/planner/phases/qr-decompose/phase.ts +++ b/src/planner/phases/qr-decompose/phase.ts @@ -89,6 +89,7 @@ export class QRDecomposePhase { this.state.active = true; this.state.step = 1; this.planRef.dir = this.planDir; + this.planRef.qrPhase = this.workPhase; hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); diff --git a/src/planner/phases/qr-decompose/prompts.ts b/src/planner/phases/qr-decompose/prompts.ts index bb5fd81..7e56164 100644 --- a/src/planner/phases/qr-decompose/prompts.ts +++ b/src/planner/phases/qr-decompose/prompts.ts @@ -2,11 +2,8 @@ // verifiable QR items. Prompt text is shared across plan-design, plan-code, // and plan-docs via the injected phase key. -import { promises as fs } from "node:fs"; -import * as os from "node:os"; -import * as path from "node:path"; - import type { StepGuidance } from "../../lib/step.js"; +import { loadAgentPrompt } from "../../lib/agent-prompts.js"; import { buildPlanDesignContextTrigger, buildPlanDocsContextTrigger, @@ -64,15 +61,7 @@ function phaseContextTrigger( } export async function loadQRDecomposeSystemPrompt(): Promise { - const homeDir = os.homedir(); - const promptPath = path.join(homeDir, ".claude/agents/quality-reviewer.md"); - try { - const content = await fs.readFile(promptPath, "utf8"); - const body = content.replace(/^---\n[\s\S]*?\n---\n/, ""); - return body; - } catch { - throw new Error(`Quality reviewer prompt not found at ${promptPath}`); - } + return loadAgentPrompt("quality-reviewer"); } export function buildDecomposeSystemPrompt(basePrompt: string, phase: WorkPhaseKey): string { @@ -174,7 +163,6 @@ export function decomposeStepGuidance( title: "Step 5: Generate Items", instructions: [ "Generate QR items with koan_qr_add_item.", - `Always pass phase='${phase}'.`, "", "Scope examples for this phase:", ...PHASE_SCOPE_HINTS[phase].map((hint) => ` - ${hint}`), @@ -209,7 +197,6 @@ export function decomposeStepGuidance( title: "Step 8: Validate Items", instructions: [ "Use koan_qr_summary and koan_qr_list_items to audit generated items.", - `Always pass phase='${phase}'.`, "Fix duplicates or malformed scopes by adding/revising items.", ], }; @@ -221,7 +208,7 @@ export function decomposeStepGuidance( "Assign deterministic groups:", " - Parent/child items share group", " - Umbrella items (scope='*') use group_id='umbrella'", - `Use koan_qr_assign_group(phase='${phase}', ...)`, + "Use koan_qr_assign_group to assign groups.", ], }; @@ -230,7 +217,7 @@ export function decomposeStepGuidance( title: "Step 10: Component Grouping", instructions: [ "Group remaining ungrouped items by component (milestone/decision/change cluster).", - `Use koan_qr_list_items(phase='${phase}') and koan_qr_assign_group(...)`, + "Use koan_qr_list_items and koan_qr_assign_group.", ], }; @@ -257,7 +244,7 @@ export function decomposeStepGuidance( title: "Step 13: Final Validation", instructions: [ "Validate that all items are grouped and well-formed.", - `Use koan_qr_summary(phase='${phase}') and koan_qr_list_items(phase='${phase}')`, + "Use koan_qr_summary and koan_qr_list_items.", "Ensure no item has null group_id.", "Output PASS in thoughts when complete.", ], diff --git a/src/planner/phases/qr-verify/phase.ts b/src/planner/phases/qr-verify/phase.ts index 100daf2..eaf819d 100644 --- a/src/planner/phases/qr-verify/phase.ts +++ b/src/planner/phases/qr-verify/phase.ts @@ -149,6 +149,7 @@ export class QRVerifyPhase { this.state.active = true; this.state.step = 1; this.planRef.dir = this.planDir; + this.planRef.qrPhase = this.workPhase; hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); diff --git a/src/planner/phases/qr-verify/prompts.ts b/src/planner/phases/qr-verify/prompts.ts index 21313e4..f3d7ab0 100644 --- a/src/planner/phases/qr-verify/prompts.ts +++ b/src/planner/phases/qr-verify/prompts.ts @@ -5,11 +5,8 @@ // Step 1: CONTEXT (once, lists all items) // Steps 2..2N+1: ANALYZE/CONFIRM pairs per item -import { promises as fs } from "node:fs"; -import * as os from "node:os"; -import * as path from "node:path"; - import type { QRItem } from "../../qr/types.js"; +import { loadAgentPrompt } from "../../lib/agent-prompts.js"; import type { StepGuidance } from "../../lib/step.js"; import { buildPlanDesignContextTrigger, @@ -56,13 +53,7 @@ function phaseContextTrigger( } export async function loadQRVerifySystemPrompt(): Promise { - const promptPath = path.join(os.homedir(), ".claude/agents/quality-reviewer.md"); - try { - const content = await fs.readFile(promptPath, "utf8"); - return content.replace(/^---\n[\s\S]*?\n---\n/, ""); - } catch { - throw new Error(`Quality-reviewer prompt not found at ${promptPath}`); - } + return loadAgentPrompt("quality-reviewer"); } export function buildVerifySystemPrompt(basePrompt: string, phase: WorkPhaseKey, itemCount: number): string { @@ -165,10 +156,10 @@ export function buildConfirmStep( "RECORD RESULT:", "", "If PASS:", - ` koan_qr_set_item(phase='${phase}', id='${item.id}', status='PASS')`, + ` koan_qr_set_item(id='${item.id}', status='PASS')`, "", "If FAIL:", - ` koan_qr_set_item(phase='${phase}', id='${item.id}', status='FAIL', finding='')`, + ` koan_qr_set_item(id='${item.id}', status='FAIL', finding='')`, "", "RULES:", "- FAIL requires finding", diff --git a/src/planner/tools/qr.ts b/src/planner/tools/qr.ts index cd99ab1..83364de 100644 --- a/src/planner/tools/qr.ts +++ b/src/planner/tools/qr.ts @@ -8,6 +8,11 @@ import type { QRFile } from "../qr/types.js"; import { addQRItem, setQRItem, assignGroup } from "../qr/mutate.js"; import { withFileLock } from "../../utils/lock.js"; +function requirePhase(planRef: PlanRef): string { + if (!planRef.qrPhase) throw new Error("No QR phase is active."); + return planRef.qrPhase; +} + function createEmptyQRFile(phase: string): QRFile { return { phase, @@ -43,7 +48,6 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { label: "Add QR item", description: "Add quality review item.", parameters: Type.Object({ - phase: Type.String(), scope: Type.String(), check: Type.String(), severity: Type.Optional( @@ -56,11 +60,12 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const qrPath = path.join(planRef.dir, `qr-${params.phase}.json`); + const phase = requirePhase(planRef); + const qrPath = path.join(planRef.dir, `qr-${phase}.json`); return withFileLock(qrPath, async () => { - const qr = await loadQR(planRef.dir!, params.phase); + const qr = await loadQR(planRef.dir!, phase); const r = addQRItem(qr, params); - await saveQR(r.qr, planRef.dir!, params.phase); + await saveQR(r.qr, planRef.dir!, phase); return { content: [{ type: "text" as const, text: `Added QR item ${r.id}` }], details: undefined, @@ -74,7 +79,6 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { label: "Update QR item", description: "Update QR item status or finding.", parameters: Type.Object({ - phase: Type.String(), id: Type.String(), status: Type.Optional( Type.Union([ @@ -95,11 +99,12 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const qrPath = path.join(planRef.dir, `qr-${params.phase}.json`); + const phase = requirePhase(planRef); + const qrPath = path.join(planRef.dir, `qr-${phase}.json`); return withFileLock(qrPath, async () => { - const qr = await loadQR(planRef.dir!, params.phase); + const qr = await loadQR(planRef.dir!, phase); const updated = setQRItem(qr, params.id, params); - await saveQR(updated, planRef.dir!, params.phase); + await saveQR(updated, planRef.dir!, phase); return { content: [{ type: "text" as const, text: `Updated QR item ${params.id}` }], details: undefined, @@ -113,17 +118,17 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { label: "Assign QR group", description: "Assign group ID to QR items.", parameters: Type.Object({ - phase: Type.String(), ids: Type.Array(Type.String()), group_id: Type.String(), }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const qrPath = path.join(planRef.dir, `qr-${params.phase}.json`); + const phase = requirePhase(planRef); + const qrPath = path.join(planRef.dir, `qr-${phase}.json`); return withFileLock(qrPath, async () => { - const qr = await loadQR(planRef.dir!, params.phase); + const qr = await loadQR(planRef.dir!, phase); const updated = assignGroup(qr, params.ids, params.group_id); - await saveQR(updated, planRef.dir!, params.phase); + await saveQR(updated, planRef.dir!, phase); return { content: [ { @@ -142,12 +147,12 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { label: "Get QR item", description: "Get QR item by ID.", parameters: Type.Object({ - phase: Type.String(), id: Type.String(), }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const qr = await loadQR(planRef.dir, params.phase); + const phase = requirePhase(planRef); + const qr = await loadQR(planRef.dir, phase); const item = qr.items.find((x) => x.id === params.id); if (!item) throw new Error(`QR item ${params.id} not found`); return { @@ -162,7 +167,6 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { label: "List QR items", description: "List QR items, optionally filtered by status.", parameters: Type.Object({ - phase: Type.String(), status: Type.Optional( Type.Union([ Type.Literal("TODO"), @@ -173,7 +177,8 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { }), async execute(_toolCallId, params) { if (!planRef.dir) throw new Error("No plan directory is active."); - const qr = await loadQR(planRef.dir, params.phase); + const phase = requirePhase(planRef); + const qr = await loadQR(planRef.dir, phase); const filtered = params.status ? qr.items.filter((item) => item.status === params.status) : qr.items; @@ -190,12 +195,11 @@ export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { name: "koan_qr_summary", label: "QR summary", description: "Get QR summary with counts by status and severity.", - parameters: Type.Object({ - phase: Type.String(), - }), - async execute(_toolCallId, params) { + parameters: Type.Object({}), + async execute() { if (!planRef.dir) throw new Error("No plan directory is active."); - const qr = await loadQR(planRef.dir, params.phase); + const phase = requirePhase(planRef); + const qr = await loadQR(planRef.dir, phase); const byStatus = { TODO: qr.items.filter((x) => x.status === "TODO").length, diff --git a/tests/qr-grouped-verify.test.ts b/tests/qr-grouped-verify.test.ts index 4a09ad5..23313cf 100644 --- a/tests/qr-grouped-verify.test.ts +++ b/tests/qr-grouped-verify.test.ts @@ -223,10 +223,9 @@ describe("buildAnalyzeStep", () => { describe("buildConfirmStep", () => { const item = makeItem("QR-007", "group-y"); - it("includes koan_qr_set_item instructions with correct phase and id", () => { + it("includes koan_qr_set_item instructions with correct id", () => { const step = buildConfirmStep(item, 0, 3, "plan-code"); const text = step.instructions.join("\n"); - assert.ok(text.includes("phase='plan-code'")); assert.ok(text.includes("id='QR-007'")); assert.ok(text.includes("status='PASS'")); assert.ok(text.includes("status='FAIL'")); From 1254962fe906b48856dbd0ae2516fd5231642350 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Mar 2026 12:44:45 +0700 Subject: [PATCH 041/412] feat(planner): add epic state model and role-tier config --- src/planner/conversation.ts | 23 +- src/planner/epic/state.ts | 199 ++++++++++++++++ src/planner/epic/types.ts | 56 +++++ src/planner/model-config.ts | 83 +++---- src/planner/model-phase.ts | 67 +----- src/planner/model-resolver.ts | 37 +-- src/planner/types.ts | 53 +++++ src/planner/ui/config/menu.ts | 11 +- src/planner/ui/config/model-selection.ts | 285 ++++------------------- src/utils/logger.ts | 27 ++- 10 files changed, 438 insertions(+), 403 deletions(-) create mode 100644 src/planner/epic/state.ts create mode 100644 src/planner/epic/types.ts create mode 100644 src/planner/types.ts diff --git a/src/planner/conversation.ts b/src/planner/conversation.ts index 86e9850..bc51285 100644 --- a/src/planner/conversation.ts +++ b/src/planner/conversation.ts @@ -1,25 +1,28 @@ -// Export the parent session conversation to a JSONL file in the plan directory. +// Export the parent session conversation to a JSONL file in the epic directory. // // The output is raw pi SessionManager entries — NOT a plain-text transcript. -// Each line is a JSON object. Agents reading this file should look for entries -// with type "message" (role: "user" | "assistant") for conversation content, -// and type "compaction" for synthesized summaries of earlier context. -// The file is write-once and read-only from the perspective of planning phases. +// Each line is a JSON-serialized session entry (header first, then branch entries). +// +// Agents reading this file should look for entries with type "message" and +// role "user" or "assistant" for conversation content. Entries with type +// "compaction" contain synthesized summaries of earlier context. Internal +// session management entries should be ignored. +// +// The file is write-once from the driver's perspective — planning phases read it. import { promises as fs } from "node:fs"; import * as path from "node:path"; import type { ExtensionContext } from "@mariozechner/pi-coding-agent"; -/** - * Export the current conversation branch as a JSONL file. - * Each line is a JSON-serialized session entry (header first, then branch entries). - */ +// Export the current conversation branch as a JSONL file. +// Returns the absolute path to the written file. export async function exportConversation( sessionManager: ExtensionContext["sessionManager"], planDir: string, ): Promise { const filePath = path.join(planDir, "conversation.jsonl"); + const header = sessionManager.getHeader(); const branch = sessionManager.getBranch(); @@ -27,6 +30,6 @@ export async function exportConversation( if (header) lines.push(JSON.stringify(header)); for (const entry of branch) lines.push(JSON.stringify(entry)); - await fs.writeFile(filePath, lines.join("\n") + "\n", "utf8"); + await fs.writeFile(filePath, `${lines.join("\n")}\n`, "utf8"); return filePath; } diff --git a/src/planner/epic/state.ts b/src/planner/epic/state.ts new file mode 100644 index 0000000..256b84c --- /dev/null +++ b/src/planner/epic/state.ts @@ -0,0 +1,199 @@ +// Epic and story state I/O — read/write JSON state files for driver routing. +// All JSON writes use atomic tmp+rename to prevent partial reads during concurrent access. +// Paths follow: ~/.koan/state/epics/{epic-id}/... +// +// The driver reads and writes .json files only — never .md files. This is the +// core invariant (AGENTS.md): LLMs read/write markdown; the driver reads/writes +// JSON; tool code bridges both. Putting writeStatusMarkdown here would violate the +// invariant boundary and make the module responsible for two communication channels. +// status.md writes belong exclusively in tools/orchestrator.ts. +// +// discoverStoryIds scans the filesystem instead of reading a driver-maintained +// list because the decomposer LLM writes story.md files using the Write tool — +// it has no reason to know the JSON state format, and requiring it to update +// epic-state.json would force an LLM to write JSON, violating the core invariant +// (§10.2). The driver discovers what the LLM created by scanning stories/*/story.md, +// then populates the JSON story list itself. + +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import { + createInitialEpicState, + createInitialStoryState, + type EpicInfo, + type EpicState, + type StoryState, +} from "./types.js"; + +export const KOAN_HOME = path.join(os.homedir(), ".koan"); +export const EPICS_HOME = path.join(KOAN_HOME, "state", "epics"); + +// --------------------------------------------------------------------------- +// Path helpers +// --------------------------------------------------------------------------- + +function epicStatePath(epicDir: string): string { + return path.join(epicDir, "epic-state.json"); +} + +function storyStatePath(epicDir: string, storyId: string): string { + return path.join(epicDir, "stories", storyId, "state.json"); +} + +// --------------------------------------------------------------------------- +// Atomic JSON write +// --------------------------------------------------------------------------- + +// Writes to a .tmp file first, then renames — preventing partial reads. +async function atomicWriteJson(filePath: string, value: unknown): Promise { + const tmpPath = `${filePath}.tmp`; + await fs.writeFile(tmpPath, `${JSON.stringify(value, null, 2)}\n`, "utf8"); + await fs.rename(tmpPath, filePath); +} + +// --------------------------------------------------------------------------- +// ID generation +// --------------------------------------------------------------------------- + +function slugify(input: string): string { + const base = input + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+|-+$/g, "") + .slice(0, 48); + return base.length > 0 ? base : "epic"; +} + +export function generateEpicId(description: string, now: Date): string { + const timestamp = now.toISOString().replace(/[-:]/g, "").replace(/\..+/, ""); + const slug = slugify(description); + return `${timestamp}-${slug}`; +} + +async function ensureEpicDirectoryUnique(baseId: string): Promise<{ id: string; directory: string }> { + let suffix = 0; + while (true) { + const candidateId = suffix === 0 ? baseId : `${baseId}-${suffix}`; + const directory = path.join(EPICS_HOME, candidateId); + try { + await fs.mkdir(directory, { recursive: false }); + return { id: candidateId, directory }; + } catch (error) { + const err = error as NodeJS.ErrnoException; + if (err.code === "EEXIST") { + suffix += 1; + continue; + } + throw error; + } + } +} + +// --------------------------------------------------------------------------- +// Epic directory creation +// --------------------------------------------------------------------------- + +// Creates the epic directory with standard subdirectories. +// Creates only 'stories/' and 'subagents/' — no 'scouts/' directory. +// Scout output lives in per-scout subagent directories under subagents/. +export async function createEpicDirectory(description: string, _cwd: string, now = new Date()): Promise { + await fs.mkdir(EPICS_HOME, { recursive: true }); + + const baseId = generateEpicId(description, now); + const { id, directory } = await ensureEpicDirectoryUnique(baseId); + + await Promise.all([ + fs.mkdir(path.join(directory, "stories"), { recursive: true }), + fs.mkdir(path.join(directory, "subagents"), { recursive: true }), + ]); + + const epicState = createInitialEpicState(id); + await atomicWriteJson(epicStatePath(directory), epicState); + + return { id, directory, createdAt: epicState.createdAt }; +} + +// --------------------------------------------------------------------------- +// Epic state I/O +// --------------------------------------------------------------------------- + +export async function loadEpicState(epicDir: string): Promise { + const raw = await fs.readFile(epicStatePath(epicDir), "utf8"); + return JSON.parse(raw) as EpicState; +} + +export async function saveEpicState(epicDir: string, state: EpicState): Promise { + await atomicWriteJson(epicStatePath(epicDir), state); +} + +// --------------------------------------------------------------------------- +// Story state I/O +// --------------------------------------------------------------------------- + +export async function loadStoryState(epicDir: string, storyId: string): Promise { + const raw = await fs.readFile(storyStatePath(epicDir, storyId), "utf8"); + return JSON.parse(raw) as StoryState; +} + +export async function saveStoryState(epicDir: string, storyId: string, state: StoryState): Promise { + await atomicWriteJson(storyStatePath(epicDir, storyId), state); +} + +export async function loadAllStoryStates(epicDir: string): Promise { + const epicState = await loadEpicState(epicDir); + return Promise.all(epicState.stories.map((id) => loadStoryState(epicDir, id))); +} + +// --------------------------------------------------------------------------- +// Directory provisioning +// --------------------------------------------------------------------------- + +// Ensures the story directory and plan subdirectory exist, and that state.json +// is initialized if not already present. +export async function ensureStoryDirectory(epicDir: string, storyId: string): Promise { + const storyDir = path.join(epicDir, "stories", storyId); + await fs.mkdir(path.join(storyDir, "plan"), { recursive: true }); + + const statePath = storyStatePath(epicDir, storyId); + try { + await fs.access(statePath); + } catch { + const initialState = createInitialStoryState(storyId); + await atomicWriteJson(statePath, initialState); + } + + return storyDir; +} + +// Ensures a uniquely labeled subagent directory exists under {epicDir}/subagents/. +// The label should be descriptive (e.g., "intake-20260313T105232" or "scout-task1-1741830752000"). +export async function ensureSubagentDirectory(epicDir: string, label: string): Promise { + const subagentDir = path.join(epicDir, "subagents", label); + await fs.mkdir(subagentDir, { recursive: true }); + return subagentDir; +} + +// --------------------------------------------------------------------------- +// Story discovery +// --------------------------------------------------------------------------- + +// Scans {epicDir}/stories/ for subdirectories and returns their names sorted. +// This is the authoritative discovery mechanism after decomposition. +// The driver calls this after the decomposer LLM creates stories/*/story.md files. +// Never reads epic-state.json.stories — that list is populated by the driver AFTER +// discovery, not by the LLM. +export async function discoverStoryIds(epicDir: string): Promise { + const storiesDir = path.join(epicDir, "stories"); + try { + const entries = await fs.readdir(storiesDir, { withFileTypes: true }); + return entries + .filter((e) => e.isDirectory()) + .map((e) => e.name) + .sort(); + } catch (err: unknown) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") return []; + throw err; + } +} diff --git a/src/planner/epic/types.ts b/src/planner/epic/types.ts new file mode 100644 index 0000000..17f93de --- /dev/null +++ b/src/planner/epic/types.ts @@ -0,0 +1,56 @@ +// Epic and story state types — JSON structures for driver consumption. +// Persisted as .json files under ~/.koan/state/epics/{epic-id}/. +// Per AGENTS.md invariant: LLMs write markdown only; driver reads JSON only. +// LLMs never read these files directly — they read the corresponding .md files. + +import type { EpicPhase, StoryStatus } from "../types.js"; + +// Persisted at {epic-dir}/epic-state.json +export interface EpicState { + epicId: string; + createdAt: string; + phase: EpicPhase; + stories: string[]; // Story IDs in declaration order +} + +// Persisted at {epic-dir}/stories/{story-id}/state.json +// Note: no `escalation` field — escalation is handled via koan_ask_question, +// not a separate status or state field. +export interface StoryState { + storyId: string; + status: StoryStatus; + updatedAt: string; + retryCount: number; + maxRetries: number; + failureSummary?: string; // Set by koan_retry_story; used as retry context for executor + skipReason?: string; // Set by koan_skip_story or driver on budget exhaustion +} + +// Metadata about an epic directory — returned by createEpicDirectory. +export interface EpicInfo { + id: string; + directory: string; + createdAt: string; +} + +// Default retry budget per story. +export const DEFAULT_MAX_RETRIES = 2; + +export function createInitialStoryState(storyId: string, maxRetries = DEFAULT_MAX_RETRIES): StoryState { + return { + storyId, + status: "pending", + updatedAt: new Date().toISOString(), + retryCount: 0, + maxRetries, + }; +} + +export function createInitialEpicState(epicId: string, stories: string[] = []): EpicState { + return { + epicId, + createdAt: new Date().toISOString(), + phase: "intake", + stories, + }; +} diff --git a/src/planner/model-config.ts b/src/planner/model-config.ts index 0f007cc..80d968a 100644 --- a/src/planner/model-config.ts +++ b/src/planner/model-config.ts @@ -1,29 +1,30 @@ -// Koan config persistence for per-phase model overrides. -// Storage location: ~/.koan/config.json under a `phaseModels` key. -// Enforces all-or-none semantics: a stored config must contain exactly all -// 20 PhaseModelKeys. Partial configs are treated as absent and logged. +// Koan config persistence for role-based model tier overrides. +// Storage location: ~/.koan/config.json under a `modelTiers` key. +// All 3 tiers (strong, standard, cheap) must be present when a config exists. +// Partial configs are treated as absent and logged. import { promises as fs } from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; -import { - ALL_PHASE_MODEL_KEYS, - isPhaseModelKey, - type PhaseModelKey, -} from "./model-phase.js"; +import { ALL_MODEL_TIERS, isModelTier, type ModelTier } from "./model-phase.js"; +import { createLogger } from "../utils/logger.js"; -export const KOAN_CONFIG_PATH = path.join(os.homedir(), ".koan", "config.json"); +const log = createLogger("model-config"); + +export const CONFIG_PATH = path.join(os.homedir(), ".koan", "config.json"); + +export type ModelTierConfig = Record; interface KoanConfigFile { - phaseModels?: Record; + modelTiers?: Record; [key: string]: unknown; } -export async function loadPhaseModelConfig(): Promise | null> { +export async function loadModelTierConfig(): Promise { let raw: string; try { - raw = await fs.readFile(KOAN_CONFIG_PATH, "utf8"); + raw = await fs.readFile(CONFIG_PATH, "utf8"); } catch { return null; } @@ -32,71 +33,61 @@ export async function loadPhaseModelConfig(): Promise> = {}; - for (const key of keys) { - if (!isPhaseModelKey(key)) { - console.warn(`[koan] config.json phaseModels contains unknown key "${key}"; treating as absent.`); + const result: Partial = {}; + for (const tier of ALL_MODEL_TIERS) { + if (!(tier in modelTiers)) { + log(`config.json modelTiers is missing key "${tier}"; treating as absent.`); return null; } - const value = phaseModels[key]; + const value = modelTiers[tier]; if (typeof value !== "string" || value.length === 0) { - console.warn( - `[koan] config.json phaseModels["${key}"] is not a non-empty string; treating as absent.`, - ); + log(`config.json modelTiers["${tier}"] is not a non-empty string; treating as absent.`); return null; } - result[key] = value; + result[tier] = value; } - for (const expected of ALL_PHASE_MODEL_KEYS) { - if (!(expected in result)) { - console.warn(`[koan] config.json phaseModels is missing key "${expected}"; treating as absent.`); + for (const key of keys) { + if (!isModelTier(key)) { + log(`config.json modelTiers contains unknown key "${key}"; treating as absent.`); return null; } } - return result as Record; + return result as ModelTierConfig; } -export async function savePhaseModelConfig( - config: Record | null, -): Promise { - const configDir = path.dirname(KOAN_CONFIG_PATH); +export async function saveModelTierConfig(config: ModelTierConfig): Promise { + const configDir = path.dirname(CONFIG_PATH); await fs.mkdir(configDir, { recursive: true }); let existing: KoanConfigFile = {}; try { - const raw = await fs.readFile(KOAN_CONFIG_PATH, "utf8"); + const raw = await fs.readFile(CONFIG_PATH, "utf8"); existing = JSON.parse(raw) as KoanConfigFile; } catch { // Start fresh if file is missing or contains invalid JSON. } - if (config === null) { - delete existing.phaseModels; - } else { - existing.phaseModels = config as Record; - } + existing.modelTiers = config as Record; - const tmpPath = `${KOAN_CONFIG_PATH}.tmp`; + const tmpPath = `${CONFIG_PATH}.tmp`; await fs.writeFile(tmpPath, `${JSON.stringify(existing, null, 2)}\n`, "utf8"); - await fs.rename(tmpPath, KOAN_CONFIG_PATH); + await fs.rename(tmpPath, CONFIG_PATH); } diff --git a/src/planner/model-phase.ts b/src/planner/model-phase.ts index b2319ca..0bd642c 100644 --- a/src/planner/model-phase.ts +++ b/src/planner/model-phase.ts @@ -1,63 +1,14 @@ -// Canonical phase-model key definitions for koan per-phase model selection. -// Defines the 5×4 matrix of (phase row × sub-phase column) keys used across -// configuration, UI, and spawn-time resolution. +// Role-based model tier types for koan. +// Replaces the old 5×4 PhaseRow × SubPhase matrix with a 3-tier system. +// Tiers map deterministically from role via ROLE_MODEL_TIER in types.ts. -export type PhaseRow = "plan-design" | "plan-code" | "plan-docs" | "exec-code" | "exec-docs"; -export type SubPhase = "exec-debut" | "exec-fix" | "qr-decompose" | "qr-verify"; -export type PhaseModelKey = `${PhaseRow}-${SubPhase}`; +import type { ModelTier } from "./types.js"; -export const PHASE_ROWS: readonly PhaseRow[] = [ - "plan-design", - "plan-code", - "plan-docs", - "exec-code", - "exec-docs", -]; +export type { ModelTier, SubagentRole } from "./types.js"; +export { ROLE_MODEL_TIER } from "./types.js"; -export const SUB_PHASES: readonly SubPhase[] = [ - "exec-debut", - "exec-fix", - "qr-decompose", - "qr-verify", -]; +export const ALL_MODEL_TIERS: readonly ModelTier[] = ["strong", "standard", "cheap"]; -function computeAllKeys(): PhaseModelKey[] { - const keys: PhaseModelKey[] = []; - for (const row of PHASE_ROWS) { - for (const col of SUB_PHASES) { - keys.push(`${row}-${col}`); - } - } - return keys; -} - -export const ALL_PHASE_MODEL_KEYS: readonly PhaseModelKey[] = computeAllKeys(); - -const STRONG_KEY_SET: Set = new Set([ - // All qr-decompose keys (bias reasoning budget to verification) - "plan-design-qr-decompose", - "plan-code-qr-decompose", - "plan-docs-qr-decompose", - "exec-code-qr-decompose", - "exec-docs-qr-decompose", - // plan-design exec keys (ripple effects across later work) - "plan-design-exec-debut", - "plan-design-exec-fix", - // exec-docs exec keys (no mechanical correctness backstop) - "exec-docs-exec-debut", - "exec-docs-exec-fix", -]); - -export const STRONG_PHASE_MODEL_KEYS: ReadonlySet = STRONG_KEY_SET; - -export const GENERAL_PURPOSE_PHASE_MODEL_KEYS: readonly PhaseModelKey[] = - ALL_PHASE_MODEL_KEYS.filter((k) => !STRONG_KEY_SET.has(k)); - -export function isPhaseModelKey(value: unknown): value is PhaseModelKey { - if (typeof value !== "string") return false; - return (ALL_PHASE_MODEL_KEYS as readonly string[]).includes(value); -} - -export function buildPhaseModelKey(phaseRow: PhaseRow, subPhase: SubPhase): PhaseModelKey { - return `${phaseRow}-${subPhase}`; +export function isModelTier(value: unknown): value is ModelTier { + return typeof value === "string" && ALL_MODEL_TIERS.includes(value as ModelTier); } diff --git a/src/planner/model-resolver.ts b/src/planner/model-resolver.ts index b67b371..bc4e530 100644 --- a/src/planner/model-resolver.ts +++ b/src/planner/model-resolver.ts @@ -1,33 +1,14 @@ -// Spawn-time model resolver for per-phase model overrides. -// Maps spawn contexts to PhaseModelKeys and looks up configured overrides. -// Returns undefined when no config exists so the caller omits --model entirely, +// Spawn-time model resolver for role-based model overrides. +// Maps SubagentRole → ModelTier → configured model string. +// Returns undefined when no config exists so the caller omits --model, // preserving pi's current active model as the implicit fallback. -import { buildPhaseModelKey, type PhaseModelKey, type PhaseRow } from "./model-phase.js"; -import { loadPhaseModelConfig } from "./model-config.js"; +import { ROLE_MODEL_TIER, type SubagentRole } from "./model-phase.js"; +import { loadModelTierConfig } from "./model-config.js"; -export type SpawnContext = "work-debut" | "fix" | "qr-decompose" | "qr-verify"; - -export function mapSpawnContextToPhaseModelKey( - context: SpawnContext, - phaseRow: PhaseRow, - // Reserved for future fix-phase-specific routing. Current mapping is phase-row + context only. - _fixPhase?: string, -): PhaseModelKey { - switch (context) { - case "work-debut": - return buildPhaseModelKey(phaseRow, "exec-debut"); - case "fix": - return buildPhaseModelKey(phaseRow, "exec-fix"); - case "qr-decompose": - return buildPhaseModelKey(phaseRow, "qr-decompose"); - case "qr-verify": - return buildPhaseModelKey(phaseRow, "qr-verify"); - } -} - -export async function resolvePhaseModelOverride(key: PhaseModelKey): Promise { - const config = await loadPhaseModelConfig(); +export async function resolveModelForRole(role: SubagentRole): Promise { + const config = await loadModelTierConfig(); if (config === null) return undefined; - return config[key]; + const tier = ROLE_MODEL_TIER[role]; + return config[tier]; } diff --git a/src/planner/types.ts b/src/planner/types.ts new file mode 100644 index 0000000..c2c0a5a --- /dev/null +++ b/src/planner/types.ts @@ -0,0 +1,53 @@ +// Core types for the koan epic/story orchestrator. +// Shared across driver, phases, tools, and spawn infrastructure. + +// No `escalated` status: escalation is asking a question (§11.3.1). The orchestrator +// calls `koan_ask_question` when it needs human input, then decides via retry/skip. +// A separate status created a dead routing path — the driver had nowhere clean to +// send it without duplicating the ask UI flow that IPC already handles. +// +// No `scouting` EpicPhase: scouts are spawned inside the IPC responder during +// intake/decomposer/planner phases, not as a top-level driver phase. Adding +// "scouting" to EpicPhase would imply a driver state that never exists (§12.2.2). +// If a top-level scouting phase is added later, re-add the value then. +// +// StepSequence exists for the orchestrator, which has two distinct step counts +// depending on where in the story lifecycle it runs: pre-execution (2 steps: +// dependency analysis + select) vs post-execution (4 steps: verify + verdict + +// propagate + select next). A single OrchestratorPhase class reads this value +// in begin() to configure its total steps and guidance functions (§9.1). + +// Subagent roles — the six LLM roles in the pipeline. +export type SubagentRole = "intake" | "scout" | "decomposer" | "orchestrator" | "planner" | "executor"; + +// Model tiers — maps to three capability levels. +export type ModelTier = "strong" | "standard" | "cheap"; + +// Role → model tier mapping. Scouts use cheap models; execution roles use standard. +export const ROLE_MODEL_TIER: Record = { + intake: "strong", + scout: "cheap", + decomposer: "strong", + orchestrator: "strong", + planner: "strong", + executor: "standard", +}; + +// Orchestrator step sequences — configures step count and guidance at spawn time. +export type StepSequence = "pre-execution" | "post-execution"; + +// Story lifecycle states. Driver manages intermediate transitions; orchestrator tools +// drive the routing transitions via koan_* tool calls. +export type StoryStatus = + | "pending" // Initial state: not yet selected + | "selected" // Orchestrator selected this story via koan_select_story + | "planning" // Driver-internal: planner subagent is running + | "executing" // Driver-internal: executor subagent is running + | "verifying" // Driver-internal: post-execution orchestrator is running + | "done" // Orchestrator verdict: story completed successfully + | "retry" // Orchestrator verdict: re-execute with failure context + | "skipped"; // Orchestrator or driver: story bypassed (budget exhaustion or explicit skip) + +// Epic lifecycle phases (driver-managed, not LLM-visible directly). +// Note: "scouting" is intentionally absent — scouts run within other phases via IPC. +export type EpicPhase = "intake" | "decomposition" | "review" | "executing" | "completed"; diff --git a/src/planner/ui/config/menu.ts b/src/planner/ui/config/menu.ts index de11954..e50f517 100644 --- a/src/planner/ui/config/menu.ts +++ b/src/planner/ui/config/menu.ts @@ -6,13 +6,14 @@ import type { ExtensionCommandContext } from "@mariozechner/pi-coding-agent"; import { getSettingsListTheme } from "@mariozechner/pi-coding-agent"; import { type SettingItem, SettingsList } from "@mariozechner/pi-tui"; -import { ALL_PHASE_MODEL_KEYS, type PhaseModelKey } from "../../model-phase.js"; -import { loadPhaseModelConfig } from "../../model-config.js"; +import { ALL_MODEL_TIERS, type ModelTier } from "../../model-phase.js"; +import { loadModelTierConfig } from "../../model-config.js"; +import type { ModelTierConfig } from "../../model-config.js"; import { createModelSelectionComponent } from "./model-selection.js"; -function configSummary(config: Record | null): string { +function configSummary(config: ModelTierConfig | null): string { if (config === null) return "inheriting active model"; - return `${ALL_PHASE_MODEL_KEYS.length} keys configured`; + return `${ALL_MODEL_TIERS.length} tiers configured`; } export async function openKoanConfig(ctx: ExtensionCommandContext): Promise { @@ -22,7 +23,7 @@ export async function openKoanConfig(ctx: ExtensionCommandContext): Promise(async (tui, theme, _keybindings, done) => { - const initialConfig = await loadPhaseModelConfig(); + const initialConfig = await loadModelTierConfig(); let currentConfig = initialConfig; const activeModelId = ctx.model diff --git a/src/planner/ui/config/model-selection.ts b/src/planner/ui/config/model-selection.ts index e551b3a..ee2a695 100644 --- a/src/planner/ui/config/model-selection.ts +++ b/src/planner/ui/config/model-selection.ts @@ -1,6 +1,6 @@ -// Model selection matrix UI for /koan config. -// Renders quick-set actions plus a true 5×4 matrix (phase rows × sub-phase columns). -// Enter opens an inline ModelSelectorComponent for the selected quick-set/cell. +// Model selection UI for /koan config. +// Renders a 3-row tier table (strong / standard / cheap). +// Enter opens an inline ModelSelectorComponent for the selected tier. // Uses SettingsManager.inMemory() to prevent global default model mutation. import { ModelSelectorComponent, SettingsManager } from "@mariozechner/pi-coding-agent"; @@ -14,97 +14,21 @@ import { visibleWidth, } from "@mariozechner/pi-tui"; -import { - ALL_PHASE_MODEL_KEYS, - GENERAL_PURPOSE_PHASE_MODEL_KEYS, - PHASE_ROWS, - STRONG_PHASE_MODEL_KEYS, - SUB_PHASES, - buildPhaseModelKey, - type PhaseModelKey, - type PhaseRow, -} from "../../model-phase.js"; -import { savePhaseModelConfig } from "../../model-config.js"; - -// -- Pure quick-set utilities (exported for testing) -- - -export function initConfigFromActiveModel(activeModelId: string): Record { - const config: Partial> = {}; - for (const key of ALL_PHASE_MODEL_KEYS) { - config[key] = activeModelId; - } - return config as Record; -} - -export function applyStrongModel( - model: string, - existingConfig: Record | null, - activeModelId: string, -): Record { - const base = existingConfig ?? initConfigFromActiveModel(activeModelId); - const result = { ...base }; - for (const key of STRONG_PHASE_MODEL_KEYS) { - result[key] = model; - } - return result; -} - -export function applyGeneralPurposeModel( - model: string, - existingConfig: Record | null, - activeModelId: string, -): Record { - const base = existingConfig ?? initConfigFromActiveModel(activeModelId); - const result = { ...base }; - for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { - result[key] = model; - } - return result; -} - -// -- Confirmation component for reset action -- - -class ResetConfirmComponent implements Component { - constructor( - private readonly theme: Theme, - private readonly onConfirm: () => void, - private readonly onCancel: () => void, - ) {} - - render(_width: number): string[] { - return [ - this.theme.bold(this.theme.fg("accent", "Reset all model overrides to active model?")), - "", - this.theme.fg("muted", " This will set all 20 phase model cells to the current active model."), - "", - this.theme.fg("dim", " Enter to confirm · Escape to cancel"), - ]; - } - - handleInput(data: string): void { - if (data === "\r" || data === "\n") { - this.onConfirm(); - } else if (data === "\x1b") { - this.onCancel(); - } - } - - invalidate(): void {} -} +import { ALL_MODEL_TIERS, type ModelTier } from "../../model-phase.js"; +import { saveModelTierConfig } from "../../model-config.js"; +import type { ModelTierConfig } from "../../model-config.js"; function padRight(text: string, width: number): string { const padding = Math.max(0, width - visibleWidth(text)); return text + " ".repeat(padding); } -function renderCell(theme: Theme, text: string, width: number, selected: boolean, strong: boolean): string { +function renderCell(theme: Theme, text: string, width: number, selected: boolean): string { const innerWidth = Math.max(1, width - 2); const clipped = truncateToWidth(text, innerWidth, ""); const padded = padRight(clipped, innerWidth); const raw = ` ${padded} `; - if (selected) return theme.inverse(raw); - if (strong) return theme.fg("accent", raw); return raw; } @@ -115,43 +39,30 @@ function cellDisplay(modelId: string | undefined, activeModelId: string | undefi return modelId; } -type SelectionZone = "quick" | "grid"; - -// -- Create model selection component -- - export function createModelSelectionComponent( tui: TUI, theme: Theme, modelRegistry: ModelRegistry, activeModelId: string | undefined, - initialConfig: Record | null, - onConfigChange: (newConfig: Record | null) => void, + initialConfig: ModelTierConfig | null, + onConfigChange: (newConfig: ModelTierConfig | null) => void, onSaveError: (error: unknown) => void, onClose: () => void, ): Component { const fallbackActive = activeModelId ?? "(active model)"; - const configRef: { value: Record | null } = { value: initialConfig }; + const configRef: { value: ModelTierConfig | null } = { value: initialConfig }; - const quickItems = [ - "Reset to active", - `Set strong (${STRONG_PHASE_MODEL_KEYS.size})`, - `Set general (${GENERAL_PURPOSE_PHASE_MODEL_KEYS.length})`, - ] as const; - - let zone: SelectionZone = "quick"; - let quickIndex = 0; let rowIndex = 0; - let colIndex = 0; let overlay: Component | null = null; function requestRender(): void { tui.requestRender(); } - async function persistAndNotify(newConfig: Record | null): Promise { + async function persistAndNotify(newConfig: ModelTierConfig | null): Promise { const previous = configRef.value; try { - await savePhaseModelConfig(newConfig); + await saveModelTierConfig(newConfig as ModelTierConfig); configRef.value = newConfig; onConfigChange(newConfig); return true; @@ -190,41 +101,19 @@ export function createModelSelectionComponent( requestRender(); } - function openResetConfirm(): void { - overlay = new ResetConfirmComponent( - theme, - () => { - const resetConfig = initConfigFromActiveModel(fallbackActive); - void persistAndNotify(resetConfig).finally(() => closeOverlay()); - }, - () => closeOverlay(), - ); - requestRender(); - } - - function openStrongSelector(): void { - const strongSample = Array.from(STRONG_PHASE_MODEL_KEYS)[0]; - const currentId = configRef.value?.[strongSample]; - - overlay = makeModelSelector( - currentId, - (modelId) => { - const newConfig = applyStrongModel(modelId, configRef.value, fallbackActive); - void persistAndNotify(newConfig).finally(() => closeOverlay()); - }, - () => closeOverlay(), - ); - requestRender(); - } - - function openGeneralSelector(): void { - const gpSample = GENERAL_PURPOSE_PHASE_MODEL_KEYS[0]; - const currentId = configRef.value?.[gpSample]; + function openTierSelector(): void { + const tier = ALL_MODEL_TIERS[rowIndex] as ModelTier; + const currentId = configRef.value?.[tier]; overlay = makeModelSelector( currentId, (modelId) => { - const newConfig = applyGeneralPurposeModel(modelId, configRef.value, fallbackActive); + const base: ModelTierConfig = configRef.value ?? { + strong: fallbackActive, + standard: fallbackActive, + cheap: fallbackActive, + }; + const newConfig: ModelTierConfig = { ...base, [tier]: modelId }; void persistAndNotify(newConfig).finally(() => closeOverlay()); }, () => closeOverlay(), @@ -232,79 +121,12 @@ export function createModelSelectionComponent( requestRender(); } - function openCellSelector(): void { - const row = PHASE_ROWS[rowIndex] as PhaseRow; - const subPhase = SUB_PHASES[colIndex]; - const key = buildPhaseModelKey(row, subPhase); - const currentId = configRef.value?.[key]; - - overlay = makeModelSelector( - currentId, - (modelId) => { - const base = configRef.value ?? initConfigFromActiveModel(fallbackActive); - const newConfig = { ...base, [key]: modelId }; - void persistAndNotify(newConfig).finally(() => closeOverlay()); - }, - () => closeOverlay(), - ); - requestRender(); - } - - function activateSelection(): void { - if (zone === "quick") { - if (quickIndex === 0) { - openResetConfirm(); - } else if (quickIndex === 1) { - openStrongSelector(); - } else { - openGeneralSelector(); - } - return; - } - - openCellSelector(); - } - function moveUp(): void { - if (zone === "quick") return; - if (rowIndex === 0) { - zone = "quick"; - return; - } - rowIndex -= 1; + if (rowIndex > 0) rowIndex -= 1; } function moveDown(): void { - if (zone === "quick") { - zone = "grid"; - rowIndex = 0; - return; - } - - if (rowIndex === PHASE_ROWS.length - 1) { - rowIndex = 0; - return; - } - - rowIndex += 1; - } - - function moveLeft(): void { - if (zone === "quick") { - quickIndex = quickIndex === 0 ? quickItems.length - 1 : quickIndex - 1; - return; - } - - colIndex = colIndex === 0 ? SUB_PHASES.length - 1 : colIndex - 1; - } - - function moveRight(): void { - if (zone === "quick") { - quickIndex = quickIndex === quickItems.length - 1 ? 0 : quickIndex + 1; - return; - } - - colIndex = colIndex === SUB_PHASES.length - 1 ? 0 : colIndex + 1; + if (rowIndex < ALL_MODEL_TIERS.length - 1) rowIndex += 1; } function renderMain(width: number): string[] { @@ -314,50 +136,33 @@ export function createModelSelectionComponent( lines.push(theme.fg("muted", `Fallback active model: ${fallbackActive}`)); lines.push(""); - const quick = quickItems - .map((label, i) => { - const block = ` ${label} `; - if (zone === "quick" && quickIndex === i) return theme.inverse(block); - return theme.fg("muted", block); - }) - .join(" "); - - lines.push(`Quick-set: ${quick}`); - lines.push(""); - + const tierColWidth = 12; const sep = " | "; const sepWidth = visibleWidth(sep); - const phaseColWidth = 12; - const available = Math.max(24, width - phaseColWidth - sepWidth * 4); - const modelColWidth = Math.max(12, Math.floor(available / 4)); + const modelColWidth = Math.max(20, width - tierColWidth - sepWidth); const headerCells = [ - renderCell(theme, "phase", phaseColWidth, false, false), - ...SUB_PHASES.map((sub) => renderCell(theme, sub, modelColWidth, false, false)), + renderCell(theme, "tier", tierColWidth, false), + renderCell(theme, "model", modelColWidth, false), ]; lines.push(headerCells.join(sep)); lines.push("-".repeat(Math.max(10, Math.min(width, visibleWidth(headerCells.join(sep)))))); - for (let r = 0; r < PHASE_ROWS.length; r += 1) { - const row = PHASE_ROWS[r] as PhaseRow; - const rowCells: string[] = [renderCell(theme, row, phaseColWidth, false, false)]; - - for (let c = 0; c < SUB_PHASES.length; c += 1) { - const sub = SUB_PHASES[c]; - const key = buildPhaseModelKey(row, sub); - const model = configRef.value?.[key]; - const display = cellDisplay(model, activeModelId); - const selected = zone === "grid" && rowIndex === r && colIndex === c; - const strong = STRONG_PHASE_MODEL_KEYS.has(key); - rowCells.push(renderCell(theme, display, modelColWidth, selected, strong)); - } - - lines.push(truncateToWidth(rowCells.join(sep), width)); + for (let r = 0; r < ALL_MODEL_TIERS.length; r += 1) { + const tier = ALL_MODEL_TIERS[r] as ModelTier; + const model = configRef.value?.[tier]; + const display = cellDisplay(model, activeModelId); + const selected = rowIndex === r; + + const row = [ + renderCell(theme, tier, tierColWidth, false), + renderCell(theme, display, modelColWidth, selected), + ]; + lines.push(truncateToWidth(row.join(sep), width)); } lines.push(""); - lines.push(theme.fg("dim", "★ strong cell")); - lines.push(theme.fg("dim", "↑↓ move row/section · ←→ move column/quick-set · Enter select · Esc back")); + lines.push(theme.fg("dim", "↑↓ move row · Enter select model · Esc back")); return lines; } @@ -380,7 +185,7 @@ export function createModelSelectionComponent( return; } if (kb.matches(data, "selectConfirm") || data === " ") { - activateSelection(); + openTierSelector(); return; } if (kb.matches(data, "selectUp")) { @@ -391,16 +196,6 @@ export function createModelSelectionComponent( if (kb.matches(data, "selectDown")) { moveDown(); requestRender(); - return; - } - if (kb.matches(data, "cursorLeft")) { - moveLeft(); - requestRender(); - return; - } - if (kb.matches(data, "cursorRight")) { - moveRight(); - requestRender(); } }, invalidate: () => { diff --git a/src/utils/logger.ts b/src/utils/logger.ts index c8ced16..f95e589 100644 --- a/src/utils/logger.ts +++ b/src/utils/logger.ts @@ -1,36 +1,41 @@ -// Debug logger for koan internals. Writes to a log file when a plan -// directory is available; silent otherwise. The Pi TUI captures both -// stdout and stderr, so neither can be used for debug output. +// Debug logger for koan internals. Writes to a log file in the plan directory +// when a log directory has been configured; silent otherwise. +// The Pi TUI captures both stdout and stderr, so neither can be used for debug output. import { appendFileSync, mkdirSync } from "node:fs"; import * as path from "node:path"; -const prefix = "[koan]"; - export type Logger = | undefined>(message: string, details?: T) => void; +const PREFIX = "[koan]"; + let logPath: string | null = null; +// Configure the log file location. Call once after the epic directory is created. +// Subsequent createLogger() calls will write to {planDir}/koan.log. export function setLogDir(planDir: string): void { logPath = path.join(planDir, "koan.log"); try { mkdirSync(path.dirname(logPath), { recursive: true }); } catch { - // best effort + // Best effort — directory may already exist. } } +// Create a scoped logger. Returns a function that appends to the configured +// log file. Silent if setLogDir() has not been called. export function createLogger(scope: string): Logger { - const label = `${prefix} ${scope}`; + const label = `${PREFIX} ${scope}`; return (message, details) => { if (!logPath) return; - const suffix = details && Object.keys(details).length > 0 - ? ` ${JSON.stringify(details)}` - : ""; + const suffix = + details !== undefined && Object.keys(details).length > 0 + ? ` ${JSON.stringify(details)}` + : ""; try { appendFileSync(logPath, `${new Date().toISOString()} ${label}: ${message}${suffix}\n`); } catch { - // best effort -- plan dir may not exist yet + // Best effort — log file may not be writable yet. } }; } From 8983f371d9c58833d66bb9b3ed0a8bfd44c9785e Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Mar 2026 12:45:05 +0700 Subject: [PATCH 042/412] feat(planner): add runtime context and IPC-driven tool infra --- src/planner/lib/audit.ts | 92 ++------- src/planner/lib/ipc-responder.ts | 201 +++++++++++++++++++ src/planner/lib/ipc.ts | 93 ++++++--- src/planner/lib/permissions.ts | 299 ++++++++++++----------------- src/planner/lib/pool.ts | 12 +- src/planner/lib/runtime-context.ts | 20 ++ src/planner/lib/step.ts | 15 +- src/planner/tools/ask.ts | 169 +++++++++++++--- src/planner/tools/workflow.ts | 28 ++- 9 files changed, 585 insertions(+), 344 deletions(-) create mode 100644 src/planner/lib/ipc-responder.ts create mode 100644 src/planner/lib/runtime-context.ts diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index 12191ca..992ecab 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -133,7 +133,7 @@ export function summarize(e: ToolEvent): string { } } -// Pure projection update -- one case per discriminated kind. +// Pure projection update — one case per discriminated kind. // All branches update updatedAt and increment eventCount. export function fold(s: Projection, e: AuditEvent): Projection { const base = { ...s, updatedAt: e.ts, eventCount: s.eventCount + 1 }; @@ -232,7 +232,7 @@ export class EventLog { private projection: Projection; private heartbeat: ReturnType | null = null; // Serializes append() calls. Heartbeat timer and tool_result handler - // both call append() concurrently -- without serialization, two + // both call append() concurrently — without serialization, two // writeState() calls race on the shared tmp file (ENOENT on rename). private pending: Promise = Promise.resolve(); @@ -336,7 +336,7 @@ export class EventLog { // -- Exports -- // Reads state.json as a Projection; returns null if missing or malformed. -// Used by session.ts parent polling loop. +// Used by driver polling loop. export async function readProjection(dir: string): Promise { try { const raw = await fs.readFile(path.join(dir, "state.json"), "utf8"); @@ -364,90 +364,20 @@ interface ToolShape { } const PREVIEW_CHARS = 40; -const KEY_PRIORITY = ["id", "milestone", "decision_ref", "intent_ref", "file", "path", "phase"]; +const KEY_PRIORITY = ["id", "story_id", "milestone", "decision_ref", "intent_ref", "file", "path", "phase"]; +// Tool shapes for koan_* tools. No koan_escalate (eliminated in §11.3.1). const KOAN_SHAPES: Record = { - koan_get_plan: { keys: ["phase"], getter: true }, - koan_get_milestone: { keys: ["id"], getter: true }, - koan_get_decision: { keys: ["id"], getter: true }, - koan_get_intent: { keys: ["id"], getter: true }, - koan_get_change: { keys: ["id"], getter: true }, - - koan_set_overview: { keys: ["problem", "approach"], freeform: ["problem", "approach"], highValue: true }, - koan_set_constraints: { keys: ["constraints"], arrays: ["constraints"], highValue: true }, - koan_set_invisible_knowledge: { - keys: ["system", "invariants", "tradeoffs"], - freeform: ["system"], - arrays: ["invariants", "tradeoffs"], - highValue: true, - }, - - koan_add_decision: { keys: ["decision", "reasoning"], freeform: ["decision", "reasoning"], highValue: true }, - koan_set_decision: { keys: ["id", "decision", "reasoning"], freeform: ["decision", "reasoning"], highValue: true }, - koan_add_rejected_alternative: { - keys: ["decision_ref", "alternative", "rejection_reason"], - freeform: ["alternative", "rejection_reason"], - highValue: true, - }, - koan_set_rejected_alternative: { - keys: ["id", "decision_ref", "alternative", "rejection_reason"], - freeform: ["alternative", "rejection_reason"], - highValue: true, - }, - koan_add_risk: { keys: ["decision_ref", "anchor", "risk", "mitigation"], freeform: ["risk", "mitigation"], highValue: true }, - koan_set_risk: { - keys: ["id", "decision_ref", "anchor", "risk", "mitigation"], - freeform: ["risk", "mitigation"], - highValue: true, - }, - - koan_add_milestone: { - keys: ["name", "files", "flags", "requirements", "acceptance_criteria", "tests"], - arrays: ["files", "flags", "requirements", "acceptance_criteria", "tests"], - highValue: true, - }, - koan_set_milestone_name: { keys: ["id", "name"] }, - koan_set_milestone_files: { keys: ["id", "files"], arrays: ["files"], highValue: true }, - koan_set_milestone_flags: { keys: ["id", "flags"], arrays: ["flags"] }, - koan_set_milestone_requirements: { keys: ["id", "requirements"], arrays: ["requirements"], highValue: true }, - koan_set_milestone_acceptance_criteria: { keys: ["id", "acceptance_criteria"], arrays: ["acceptance_criteria"], highValue: true }, - koan_set_milestone_tests: { keys: ["id", "tests"], arrays: ["tests"], highValue: true }, - - koan_add_intent: { keys: ["milestone", "file", "function", "behavior"], freeform: ["behavior"], highValue: true }, - koan_set_intent: { keys: ["id", "file", "function", "behavior"], freeform: ["behavior"], highValue: true }, - - koan_add_change: { - keys: ["milestone", "file", "intent_ref", "diff", "doc_diff", "comments"], - freeform: ["diff", "doc_diff", "comments"], - highValue: true, - }, - koan_set_change_diff: { keys: ["id", "diff"], freeform: ["diff"], highValue: true }, - koan_set_change_doc_diff: { keys: ["id", "doc_diff"], freeform: ["doc_diff"], highValue: true }, - koan_set_change_comments: { keys: ["id", "comments"], freeform: ["comments"], highValue: true }, - koan_set_change_file: { keys: ["id", "file"], highValue: true }, - koan_set_change_intent_ref: { keys: ["id", "intent_ref"] }, - - koan_add_wave: { keys: ["milestones"], arrays: ["milestones"], highValue: true }, - koan_set_wave_milestones: { keys: ["id", "milestones"], arrays: ["milestones"], highValue: true }, - - koan_add_diagram: { keys: ["type", "scope", "title"] }, - koan_set_diagram: { keys: ["id", "title", "scope", "ascii_render"], freeform: ["ascii_render"], highValue: true }, - koan_add_diagram_node: { keys: ["diagram_id", "id", "label", "type"] }, - koan_add_diagram_edge: { keys: ["diagram_id", "source", "target", "label", "protocol"] }, - - koan_set_readme_entry: { keys: ["path", "content"], freeform: ["content"], highValue: true }, - - koan_qr_add_item: { keys: ["phase", "scope", "check", "severity"], freeform: ["check"], highValue: true }, - koan_qr_set_item: { keys: ["phase", "id", "status", "finding"], freeform: ["finding"], highValue: true }, - koan_qr_assign_group: { keys: ["phase", "group_id", "ids"], arrays: ["ids"], highValue: true }, - koan_qr_get_item: { keys: ["phase", "id"], getter: true }, - koan_qr_list_items: { keys: ["phase", "status"], getter: true }, - koan_qr_summary: { keys: ["phase"], getter: true }, + koan_select_story: { keys: ["story_id"], highValue: true }, + koan_complete_story: { keys: ["story_id"], highValue: true }, + koan_retry_story: { keys: ["story_id", "failure_summary"], freeform: ["failure_summary"], highValue: true }, + koan_skip_story: { keys: ["story_id", "reason"], freeform: ["reason"], highValue: true }, koan_ask_question: { keys: ["questions"], arrays: ["questions"], highValue: true }, + koan_request_scouts: { keys: ["scouts"], arrays: ["scouts"], highValue: true }, }; // Reads the tail of events.jsonl and returns structured log entries. -// Filters out heartbeats (noisy). Used by session.ts to feed the widget log card. +// Filters out heartbeats (noisy). Used by driver to feed the widget log card. export async function readRecentLogs(dir: string, count = 8): Promise { try { const raw = await fs.readFile(path.join(dir, "events.jsonl"), "utf8"); diff --git a/src/planner/lib/ipc-responder.ts b/src/planner/lib/ipc-responder.ts new file mode 100644 index 0000000..bf9c31a --- /dev/null +++ b/src/planner/lib/ipc-responder.ts @@ -0,0 +1,201 @@ +// Parent-side IPC responder: polls for requests from active subagents, +// handles them, and writes responses back. Runs concurrently with subagent +// process execution and terminates when the provided AbortSignal fires. +// +// Supports two request types (§11.2.4): +// "ask" → render ask UI, write answer back +// "scout-request" → spawn scouts via pool(), write findings paths back + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; + +import { + readIpcFile, + writeIpcFile, + createAskResponse, + createCancelledResponse, + type AskAnswerPayload, + type ScoutTask, + type AskIpcFile, + type ScoutIpcFile, +} from "./ipc.js"; +import { pool } from "./pool.js"; +import { askSingleQuestionWithInlineNote } from "../ui/ask/ask-inline-ui.js"; +import { askQuestionsWithTabs } from "../ui/ask/ask-tabs-ui.js"; +import type { AskQuestion, AskSelection } from "../ui/ask/ask-logic.js"; + +const POLL_INTERVAL_MS = 300; + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +// Provided by subagent.ts when starting the IPC responder. Avoids circular +// imports: ipc-responder.ts never imports from subagent.ts. +export interface ScoutSpawnContext { + epicDir: string; + // Spawns a single scout; returns exit code. + spawnScout: (task: ScoutTask, scoutSubagentDir: string, outputFile: string) => Promise; +} + +// Handles a pending ask request: renders UI, writes response. +async function handleAskRequest( + subagentDir: string, + ipc: AskIpcFile, + ui: ExtensionUIContext, + signal: AbortSignal, +): Promise { + const { payload } = ipc; + const questions: AskQuestion[] = payload.questions.map((q) => ({ + id: q.id, + question: q.question, + options: q.options, + multi: q.multi, + recommended: q.recommended, + })); + + let cancelled = false; + let answers: AskAnswerPayload["answers"] = []; + + if (questions.length === 1) { + const q = questions[0]; + const selection = await askSingleQuestionWithInlineNote(ui, { + question: q.question, + options: q.options, + recommended: q.recommended, + }); + + // ask UI components do not accept an AbortSignal — they block until the + // user interacts even after the subagent exits. Check after return to + // prevent writing a stale answer to a dead subagent's IPC file. + if (signal.aborted) { + const current = await readIpcFile(subagentDir); + if (current !== null && current.type === "ask" && current.response === null && current.id === ipc.id) { + await writeIpcFile(subagentDir, { ...current, response: createCancelledResponse(ipc.id) }); + } + return; + } + + cancelled = selection.selectedOptions.length === 0 && !selection.customInput; + if (!cancelled) { + answers = [{ + id: q.id, + selectedOptions: selection.selectedOptions, + customInput: selection.customInput, + }]; + } + } else { + const result = await askQuestionsWithTabs(ui, questions); + + if (signal.aborted) { + const current = await readIpcFile(subagentDir); + if (current !== null && current.type === "ask" && current.response === null && current.id === ipc.id) { + await writeIpcFile(subagentDir, { ...current, response: createCancelledResponse(ipc.id) }); + } + return; + } + + cancelled = result.cancelled; + if (!cancelled) { + answers = questions.map((q, i) => { + const sel: AskSelection = result.selections[i] ?? { selectedOptions: [] }; + const entry: AskAnswerPayload["answers"][number] = { + id: q.id, + selectedOptions: sel.selectedOptions, + }; + if (sel.customInput !== undefined) { + entry.customInput = sel.customInput; + } + return entry; + }); + } + } + + const response = cancelled + ? createCancelledResponse(ipc.id) + : createAskResponse(ipc.id, { answers }); + + const current = await readIpcFile(subagentDir); + if (current !== null && current.type === "ask" && current.response === null && current.id === ipc.id) { + await writeIpcFile(subagentDir, { ...current, response }); + } +} + +// Handles a pending scout-request: spawns scouts via pool(), writes findings. +async function handleScoutRequest( + subagentDir: string, + ipc: ScoutIpcFile, + scoutCtx: ScoutSpawnContext, + signal: AbortSignal, +): Promise { + const { scouts, id } = ipc; + const { epicDir } = scoutCtx; + const findings: string[] = []; + const failures: string[] = []; + + // Each scout writes to ${subagentDir}/output.md — output is scoped to the + // scout's own directory, avoiding collisions. Compute subagentDir once and + // derive outputFile from it (never call Date.now() twice for the same entry). + const scoutEntries = scouts.map((task) => { + const scoutDir = path.join(epicDir, "subagents", `scout-${task.id}-${Date.now()}`); + return { task, subagentDir: scoutDir, outputFile: path.join(scoutDir, "output.md") }; + }); + + const taskIds = scoutEntries.map((t) => t.task.id); + await pool( + taskIds, + 4, // up to 4 concurrent scouts + async (taskId) => { + if (signal.aborted) return { exitCode: 1, stderr: "aborted", subagentDir: "" }; + const entry = scoutEntries.find((t) => t.task.id === taskId)!; + await fs.mkdir(entry.subagentDir, { recursive: true }); + const exitCode = await scoutCtx.spawnScout(entry.task, entry.subagentDir, entry.outputFile); + if (exitCode === 0) { + findings.push(entry.outputFile); + } else { + failures.push(taskId); + } + return { exitCode, stderr: "", subagentDir: entry.subagentDir }; + }, + ); + + // Write response back to the ipc file. + const current = await readIpcFile(subagentDir); + if (current !== null && current.type === "scout-request" && current.response === null && current.id === id) { + const updated: ScoutIpcFile = { ...current, response: { findings, failures } }; + await writeIpcFile(subagentDir, updated); + } +} + +// Runs the parent-side IPC poll loop for a single subagent directory. +// Routes to ask UI or scout spawning based on request type. +// Terminates when `signal` is aborted. Errors are swallowed — transient +// filesystem issues must not crash the parent session. +export async function runIpcResponder( + subagentDir: string, + ui: ExtensionUIContext, + signal: AbortSignal, + scoutContext?: ScoutSpawnContext, +): Promise { + while (!signal.aborted) { + try { + await sleep(POLL_INTERVAL_MS); + if (signal.aborted) break; + + const ipc = await readIpcFile(subagentDir); + if (ipc === null || ipc.response !== null) continue; + + if (ipc.type === "ask") { + await handleAskRequest(subagentDir, ipc, ui, signal); + } else if (ipc.type === "scout-request" && scoutContext) { + await handleScoutRequest(subagentDir, ipc, scoutContext, signal); + } + // Unknown type: ignore (forward-compatibility) + } catch { + // Swallow all errors — transient filesystem or UI issues must not + // abort the parent session. + } + } +} diff --git a/src/planner/lib/ipc.ts b/src/planner/lib/ipc.ts index aaa14ee..3d26828 100644 --- a/src/planner/lib/ipc.ts +++ b/src/planner/lib/ipc.ts @@ -1,50 +1,75 @@ // File-based IPC between subagent and parent session. -// A single ipc.json file per subagent directory holds both the request and -// response. Atomic writes (tmp-rename) prevent partial reads. +// A single ipc.json file per subagent directory holds the current request and +// its response. Atomic writes (tmp-rename) prevent partial reads. +// +// IPC protocol supports two message types (§11.2.4): +// "ask" — subagent asks the user a question +// "scout-request" — subagent requests parallel codebase scout spawning import { promises as fs } from "node:fs"; import * as path from "node:path"; import * as crypto from "node:crypto"; -// -- Types -- +// -- Scout types -- -export interface IpcFile { - request: IpcRequest; - response: IpcResponse | null; // null while awaiting parent response +export interface ScoutTask { + id: string; // Unique task ID, e.g. "auth-libs" + role: string; // Custom role description for the scout + prompt: string; // What the scout should find } -export interface IpcRequest { - id: string; // crypto.randomUUID() — correlates request to response - type: "ask-question"; // discriminant for routing; extensible to future types - createdAt: string; // ISO 8601 timestamp - payload: AskQuestionPayload; +export interface ScoutResponse { + findings: string[]; // File paths to scout output markdown files (absolute) + failures: string[]; // Scout task IDs that failed (non-fatal) } +// -- Ask types -- + export interface AskQuestionPayload { questions: Array<{ id: string; question: string; options: Array<{ label: string }>; multi?: boolean; - recommended?: number; // 0-indexed + recommended?: number; }>; } -export interface IpcResponse { - id: string; // must match request.id - respondedAt: string; // ISO 8601 timestamp - cancelled: boolean; // true when user presses Escape - payload: AskAnswerPayload | null; // null when cancelled -} - export interface AskAnswerPayload { answers: Array<{ - id: string; // matches question id + id: string; selectedOptions: string[]; - customInput?: string; // populated when user selects "Other" + customInput?: string; }>; } +export interface AskResponse { + id: string; + respondedAt: string; + cancelled: boolean; + payload: AskAnswerPayload | null; +} + +// -- IPC file union -- + +export interface AskIpcFile { + type: "ask"; + id: string; + createdAt: string; + payload: AskQuestionPayload; + response: AskResponse | null; +} + +export interface ScoutIpcFile { + type: "scout-request"; + id: string; + createdAt: string; + scouts: ScoutTask[]; + response: ScoutResponse | null; +} + +export type IpcFile = AskIpcFile | ScoutIpcFile; + // -- File paths -- const IPC_FILE = "ipc.json"; @@ -94,19 +119,27 @@ export async function deleteIpcFile(dir: string): Promise { // -- Factory helpers -- -export function createAskRequest(payload: AskQuestionPayload): IpcFile { +export function createAskRequest(payload: AskQuestionPayload): AskIpcFile { + return { + type: "ask", + id: crypto.randomUUID(), + createdAt: new Date().toISOString(), + payload, + response: null, + }; +} + +export function createScoutRequest(scouts: ScoutTask[]): ScoutIpcFile { return { - request: { - id: crypto.randomUUID(), - type: "ask-question", - createdAt: new Date().toISOString(), - payload, - }, + type: "scout-request", + id: crypto.randomUUID(), + createdAt: new Date().toISOString(), + scouts, response: null, }; } -export function createAskResponse(requestId: string, payload: AskAnswerPayload): IpcResponse { +export function createAskResponse(requestId: string, payload: AskAnswerPayload): AskResponse { return { id: requestId, respondedAt: new Date().toISOString(), @@ -115,7 +148,7 @@ export function createAskResponse(requestId: string, payload: AskAnswerPayload): }; } -export function createCancelledResponse(requestId: string): IpcResponse { +export function createCancelledResponse(requestId: string): AskResponse { return { id: requestId, respondedAt: new Date().toISOString(), diff --git a/src/planner/lib/permissions.ts b/src/planner/lib/permissions.ts index 90c3e06..058be7c 100644 --- a/src/planner/lib/permissions.ts +++ b/src/planner/lib/permissions.ts @@ -1,194 +1,145 @@ -// Default-deny permissions. Read tools bypass this map. Write tools -// (edit/write) always blocked during planning. The map defines OUTER -// boundaries; phase handlers narrow further. - +// Default-deny role-based permissions for koan subagents. +// +// Permission model overview: +// 1. READ_TOOLS (bash, read, grep, glob, find, ls) are always allowed for all +// roles. This is an accepted limitation (§11.9, §12.5): distinguishing +// "read bash" from "write bash" is intractable at the permission layer. +// Prompt engineering constrains intended bash use; enforcement does not. +// Do not assume bash is restricted to roles that list it explicitly. +// +// 2. ROLE_PERMISSIONS controls koan-specific tools and write/edit access. +// Unknown roles are blocked under default-deny policy. +// +// 3. Planning roles (intake, scout, decomposer, orchestrator, planner) have +// write/edit access path-scoped to the epic directory. Only the executor +// role has unrestricted write access — it must modify the codebase. + +import * as path from "node:path"; + +import { createLogger } from "../../utils/logger.js"; + +const log = createLogger("permissions"); + +// Read tools always allowed for all roles — early return in checkPermission. const READ_TOOLS = new Set(["read", "bash", "grep", "glob", "find", "ls"]); const WRITE_TOOLS = new Set(["edit", "write"]); -const PLAN_GETTER_TOOLS_LIST = [ - "koan_get_plan", - "koan_get_milestone", - "koan_get_decision", - "koan_get_intent", - "koan_get_change", -]; - -const PLAN_SETTER_TOOLS_LIST = [ - "koan_set_overview", - "koan_set_constraints", - "koan_set_invisible_knowledge", -]; - -const PLAN_DECISION_TOOLS_LIST = ["koan_add_decision", "koan_set_decision"]; - -const PLAN_REJECTED_ALT_TOOLS_LIST = [ - "koan_add_rejected_alternative", - "koan_set_rejected_alternative", -]; - -const PLAN_RISK_TOOLS_LIST = ["koan_add_risk", "koan_set_risk"]; - -const PLAN_MILESTONE_TOOLS_LIST = [ - "koan_add_milestone", - "koan_set_milestone_name", - "koan_set_milestone_files", - "koan_set_milestone_flags", - "koan_set_milestone_requirements", - "koan_set_milestone_acceptance_criteria", - "koan_set_milestone_tests", -]; - -const PLAN_INTENT_TOOLS_LIST = ["koan_add_intent", "koan_set_intent"]; - -const PLAN_CHANGE_TOOLS_LIST = [ - "koan_add_change", - "koan_set_change_diff", - "koan_set_change_doc_diff", - "koan_set_change_comments", - "koan_set_change_file", - "koan_set_change_intent_ref", -]; - -const PLAN_WAVE_TOOLS_LIST = ["koan_add_wave", "koan_set_wave_milestones"]; - -const PLAN_DIAGRAM_TOOLS_LIST = [ - "koan_add_diagram", - "koan_set_diagram", - "koan_add_diagram_node", - "koan_add_diagram_edge", -]; - -const PLAN_README_TOOLS_LIST = ["koan_set_readme_entry"]; - -const QR_TOOLS_LIST = [ - "koan_qr_add_item", - "koan_qr_set_item", - "koan_qr_assign_group", - "koan_qr_get_item", - "koan_qr_list_items", - "koan_qr_summary", -]; - -const ALL_PLAN_ENTITY_TOOLS = [ - ...PLAN_DECISION_TOOLS_LIST, - ...PLAN_REJECTED_ALT_TOOLS_LIST, - ...PLAN_RISK_TOOLS_LIST, - ...PLAN_MILESTONE_TOOLS_LIST, - ...PLAN_INTENT_TOOLS_LIST, - ...PLAN_WAVE_TOOLS_LIST, - ...PLAN_DIAGRAM_TOOLS_LIST, - ...PLAN_README_TOOLS_LIST, -]; - -const PLAN_DESIGN_ENTITY_TOOLS = ALL_PLAN_ENTITY_TOOLS.filter( - (t) => !PLAN_CHANGE_TOOLS_LIST.includes(t), -); - -export const PLAN_GETTER_TOOLS: ReadonlySet = new Set( - PLAN_GETTER_TOOLS_LIST, -); - -export const PLAN_MUTATION_TOOLS: ReadonlySet = new Set([ - ...PLAN_SETTER_TOOLS_LIST, - ...ALL_PLAN_ENTITY_TOOLS, - ...PLAN_CHANGE_TOOLS_LIST, +// Tools allowed per role beyond READ_TOOLS. +// Write/edit are tracked here but enforced via path-scoping below. +export const ROLE_PERMISSIONS: ReadonlyMap> = new Map([ + [ + "intake", + new Set([ + "koan_complete_step", + "koan_ask_question", + "koan_request_scouts", + "edit", + "write", + ]), + ], + [ + "scout", + new Set([ + "koan_complete_step", + "edit", + "write", + // No koan_ask_question — scouts are narrow investigators; no user interaction. + // No koan_request_scouts — scouts do not spawn scouts. + ]), + ], + [ + "decomposer", + new Set([ + "koan_complete_step", + "koan_ask_question", + "koan_request_scouts", + "edit", + "write", + ]), + ], + [ + "orchestrator", + new Set([ + "koan_complete_step", + "koan_ask_question", + // koan_request_scouts excluded from orchestrator — scouts serve planning roles; + // orchestrator uses bash for verification. + "koan_select_story", + "koan_complete_story", + "koan_retry_story", + "koan_skip_story", + "edit", + "write", + "bash", // also in READ_TOOLS; explicit here for documentation + ]), + ], + [ + "planner", + new Set([ + "koan_complete_step", + "koan_ask_question", + "koan_request_scouts", + "edit", + "write", + ]), + ], + [ + "executor", + new Set([ + "koan_complete_step", + "koan_ask_question", + "edit", + "write", + "bash", // also in READ_TOOLS; explicit here for documentation + ]), + ], ]); -// Missing phase keys are blocked (default-deny extends to unknown phases). -// Prevents security boundary breach when a new phase is added without -// updating the permissions map. -export const PHASE_PERMISSIONS: ReadonlyMap> = - new Map([ - [ - "plan-design", - new Set([ - "koan_complete_step", - "koan_ask_question", - ...PLAN_GETTER_TOOLS_LIST, - ...PLAN_SETTER_TOOLS_LIST, - ...PLAN_DESIGN_ENTITY_TOOLS, - ]), - ], - [ - "plan-code", - new Set([ - "koan_complete_step", - "koan_ask_question", - ...PLAN_GETTER_TOOLS_LIST, - ...PLAN_CHANGE_TOOLS_LIST, - "koan_set_intent", - ]), - ], - [ - "plan-docs", - new Set([ - "koan_complete_step", - "koan_ask_question", - ...PLAN_GETTER_TOOLS_LIST, - "koan_set_change_doc_diff", - "koan_set_change_comments", - "koan_set_readme_entry", - "koan_add_diagram", - "koan_set_diagram", - "koan_add_diagram_node", - "koan_add_diagram_edge", - ]), - ], - [ - "qr-plan-design", - new Set(["koan_complete_step", ...PLAN_GETTER_TOOLS_LIST, ...QR_TOOLS_LIST]), - ], - [ - "qr-plan-code", - new Set([ - "koan_complete_step", - "koan_get_plan", - "koan_get_milestone", - "koan_get_intent", - "koan_get_change", - ...QR_TOOLS_LIST, - ]), - ], - [ - "qr-plan-docs", - new Set([ - "koan_complete_step", - "koan_get_plan", - "koan_get_milestone", - "koan_get_change", - ...QR_TOOLS_LIST, - ]), - ], - ]); +// Planning roles write only inside the epic directory. +// Executor has unrestricted write access (must implement stories in the codebase). +const PLANNING_ROLES = new Set(["intake", "scout", "decomposer", "orchestrator", "planner"]); export function checkPermission( - phaseKey: string, + role: string, toolName: string, + epicDir?: string, + toolArgs?: Record, ): { allowed: boolean; reason?: string } { + // Read tools are always allowed — check before role map lookup. if (READ_TOOLS.has(toolName)) { return { allowed: true }; } - if (WRITE_TOOLS.has(toolName)) { - return { - allowed: false, - reason: "Edit/write tools blocked during planning.", - }; + // Unknown role: blocked under default-deny policy. + if (!ROLE_PERMISSIONS.has(role)) { + log("Unknown role blocked", { role, toolName }); + return { allowed: false, reason: `Unknown role: ${role}` }; } - if (!PHASE_PERMISSIONS.has(phaseKey)) { - return { - allowed: false, - reason: `Unknown phase: ${phaseKey}`, - }; + const roleAllowed = ROLE_PERMISSIONS.get(role)!; + + if (!roleAllowed.has(toolName)) { + return { allowed: false, reason: `${toolName} is not available for role ${role}` }; } - const allowed = PHASE_PERMISSIONS.get(phaseKey)!; - if (!allowed.has(toolName)) { - return { - allowed: false, - reason: `${toolName} is not available in phase ${phaseKey}`, - }; + // Path-scope enforcement: planning roles may only write inside the epic directory. + if (WRITE_TOOLS.has(toolName) && PLANNING_ROLES.has(role)) { + if (epicDir && toolArgs) { + const rawPath = toolArgs["path"]; + if (typeof rawPath === "string") { + const resolvedTool = path.resolve(rawPath); + const resolvedEpic = path.resolve(epicDir); + if (!resolvedTool.startsWith(resolvedEpic + path.sep) && resolvedTool !== resolvedEpic) { + log("Write blocked: path outside epic dir", { role, toolName, rawPath, epicDir }); + return { + allowed: false, + reason: `${toolName} path "${rawPath}" is outside epic directory`, + }; + } + } + } + // No epicDir or no path arg: allow (cannot scope-check without context). + return { allowed: true }; } return { allowed: true }; diff --git a/src/planner/lib/pool.ts b/src/planner/lib/pool.ts index f4bfcc8..132ea99 100644 --- a/src/planner/lib/pool.ts +++ b/src/planner/lib/pool.ts @@ -1,5 +1,5 @@ // Bounded-parallel subagent pool using an in-process semaphore. -// Runs all items to completion regardless of failures; callers inspect PoolResult. +// Runs all items to completion regardless of individual failures. // Timeout logic belongs in the worker closure, not here. import type { SubagentResult } from "../subagent.js"; @@ -19,14 +19,10 @@ export interface PoolProgress { queued: number; } -// -- Constants -- - -export const DEFAULT_REVIEWER_TIMEOUT_MS = 10 * 60 * 1000; - // -- Private helpers -- class Semaphore { - private queue: Array<() => void> = []; + private readonly queue: Array<() => void> = []; private count: number; constructor(limit: number) { @@ -80,8 +76,8 @@ export async function pool( emit(); try { - const r = await worker(id); - if (r.exitCode !== 0) { + const result = await worker(id); + if (result.exitCode !== 0) { failed.push(id); } } finally { diff --git a/src/planner/lib/runtime-context.ts b/src/planner/lib/runtime-context.ts new file mode 100644 index 0000000..5019bb1 --- /dev/null +++ b/src/planner/lib/runtime-context.ts @@ -0,0 +1,20 @@ +// RuntimeContext replaces the old PlanRef + SubagentRef + WorkflowDispatch triple. +// Set once during before_agent_start; tools read from it at call time. The mutable-ref +// pattern accommodates pi's extension lifecycle: tools register at init before state exists. +// +// onCompleteStep return value: +// string → next step's formatted prompt (tool returns it to the LLM) +// null → phase is complete (tool returns "Phase complete.") +export interface RuntimeContext { + epicDir: string | null; + subagentDir: string | null; + onCompleteStep: ((thoughts: string) => Promise) | null; +} + +export function createRuntimeContext(): RuntimeContext { + return { + epicDir: null, + subagentDir: null, + onCompleteStep: null, + }; +} diff --git a/src/planner/lib/step.ts b/src/planner/lib/step.ts index 28743eb..9771f6f 100644 --- a/src/planner/lib/step.ts +++ b/src/planner/lib/step.ts @@ -1,16 +1,15 @@ -// Step prompt assembly for koan workflows. +// Step prompt assembly for koan phase workflows. // -// The `thoughts` parameter on koan_complete_step captures the model's -// work output (analysis, review, findings) as a tool parameter. This -// avoids requiring the model to produce text + tool_call in one -// response, which some models (e.g. GPT-5-codex) cannot do. +// The `thoughts` parameter on koan_complete_step captures the model's work output +// (analysis, review, findings) as a tool parameter rather than text output. This +// ensures models that can't mix text + tool_call in one response still advance +// the workflow. export interface StepGuidance { title: string; instructions: string[]; - // Custom invoke-after directive. When omitted, formatStep - // appends the default koan_complete_step directive. - // Terminal steps override this (e.g., step 6 plan validation). + // Custom invoke-after directive. When omitted, formatStep appends the default + // koan_complete_step directive. Terminal steps may override this. invokeAfter?: string; } diff --git a/src/planner/tools/ask.ts b/src/planner/tools/ask.ts index f1d6ff0..57a8b8a 100644 --- a/src/planner/tools/ask.ts +++ b/src/planner/tools/ask.ts @@ -1,22 +1,26 @@ -// koan_ask_question tool: subagent-side of the file-based IPC ask flow. -// Writes ipc.json, polls until parent writes a response, then returns -// formatted answers to the LLM. The entire poll loop is wrapped in a -// try/finally that deletes ipc.json, guaranteeing cleanup on all exit paths. +// IPC-based tools: koan_ask_question and koan_request_scouts. +// Both tools use file-based IPC to pause subagent execution and communicate +// with the parent session, then resume with the response. +// +// koan_ask_question — ask the user a question, get answers +// koan_request_scouts — request parallel codebase scouts, get findings paths import { Type, type Static } from "@sinclair/typebox"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import type { SubagentRef } from "../lib/dispatch.js"; +import type { RuntimeContext } from "../lib/runtime-context.js"; import { ipcFileExists, writeIpcFile, readIpcFile, deleteIpcFile, createAskRequest, + createScoutRequest, type AskAnswerPayload, + type ScoutTask, } from "../lib/ipc.js"; -// -- Tool schema (mirrors pi-ask-tool-extension exactly) -- +// -- Schemas -- const OptionItemSchema = Type.Object({ label: Type.String({ description: "Display label" }), @@ -41,7 +45,19 @@ const AskParamsSchema = Type.Object({ type AskParams = Static; -// -- Result formatting -- +const ScoutTaskSchema = Type.Object({ + id: Type.String({ description: "Scout task ID, e.g. 'auth-libs'" }), + role: Type.String({ description: "Custom role for the scout, e.g. 'system architect'" }), + prompt: Type.String({ description: "What to find, e.g. 'Find all auth-related files in src/'" }), +}); + +const RequestScoutsSchema = Type.Object({ + scouts: Type.Array(ScoutTaskSchema, { description: "Scout tasks to run in parallel", minItems: 1 }), +}); + +type RequestScoutsParams = Static; + +// -- Result formatting (ask) -- interface QuestionResult { id: string; @@ -125,6 +141,12 @@ function buildQuestionResults( }); } +// -- Shared poll helper -- + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + // -- Tool registration -- const ASK_TOOL_DESCRIPTION = ` @@ -138,11 +160,21 @@ Ask the user for clarification when a choice materially affects the outcome. - Do NOT include an 'Other' option; UI adds it automatically. `.trim(); -function sleep(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); -} +const SCOUTS_TOOL_DESCRIPTION = ` +Request parallel codebase scouting. Use when you need to explore specific +areas of the codebase before making decisions or asking questions. + +Each scout answers one narrow question and writes findings to a markdown file. +Scouts run in parallel. The tool returns the file paths to read. + +- id: unique identifier for this scout task (e.g., "auth-patterns") +- role: the investigator role for the scout (e.g., "security auditor") +- prompt: what to find (e.g., "Find all authentication middleware in src/") +`.trim(); + +export function registerAskTools(pi: ExtensionAPI, ctx: RuntimeContext): void { + // -- koan_ask_question -- -export function registerAskTools(pi: ExtensionAPI, subagentRef: SubagentRef): void { pi.registerTool({ name: "koan_ask_question", label: "Ask question", @@ -151,7 +183,7 @@ export function registerAskTools(pi: ExtensionAPI, subagentRef: SubagentRef): vo async execute(_toolCallId, params, signal) { const askParams = params as AskParams; - const dir = subagentRef.dir; + const dir = ctx.subagentDir; if (!dir) { return { @@ -162,7 +194,7 @@ export function registerAskTools(pi: ExtensionAPI, subagentRef: SubagentRef): vo if (await ipcFileExists(dir)) { return { - content: [{ type: "text" as const, text: "Error: A question request is already pending." }], + content: [{ type: "text" as const, text: "Error: An IPC request is already pending." }], details: undefined, }; } @@ -172,9 +204,7 @@ export function registerAskTools(pi: ExtensionAPI, subagentRef: SubagentRef): vo let aborted = false; const onAbort = () => { aborted = true; }; - if (signal) { - signal.addEventListener("abort", onAbort, { once: true }); - } + if (signal) signal.addEventListener("abort", onAbort, { once: true }); type PollResult = "answered" | "cancelled" | "aborted" | "file-gone"; let pollResult: PollResult = "file-gone"; @@ -183,18 +213,12 @@ export function registerAskTools(pi: ExtensionAPI, subagentRef: SubagentRef): vo try { while (!aborted) { await sleep(500); - if (signal?.aborted) { - aborted = true; - break; - } + if (signal?.aborted) { aborted = true; break; } const current = await readIpcFile(dir); - if (current === null) { - pollResult = "file-gone"; - break; - } + if (current === null) { pollResult = "file-gone"; break; } - if (current.response !== null && current.response.id === ipc.request.id) { + if (current.type === "ask" && current.response !== null && current.response.id === ipc.id) { if (current.response.cancelled) { pollResult = "cancelled"; } else { @@ -205,9 +229,7 @@ export function registerAskTools(pi: ExtensionAPI, subagentRef: SubagentRef): vo } } - if (aborted) { - pollResult = "aborted"; - } + if (aborted) pollResult = "aborted"; } finally { await deleteIpcFile(dir); } @@ -238,4 +260,95 @@ export function registerAskTools(pi: ExtensionAPI, subagentRef: SubagentRef): vo } }, }); + + // -- koan_request_scouts -- + + pi.registerTool({ + name: "koan_request_scouts", + label: "Request codebase scouts", + description: SCOUTS_TOOL_DESCRIPTION, + parameters: RequestScoutsSchema, + + async execute(_toolCallId, params, signal) { + const { scouts } = params as RequestScoutsParams; + const dir = ctx.subagentDir; + + if (!dir) { + return { + content: [{ type: "text" as const, text: "Error: koan_request_scouts is only available in subagent context." }], + details: undefined, + }; + } + + if (await ipcFileExists(dir)) { + return { + content: [{ type: "text" as const, text: "Error: An IPC request is already pending." }], + details: undefined, + }; + } + + const ipc = createScoutRequest(scouts as ScoutTask[]); + await writeIpcFile(dir, ipc); + + let aborted = false; + const onAbort = () => { aborted = true; }; + if (signal) signal.addEventListener("abort", onAbort, { once: true }); + + type PollResult = "completed" | "aborted" | "file-gone"; + let pollResult: PollResult = "file-gone"; + let findings: string[] = []; + let failures: string[] = []; + + try { + while (!aborted) { + await sleep(500); + if (signal?.aborted) { aborted = true; break; } + + const current = await readIpcFile(dir); + if (current === null) { pollResult = "file-gone"; break; } + + if (current.type === "scout-request" && current.response !== null && current.id === ipc.id) { + pollResult = "completed"; + findings = current.response.findings; + failures = current.response.failures; + break; + } + } + + if (aborted) pollResult = "aborted"; + } finally { + await deleteIpcFile(dir); + } + + switch (pollResult) { + case "completed": { + const lines: string[] = [ + `Scout findings: ${findings.length} completed, ${failures.length} failed.`, + "", + ]; + if (findings.length > 0) { + lines.push("Findings files (read these for codebase context):"); + for (const f of findings) lines.push(` ${f}`); + } + if (failures.length > 0) { + lines.push(`Failed scouts (non-fatal, proceed without them): ${failures.join(", ")}`); + } + return { + content: [{ type: "text" as const, text: lines.join("\n") }], + details: undefined, + }; + } + case "aborted": + return { + content: [{ type: "text" as const, text: "Scout request aborted. Proceed without codebase context." }], + details: undefined, + }; + case "file-gone": + return { + content: [{ type: "text" as const, text: "Scout request cancelled. Proceed without codebase context." }], + details: undefined, + }; + } + }, + }); } diff --git a/src/planner/tools/workflow.ts b/src/planner/tools/workflow.ts index 28b5282..71eb74e 100644 --- a/src/planner/tools/workflow.ts +++ b/src/planner/tools/workflow.ts @@ -1,27 +1,27 @@ // Workflow tool registration: koan_complete_step. // Tools register once at init; execute callbacks read from the mutable -// dispatch at call time, decoupling static registration from phase routing. +// RuntimeContext at call time, decoupling static registration from phase routing. import { Type } from "@sinclair/typebox"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { createLogger } from "../../utils/logger.js"; -import type { WorkflowDispatch } from "../lib/dispatch.js"; +import type { RuntimeContext } from "../lib/runtime-context.js"; const log = createLogger("Dispatch"); // Registers workflow tools. Called once at init in koan.ts, // before pi's _buildRuntime() snapshot. Tool execute callbacks read -// from the dispatch at call time -- the dispatch is mutable, the -// tool list is not. +// from the RuntimeContext at call time — the context is mutable, +// the tool list is not. // // Why register all tools unconditionally? Flags are unavailable during // init (getFlag() returns undefined before _buildRuntime() sets flagValues), -// so conditional registration based on role/phase is impossible. Tools -// registered after _buildRuntime() are invisible to the LLM. +// so conditional registration based on role is impossible. Tools registered +// after _buildRuntime() are invisible to the LLM. export function registerWorkflowTools( pi: ExtensionAPI, - dispatch: WorkflowDispatch, + ctx: RuntimeContext, ): void { // -- koan_complete_step -- // The `thoughts` parameter captures the model's work output (analysis, @@ -33,7 +33,7 @@ export function registerWorkflowTools( label: "Complete current workflow step", description: [ "Signal completion of the current workflow step.", - "Put your analysis, findings, or review in the `thoughts` parameter.", + "Put your analysis, findings, or work output in the `thoughts` parameter.", "DO NOT call this tool until the step instructions explicitly tell you to.", ].join(" "), parameters: Type.Object({ @@ -42,16 +42,14 @@ export function registerWorkflowTools( })), }), async execute(_toolCallId, params) { - if (!dispatch.onCompleteStep) { + if (!ctx.onCompleteStep) { + log("koan_complete_step called with no active phase"); throw new Error("No workflow phase is active."); } - const thoughts = (params as { thoughts?: string }).thoughts; - const r = await dispatch.onCompleteStep(thoughts); - if (!r.ok) { - throw new Error(r.error ?? "Step transition failed."); - } + const thoughts = (params as { thoughts?: string }).thoughts ?? ""; + const nextPrompt = await ctx.onCompleteStep(thoughts); return { - content: [{ type: "text" as const, text: r.prompt ?? "Step complete." }], + content: [{ type: "text" as const, text: nextPrompt ?? "Phase complete." }], details: undefined, }; }, From 0d46330d318ce458644013a728449c8e3ffbc43c Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Mar 2026 12:45:25 +0700 Subject: [PATCH 043/412] feat(planner): implement role-based phase classes and orchestrator tools --- src/planner/phases/base-phase.ts | 124 +++++++++ src/planner/phases/decomposer/phase.ts | 39 +++ src/planner/phases/decomposer/prompts.ts | 151 +++++++++++ src/planner/phases/dispatch.ts | 257 +++++------------- src/planner/phases/executor/phase.ts | 43 +++ src/planner/phases/executor/prompts.ts | 156 +++++++++++ src/planner/phases/intake/phase.ts | 43 +++ src/planner/phases/intake/prompts.ts | 183 +++++++++++++ src/planner/phases/orchestrator/phase.ts | 60 +++++ src/planner/phases/orchestrator/prompts.ts | 299 +++++++++++++++++++++ src/planner/phases/planner/phase.ts | 41 +++ src/planner/phases/planner/prompts.ts | 213 +++++++++++++++ src/planner/phases/scout/phase.ts | 39 +++ src/planner/phases/scout/prompts.ts | 74 +++++ src/planner/tools/index.ts | 41 +-- src/planner/tools/orchestrator.ts | 239 ++++++++++++++++ 16 files changed, 1788 insertions(+), 214 deletions(-) create mode 100644 src/planner/phases/base-phase.ts create mode 100644 src/planner/phases/decomposer/phase.ts create mode 100644 src/planner/phases/decomposer/prompts.ts create mode 100644 src/planner/phases/executor/phase.ts create mode 100644 src/planner/phases/executor/prompts.ts create mode 100644 src/planner/phases/intake/phase.ts create mode 100644 src/planner/phases/intake/prompts.ts create mode 100644 src/planner/phases/orchestrator/phase.ts create mode 100644 src/planner/phases/orchestrator/prompts.ts create mode 100644 src/planner/phases/planner/phase.ts create mode 100644 src/planner/phases/planner/prompts.ts create mode 100644 src/planner/phases/scout/phase.ts create mode 100644 src/planner/phases/scout/prompts.ts create mode 100644 src/planner/tools/orchestrator.ts diff --git a/src/planner/phases/base-phase.ts b/src/planner/phases/base-phase.ts new file mode 100644 index 0000000..775b835 --- /dev/null +++ b/src/planner/phases/base-phase.ts @@ -0,0 +1,124 @@ +// BasePhase: shared lifecycle for all six koan subagent roles. +// Subclasses define only their step structure and system prompt. +// Eliminates ~40 lines of duplicated skeleton per phase. +// +// Lifecycle: +// constructor → registerHandlers() (hooks event listeners) +// begin() → activates phase, sets onCompleteStep in ctx, emits phase_start +// handleStepComplete() → advances step counter, returns next prompt or null + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { createLogger, type Logger } from "../../utils/logger.js"; +import { checkPermission } from "../lib/permissions.js"; +import { formatStep, type StepGuidance } from "../lib/step.js"; +import { EventLog } from "../lib/audit.js"; +import type { RuntimeContext } from "../lib/runtime-context.js"; + +export abstract class BasePhase { + // Subclasses declare these as readonly properties. + protected abstract readonly role: string; + protected abstract readonly totalSteps: number; + + // Subclasses implement these to define step content. + protected abstract getSystemPrompt(): string; + protected abstract getStepName(step: number): string; + protected abstract getStepGuidance(step: number): StepGuidance; + + private step = 1; + private active = false; + private step1Prompt: string | null = null; + + protected readonly log: Logger; + + constructor( + protected readonly pi: ExtensionAPI, + protected readonly ctx: RuntimeContext, + log?: Logger, + protected readonly eventLog?: EventLog, + ) { + this.log = log ?? createLogger("Phase"); + this.registerHandlers(); + } + + // -- Event handler registration -- + + private registerHandlers(): void { + // before_agent_start: inject system prompt when this phase is active. + this.pi.on("before_agent_start", () => { + if (!this.active) return undefined; + return { systemPrompt: this.getSystemPrompt() }; + }); + + // context: append step 1 guidance to the spawn prompt (§9.8 append pattern). + // Preserves context embedded by the spawn function (scout question, retry + // context, etc.) while adding structured step instructions after a separator. + this.pi.on("context", (event) => { + if (!this.active || this.step !== 1 || !this.step1Prompt) return undefined; + const messages = event.messages.map((m) => { + if (m.role !== "user") return m; + const existing = typeof m.content === "string" ? m.content.trim() : ""; + const combined = existing.length > 0 + ? `${existing}\n\n---\n\n${this.step1Prompt!}` + : this.step1Prompt!; + return { ...m, content: combined }; + }); + return { messages }; + }); + + // tool_call: default-deny permission check for every tool call. + this.pi.on("tool_call", (event) => { + if (!this.active) return undefined; + const perm = checkPermission( + this.role, + event.toolName, + this.ctx.epicDir ?? undefined, + event.input as Record, + ); + if (!perm.allowed) { + return { block: true, reason: perm.reason }; + } + return undefined; + }); + } + + // -- Public lifecycle -- + + async begin(): Promise { + this.step1Prompt = formatStep(this.getStepGuidance(1)); + this.active = true; + this.step = 1; + + if (this.ctx.onCompleteStep !== null) { + throw new Error(`ctx.onCompleteStep is already occupied — cannot begin ${this.role} phase`); + } + this.ctx.onCompleteStep = (thoughts: string) => this.handleStepComplete(thoughts); + + this.log("Starting phase", { role: this.role, step: 1, totalSteps: this.totalSteps }); + await this.eventLog?.emitPhaseStart(this.totalSteps); + await this.eventLog?.emitStepTransition(1, this.getStepName(1), this.totalSteps); + } + + // -- Private step progression -- + + private async handleStepComplete(thoughts: string): Promise { + void thoughts; // captured in event log via tool_result; used by subclass prompts if needed + const prev = this.step; + + if (prev === this.totalSteps) { + // Phase complete. + this.active = false; + this.ctx.onCompleteStep = null; + await this.eventLog?.emitPhaseEnd("completed"); + this.log("Phase complete", { role: this.role }); + return null; + } + + // Advance to next step. + this.step = prev + 1; + const prompt = formatStep(this.getStepGuidance(this.step)); + await this.eventLog?.emitStepTransition(this.step, this.getStepName(this.step), this.totalSteps); + this.log("Step transition", { role: this.role, from: prev, to: this.step }); + return prompt; + } +} diff --git a/src/planner/phases/decomposer/phase.ts b/src/planner/phases/decomposer/phase.ts new file mode 100644 index 0000000..b5ab322 --- /dev/null +++ b/src/planner/phases/decomposer/phase.ts @@ -0,0 +1,39 @@ +// Decomposer phase: splits the epic into story sketches. +// Two steps: analysis → decomposition. + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { createLogger, type Logger } from "../../../utils/logger.js"; +import type { RuntimeContext } from "../../lib/runtime-context.js"; +import { EventLog } from "../../lib/audit.js"; +import { BasePhase } from "../base-phase.js"; +import { DECOMPOSER_STEP_NAMES, decomposerSystemPrompt, decomposerStepGuidance } from "./prompts.js"; +import type { StepGuidance } from "../../lib/step.js"; + +export class DecomposerPhase extends BasePhase { + protected readonly role = "decomposer"; + protected readonly totalSteps = 2; + + constructor( + pi: ExtensionAPI, + config: { epicDir: string }, + ctx: RuntimeContext, + log?: Logger, + eventLog?: EventLog, + ) { + super(pi, ctx, log ?? createLogger("DecomposerPhase"), eventLog); + void config; + } + + protected getSystemPrompt(): string { + return decomposerSystemPrompt(); + } + + protected getStepName(step: number): string { + return DECOMPOSER_STEP_NAMES[step] ?? `Step ${step}`; + } + + protected getStepGuidance(step: number): StepGuidance { + return decomposerStepGuidance(step); + } +} diff --git a/src/planner/phases/decomposer/prompts.ts b/src/planner/phases/decomposer/prompts.ts new file mode 100644 index 0000000..7f18450 --- /dev/null +++ b/src/planner/phases/decomposer/prompts.ts @@ -0,0 +1,151 @@ +// Decomposer phase prompts — 2 steps: analysis → decomposition. +// Story IDs use S-NNN-slug format per §11.5.5 (e.g., S-001-auth-provider). + +import type { StepGuidance } from "../../lib/step.js"; + +export const DECOMPOSER_STEP_NAMES: Record = { + 1: "Analysis", + 2: "Decomposition", +}; + +export function decomposerSystemPrompt(): string { + return `You are a feature decomposer for a coding task planner. You read intake output and codebase scout reports, then split the requested work into independent story sketches — each story representing one pull request. + +## Your role + +You define WHAT the stories are and in WHAT ORDER they should be executed. You do NOT decide HOW each story is implemented (that belongs to the planner role). + +## Story definition + +A story must be: +- **Independent**: it can be reviewed and merged without depending on an unreleased sibling story. +- **Bounded**: it fits in one pull request — one coherent change to the codebase. +- **Testable**: the change can be verified in isolation. +- **Sequenced**: if stories have dependencies, they are ordered so earlier stories provide a stable base. + +## Story ID format + +Story IDs use the format: \`S-NNN-descriptive-slug\` +Examples: \`S-001-auth-provider\`, \`S-002-protected-routes\`, \`S-003-user-profile\` + +Use zero-padded three-digit numbers. The slug is a short kebab-case description of the story goal. +This format is sortable and human-readable. + +## Strict rules + +- MUST NOT include implementation details (specific functions, algorithms, data structures). +- MUST NOT make decisions that require user input. Those belong to intake. +- MUST NOT invent scope not present in context.md or decisions.md. +- MUST produce one story sketch per deliverable unit of work. +- SHOULD keep stories small: prefer 4–8 stories over 1–2 large ones. +- SHOULD order stories so foundational work (types, interfaces, data models) comes first. +- SHOULD mark stories that are optional or conditional explicitly. +- MUST use the S-NNN-slug story ID format. + +## Output files + +You write the following files, all inside the epic directory: + +1. **epic.md** — overview of the full scope and the story list with sequencing rationale. +2. **stories/{story-id}/story.md** — one file per story with title, goal, scope, and dependencies. + +## Tools available + +- All read tools (read, bash, grep, glob, find, ls) — for reading intake output and scout reports. +- \`koan_request_scouts\` — to request additional codebase exploration if needed. +- \`write\` / \`edit\` — for writing output files inside the epic directory. +- \`koan_complete_step\` — to signal step completion. + +You work in two steps. First you read and analyze. Then you write the decomposition.`; +} + +export function decomposerStepGuidance(step: number): StepGuidance { + switch (step) { + case 1: + return { + title: DECOMPOSER_STEP_NAMES[1], + instructions: [ + "Read the intake output and all scout reports. Build a complete understanding of the scope", + "before producing any output.", + "", + "## Files to read", + "", + "From the epic directory:", + "- `context.md` — structured requirements extracted from the conversation", + "- `decisions.md` — user answers to clarifying questions", + "", + "If scout reports were referenced in your initial instructions above, read them now.", + "If no scout reports were mentioned, proceed without them.", + "You may also call `koan_request_scouts` if you need codebase context to inform story boundaries.", + "", + "## What to understand", + "", + "After reading, you should be able to answer:", + "- What is the top-level goal of this epic?", + "- What are the distinct deliverable units of work?", + "- Which units depend on each other, and what is the safe delivery order?", + "- Are there any parts of the work that are conditional or optional?", + "- What does the existing codebase already provide (from scout reports)?", + "", + "Do not write any output files during this step.", + ], + }; + + case 2: + return { + title: DECOMPOSER_STEP_NAMES[2], + instructions: [ + "Produce the full decomposition: epic.md and one story.md per story.", + "", + "## Story ID format", + "", + "Use S-NNN-slug format: S-001-auth-provider, S-002-protected-routes, etc.", + "The number is zero-padded, three digits, sequential. The slug is kebab-case.", + "", + "## epic.md", + "", + "Write `epic.md` to the epic directory with these sections:", + "", + "### Overview", + "One to three paragraphs describing the full scope of this epic.", + "", + "### Stories", + "A numbered list of all stories in delivery order.", + "Format: `{n}. [{story-id}] {story title} — {one-sentence goal}`", + "", + "### Sequencing Rationale", + "Explain why the stories are ordered as they are. Identify dependency chains.", + "Note any stories that can be worked in parallel.", + "", + "## stories/{story-id}/story.md", + "", + "Write one file per story. Use the story ID as the directory name.", + "Each story.md must contain these sections:", + "", + "### Goal", + "One sentence: what this story delivers and why.", + "", + "### Scope", + "What is included in this story. Be specific about boundaries.", + "List what is explicitly OUT OF SCOPE (to be handled in another story or not at all).", + "", + "### Dependencies", + "List any stories that must be merged before this story can begin.", + "If none: write `(none — this story can start immediately)`", + "", + "### Acceptance Criteria", + "Three to six testable conditions that define 'done' for this story.", + "Format: `- [ ] [condition]`", + "", + "After writing all files, call `koan_complete_step` with a summary:", + "number of stories produced and the delivery order.", + ], + }; + + default: + return { + title: `Step ${step}`, + instructions: [`Execute step ${step}.`], + }; + } +} diff --git a/src/planner/phases/dispatch.ts b/src/planner/phases/dispatch.ts index f3e97c6..b880cc0 100644 --- a/src/planner/phases/dispatch.ts +++ b/src/planner/phases/dispatch.ts @@ -1,233 +1,124 @@ // Phase dispatch: detects subagent mode from CLI flags and routes to the -// appropriate phase constructor. Flags are unavailable at extension init -// (getFlag returns undefined before _buildRuntime), so detection is +// appropriate phase class based on role. Flags are unavailable at extension +// init (getFlag returns undefined before _buildRuntime), so detection is // deferred to before_agent_start. -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import { PlanDesignPhase } from "./plan-design/phase.js"; -import { PlanDesignFixPhase } from "./plan-design/fix-phase.js"; -import { PlanCodePhase } from "./plan-code/phase.js"; -import { PlanCodeFixPhase } from "./plan-code/fix-phase.js"; -import { PlanDocsPhase } from "./plan-docs/phase.js"; -import { PlanDocsFixPhase } from "./plan-docs/fix-phase.js"; -import { QRDecomposePhase } from "./qr-decompose/phase.js"; -import { QRVerifyPhase } from "./qr-verify/phase.js"; import { createLogger, type Logger } from "../../utils/logger.js"; -import type { WorkflowDispatch, PlanRef } from "../lib/dispatch.js"; +import type { RuntimeContext } from "../lib/runtime-context.js"; import type { EventLog } from "../lib/audit.js"; -import type { QRFile } from "../qr/types.js"; +import type { SubagentRole, StepSequence } from "../types.js"; +import { IntakePhase } from "./intake/phase.js"; +import { ScoutPhase } from "./scout/phase.js"; +import { DecomposerPhase } from "./decomposer/phase.js"; +import { OrchestratorPhase } from "./orchestrator/phase.js"; +import { PlannerPhase } from "./planner/phase.js"; +import { ExecutorPhase } from "./executor/phase.js"; + +// -- Config -- export interface SubagentConfig { - role: string; - phase: string; - planDir: string; + role: SubagentRole; + epicDir: string; subagentDir: string; - fix: string | null; -} - -type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; - -function parseWorkPhase(value: string | null): WorkPhaseKey | null { - if (value === "plan-design" || value === "plan-code" || value === "plan-docs") { - return value; - } - return null; + storyId: string | null; + stepSequence: StepSequence | null; } -function parseQRPhase(value: string): WorkPhaseKey | null { - if (!value.startsWith("qr-")) return null; - return parseWorkPhase(value.slice(3)); -} +// -- Detection -- -async function loadFixFailures(planDir: string, phase: WorkPhaseKey): Promise { - const qrPath = path.join(planDir, `qr-${phase}.json`); - try { - const raw = await fs.readFile(qrPath, "utf8"); - return JSON.parse(raw) as QRFile; - } catch { - return null; - } -} - -// Detects subagent mode by checking flags set via CLI (pi -p --koan-role -// architect --koan-phase plan-design ...). Flags are unavailable during -// init (getFlag() returns undefined before _buildRuntime()), so this -// must be called from before_agent_start or later. +// Detects subagent mode by reading flags set via CLI +// (pi -p --koan-role intake --koan-epic-dir /path ...). +// Must be called from before_agent_start or later; flags are +// undefined before _buildRuntime() runs. export function detectSubagentMode(pi: ExtensionAPI): SubagentConfig | null { const role = pi.getFlag("koan-role"); if (!role || typeof role !== "string" || role.trim().length === 0) { return null; } - const phase = pi.getFlag("koan-phase"); - const planDir = pi.getFlag("koan-plan-dir"); + const epicDir = pi.getFlag("koan-epic-dir"); const subagentDir = pi.getFlag("koan-subagent-dir"); - const fix = pi.getFlag("koan-fix"); + const storyId = pi.getFlag("koan-story-id"); + const stepSequence = pi.getFlag("koan-step-sequence"); return { - role: role.trim(), - phase: typeof phase === "string" ? phase.trim() : "", - planDir: typeof planDir === "string" ? planDir.trim() : "", + role: role.trim() as SubagentRole, + epicDir: typeof epicDir === "string" ? epicDir.trim() : "", subagentDir: typeof subagentDir === "string" ? subagentDir.trim() : "", - fix: typeof fix === "string" && fix.trim().length > 0 ? fix.trim() : null, + storyId: typeof storyId === "string" && storyId.trim().length > 0 ? storyId.trim() : null, + stepSequence: typeof stepSequence === "string" && stepSequence.trim().length > 0 + ? stepSequence.trim() as StepSequence + : null, }; } +// -- Dispatch -- + export async function dispatchPhase( pi: ExtensionAPI, config: SubagentConfig, - dispatch: WorkflowDispatch, - planRef: PlanRef, + ctx: RuntimeContext, log?: Logger, eventLog?: EventLog, ): Promise { const logger = log ?? createLogger("Dispatch"); - // -- Fix modes -- - - const fixPhase = parseWorkPhase(config.fix); - if (fixPhase) { - const qrFile = await loadFixFailures(config.planDir, fixPhase); - if (!qrFile) { - logger("Fix dispatch: failed to read QR file", { phase: fixPhase }); - return; + switch (config.role) { + case "intake": { + const phase = new IntakePhase(pi, { epicDir: config.epicDir }, ctx, logger, eventLog); + await phase.begin(); + break; } - - const failures = qrFile.items.filter((i) => i.status === "FAIL"); - if (failures.length === 0) { - logger("Fix dispatch: no FAIL items in QR file, skipping fix phase", { phase: fixPhase }); - return; + case "scout": { + const phase = new ScoutPhase(pi, { epicDir: config.epicDir }, ctx, logger, eventLog); + await phase.begin(); + break; } - - if (config.role === "architect" && fixPhase === "plan-design") { - const phase = new PlanDesignFixPhase( + case "decomposer": { + const phase = new DecomposerPhase(pi, { epicDir: config.epicDir }, ctx, logger, eventLog); + await phase.begin(); + break; + } + case "orchestrator": { + const stepSequence = config.stepSequence ?? "pre-execution"; + const phase = new OrchestratorPhase( pi, - { planDir: config.planDir, failures }, - dispatch, - planRef, - logger, - eventLog, + { epicDir: config.epicDir, stepSequence, storyId: config.storyId ?? undefined }, + ctx, logger, eventLog, ); await phase.begin(); - return; + break; } - - if (config.role === "developer" && fixPhase === "plan-code") { - const phase = new PlanCodeFixPhase( + case "planner": { + // Fail-fast: missing storyId produces malformed paths like stories//plan/plan.md (§12.4.3). + if (!config.storyId) throw new Error("planner phase requires --koan-story-id flag"); + const phase = new PlannerPhase( pi, - { planDir: config.planDir, failures }, - dispatch, - planRef, - logger, - eventLog, + { epicDir: config.epicDir, storyId: config.storyId }, + ctx, logger, eventLog, ); await phase.begin(); - return; + break; } - - if (config.role === "technical-writer" && fixPhase === "plan-docs") { - const phase = new PlanDocsFixPhase( + case "executor": { + // Fail-fast: missing storyId produces malformed paths like stories//plan/plan.md (§12.4.3). + if (!config.storyId) throw new Error("executor phase requires --koan-story-id flag"); + const retryContext = pi.getFlag("koan-retry-context"); + const phase = new ExecutorPhase( pi, - { planDir: config.planDir, failures }, - dispatch, - planRef, - logger, - eventLog, + { + epicDir: config.epicDir, + storyId: config.storyId, + retryContext: typeof retryContext === "string" && retryContext.length > 0 ? retryContext : undefined, + }, + ctx, logger, eventLog, ); await phase.begin(); - return; + break; } + default: + logger("Unknown role", { role: config.role }); } - - // -- Work phases -- - - if (config.role === "architect" && config.phase === "plan-design") { - const phase = new PlanDesignPhase( - pi, - { planDir: config.planDir }, - dispatch, - planRef, - logger, - eventLog, - ); - await phase.begin(); - return; - } - - if (config.role === "developer" && config.phase === "plan-code") { - const phase = new PlanCodePhase( - pi, - { planDir: config.planDir }, - dispatch, - planRef, - logger, - eventLog, - ); - await phase.begin(); - return; - } - - if (config.role === "technical-writer" && config.phase === "plan-docs") { - const phase = new PlanDocsPhase( - pi, - { planDir: config.planDir }, - dispatch, - planRef, - logger, - eventLog, - ); - await phase.begin(); - return; - } - - // -- QR phases -- - - const qrWorkPhase = parseQRPhase(config.phase); - if (config.role === "qr-decomposer" && qrWorkPhase) { - const phase = new QRDecomposePhase( - pi, - { planDir: config.planDir, workPhase: qrWorkPhase }, - dispatch, - planRef, - logger, - eventLog, - ); - await phase.begin(); - return; - } - - if (config.role === "reviewer" && qrWorkPhase) { - const rawItemFlag = pi.getFlag("koan-qr-item") as string; - if (!rawItemFlag) { - logger("Reviewer missing --koan-qr-item flag"); - return; - } - - const itemIds = rawItemFlag.split(",").map((s) => s.trim()).filter(Boolean); - if (itemIds.length === 0) { - logger("Reviewer --koan-qr-item flag is empty after parsing"); - return; - } - - const phase = new QRVerifyPhase( - pi, - { planDir: config.planDir, itemIds, workPhase: qrWorkPhase }, - dispatch, - planRef, - logger, - eventLog, - ); - await phase.begin(); - return; - } - - logger("Unknown role/phase combination", { - role: config.role, - phase: config.phase, - fix: config.fix, - }); } diff --git a/src/planner/phases/executor/phase.ts b/src/planner/phases/executor/phase.ts new file mode 100644 index 0000000..6ab7f05 --- /dev/null +++ b/src/planner/phases/executor/phase.ts @@ -0,0 +1,43 @@ +// Executor phase: implements a story plan. +// Two steps: comprehension → implementation. + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { createLogger, type Logger } from "../../../utils/logger.js"; +import type { RuntimeContext } from "../../lib/runtime-context.js"; +import { EventLog } from "../../lib/audit.js"; +import { BasePhase } from "../base-phase.js"; +import { EXECUTOR_STEP_NAMES, executorSystemPrompt, executorStepGuidance } from "./prompts.js"; +import type { StepGuidance } from "../../lib/step.js"; + +export class ExecutorPhase extends BasePhase { + protected readonly role = "executor"; + protected readonly totalSteps = 2; + + private readonly storyId: string; + private readonly retryContext: string | undefined; + + constructor( + pi: ExtensionAPI, + config: { epicDir: string; storyId: string; retryContext?: string }, + ctx: RuntimeContext, + log?: Logger, + eventLog?: EventLog, + ) { + super(pi, ctx, log ?? createLogger("ExecutorPhase"), eventLog); + this.storyId = config.storyId; + this.retryContext = config.retryContext; + } + + protected getSystemPrompt(): string { + return executorSystemPrompt(); + } + + protected getStepName(step: number): string { + return EXECUTOR_STEP_NAMES[step] ?? `Step ${step}`; + } + + protected getStepGuidance(step: number): StepGuidance { + return executorStepGuidance(step, this.storyId, this.retryContext); + } +} diff --git a/src/planner/phases/executor/prompts.ts b/src/planner/phases/executor/prompts.ts new file mode 100644 index 0000000..b27bf14 --- /dev/null +++ b/src/planner/phases/executor/prompts.ts @@ -0,0 +1,156 @@ +import type { StepGuidance } from "../../lib/step.js"; + +export const EXECUTOR_STEP_NAMES: Record = { + 1: "Comprehension", + 2: "Implementation", +}; + +export function executorSystemPrompt(): string { + return `You are a coding agent. You implement changes to a codebase by following a detailed plan written by a planner. You are the only agent in the koan workflow that writes source code. + +## Your role + +You receive a plan (plan/plan.md) and supporting context (plan/context.md), and you implement each step in order. You do not design. You do not make architectural decisions. You execute the plan faithfully. + +## What you receive + +- **plan/plan.md**: A numbered list of implementation steps. Each step specifies the file, location, action, and exact change to make. +- **plan/context.md**: Curated code snippets for the files you will modify — function signatures, type definitions, and import blocks. +- **retryContext** (when present): A failure summary from a previous execution attempt. Read it carefully — it describes what went wrong and what you should do differently. + +## How to work + +Work through the plan steps in order. Before touching any file: + +1. Read the file to understand its current state. Plan/context.md is a snapshot; the file may have changed due to earlier steps in this execution. +2. Identify exactly where the change goes. +3. Make the change precisely — no more, no less. +4. Verify the change looks correct before moving on. + +## When plan and reality diverge + +If what you find in the codebase does not match what the plan describes — the function doesn't exist, the signature is different, the file structure changed — you MUST stop immediately and call \`koan_ask_question\`. Do not improvise a solution. Do not make assumptions. + +Describe: +- Which plan step you are on +- What the plan expected to find +- What you actually found +- What you need to know to proceed + +Improvised solutions that seem reasonable in isolation frequently break other parts of the system that are not visible in your context window. + +## Strict rules — violations cause retry cycles + +- MUST implement steps in the order specified by the plan. +- MUST NOT skip any step, even if it seems redundant. +- MUST NOT add features, functions, or logic that the plan does not specify. +- MUST NOT refactor code that the plan does not mention — even if you notice an improvement opportunity. +- MUST NOT modify test expectations to make tests pass. If a test fails after your implementation, report it via koan_ask_question. +- MUST read each file before modifying it. Context.md is a reference, not a guarantee of current state. +- MUST call koan_ask_question immediately when plan assumptions don't hold. Do not continue to the next step. + +## On retries + +If retryContext is present, this is your second (or later) attempt at this story. The failure summary tells you what went wrong. Read it before you read the plan, and keep the failure context in mind as you implement. Do not repeat the mistake from the previous attempt. + +You work in steps. Each step has specific instructions. Follow them precisely.`; +} + +export function executorStepGuidance(step: number, storyId: string, retryContext?: string): StepGuidance { + switch (step) { + case 1: + return { + title: EXECUTOR_STEP_NAMES[1], + instructions: [ + `Read and fully understand the implementation plan for story \`${storyId}\` before writing any code.`, + "", + "## What to read", + "", + `1. Read \`stories/${storyId}/plan/plan.md\` — read every step from start to finish. Do not skim.`, + `2. Read \`stories/${storyId}/plan/context.md\` — understand the function signatures, types, and imports for every file the plan touches.`, + ...(retryContext + ? [ + "", + "## Retry context — read this first", + "", + "This is a retry attempt. A previous execution of this story failed. The failure summary is:", + "", + retryContext, + "", + "Keep this failure context in mind as you read the plan. Identify which step caused the failure and what you will do differently.", + ] + : []), + "", + "## What to understand", + "", + "After reading, you must be able to answer these questions without referring back to the files:", + "", + "- How many steps are in the plan?", + "- Which files will you modify?", + "- What is the dependency order between steps?", + "- Are there any steps that touch the same file (potential ordering conflicts)?", + "- What types or interfaces are central to the changes?", + "", + "Do NOT start writing code in this step. Comprehension only.", + "", + "Call koan_complete_step with your comprehension summary:", + "- Number of steps", + "- List of files to modify", + "- Any ambiguities or concerns you spotted in the plan (do not block on these — note them)", + ...(retryContext ? ["- How you plan to avoid the previous failure"] : []), + ], + }; + + case 2: + return { + title: EXECUTOR_STEP_NAMES[2], + instructions: [ + `Implement the plan for story \`${storyId}\` step by step.`, + "", + "## Execution protocol", + "", + "Work through plan/plan.md in order. For each step:", + "", + "1. **Read the target file** — do not rely solely on context.md; read the actual current state of the file.", + "2. **Locate the change site** — find the exact function, class, or section described in the plan step.", + "3. **Verify your assumption** — confirm that what you find matches what the plan describes. If it does not match, call koan_ask_question immediately.", + "4. **Make the change** — implement exactly what the plan step specifies. No more, no less.", + "5. **Move to the next step** — do not review or revisit previous steps.", + "", + "## Plan-reality mismatch protocol", + "", + "If at any point the codebase does not match the plan's description:", + "", + "- STOP immediately. Do not attempt to adapt the plan.", + "- Call `koan_ask_question` with:", + " - The plan step number and description", + " - What the plan expected", + " - What you actually found", + " - What specific information you need to proceed", + "", + "## Common pitfalls", + "", + "- Do not add logging, error handling, or validation beyond what the plan specifies.", + "- Do not fix code style issues you notice in passing.", + "- Do not update imports for files not mentioned in the plan.", + "- Do not change test files unless a plan step explicitly says to.", + "- Do not run the tests yourself — the orchestrator will verify.", + "", + "## When all steps are complete", + "", + "Review your changes at a high level: are all plan steps implemented? Did you accidentally modify something you shouldn't have? Correct any accidental changes.", + "", + "Then call koan_complete_step with a summary of what you implemented:", + "- Each plan step: completed or skipped (with reason if skipped)", + "- Files modified", + "- Any concerns or observations for the orchestrator", + ], + }; + + default: + return { + title: `Step ${step}`, + instructions: [`Execute step ${step}.`], + }; + } +} diff --git a/src/planner/phases/intake/phase.ts b/src/planner/phases/intake/phase.ts new file mode 100644 index 0000000..5ef4d79 --- /dev/null +++ b/src/planner/phases/intake/phase.ts @@ -0,0 +1,43 @@ +// Intake phase: reads conversation, extracts context, requests scouts, +// identifies gaps, asks user questions, writes context.md and decisions.md. +// Three-step sequence per §11.2.2. + +import * as path from "node:path"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { createLogger, type Logger } from "../../../utils/logger.js"; +import type { RuntimeContext } from "../../lib/runtime-context.js"; +import { EventLog } from "../../lib/audit.js"; +import { BasePhase } from "../base-phase.js"; +import { INTAKE_STEP_NAMES, intakeSystemPrompt, intakeStepGuidance } from "./prompts.js"; +import type { StepGuidance } from "../../lib/step.js"; + +export class IntakePhase extends BasePhase { + protected readonly role = "intake"; + protected readonly totalSteps = 3; + + private readonly conversationPath: string; + + constructor( + pi: ExtensionAPI, + config: { epicDir: string }, + ctx: RuntimeContext, + log?: Logger, + eventLog?: EventLog, + ) { + super(pi, ctx, log ?? createLogger("IntakePhase"), eventLog); + this.conversationPath = path.join(config.epicDir, "conversation.jsonl"); + } + + protected getSystemPrompt(): string { + return intakeSystemPrompt(); + } + + protected getStepName(step: number): string { + return INTAKE_STEP_NAMES[step] ?? `Step ${step}`; + } + + protected getStepGuidance(step: number): StepGuidance { + return intakeStepGuidance(step, this.conversationPath); + } +} diff --git a/src/planner/phases/intake/prompts.ts b/src/planner/phases/intake/prompts.ts new file mode 100644 index 0000000..80161f4 --- /dev/null +++ b/src/planner/phases/intake/prompts.ts @@ -0,0 +1,183 @@ +// Intake phase prompts — 3-step sequence per §11.2.2: +// Step 1: Context extraction (read conversation → write context.md) +// Step 2: Codebase scouting (call koan_request_scouts with targeted questions) +// Step 3: Gap analysis + questions (review findings → ask user → write decisions.md) + +import type { StepGuidance } from "../../lib/step.js"; + +export const INTAKE_STEP_NAMES: Record = { + 1: "Context Extraction", + 2: "Codebase Scouting", + 3: "Gap Analysis & Questions", +}; + +export function intakeSystemPrompt(): string { + return `You are an intake analyst for a coding task planner. You read a conversation history, extract structured context, explore the codebase via scouts, and ask the user targeted clarifying questions grounded in both the conversation and what actually exists in the codebase. + +## Your role + +You extract and organize information. You do NOT plan, design, or implement. + +## Strict rules — violations invalidate your output + +- MUST NOT infer decisions that were not explicitly stated in the conversation. +- MUST NOT add architectural opinions or suggest approaches. +- MUST NOT summarize, paraphrase, or analyze code beyond extracting factual references. +- MUST NOT produce implementation recommendations of any kind. +- MUST only capture what was explicitly said. If something is unclear, note it as an unresolved question. +- MUST ask at most 8 questions total. Prioritize the most important gaps. +- SHOULD prefer multiple-choice questions when the answer space is bounded. +- SHOULD ask open-ended questions only when the space of valid answers is genuinely unbounded. +- SHOULD ask questions grounded in what you found in the codebase (e.g., "the codebase uses X — should this story follow the same pattern or switch to Y?"). + +## Output files + +You write two files, both inside the epic directory: + +1. **context.md** — structured extraction of what was said in the conversation. +2. **decisions.md** — answers to the questions you asked the user. + +## Tools available + +- All read tools (read, bash, grep, glob, find, ls) — for reading the conversation and codebase. +- \`koan_request_scouts\` — to request parallel codebase exploration. +- \`koan_ask_question\` — to ask the user clarifying questions via IPC. +- \`write\` / \`edit\` — for writing output files inside the epic directory only. +- \`koan_complete_step\` — to signal step completion with your findings. + +You work in three steps. Each step has specific instructions. Follow them precisely.`; +} + +export function intakeStepGuidance(step: number, conversationPath?: string): StepGuidance { + switch (step) { + case 1: + return { + title: INTAKE_STEP_NAMES[1], + instructions: [ + "Read the conversation file and extract structured context into `context.md`.", + "", + conversationPath + ? `Conversation file: ${conversationPath}` + : "Conversation file: locate `conversation.jsonl` in the epic directory.", + "", + "The conversation file is JSONL (JSON Lines). Each line is a JSON object.", + "Look for entries with type 'message' and role 'user' or 'assistant' for content.", + "Ignore internal session entries (header, compaction, etc.).", + "", + "Write `context.md` to the epic directory with these exact sections:", + "", + "## Topic", + "One paragraph describing what is being built or changed. Use only information explicitly stated in the conversation.", + "", + "## File References", + "List every file, directory, or module mentioned in the conversation. One item per line.", + "If none were mentioned, write: (none mentioned)", + "", + "## Decisions Made", + "List every decision that was explicitly stated and agreed upon. Format: `- [decision text]`", + "A decision must be explicitly stated — do not infer from context.", + "If none were made, write: (none recorded)", + "", + "## Constraints", + "List every explicit constraint: technical, timeline, compatibility, budget, etc.", + "If none were stated, write: (none stated)", + "", + "## Unresolved Questions", + "List every question raised in the conversation that was NOT answered.", + "Also list any gaps you observe — things that must be known before planning can proceed.", + "Format: `- [question or gap description]`", + "", + "Be faithful to the conversation. Do not invent context.", + ], + }; + + case 2: + return { + title: INTAKE_STEP_NAMES[2], + instructions: [ + "Based on the file references and topic in context.md, identify what needs codebase exploration.", + "", + "Use `koan_request_scouts` to gather codebase context before asking the user questions.", + "This grounds the questions in what actually exists — preventing questions the codebase already answers.", + "", + "## When to scout", + "", + "Scout when context.md mentions:", + "- Specific files, modules, or packages that should be verified or understood.", + "- Integration points with existing code (APIs, databases, auth, etc.).", + "- Areas where the user's assumptions may not match the codebase (e.g., 'we use React' but you should verify).", + "", + "Formulate 1–5 focused scout tasks. Each scout answers one narrow question.", + "", + "## Scout task format", + "", + "Each scout needs:", + "- id: short kebab-case identifier (e.g., 'auth-setup', 'api-structure')", + "- role: a focused investigator role (e.g., 'auth system auditor', 'API structure analyst')", + "- prompt: exactly what to find (e.g., 'Find all auth-related files and identify which auth library is used')", + "", + "## If no scouting is needed", + "", + "If context.md has no file references and the topic is purely conceptual (no codebase inspection needed),", + "skip scouting and call koan_complete_step with: 'Scouting skipped — no codebase references in context.'", + ], + }; + + case 3: + return { + title: INTAKE_STEP_NAMES[3], + instructions: [ + "Review `context.md` and scout findings together. Identify gaps. Ask the user. Write `decisions.md`.", + "", + "## Gap identification criteria", + "", + "Ask about a gap if:", + "- The answer materially changes WHAT is built (scope, features, API shape).", + "- The answer materially changes HOW the work is sequenced (dependencies, ordering).", + "- Without the answer, the decomposer cannot split the work into stories.", + "- Scout findings reveal a contradiction with what the user described (e.g., user said 'we use Postgres' but scout found SQLite).", + "", + "Do NOT ask about:", + "- Implementation choices (those belong to the planner role).", + "- Things the scout findings already answered.", + "- Nice-to-have clarifications that don't change the plan.", + "", + "## Asking questions", + "", + "Use `koan_ask_question` to send questions to the user. Maximum 8 questions.", + "Prefer multiple-choice when the answer space is bounded.", + "Reference scout findings in questions when relevant: 'The codebase uses X — should this follow the same pattern?'", + "", + "## Writing decisions.md", + "", + "After the user responds, write `decisions.md` to the epic directory:", + "", + "## Answers", + "For each question asked, record the question and the user's answer.", + "Format:", + "```", + "**Q: [question text]**", + "A: [user's answer]", + "```", + "", + "## Remaining Unknowns", + "List any gaps that remain unresolved. If none: write (none)", + "", + "If there were no meaningful gaps, write:", + "`## Answers\\n(no questions were needed — context and codebase survey were sufficient)`", + "", + "Then call `koan_complete_step` with a brief summary:", + "- File references found", + "- Scouts requested and key findings", + "- Questions asked and answered", + "- Any remaining unknowns", + ], + }; + + default: + return { + title: `Step ${step}`, + instructions: [`Execute step ${step}.`], + }; + } +} diff --git a/src/planner/phases/orchestrator/phase.ts b/src/planner/phases/orchestrator/phase.ts new file mode 100644 index 0000000..5629e7b --- /dev/null +++ b/src/planner/phases/orchestrator/phase.ts @@ -0,0 +1,60 @@ +// Orchestrator phase: judgment calls at execution boundaries. +// Two step sequences: pre-execution (2 steps) and post-execution (4 steps). +// koan_escalate is eliminated — orchestrator uses koan_ask_question for all +// user communication and then calls appropriate state-transition tools. + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { createLogger, type Logger } from "../../../utils/logger.js"; +import type { RuntimeContext } from "../../lib/runtime-context.js"; +import { EventLog } from "../../lib/audit.js"; +import { BasePhase } from "../base-phase.js"; +import { + ORCHESTRATOR_PRE_STEP_NAMES, + ORCHESTRATOR_POST_STEP_NAMES, + orchestratorSystemPrompt, + orchestratorPreStepGuidance, + orchestratorPostStepGuidance, +} from "./prompts.js"; +import type { StepGuidance } from "../../lib/step.js"; + +const PRE_TOTAL_STEPS = 2; +const POST_TOTAL_STEPS = 4; + +export class OrchestratorPhase extends BasePhase { + protected readonly role = "orchestrator"; + protected readonly totalSteps: number; + + private readonly stepSequence: "pre-execution" | "post-execution"; + private readonly storyId: string | undefined; + + constructor( + pi: ExtensionAPI, + config: { epicDir: string; stepSequence: "pre-execution" | "post-execution"; storyId?: string }, + ctx: RuntimeContext, + log?: Logger, + eventLog?: EventLog, + ) { + super(pi, ctx, log ?? createLogger("OrchestratorPhase"), eventLog); + this.stepSequence = config.stepSequence; + this.storyId = config.storyId; + this.totalSteps = config.stepSequence === "pre-execution" ? PRE_TOTAL_STEPS : POST_TOTAL_STEPS; + } + + protected getSystemPrompt(): string { + return orchestratorSystemPrompt(this.stepSequence); + } + + protected getStepName(step: number): string { + const names = this.stepSequence === "pre-execution" + ? ORCHESTRATOR_PRE_STEP_NAMES + : ORCHESTRATOR_POST_STEP_NAMES; + return names[step] ?? `Step ${step}`; + } + + protected getStepGuidance(step: number): StepGuidance { + return this.stepSequence === "pre-execution" + ? orchestratorPreStepGuidance(step) + : orchestratorPostStepGuidance(step, this.storyId); + } +} diff --git a/src/planner/phases/orchestrator/prompts.ts b/src/planner/phases/orchestrator/prompts.ts new file mode 100644 index 0000000..15c5db3 --- /dev/null +++ b/src/planner/phases/orchestrator/prompts.ts @@ -0,0 +1,299 @@ +// Orchestrator phase prompts. +// Pre-execution (2 steps): dependency analysis → story selection. +// Post-execution (4 steps): verify → verdict → propagate → select next. +// +// koan_escalate is eliminated per §11.3.1. When the orchestrator needs human +// input, it uses koan_ask_question to get clarification, then decides what +// to do (retry, skip, etc.) and calls the appropriate state-transition tool. + +import type { StepGuidance } from "../../lib/step.js"; + +export const ORCHESTRATOR_PRE_STEP_NAMES: Record = { + 1: "Dependency Analysis", + 2: "Story Selection", +}; + +export const ORCHESTRATOR_POST_STEP_NAMES: Record = { + 1: "Verify", + 2: "Verdict", + 3: "Propagate", + 4: "Select Next", +}; + +export function orchestratorSystemPrompt(stepSequence: string): string { + const sequenceFocus = + stepSequence === "pre-execution" + ? "You are beginning an epic run. Analyze story dependencies and select the first story for execution." + : "Execution has just completed for a story. Verify the result, issue a verdict, propagate learnings, and select the next story."; + + return `You are a workflow orchestrator for a multi-story coding epic. You make judgment calls at execution boundaries — before and after each coding story runs. ${sequenceFocus} + +## Important: status.md may be stale + +Do not rely on \`status.md\` for current story state. The driver sets intermediate statuses (\`planning\`, \`executing\`, \`verifying\`) in its internal JSON state only — \`status.md\` is only updated by orchestrator tool calls (\`koan_select_story\`, \`koan_complete_story\`, etc.). Your authoritative inputs are \`verify.md\`, \`plan.md\`, git diff, and \`epic.md\` — not \`status.md\`. + +## Your role + +You are a decision-maker. You read content, apply judgment, and direct the workflow. You do NOT write code. You do NOT modify source code files. You do NOT produce implementation plans. + +## What you own + +- **Verification**: Running the checks defined in a story's verify.md to determine whether the implementation is correct. +- **Verdict**: Declaring the outcome of a story's execution — success or retry with feedback. +- **Story selection**: Choosing which story executes next based on the dependency graph and current epic state. +- **Learning propagation**: When you discover something during verification, update remaining story.md files and decisions.md. Mark every autonomous update with \`[autonomous]\`. +- **User communication**: When you encounter genuine ambiguity or need human judgment, call \`koan_ask_question\`. After getting the answer, decide what to do (retry with new context, skip, etc.) and call the appropriate tool. + +## When to ask the user + +Call \`koan_ask_question\` when: +- Verification reveals an ambiguity in requirements that cannot be resolved by reading the code. +- A story fails in a way that suggests the spec was wrong, not the implementation. +- You need human judgment on whether to retry, skip, or take a different approach. + +After getting the answer, record it and proceed with an appropriate tool call: +- \`koan_retry_story\` — if the user provided direction that lets you retry with a better plan +- \`koan_skip_story\` — if the user decided the story is no longer needed +- \`koan_complete_story\` — if the user confirmed the outcome is acceptable + +## Tools available + +- All read tools (read, bash, grep, glob, find, ls) — for reading epic artifacts and running verification checks. +- \`koan_select_story\` — to declare which story should execute next. +- \`koan_complete_story\` — to mark a story as successfully verified and completed. +- \`koan_retry_story\` — to send a story back to the executor with a detailed failure summary. +- \`koan_skip_story\` — to skip a story that is superseded or no longer needed. +- \`koan_ask_question\` — to ask the human a targeted question when judgment is genuinely ambiguous. +- \`koan_complete_step\` — to signal step completion with your findings. +- \`write\` / \`edit\` — for updating artifact files inside the epic directory only. +- \`bash\` — for running verification commands. + +## The [autonomous] marker + +When you make a decision that modifies artifacts without explicit human instruction, prefix the added content with \`[autonomous]\` in the artifact file. This lets the human audit all autonomous decisions. + +## Strict rules + +- MUST NOT write or modify source code files. +- MUST NOT call more than one verdict tool per verdict step. +- MUST run ALL verification checks in verify.md before issuing a verdict. +- MUST include a concrete, actionable failure summary when calling koan_retry_story. +- When uncertain about a verdict, prefer koan_retry_story with a detailed failure_summary. Ask the user only when the failure reveals a genuine requirements ambiguity. + +You work in steps. Each step has specific instructions. Follow them precisely.`; +} + +export function orchestratorPreStepGuidance(step: number): StepGuidance { + switch (step) { + case 1: + return { + title: ORCHESTRATOR_PRE_STEP_NAMES[1], + instructions: [ + "Read the epic artifacts to understand the full scope of work and story dependencies.", + "", + "## What to read", + "", + "1. Read `epic.md` in the epic directory — understand the overall goal and scope.", + "2. Read `decisions.md` in the epic directory — understand decisions that shape execution.", + "3. Read each `story.md` file for every story in the epic — understand what each story builds and depends on.", + "", + "## What to analyze", + "", + "After reading, build a dependency model:", + "- Which stories must complete before others can begin? (explicit dependencies)", + "- Which stories share files or interfaces? (implicit coupling)", + "- Which stories are independent and could run in any order?", + "- Are there any circular dependencies or unresolvable conflicts?", + "", + "Note the risk profile of each story: stories that touch shared infrastructure are higher risk.", + "", + "## Output", + "", + "Call koan_complete_step with your dependency analysis in the `thoughts` parameter. Include:", + "- The execution order you recommend and why", + "- Any risks or concerns you identified", + "- The ID of the story you believe should run first", + ], + }; + + case 2: + return { + title: ORCHESTRATOR_PRE_STEP_NAMES[2], + instructions: [ + "Select the first story for execution based on your dependency analysis from step 1.", + "", + "## Selection criteria", + "", + "Choose the story that:", + "1. Has all its dependencies satisfied (no blockers)", + "2. Is highest priority given the epic's goal", + "3. Creates the most unblocking value for subsequent stories if completed", + "", + "Prefer foundational stories (shared types, interfaces, infrastructure) over leaf stories.", + "", + "## What to do", + "", + "Call `koan_select_story` with the ID of the story that should execute first.", + "Then call `koan_complete_step` with your reasoning.", + ], + invokeAfter: [ + "WHEN DONE: Call koan_select_story with your chosen story ID, then call koan_complete_step with your reasoning.", + "Do NOT call koan_complete_step until koan_select_story has been called.", + ].join("\n"), + }; + + default: + return { title: `Step ${step}`, instructions: [`Execute step ${step}.`] }; + } +} + +export function orchestratorPostStepGuidance(step: number, storyId?: string): StepGuidance { + const storyRef = storyId ? `story \`${storyId}\`` : "the current story"; + const verifyPath = storyId ? `stories/${storyId}/plan/verify.md` : "stories//plan/verify.md"; + + switch (step) { + case 1: + return { + title: ORCHESTRATOR_POST_STEP_NAMES[1], + instructions: [ + `Run all verification checks defined for ${storyRef}.`, + "", + "## What to read", + "", + `1. Read \`${verifyPath}\` in the epic directory — every check you must run.`, + "2. Read the story's `story.md` to understand the acceptance criteria.", + "", + "## Running checks", + "", + "Execute every check listed in verify.md using bash. Do not skip checks.", + "", + "- Run compilation/type checks first (cheapest).", + "- Run linting and static analysis next.", + "- Run unit and integration tests last (most expensive).", + "", + "For each check, record:", + "- The exact command you ran", + "- The exit code", + "- Relevant output (errors, failures, warnings)", + "", + "## Output", + "", + "Call koan_complete_step with your verification findings:", + "- A summary of every check run and its result (pass/fail)", + "- The full error output for any failures", + "- Your preliminary assessment: does the implementation appear correct?", + ], + }; + + case 2: + return { + title: ORCHESTRATOR_POST_STEP_NAMES[2], + instructions: [ + "Issue a verdict based on your verification findings from step 1.", + "", + "## Verdict options", + "", + "**koan_complete_story** — All verification checks passed. The implementation is correct.", + "", + "**koan_retry_story** — Verification failed, but the failure is fixable by the executor.", + "MUST provide a detailed `failure_summary` that includes:", + " - Which checks failed and why", + " - The exact error messages", + " - What the executor should do differently", + "", + "**koan_ask_question then decide** — The failure reveals a genuine requirements ambiguity.", + "Ask the user a focused question. Based on the answer:", + " - Call koan_retry_story with the user's direction as context", + " - Call koan_skip_story if the user decides the story is no longer needed", + " - Call koan_complete_story if the user confirms the outcome is acceptable", + "", + "## Decision rule", + "", + "If any check failed AND the failure is a concrete code bug → koan_retry_story.", + "If any check failed AND the failure reveals a requirements contradiction → koan_ask_question then decide.", + "If all checks passed → koan_complete_story.", + "", + "Call EXACTLY ONE verdict tool (after any koan_ask_question).", + ], + invokeAfter: [ + "WHEN DONE: Call EXACTLY ONE of: koan_complete_story, koan_retry_story, or (koan_ask_question then verdict tool).", + "Then call koan_complete_step to advance to the next step.", + ].join("\n"), + }; + + case 3: + return { + title: ORCHESTRATOR_POST_STEP_NAMES[3], + instructions: [ + "Propagate lessons from this story's execution to remaining stories and the decisions log.", + "", + "## What to propagate", + "", + "Review what you learned from verification (step 1) and the verdict (step 2):", + "- Did the executor encounter something that affects remaining stories?", + "- Did verification reveal an incorrect assumption in a remaining story's plan?", + "- Did the implementation introduce a pattern remaining stories should follow?", + "", + "Only propagate information directly relevant to remaining stories.", + "", + "## How to propagate", + "", + "For each remaining story that is affected:", + "1. Read its `story.md`.", + "2. Add a `## [autonomous] Propagated Context` section with the relevant information.", + "", + "Update `decisions.md` if a new decision was made or an existing one was invalidated.", + "Add `[autonomous]` prefix to any autonomous additions.", + "", + "If no propagation is needed, skip file updates and proceed.", + "", + "## Skipping stories", + "", + "If this story's completion makes another story unnecessary, call `koan_skip_story` with a clear reason.", + "", + "Then call koan_complete_step with a summary of what was propagated.", + ], + }; + + case 4: + return { + title: ORCHESTRATOR_POST_STEP_NAMES[4], + instructions: [ + "Select the next story to execute, or complete the epic if all stories are done.", + "", + "## What to check", + "", + "Read each story directory to understand which stories remain:", + "- Stories with `pending` or `retry` status are candidates.", + "- Done, skipped, or currently-selected stories are not candidates.", + "", + "## Selection criteria", + "", + "Among remaining stories:", + "1. Filter to those whose dependencies are all completed.", + "2. Among unblocked stories, prefer the one with highest value.", + "3. A story in 'retry' state is highest priority — it was already planned and executed.", + "", + "## What to do", + "", + "If one or more stories remain and are unblocked:", + "- Call `koan_select_story` with the ID of the next story.", + "- Then call `koan_complete_step` with your reasoning.", + "", + "If no stories remain (all completed or skipped):", + "- Call `koan_complete_step` with a summary stating the epic is complete.", + " Do NOT call koan_select_story.", + "", + "If stories remain but all are blocked (dependencies not satisfied):", + "- Call `koan_ask_question` to ask the user how to proceed (reorder, skip, or abort).", + " Based on the answer, call the appropriate tool.", + ], + invokeAfter: [ + "WHEN DONE: If stories remain, call koan_select_story then koan_complete_step. If none remain, call koan_complete_step only.", + ].join("\n"), + }; + + default: + return { title: `Step ${step}`, instructions: [`Execute step ${step}.`] }; + } +} diff --git a/src/planner/phases/planner/phase.ts b/src/planner/phases/planner/phase.ts new file mode 100644 index 0000000..4a0c5d6 --- /dev/null +++ b/src/planner/phases/planner/phase.ts @@ -0,0 +1,41 @@ +// Planner phase: produces the detail plan for a single story. +// Three steps: analysis → plan → verification design. + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { createLogger, type Logger } from "../../../utils/logger.js"; +import type { RuntimeContext } from "../../lib/runtime-context.js"; +import { EventLog } from "../../lib/audit.js"; +import { BasePhase } from "../base-phase.js"; +import { PLANNER_STEP_NAMES, plannerSystemPrompt, plannerStepGuidance } from "./prompts.js"; +import type { StepGuidance } from "../../lib/step.js"; + +export class PlannerPhase extends BasePhase { + protected readonly role = "planner"; + protected readonly totalSteps = 3; + + private readonly storyId: string; + + constructor( + pi: ExtensionAPI, + config: { epicDir: string; storyId: string }, + ctx: RuntimeContext, + log?: Logger, + eventLog?: EventLog, + ) { + super(pi, ctx, log ?? createLogger("PlannerPhase"), eventLog); + this.storyId = config.storyId; + } + + protected getSystemPrompt(): string { + return plannerSystemPrompt(); + } + + protected getStepName(step: number): string { + return PLANNER_STEP_NAMES[step] ?? `Step ${step}`; + } + + protected getStepGuidance(step: number): StepGuidance { + return plannerStepGuidance(step, this.storyId); + } +} diff --git a/src/planner/phases/planner/prompts.ts b/src/planner/phases/planner/prompts.ts new file mode 100644 index 0000000..b7d77f9 --- /dev/null +++ b/src/planner/phases/planner/prompts.ts @@ -0,0 +1,213 @@ +import type { StepGuidance } from "../../lib/step.js"; + +export const PLANNER_STEP_NAMES: Record = { + 1: "Analysis", + 2: "Plan", + 3: "Verification Design", +}; + +export function plannerSystemPrompt(): string { + return `You are an implementation planner for a single coding story. You produce a detailed, step-by-step plan that a coding agent can execute without making judgment calls. You bridge the gap between high-level story intent and concrete implementation actions. + +## Your role + +You read stories, codebase artifacts, and scout reports, then produce three output files: a step-by-step plan, a curated code context file, and a verification checklist. You do NOT write code. You do NOT make design decisions beyond what the story and decisions log specify. + +## What you produce + +### plan/plan.md — Step-by-step implementation plan + +Each step must specify: +- **Which file** to modify or create (full path from repo root) +- **Which function, class, or section** within that file +- **What change** to make (add, modify, delete, rename, restructure) +- **Why** this change is needed (link to story requirement or constraint) +- **Dependencies** between steps (e.g., "Step 3 requires step 1 to complete first") + +Steps must be ordered to minimize conflicts. Implement foundational changes before dependent ones. Leaf dependencies before callers. + +Be precise enough that a coding agent can execute each step without asking questions. Vague steps ("update the handler") produce retry cycles. Precise steps ("add parameter \`timeout: number\` to the \`fetchUser\` function signature in \`src/api/users.ts\`, update all call sites in \`src/routes/auth.ts\` and \`src/routes/profile.ts\`") do not. + +### plan/context.md — Curated code context + +Include only the code the executor needs to understand what it is modifying: +- Function signatures for every function the plan touches +- Relevant type definitions and interfaces +- Import statements that must be preserved or updated +- Key constants or configuration values that affect the changes +- Do NOT include boilerplate, unrelated functions, or documentation blocks + +### plan/verify.md — Verification checklist + +List every check the orchestrator should run after execution, ordered cheap to expensive: +1. Compilation checks (tsc --noEmit, build commands) +2. Linting and type checks +3. Unit tests for affected modules +4. Integration or end-to-end tests + +Each check entry must include: +- A description of what it verifies +- The exact command to run (with arguments) +- What a passing result looks like + +## Strict rules — violations cause execution failures + +- MUST NOT write source code. Plan steps describe actions; they do not contain implementation. +- MUST NOT plan beyond the current story's scope. If a step would touch something not in the story, flag it as out-of-scope. +- MUST NOT make architectural decisions. If a decision is needed that is outside the planner's scope, note it in plan.md as: \`BLOCKER: [description]. The orchestrator will ask the user via koan_ask_question during verification.\` +- MUST include enough detail that the executor can implement the plan in one pass without guessing. +- MUST scope plan/context.md to only what the executor needs — context files that include too much code obscure the relevant parts. + +You work in steps. Each step has specific instructions. Follow them precisely.`; +} + +export function plannerStepGuidance(step: number, storyId: string): StepGuidance { + switch (step) { + case 1: + return { + title: PLANNER_STEP_NAMES[1], + instructions: [ + `Analyze all available context for story \`${storyId}\` before producing any plan output.`, + "", + "## Request fresh codebase scouts", + "", + "Before analyzing the story, use `koan_request_scouts` to explore the current state of files this story will touch. Codebase state may have changed since earlier scouts. Request scouts for the specific files and patterns mentioned in the story sketch.", + "", + "## What to read", + "", + `1. Read \`stories/${storyId}/story.md\` in the epic directory — understand exactly what this story must accomplish, its acceptance criteria, and any noted constraints or dependencies.`, + "2. Read `decisions.md` in the epic directory — understand the architectural decisions and open questions that apply to this story. If a decision is marked as unresolved, check whether it blocks this story.", + "3. Read the scout reports returned by `koan_request_scouts` for current codebase context.", + "", + "## What to analyze", + "", + "After reading, build a complete picture of the work:", + "", + "- **Scope**: What exactly must change? What must NOT change?", + "- **Entry points**: Which files, functions, or modules are the primary change sites?", + "- **Ripple effects**: What else must be updated because of the primary changes? (callers, types, tests, exports)", + "- **Constraints**: Are there patterns from the codebase the executor must follow? (naming conventions, error handling style, module structure)", + "- **Risks**: Which steps are most likely to cause conflicts or unexpected issues?", + "", + "## Output", + "", + "Call koan_complete_step with your analysis in the `thoughts` parameter. Include:", + "- The list of files that will be modified or created", + "- The sequence you plan for the steps (high-level)", + "- Any risks or unresolved questions you identified", + "- Whether any open decisions in decisions.md block this story", + ], + }; + + case 2: + return { + title: PLANNER_STEP_NAMES[2], + instructions: [ + `Write the implementation plan and code context for story \`${storyId}\`.`, + "", + "## Write plan/plan.md", + "", + `Create \`stories/${storyId}/plan/plan.md\` in the epic directory with a numbered list of implementation steps.`, + "", + "Each step must follow this format:", + "```", + "## Step N: [Short title]", + "", + "**File**: path/to/file.ts", + "**Location**: function name, class name, or section description", + "**Action**: [add | modify | delete | create | rename]", + "", + "[Precise description of what to change and why. Include exact parameter names,", + "type signatures, return values, or behavioral changes. Be specific enough that", + "the executor does not need to make any judgment calls.]", + "", + "**Depends on**: Step N (if applicable)", + "```", + "", + "Order steps so each step's dependencies are satisfied before it runs.", + "Prefer: type changes → interface updates → implementation changes → call-site updates → test updates.", + "", + "## Write plan/context.md", + "", + `Create \`stories/${storyId}/plan/context.md\` with curated code snippets the executor needs.`, + "", + "Structure by file, then by section within the file:", + "```", + "## path/to/file.ts", + "", + "### FunctionName (lines N–M)", + "\\`\\`\\`typescript", + "// paste the relevant function signature and key lines only", + "\\`\\`\\`", + "```", + "", + "Include:", + "- Every function signature the plan references", + "- Type definitions that the changes touch", + "- Import blocks for files being modified", + "- Constants or configuration values referenced in plan steps", + "", + "Exclude:", + "- Unrelated functions and classes", + "- Long function bodies (include signature + key lines only)", + "- Documentation blocks and comments unless they carry critical constraint information", + "", + "Call koan_complete_step with a summary: number of plan steps, files affected, and any risks you flagged in the plan.", + ], + }; + + case 3: + return { + title: PLANNER_STEP_NAMES[3], + instructions: [ + `Write the verification checklist for story \`${storyId}\`.`, + "", + `Create \`stories/${storyId}/plan/verify.md\` in the epic directory. This file will be used by the orchestrator to verify the executor's output.`, + "", + "## Structure", + "", + "Order checks from cheapest to most expensive. The orchestrator must be able to run every check via bash.", + "", + "```", + "## Verification Checklist for story: ${storyId}", + "", + "### Check 1: [Description]", + "**Command**: `exact command here`", + "**Passes when**: [description of expected output or exit code]", + "", + "### Check 2: ...", + "```", + "", + "## Required check categories (in order)", + "", + "**1. Compilation** (always required)", + "Include the TypeScript compilation check or equivalent build command.", + "Example: `npx tsc --noEmit`", + "", + "**2. Linting** (if project uses a linter)", + "Include the lint command for affected files.", + "", + "**3. Unit tests** (for modified modules)", + "Include test commands scoped to the files or modules changed by this story.", + "Prefer targeted test runs (e.g., `--testPathPattern`) over full suite runs.", + "", + "**4. Integration tests** (if applicable)", + "Include only tests that directly exercise the story's acceptance criteria.", + "", + "## Precision requirements", + "", + "- Each command must be runnable from the repo root with no modifications.", + "- Pass/fail criteria must be unambiguous (exit code 0 = pass, or specific output pattern).", + "- Do not include checks that verify things outside this story's scope.", + "", + "Call koan_complete_step with a summary: number of checks, categories covered, and any checks you could not define due to missing information.", + ], + }; + + default: + return { + title: `Step ${step}`, + instructions: [`Execute step ${step}.`], + }; + } +} diff --git a/src/planner/phases/scout/phase.ts b/src/planner/phases/scout/phase.ts new file mode 100644 index 0000000..6685505 --- /dev/null +++ b/src/planner/phases/scout/phase.ts @@ -0,0 +1,39 @@ +// Scout phase: answers one narrow codebase question and writes findings. +// Single-step, cheap model, no user interaction. + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import { createLogger, type Logger } from "../../../utils/logger.js"; +import type { RuntimeContext } from "../../lib/runtime-context.js"; +import { EventLog } from "../../lib/audit.js"; +import { BasePhase } from "../base-phase.js"; +import { SCOUT_STEP_NAMES, scoutSystemPrompt, scoutStepGuidance } from "./prompts.js"; +import type { StepGuidance } from "../../lib/step.js"; + +export class ScoutPhase extends BasePhase { + protected readonly role = "scout"; + protected readonly totalSteps = 1; + + constructor( + pi: ExtensionAPI, + config: { epicDir: string }, + ctx: RuntimeContext, + log?: Logger, + eventLog?: EventLog, + ) { + super(pi, ctx, log ?? createLogger("ScoutPhase"), eventLog); + void config; // epicDir used via ctx.epicDir for permission scoping + } + + protected getSystemPrompt(): string { + return scoutSystemPrompt(); + } + + protected getStepName(step: number): string { + return SCOUT_STEP_NAMES[step] ?? `Step ${step}`; + } + + protected getStepGuidance(_step: number): StepGuidance { + return scoutStepGuidance(); + } +} diff --git a/src/planner/phases/scout/prompts.ts b/src/planner/phases/scout/prompts.ts new file mode 100644 index 0000000..1512e83 --- /dev/null +++ b/src/planner/phases/scout/prompts.ts @@ -0,0 +1,74 @@ +// Scout phase prompts — single step: explore & report. +// Role-specific context (the question and output file) is embedded in the +// spawn prompt by the spawn function. This provides only process guidance. + +import type { StepGuidance } from "../../lib/step.js"; + +export const SCOUT_STEP_NAMES: Record = { + 1: "Explore & Report", +}; + +export function scoutSystemPrompt(): string { + return `You are a codebase investigator. You are assigned one narrow, specific question about a codebase. Your job is to read the relevant files, find the answer, and write your findings to a designated output file. + +## Your role + +You find facts. You do NOT interpret, recommend, or opine. + +## Strict rules + +- MUST answer only the assigned question. Do not expand scope. +- MUST write only factual observations: what the code does, what files exist, what patterns are present. +- MUST NOT produce recommendations or suggestions of any kind. +- MUST NOT express opinions about code quality. +- MUST NOT produce implementation plans or design ideas. +- MUST include file paths and line numbers when referencing code. +- MUST include relevant code excerpts (verbatim) to support each finding. +- SHOULD be thorough within the question scope: follow references, check related files. +- SHOULD note explicitly when something is NOT present (e.g., "No tests found for this module"). + +## Output format + +Write a markdown file with these sections: + +## Question +Restate the assigned question verbatim. + +## Findings +Factual observations that answer the question. Use sub-sections if the answer has multiple parts. +Cite file paths and line numbers for every claim. Include code snippets where relevant. + +## Files Examined +List every file you read during this investigation. + +## Gaps +Note anything you could not determine. If no gaps, write: (none) + +## Tools available + +- All read tools (read, bash, grep, glob, find, ls) — for reading the codebase. +- \`write\` / \`edit\` — for writing the output file only. +- \`koan_complete_step\` — to signal completion. + +You work in a single step. Read the codebase, answer the question, write the output file.`; +} + +// Role-specific context (the question and output file) is embedded in the +// spawn prompt by the spawn function. This provides process guidance only. +export function scoutStepGuidance(): StepGuidance { + return { + title: SCOUT_STEP_NAMES[1], + instructions: [ + "Investigate the codebase to answer the assigned question. Write your findings to the output file.", + "", + "## Process", + "", + "1. Identify the files most likely to contain the answer. Start broad (grep, glob, ls),", + " then narrow down (read specific files).", + "2. Follow cross-references: if a file imports from another file, check that file too.", + "3. Be thorough within the question scope. Do not stop at the first partial answer.", + "4. Write your findings to the output file using the format described in your system prompt.", + "5. Call `koan_complete_step` with a one-sentence summary of your key finding.", + ], + }; +} diff --git a/src/planner/tools/index.ts b/src/planner/tools/index.ts index 726cd11..6383a34 100644 --- a/src/planner/tools/index.ts +++ b/src/planner/tools/index.ts @@ -1,40 +1,19 @@ // Tool registration aggregator. Single entry point for koan.ts. -// Re-exports dispatch primitives so koan.ts needs one import for both -// tool registration and workflow infrastructure. +// All tools registered here; RuntimeContext replaces the three separate +// mutable refs (PlanRef, SubagentRef, WorkflowDispatch) from the old design. import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import type { WorkflowDispatch, PlanRef, SubagentRef } from "../lib/dispatch.js"; +import type { RuntimeContext } from "../lib/runtime-context.js"; import { registerWorkflowTools } from "./workflow.js"; -import { registerPlanGetterTools } from "./getters.js"; -import { registerPlanSetterTools } from "./setters.js"; -import { registerPlanDesignEntityTools } from "./entity-design.js"; -import { registerPlanCodeEntityTools } from "./entity-code.js"; -import { registerPlanStructureEntityTools } from "./entity-structure.js"; -import { registerQRTools } from "./qr.js"; +import { registerOrchestratorTools } from "./orchestrator.js"; import { registerAskTools } from "./ask.js"; -export type { WorkflowDispatch, PlanRef, SubagentRef, StepResult } from "../lib/dispatch.js"; -export { - createDispatch, - createPlanRef, - createSubagentRef, - hookDispatch, - unhookDispatch, -} from "../lib/dispatch.js"; +export type { RuntimeContext } from "../lib/runtime-context.js"; +export { createRuntimeContext } from "../lib/runtime-context.js"; -export function registerAllTools( - pi: ExtensionAPI, - planRef: PlanRef, - dispatch: WorkflowDispatch, - subagentRef: SubagentRef, -): void { - registerWorkflowTools(pi, dispatch); - registerPlanGetterTools(pi, planRef); - registerPlanSetterTools(pi, planRef); - registerPlanDesignEntityTools(pi, planRef); - registerPlanCodeEntityTools(pi, planRef); - registerPlanStructureEntityTools(pi, planRef); - registerQRTools(pi, planRef); - registerAskTools(pi, subagentRef); +export function registerAllTools(pi: ExtensionAPI, ctx: RuntimeContext): void { + registerWorkflowTools(pi, ctx); + registerOrchestratorTools(pi, ctx); + registerAskTools(pi, ctx); } diff --git a/src/planner/tools/orchestrator.ts b/src/planner/tools/orchestrator.ts new file mode 100644 index 0000000..0348aa4 --- /dev/null +++ b/src/planner/tools/orchestrator.ts @@ -0,0 +1,239 @@ +// Orchestrator tools: four tools for the orchestrator subagent to advance +// story lifecycle state. koan_escalate is eliminated per §11.3.1 — the +// orchestrator uses koan_ask_question for all user communication. +// +// Each tool: +// 1. Validates that the story is in the correct source state (§11.4/§11.12) +// 2. Writes JSON state (for driver polling) +// 3. Writes templated markdown status.md (for LLM reads, §11.5.4) + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import type { RuntimeContext } from "../lib/runtime-context.js"; +import { loadStoryState, saveStoryState } from "../epic/state.js"; +import type { StoryStatus } from "../types.js"; + +// -- Helpers -- + +function now(): string { + return new Date().toISOString(); +} + +function storyDir(epicDir: string, storyId: string): string { + return path.join(epicDir, "stories", storyId); +} + +async function writeStatusMd(epicDir: string, storyId: string, content: string): Promise { + const dir = storyDir(epicDir, storyId); + const target = path.join(dir, "status.md"); + const tmp = path.join(dir, "status.md.tmp"); + await fs.writeFile(tmp, content, "utf8"); + await fs.rename(tmp, target); +} + +// §11.5.4 templated status.md format. +function statusMd( + storyId: string, + status: StoryStatus, + lastAction: string, + verificationSummary: string, + notes: string, +): string { + return [ + `# Status: ${status}`, + "", + "## Last Action", + lastAction, + "", + "## Verification Summary", + verificationSummary, + "", + "## Notes", + notes, + "", + ].join("\n"); +} + +function requireEpicDir(ctx: RuntimeContext): string { + if (!ctx.epicDir) { + throw new Error("Epic directory is not set. Is this running inside a koan subagent?"); + } + return ctx.epicDir; +} + +// Validates story status against allowed source statuses. Throws on mismatch. +export function assertStatus(storyId: string, current: StoryStatus, allowed: StoryStatus[]): void { + if (!allowed.includes(current)) { + const listed = allowed.map((s) => `'${s}'`).join(" or "); + throw new Error( + `Cannot transition story '${storyId}': expected status ${listed}, got '${current}'.`, + ); + } +} + +// -- Tool registration -- + +export function registerOrchestratorTools(pi: ExtensionAPI, ctx: RuntimeContext): void { + // -- koan_select_story -- + // Valid source statuses: pending, retry (§11.4) + + pi.registerTool({ + name: "koan_select_story", + label: "Select story for execution", + description: "Mark a pending or retried story as selected for execution. Valid only when the story is in 'pending' or 'retry' status.", + parameters: Type.Object({ + story_id: Type.String({ description: "The story ID to select." }), + }), + async execute(_toolCallId, params) { + const { story_id } = params as { story_id: string }; + const epicDir = requireEpicDir(ctx); + const ts = now(); + + const state = await loadStoryState(epicDir, story_id); + assertStatus(story_id, state.status, ["pending", "retry"]); + + await saveStoryState(epicDir, story_id, { ...state, status: "selected", updatedAt: ts }); + await writeStatusMd( + epicDir, story_id, + statusMd(story_id, "selected", `Selected at: ${ts}`, "(pending — not yet verified)", ""), + ); + + return { + content: [{ type: "text" as const, text: `Story '${story_id}' selected.` }], + details: undefined, + }; + }, + }); + + // -- koan_complete_story -- + // Valid source status: verifying (§11.4) + + pi.registerTool({ + name: "koan_complete_story", + label: "Complete story", + description: "Mark a story as done after verifying all acceptance criteria are met. Only valid when story is in 'verifying' status.", + parameters: Type.Object({ + story_id: Type.String({ description: "The story ID to mark as done." }), + verification_summary: Type.Optional(Type.String({ + description: "Summary of verification checks that passed.", + })), + }), + async execute(_toolCallId, params) { + const { story_id, verification_summary } = params as { + story_id: string; + verification_summary?: string; + }; + const epicDir = requireEpicDir(ctx); + const ts = now(); + + const state = await loadStoryState(epicDir, story_id); + assertStatus(story_id, state.status, ["verifying"]); + + await saveStoryState(epicDir, story_id, { ...state, status: "done", updatedAt: ts }); + await writeStatusMd( + epicDir, story_id, + statusMd( + story_id, "done", + `Completed at: ${ts}`, + verification_summary ?? "All checks passed.", + "", + ), + ); + + return { + content: [{ type: "text" as const, text: `Story '${story_id}' completed.` }], + details: undefined, + }; + }, + }); + + // -- koan_retry_story -- + // Valid source status: verifying (§11.4) + + pi.registerTool({ + name: "koan_retry_story", + label: "Retry story", + description: "Mark a story for retry and record why the previous attempt failed. Only valid when story is in 'verifying' status.", + parameters: Type.Object({ + story_id: Type.String({ description: "The story ID to retry." }), + failure_summary: Type.String({ + description: "Concrete description of what went wrong. Include failing commands, error messages, and what the executor should do differently.", + }), + }), + async execute(_toolCallId, params) { + const { story_id, failure_summary } = params as { story_id: string; failure_summary: string }; + const epicDir = requireEpicDir(ctx); + const ts = now(); + + const state = await loadStoryState(epicDir, story_id); + assertStatus(story_id, state.status, ["verifying"]); + + await saveStoryState(epicDir, story_id, { + ...state, + status: "retry", + updatedAt: ts, + failureSummary: failure_summary, + }); + await writeStatusMd( + epicDir, story_id, + statusMd( + story_id, "retry", + `Queued for retry at: ${ts}`, + "Failed — see Notes for details.", + failure_summary, + ), + ); + + return { + content: [{ type: "text" as const, text: `Story '${story_id}' queued for retry.` }], + details: undefined, + }; + }, + }); + + // -- koan_skip_story -- + // Valid source statuses: pending, retry (§11.4) + + pi.registerTool({ + name: "koan_skip_story", + label: "Skip story", + description: "Mark a pending or retried story as skipped and record the reason. Valid when story is in 'pending' or 'retry' status.", + parameters: Type.Object({ + story_id: Type.String({ description: "The story ID to skip." }), + reason: Type.String({ description: "Why this story is being skipped." }), + }), + async execute(_toolCallId, params) { + const { story_id, reason } = params as { story_id: string; reason: string }; + const epicDir = requireEpicDir(ctx); + const ts = now(); + + const state = await loadStoryState(epicDir, story_id); + assertStatus(story_id, state.status, ["pending", "retry"]); + + await saveStoryState(epicDir, story_id, { + ...state, + status: "skipped", + updatedAt: ts, + skipReason: reason, + }); + await writeStatusMd( + epicDir, story_id, + statusMd( + story_id, "skipped", + `Skipped at: ${ts}`, + "(not executed)", + reason, + ), + ); + + return { + content: [{ type: "text" as const, text: `Story '${story_id}' skipped.` }], + details: undefined, + }; + }, + }); +} From f00d78ab18a48c449037bb968a96ce0875a544b8 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Mar 2026 12:45:43 +0700 Subject: [PATCH 044/412] feat(planner): wire epic driver, spawners, and review widget --- extensions/koan.ts | 154 ++++++---- src/planner/driver.ts | 539 ++++++++++++++++++++++++++++++++++ src/planner/subagent.ts | 237 +++++++++------ src/planner/ui/epic-widget.ts | 243 +++++++++++++++ src/planner/ui/spec-review.ts | 152 ++++++++++ 5 files changed, 1175 insertions(+), 150 deletions(-) create mode 100644 src/planner/driver.ts create mode 100644 src/planner/ui/epic-widget.ts create mode 100644 src/planner/ui/spec-review.ts diff --git a/extensions/koan.ts b/extensions/koan.ts index ec475a6..24e0efc 100644 --- a/extensions/koan.ts +++ b/extensions/koan.ts @@ -1,18 +1,23 @@ // Entry point for the koan pi extension. Serves dual roles: parent session -// (registers koan_plan tool and /koan-execute, /koan-status, /koan commands) -// and subagent mode (dispatches to phase workflow via CLI flags). All tools -// register unconditionally at init; phases restrict access via tool_call -// blocking at runtime. - +// (registers koan_plan tool and /koan commands) and subagent mode (dispatches +// to phase workflow via CLI flags). All tools register unconditionally at init; +// phases restrict access via tool_call blocking at runtime. +// +// RuntimeContext replaces the three separate mutable refs (PlanRef, +// SubagentRef, WorkflowDispatch) used in the previous design. + +import * as path from "node:path"; import { Type } from "@sinclair/typebox"; import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; -import { createSession } from "../src/planner/session.js"; import { detectSubagentMode, dispatchPhase } from "../src/planner/phases/dispatch.js"; -import { registerAllTools, createDispatch, createPlanRef, createSubagentRef } from "../src/planner/tools/index.js"; -import { createLogger } from "../src/utils/logger.js"; +import { registerAllTools, createRuntimeContext } from "../src/planner/tools/index.js"; +import { createLogger, setLogDir } from "../src/utils/logger.js"; import { EventLog, extractToolEvent } from "../src/planner/lib/audit.js"; import { openKoanConfig } from "../src/planner/ui/config/menu.js"; +import { createEpicDirectory } from "../src/planner/epic/state.js"; +import { exportConversation } from "../src/planner/conversation.js"; +import { runEpicPipeline } from "../src/planner/driver.js"; function currentModelId(ctx: ExtensionContext): string | null { const model = ctx.model; @@ -23,76 +28,68 @@ function currentModelId(ctx: ExtensionContext): string | null { export default function koan(pi: ExtensionAPI): void { const log = createLogger("Koan"); + // -- Flags -- pi.registerFlag("koan-role", { - description: "Koan subagent role (reserved)", + description: "Koan subagent role", type: "string", default: "", }); - - pi.registerFlag("koan-phase", { - description: "Koan workflow phase (reserved)", + pi.registerFlag("koan-epic-dir", { + description: "Koan epic directory path", type: "string", default: "", }); - - pi.registerFlag("koan-plan-dir", { - description: "Koan plan directory path", + pi.registerFlag("koan-subagent-dir", { + description: "Koan subagent working directory", type: "string", default: "", }); - - pi.registerFlag("koan-subagent-dir", { - description: "Koan subagent working directory", + pi.registerFlag("koan-story-id", { + description: "Current story ID for per-story subagents", type: "string", default: "", }); - - pi.registerFlag("koan-qr-item", { - description: "QR item ID(s) for reviewer subagent (comma-separated for groups)", + pi.registerFlag("koan-step-sequence", { + description: "Orchestrator step sequence (pre-execution or post-execution)", type: "string", default: "", }); - - pi.registerFlag("koan-fix", { - description: "QR phase to fix (e.g. plan-design)", + pi.registerFlag("koan-retry-context", { + description: "Failure context from previous execution attempt", type: "string", default: "", }); - // Pi snapshots tools during _buildRuntime() at init. All 44 tools - // register here unconditionally. Phases restrict access via tool_call - // blocking at runtime. - const dispatch = createDispatch(); - const planRef = createPlanRef(); - const subagentRef = createSubagentRef(); + // RuntimeContext: single mutable object that carries epicDir, subagentDir, + // and the active onCompleteStep handler. Replaces the old PlanRef + + // SubagentRef + WorkflowDispatch triple. + const ctx = createRuntimeContext(); - registerAllTools(pi, planRef, dispatch, subagentRef); + registerAllTools(pi, ctx); - // Subagent detection runs at before_agent_start (flags - // are unavailable during init). let dispatched = false; - pi.on("before_agent_start", async (_event, ctx) => { + pi.on("before_agent_start", async (_event, extCtx) => { if (dispatched) return; dispatched = true; + const config = detectSubagentMode(pi); if (config) { - const planDir = pi.getFlag("koan-plan-dir") as string; - if (planDir) { - planRef.dir = planDir; + // Populate RuntimeContext from CLI flags. + if (config.epicDir) { + ctx.epicDir = config.epicDir; } - // EventLog exists only in subagent mode. Parent mode has no audit log. - // Model identity is captured by the subagent itself and persisted in - // state.json for parent widget rendering. let eventLog: EventLog | undefined; if (config.subagentDir) { - eventLog = new EventLog(config.subagentDir, config.role, config.phase, currentModelId(ctx)); + ctx.subagentDir = config.subagentDir; + eventLog = new EventLog( + config.subagentDir, + config.role, + config.role, + currentModelId(extCtx), + ); await eventLog.open(); - subagentRef.dir = config.subagentDir; - // Capture all tool results for the audit trail. Graduated detail: - // file paths for read/edit/write, binary name for bash, full - // input+response for koan_* tools, name-only for everything else. pi.on("tool_result", (event) => { void eventLog!.append(extractToolEvent(event as { toolName: string; @@ -107,13 +104,16 @@ export default function koan(pi: ExtensionAPI): void { }); } - await dispatchPhase(pi, config, dispatch, planRef, log, eventLog); + await dispatchPhase(pi, config, ctx, log, eventLog); } }); - // Session: parent-mode workflow engine. - const session = createSession(pi, dispatch, planRef); - + // -- koan_plan tool -- + // Requires an interactive terminal session: subagents use koan_ask_question + // and koan_request_scouts, which are answered by the IPC responder running + // in the parent session. Without a UI, no IPC responder starts and any + // subagent calling those tools will poll ipc.json forever, hanging the + // pipeline permanently. pi.registerTool({ name: "koan_plan", label: "Plan", @@ -123,41 +123,69 @@ export default function koan(pi: ExtensionAPI): void { "is too large to implement directly.", "", "The current conversation is automatically captured — it becomes the", - "planning context. The pipeline spawns specialized agents (architect,", - "developer, writer) that read the conversation history to understand", - "the task, then produce a structured plan with milestones, code intents,", - "and quality review.", + "planning context. The pipeline spawns specialized agents that decompose", + "the task into stories and execute them one at a time.", "", - "This is a long-running operation (5-15 minutes). Do not invoke for", - "simple tasks that can be done in a single pass.", + "This is a long-running operation. Do not invoke for simple tasks.", ].join("\n"), parameters: Type.Object({}), - async execute(toolCallId, params, signal, onUpdate, ctx) { - return await session.plan(ctx); + async execute(_toolCallId, _params, _signal, _onUpdate, extCtx) { + // koan_plan requires an interactive terminal session. Subagents use + // koan_ask_question and koan_request_scouts, which are answered by the + // IPC responder that only starts when a UI is present. Without a UI, + // subagents would poll ipc.json forever and the pipeline would hang. + if (!extCtx.hasUI) { + return { + content: [{ type: "text" as const, text: "koan_plan requires an interactive terminal session." }], + details: undefined, + }; + } + + const epicInfo = await createEpicDirectory("", extCtx.cwd); + ctx.epicDir = epicInfo.directory; + setLogDir(epicInfo.directory); + + await exportConversation(extCtx.sessionManager, epicInfo.directory); + log("Conversation exported", { epicDir: epicInfo.directory }); + + const extensionPath = path.resolve(import.meta.dirname, "koan.ts"); + const ui = extCtx.hasUI ? extCtx.ui : null; + + const result = await runEpicPipeline(epicInfo.directory, extCtx.cwd, extensionPath, log, ui); + + return { + content: [{ type: "text" as const, text: result.summary }], + details: undefined, + }; }, }); + // -- Commands -- pi.registerCommand("koan", { description: "Koan commands. Usage: /koan config", - handler: async (args, ctx) => { + handler: async (args, extCtx) => { const subcommand = args.trim(); if (subcommand === "config") { - await openKoanConfig(ctx); + await openKoanConfig(extCtx); } else if (subcommand === "") { - ctx.ui.notify("Usage: /koan config", "info"); + extCtx.ui.notify("Usage: /koan config", "info"); } else { - ctx.ui.notify(`Unknown koan subcommand: "${subcommand}". Usage: /koan config`, "warning"); + extCtx.ui.notify(`Unknown koan subcommand: "${subcommand}". Usage: /koan config`, "warning"); } }, }); pi.registerCommand("koan-execute", { description: "Execute a koan plan", - handler: async (_args, ctx) => { await session.execute(ctx); }, + handler: async (_args, extCtx) => { + extCtx.ui.notify("Execution mode is not yet implemented.", "warning"); + }, }); pi.registerCommand("koan-status", { description: "Show koan workflow status", - handler: async (_args, ctx) => { await session.status(ctx); }, + handler: async (_args, extCtx) => { + extCtx.ui.notify("Status: idle", "info"); + }, }); } diff --git a/src/planner/driver.ts b/src/planner/driver.ts new file mode 100644 index 0000000..5d03543 --- /dev/null +++ b/src/planner/driver.ts @@ -0,0 +1,539 @@ +// Epic pipeline driver — deterministic coordinator for the full epic lifecycle. +// Reads JSON state and exit codes; applies routing rules. Never parses markdown. +// Per AGENTS.md: driver owns .json state; LLMs own .md files. + +import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; + +import { + loadEpicState, + saveEpicState, + loadStoryState, + saveStoryState, + loadAllStoryStates, + ensureSubagentDirectory, + ensureStoryDirectory, + discoverStoryIds, +} from "./epic/state.js"; +import { + spawnIntake, + spawnDecomposer, + spawnOrchestrator, + spawnPlanner, + spawnExecutor, +} from "./subagent.js"; +import type { Logger } from "../utils/logger.js"; +import type { StoryState } from "./epic/types.js"; +import { readRecentLogs, readProjection } from "./lib/audit.js"; +import { EpicWidgetController } from "./ui/epic-widget.js"; +import { reviewStorySketches } from "./ui/spec-review.js"; + +// --------------------------------------------------------------------------- +// Routing +// --------------------------------------------------------------------------- + +interface RoutingDecision { + action: "execute" | "retry" | "complete" | "error"; + storyId?: string; + error?: string; +} + +// Simplified routing — no escalation path per §11.3.1 and §11.6.3. +// Retry budget exhaustion is handled inside the retry case (skip + notify). +function routeFromState(stories: StoryState[], log: Logger): RoutingDecision { + // Priority order: + // 1. Any story with status 'retry'? → check budget, then re-execute or skip + // 2. Any story with status 'selected'? → execute it + // 3. All stories terminal? → complete + // 4. None of the above → error + + const retry = stories.find((s) => s.status === "retry"); + if (retry) { + log("Routing: retry", { storyId: retry.storyId }); + return { action: "retry", storyId: retry.storyId }; + } + + const selected = stories.find((s) => s.status === "selected"); + if (selected) { + log("Routing: execute", { storyId: selected.storyId }); + return { action: "execute", storyId: selected.storyId }; + } + + const terminal = new Set(["done", "skipped"]); + const allTerminal = stories.every((s) => terminal.has(s.status)); + if (allTerminal && stories.length > 0) { + log("Routing: complete", { total: stories.length }); + return { action: "complete" }; + } + + return { + action: "error", + error: "No actionable story state found (orchestrator may have exited without a routing decision)", + }; +} + +// --------------------------------------------------------------------------- +// Active widget polling (§11.6.1) +// --------------------------------------------------------------------------- + +// Starts a 2s polling interval that reads the active subagent's projection +// and log tail, then updates the widget. Interval is unref'd so it does not +// prevent process exit. +function startActivePolling( + activeSubagentDir: string, + widget: EpicWidgetController, + startedAt: number, + role: string, + storyId?: string, +): () => void { + const timer = setInterval(async () => { + try { + const [projection, logs] = await Promise.all([ + readProjection(activeSubagentDir), + readRecentLogs(activeSubagentDir), + ]); + widget.update({ logLines: logs }); + if (projection) { + widget.update({ + activeSubagent: { + role, + storyId, + step: projection.step, + totalSteps: projection.totalSteps, + stepName: projection.stepName, + startedAt, + }, + }); + } + } catch { + // Non-fatal — polling is best-effort. + } + }, 2000); + timer.unref(); + return () => clearInterval(timer); +} + +// --------------------------------------------------------------------------- +// Phase A helpers +// --------------------------------------------------------------------------- + +async function runIntake( + epicDir: string, + cwd: string, + extensionPath: string, + log: Logger, + ui: ExtensionUIContext | null, + widget: EpicWidgetController | null, +): Promise { + const subagentDir = await ensureSubagentDirectory(epicDir, "intake"); + const startedAt = Date.now(); + let stopPolling: (() => void) | undefined; + if (widget) { + widget.update({ activeSubagent: { role: "intake", step: 0, totalSteps: 3, stepName: "", startedAt } }); + stopPolling = startActivePolling(subagentDir, widget, startedAt, "intake"); + } + const result = await spawnIntake({ epicDir, subagentDir, cwd, extensionPath, log, ui: ui ?? undefined }); + stopPolling?.(); + if (widget) { + const logs = await readRecentLogs(subagentDir); + widget.update({ logLines: logs, activeSubagent: null }); + } + if (result.exitCode !== 0) { + log("Intake failed", { exitCode: result.exitCode }); + return false; + } + return true; +} + +async function runDecomposer( + epicDir: string, + cwd: string, + extensionPath: string, + log: Logger, + ui: ExtensionUIContext | null, + widget: EpicWidgetController | null, +): Promise { + const subagentDir = await ensureSubagentDirectory(epicDir, "decomposer"); + const startedAt = Date.now(); + let stopPolling: (() => void) | undefined; + if (widget) { + widget.update({ activeSubagent: { role: "decomposer", step: 0, totalSteps: 2, stepName: "", startedAt } }); + stopPolling = startActivePolling(subagentDir, widget, startedAt, "decomposer"); + } + const result = await spawnDecomposer({ epicDir, subagentDir, cwd, extensionPath, log, ui: ui ?? undefined }); + stopPolling?.(); + if (widget) { + const logs = await readRecentLogs(subagentDir); + widget.update({ logLines: logs, activeSubagent: null }); + } + if (result.exitCode !== 0) { + log("Decomposer failed", { exitCode: result.exitCode }); + return false; + } + return true; +} + +// --------------------------------------------------------------------------- +// Phase B helpers +// --------------------------------------------------------------------------- + +async function runStoryExecution( + epicDir: string, + cwd: string, + extensionPath: string, + storyId: string, + log: Logger, + ui: ExtensionUIContext | null, + widget: EpicWidgetController | null, +): Promise { + // 1. Set status to 'planning'. + const story = await loadStoryState(epicDir, storyId); + await saveStoryState(epicDir, storyId, { + ...story, + status: "planning", + updatedAt: new Date().toISOString(), + }); + + // 2. Spawn planner. + const plannerDir = await ensureSubagentDirectory(epicDir, `planner-${storyId}`); + const plannerStarted = Date.now(); + let stopPolling: (() => void) | undefined; + if (widget) { + widget.update({ + activeSubagent: { role: "planner", storyId, step: 0, totalSteps: 3, stepName: "", startedAt: plannerStarted }, + }); + stopPolling = startActivePolling(plannerDir, widget, plannerStarted, "planner", storyId); + } + + const planResult = await spawnPlanner({ epicDir, subagentDir: plannerDir, cwd, extensionPath, storyId, log, ui: ui ?? undefined }); + stopPolling?.(); + + if (widget) { + const logs = await readRecentLogs(plannerDir); + widget.update({ logLines: logs }); + } + + if (planResult.exitCode !== 0) { + log("Planner failed — skipping executor, proceeding to post-execution orchestrator", { + storyId, exitCode: planResult.exitCode, + }); + + const s2 = await loadStoryState(epicDir, storyId); + await saveStoryState(epicDir, storyId, { + ...s2, + status: "verifying", + updatedAt: new Date().toISOString(), + }); + + const postDir = await ensureSubagentDirectory(epicDir, `orchestrator-post-${storyId}`); + const orchStarted = Date.now(); + if (widget) { + widget.update({ activeSubagent: { role: "orchestrator", storyId, step: 0, totalSteps: 4, stepName: "", startedAt: orchStarted } }); + stopPolling = startActivePolling(postDir, widget, orchStarted, "orchestrator", storyId); + } + + await spawnOrchestrator({ epicDir, subagentDir: postDir, cwd, extensionPath, stepSequence: "post-execution", storyId, log, ui: ui ?? undefined }); + stopPolling?.(); + + if (widget) { + const logs = await readRecentLogs(postDir); + widget.update({ logLines: logs }); + } + return; + } + + // 3. Set status to 'executing'. + const s3 = await loadStoryState(epicDir, storyId); + await saveStoryState(epicDir, storyId, { + ...s3, + status: "executing", + updatedAt: new Date().toISOString(), + }); + + // 4. Spawn executor. + const execDir = await ensureSubagentDirectory(epicDir, `executor-${storyId}`); + const execStarted = Date.now(); + if (widget) { + widget.update({ activeSubagent: { role: "executor", storyId, step: 0, totalSteps: 2, stepName: "", startedAt: execStarted } }); + stopPolling = startActivePolling(execDir, widget, execStarted, "executor", storyId); + } + + const execResult = await spawnExecutor({ epicDir, subagentDir: execDir, cwd, extensionPath, storyId, log, ui: ui ?? undefined }); + stopPolling?.(); + + if (widget) { + const logs = await readRecentLogs(execDir); + widget.update({ logLines: logs }); + } + + if (execResult.exitCode !== 0) { + log("Executor failed", { storyId, exitCode: execResult.exitCode }); + } + + // 5. Set status to 'verifying'. + const s4 = await loadStoryState(epicDir, storyId); + await saveStoryState(epicDir, storyId, { + ...s4, + status: "verifying", + updatedAt: new Date().toISOString(), + }); + + // 6. Spawn orchestrator (post-execution) — writes verdict to story state. + const postDir = await ensureSubagentDirectory(epicDir, `orchestrator-post-${storyId}`); + const orchStarted = Date.now(); + if (widget) { + widget.update({ activeSubagent: { role: "orchestrator", storyId, step: 0, totalSteps: 4, stepName: "", startedAt: orchStarted } }); + stopPolling = startActivePolling(postDir, widget, orchStarted, "orchestrator", storyId); + } + + await spawnOrchestrator({ epicDir, subagentDir: postDir, cwd, extensionPath, stepSequence: "post-execution", storyId, log, ui: ui ?? undefined }); + stopPolling?.(); + + if (widget) { + const logs = await readRecentLogs(postDir); + widget.update({ logLines: logs }); + } +} + +// retryCount is the 1-based retry attempt number (1 for first retry, 2 for +// second, etc.). It is included in directory names so each retry gets its own +// isolated stdout.log and events.jsonl, preventing directory collision when +// DEFAULT_MAX_RETRIES > 1. +async function runStoryReexecution( + epicDir: string, + cwd: string, + extensionPath: string, + storyId: string, + retryCount: number, + failureContext: string | undefined, + log: Logger, + ui: ExtensionUIContext | null, + widget: EpicWidgetController | null, +): Promise { + const execDir = await ensureSubagentDirectory(epicDir, `executor-${storyId}-retry-${retryCount}`); + const execStarted = Date.now(); + let stopPolling: (() => void) | undefined; + if (widget) { + widget.update({ activeSubagent: { role: "executor", storyId, step: 0, totalSteps: 2, stepName: "retry", startedAt: execStarted } }); + stopPolling = startActivePolling(execDir, widget, execStarted, "executor", storyId); + } + + await spawnExecutor({ epicDir, subagentDir: execDir, cwd, extensionPath, storyId, retryContext: failureContext, log, ui: ui ?? undefined }); + stopPolling?.(); + + if (widget) { + const logs = await readRecentLogs(execDir); + widget.update({ logLines: logs }); + } + + const story = await loadStoryState(epicDir, storyId); + await saveStoryState(epicDir, storyId, { + ...story, + status: "verifying", + updatedAt: new Date().toISOString(), + }); + + const postDir = await ensureSubagentDirectory(epicDir, `orchestrator-post-${storyId}-retry-${retryCount}`); + const orchStarted = Date.now(); + if (widget) { + widget.update({ activeSubagent: { role: "orchestrator", storyId, step: 0, totalSteps: 4, stepName: "", startedAt: orchStarted } }); + stopPolling = startActivePolling(postDir, widget, orchStarted, "orchestrator", storyId); + } + + await spawnOrchestrator({ epicDir, subagentDir: postDir, cwd, extensionPath, stepSequence: "post-execution", storyId, log, ui: ui ?? undefined }); + stopPolling?.(); + + if (widget) { + const logs = await readRecentLogs(postDir); + widget.update({ logLines: logs }); + } +} + +async function refreshWidgetStories(epicDir: string, widget: EpicWidgetController): Promise { + try { + const stories = await loadAllStoryStates(epicDir); + widget.update({ stories: stories.map((s) => ({ storyId: s.storyId, status: s.status })) }); + } catch { + // Non-fatal — widget update is best-effort. + } +} + +async function runStoryLoop( + epicDir: string, + cwd: string, + extensionPath: string, + log: Logger, + ui: ExtensionUIContext | null, + widget: EpicWidgetController | null, +): Promise<{ success: boolean; summary: string }> { + { + + // 2. Spawn orchestrator (pre-execution) — selects first story. + const preDir = await ensureSubagentDirectory(epicDir, "orchestrator-pre"); + const preStarted = Date.now(); + let stopPolling: (() => void) | undefined; + if (widget) { + widget.update({ activeSubagent: { role: "orchestrator", step: 0, totalSteps: 2, stepName: "pre-execution", startedAt: preStarted } }); + stopPolling = startActivePolling(preDir, widget, preStarted, "orchestrator"); + } + + const preResult = await spawnOrchestrator({ epicDir, subagentDir: preDir, cwd, extensionPath, stepSequence: "pre-execution", log, ui: ui ?? undefined }); + stopPolling?.(); + + if (preResult.exitCode !== 0) { + return { success: false, summary: "Pre-execution orchestrator failed" }; + } + + if (widget) await refreshWidgetStories(epicDir, widget); + + // 3. Story execution loop — route until terminal state. + while (true) { + const stories = await loadAllStoryStates(epicDir); + if (widget) { + widget.update({ stories: stories.map((s) => ({ storyId: s.storyId, status: s.status })) }); + } + + const routing = routeFromState(stories, log); + + switch (routing.action) { + case "execute": { + const storyId = routing.storyId as string; + await runStoryExecution(epicDir, cwd, extensionPath, storyId, log, ui, widget); + if (widget) await refreshWidgetStories(epicDir, widget); + break; + } + + case "retry": { + const storyId = routing.storyId as string; + const story = stories.find((s) => s.storyId === storyId) as StoryState; + + // Retry budget exhaustion: skip + notify per §11.6.3. + if (story.retryCount >= story.maxRetries) { + log("Retry budget exhausted, skipping story", { storyId, retryCount: story.retryCount }); + await saveStoryState(epicDir, storyId, { + ...story, + status: "skipped", + skipReason: `Retry budget exhausted after ${story.retryCount} attempt(s). Last failure: ${story.failureSummary ?? "(none recorded)"}`, + updatedAt: new Date().toISOString(), + }); + ui?.notify(`Story ${storyId} skipped after ${story.retryCount} failed attempt(s).`, "warning"); + if (widget) await refreshWidgetStories(epicDir, widget); + // Continue loop — other stories may still be runnable. + continue; + } + + await saveStoryState(epicDir, storyId, { + ...story, + status: "executing", + retryCount: story.retryCount + 1, + updatedAt: new Date().toISOString(), + }); + await runStoryReexecution(epicDir, cwd, extensionPath, storyId, story.retryCount + 1, story.failureSummary, log, ui, widget); + if (widget) await refreshWidgetStories(epicDir, widget); + break; + } + + case "complete": { + const done = stories.filter((s) => s.status === "done").length; + const skipped = stories.filter((s) => s.status === "skipped").length; + if (widget) widget.update({ activeSubagent: null }); + return { success: true, summary: `Epic complete: ${done} done, ${skipped} skipped` }; + } + + case "error": + return { success: false, summary: routing.error as string }; + } + } + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +export async function runEpicPipeline( + epicDir: string, + cwd: string, + extensionPath: string, + log: Logger, + ui: ExtensionUIContext | null, +): Promise<{ success: boolean; summary: string }> { + // Widget created at pipeline start — spans the full epic lifecycle (Phase A + B). + // Widget is an observation layer: receives one-way update() calls, never + // influences routing decisions. + const epicState = await loadEpicState(epicDir); + const widget = ui ? new EpicWidgetController(ui, epicState.epicId) : null; + + try { + // Phase A: Epic Creation. + ui?.notify("Starting intake...", "info"); + await saveEpicState(epicDir, { ...epicState, phase: "intake" }); + if (widget) widget.update({ epicPhase: "intake" }); + + const intakeOk = await runIntake(epicDir, cwd, extensionPath, log, ui, widget); + if (!intakeOk) return { success: false, summary: "Intake phase failed" }; + + const afterIntake = await loadEpicState(epicDir); + await saveEpicState(epicDir, { ...afterIntake, phase: "decomposition" }); + if (widget) widget.update({ epicPhase: "decomposition" }); + + const decompOk = await runDecomposer(epicDir, cwd, extensionPath, log, ui, widget); + if (!decompOk) return { success: false, summary: "Decomposition phase failed" }; + + // Discover stories by scanning the filesystem — per AGENTS.md invariant, + // LLMs write markdown files only. The decomposer wrote stories/{id}/story.md + // files; the driver scans to discover IDs and populates epic-state.json. + const storyIds = await discoverStoryIds(epicDir); + log("Discovered story IDs", { count: storyIds.length, ids: storyIds }); + + for (const storyId of storyIds) { + await ensureStoryDirectory(epicDir, storyId); + } + + const afterDecomp = await loadEpicState(epicDir); + await saveEpicState(epicDir, { ...afterDecomp, stories: storyIds, phase: "review" }); + if (widget) { + widget.update({ epicPhase: "review" }); + const initialStories = await loadAllStoryStates(epicDir); + widget.update({ stories: initialStories.map((s) => ({ storyId: s.storyId, status: s.status })) }); + } + + // Spec review gate — present story sketches for human approval if UI is available. + if (ui && storyIds.length > 0) { + ui.notify("Decomposition complete. Review story sketches...", "info"); + const reviewResult = await reviewStorySketches(epicDir, storyIds, ui); + log("Spec review complete", { approved: reviewResult.approved.length, skipped: reviewResult.skipped.length }); + + for (const skippedId of reviewResult.skipped) { + const skippedStory = await loadStoryState(epicDir, skippedId); + await saveStoryState(epicDir, skippedId, { + ...skippedStory, + status: "skipped", + skipReason: "Removed during spec review", + updatedAt: new Date().toISOString(), + }); + } + + const reviewedState = await loadEpicState(epicDir); + await saveEpicState(epicDir, { ...reviewedState, stories: storyIds }); + } else { + log("Spec review gate: auto-approving (no UI or no stories)"); + } + + // Phase B: Execution. + const beforeExec = await loadEpicState(epicDir); + await saveEpicState(epicDir, { ...beforeExec, phase: "executing" }); + if (widget) widget.update({ epicPhase: "executing" }); + + const result = await runStoryLoop(epicDir, cwd, extensionPath, log, ui, widget); + + if (result.success) { + const afterExec = await loadEpicState(epicDir); + await saveEpicState(epicDir, { ...afterExec, phase: "completed" }); + if (widget) widget.update({ epicPhase: "completed" }); + } + + return result; + } finally { + widget?.destroy(); + } +} diff --git a/src/planner/subagent.ts b/src/planner/subagent.ts index b4fb0a9..99c637d 100644 --- a/src/planner/subagent.ts +++ b/src/planner/subagent.ts @@ -1,15 +1,21 @@ // Subagent spawn helpers. Each public function delegates to spawnSubagent, // which handles process lifecycle, stdout/stderr routing to disk, and -// exit-code normalization. Spawn errors resolve (not reject) so the caller -// can always read exitCode without try/catch. +// exit-code normalization. When a UI context is provided, an IPC responder +// runs concurrently so subagents can ask questions and request scouts. import { spawn } from "node:child_process"; import { createWriteStream } from "node:fs"; import * as path from "node:path"; +import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; + import { createLogger, type Logger } from "../utils/logger.js"; +import type { SubagentRole, StepSequence } from "./types.js"; +import { resolveModelForRole } from "./model-resolver.js"; +import { runIpcResponder, type ScoutSpawnContext } from "./lib/ipc-responder.js"; +import type { ScoutTask } from "./lib/ipc.js"; -type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; +// -- Result type -- export interface SubagentResult { exitCode: number; @@ -17,59 +23,39 @@ export interface SubagentResult { subagentDir: string; } -export interface SpawnWorkOptions { - planDir: string; - subagentDir: string; - cwd: string; - extensionPath: string; - initialPrompt?: string; - modelOverride?: string; - log?: Logger; -} +// -- Public spawn option types -- -export interface SpawnFixOptions { - planDir: string; +export interface SpawnOptions { + epicDir: string; subagentDir: string; cwd: string; extensionPath: string; - fixPhase: WorkPhaseKey; modelOverride?: string; log?: Logger; + ui?: ExtensionUIContext; } -export interface SpawnQRDecomposerOptions { - planDir: string; - subagentDir: string; - cwd: string; - extensionPath: string; - phase: WorkPhaseKey; - modelOverride?: string; - log?: Logger; +export interface SpawnStoryOptions extends SpawnOptions { + storyId: string; } -export interface SpawnReviewerOptions { - planDir: string; - subagentDir: string; - cwd: string; - extensionPath: string; - phase: WorkPhaseKey; - itemIds: string[]; - modelOverride?: string; - log?: Logger; -} +// -- Internal spawn infrastructure -- interface SpawnSubagentOpts { - planDir: string; + epicDir: string; subagentDir: string; cwd: string; extensionPath: string; extraFlags?: string[]; modelOverride?: string; + ui?: ExtensionUIContext; + // Scout spawning context for the IPC responder. Provided for all non-scout + // subagents that may call koan_request_scouts. + scoutContext?: ScoutSpawnContext; } export function buildSpawnArgs( role: string, - phase: string, prompt: string, opts: SpawnSubagentOpts, ): string[] { @@ -77,8 +63,7 @@ export function buildSpawnArgs( "-p", "-e", opts.extensionPath, "--koan-role", role, - "--koan-phase", phase, - "--koan-plan-dir", opts.planDir, + "--koan-epic-dir", opts.epicDir, "--koan-subagent-dir", opts.subagentDir, ...(opts.extraFlags ?? []), ...(opts.modelOverride ? ["--model", opts.modelOverride] : []), @@ -88,14 +73,12 @@ export function buildSpawnArgs( function spawnSubagent( role: string, - phase: string, prompt: string, opts: SpawnSubagentOpts, log: Logger, ): Promise { - const args = buildSpawnArgs(role, phase, prompt, opts); - - log(`Spawning ${role} subagent`, { planDir: opts.planDir, subagentDir: opts.subagentDir, phase }); + const args = buildSpawnArgs(role, prompt, opts); + log(`Spawning ${role} subagent`, { epicDir: opts.epicDir, subagentDir: opts.subagentDir }); return new Promise((resolve) => { const stdoutLog = createWriteStream(path.join(opts.subagentDir, "stdout.log"), { flags: "w" }); @@ -107,6 +90,22 @@ function spawnSubagent( stdio: ["ignore", "pipe", "pipe"], }); + // Start IPC responder concurrently when a UI context is available. + // The responder polls ipc.json in the subagent directory and routes + // ask-question requests to the ask UI and scout-request requests to + // the scout spawning pool. + let abortIpc: (() => void) | undefined; + if (opts.ui) { + const ac = new AbortController(); + abortIpc = () => ac.abort(); + void runIpcResponder( + opts.subagentDir, + opts.ui, + ac.signal, + opts.scoutContext, + ); + } + let stderr = ""; proc.stdout.on("data", (data: Buffer) => { @@ -119,94 +118,158 @@ function spawnSubagent( }); proc.on("close", (code) => { + abortIpc?.(); stdoutLog.end(); stderrLog.end(); const exitCode = code ?? 1; - log(`${role} subagent exited`, { exitCode, phase }); + log(`${role} subagent exited`, { exitCode }); resolve({ exitCode, stderr, subagentDir: opts.subagentDir }); }); proc.on("error", (error) => { + abortIpc?.(); stdoutLog.end(); stderrLog.end(); - log(`${role} subagent spawn error`, { error: error.message, phase }); + log(`${role} subagent spawn error`, { error: error.message }); resolve({ exitCode: 1, stderr: error.message, subagentDir: opts.subagentDir }); }); }); } -function spawnWork(role: string, phase: WorkPhaseKey, prompt: string, opts: SpawnWorkOptions): Promise { - const log = opts.log ?? createLogger("Subagent"); - return spawnSubagent(role, phase, prompt, opts, log); -} - -// -- Planning workers -- - -export function spawnArchitect(opts: SpawnWorkOptions): Promise { - return spawnWork("architect", "plan-design", opts.initialPrompt ?? "Begin the plan-design phase.", opts); -} +// -- Scout spawner (injected into IPC responder) -- +// Defined here to avoid circular imports: ipc-responder.ts uses a callback +// type, not a direct import from this module. -export function spawnDeveloper(opts: SpawnWorkOptions): Promise { - return spawnWork("developer", "plan-code", opts.initialPrompt ?? "Begin the plan-code phase.", opts); -} - -export function spawnTechnicalWriter(opts: SpawnWorkOptions): Promise { - return spawnWork("technical-writer", "plan-docs", opts.initialPrompt ?? "Begin the plan-docs phase.", opts); +function makeScoutSpawnContext( + opts: SpawnOptions, + log: Logger, +): ScoutSpawnContext { + return { + epicDir: opts.epicDir, + async spawnScout(task: ScoutTask, scoutSubagentDir: string, outputFile: string): Promise { + const scoutModel = await resolveModelForRole("scout"); + const prompt = `${task.prompt}\n\nWrite your findings to: ${outputFile}\nYour investigator role: ${task.role}`; + const result = await spawnSubagent( + "scout", + prompt, + { + epicDir: opts.epicDir, + subagentDir: scoutSubagentDir, + cwd: opts.cwd, + extensionPath: opts.extensionPath, + modelOverride: scoutModel, + // Scouts do not get an IPC responder — they are narrow investigators. + }, + log, + ); + return result.exitCode; + }, + }; } -// -- Fix workers -- +// -- Public spawn functions -- -export function spawnArchitectFix(opts: SpawnFixOptions): Promise { +// Intake: reads conversation, extracts context, requests scouts, asks user questions. +export async function spawnIntake(opts: SpawnOptions): Promise { + const role: SubagentRole = "intake"; const log = opts.log ?? createLogger("Subagent"); + const modelOverride = opts.modelOverride ?? await resolveModelForRole(role); + const scoutContext = makeScoutSpawnContext(opts, log); return spawnSubagent( - "architect", - "plan-design", - "Fix the plan based on QR failures.", - { ...opts, extraFlags: ["--koan-fix", opts.fixPhase] }, + role, + "Begin the intake phase.", + { ...opts, modelOverride, scoutContext }, log, ); } -export function spawnDeveloperFix(opts: SpawnFixOptions): Promise { +// Scout: answers one narrow codebase question and writes findings to outputFile. +// Note: scouts are spawned by the IPC responder (via makeScoutSpawnContext) when +// a subagent calls koan_request_scouts. This function is also callable directly +// from the driver if needed. +export async function spawnScout( + opts: SpawnOptions & { question: string; role?: string; outputFile: string }, +): Promise { + const subagentRole: SubagentRole = "scout"; + const log = opts.log ?? createLogger("Subagent"); + const modelOverride = opts.modelOverride ?? await resolveModelForRole(subagentRole); + const prompt = [ + opts.question, + opts.role ? `Your investigator role: ${opts.role}` : "", + `Write your findings to: ${opts.outputFile}`, + ].filter(Boolean).join("\n"); + return spawnSubagent(subagentRole, prompt, { ...opts, modelOverride }, log); +} + +// Decomposer: splits the epic into stories. +export async function spawnDecomposer(opts: SpawnOptions): Promise { + const role: SubagentRole = "decomposer"; const log = opts.log ?? createLogger("Subagent"); + const modelOverride = opts.modelOverride ?? await resolveModelForRole(role); + const scoutContext = makeScoutSpawnContext(opts, log); return spawnSubagent( - "developer", - "plan-code", - "Fix plan-code output based on QR failures.", - { ...opts, extraFlags: ["--koan-fix", opts.fixPhase] }, + role, + "Begin the decomposition phase.", + { ...opts, modelOverride, scoutContext }, log, ); } -export function spawnTechnicalWriterFix(opts: SpawnFixOptions): Promise { +// Orchestrator: pre-execution or post-execution decision making. +export async function spawnOrchestrator( + opts: SpawnOptions & { stepSequence: StepSequence; storyId?: string }, +): Promise { + const role: SubagentRole = "orchestrator"; const log = opts.log ?? createLogger("Subagent"); + const modelOverride = opts.modelOverride ?? await resolveModelForRole(role); + const extraFlags: string[] = ["--koan-step-sequence", opts.stepSequence]; + if (opts.storyId) { + extraFlags.push("--koan-story-id", opts.storyId); + } + const prompt = `Begin the ${opts.stepSequence} orchestrator phase.`; return spawnSubagent( - "technical-writer", - "plan-docs", - "Fix plan-docs output based on QR failures.", - { ...opts, extraFlags: ["--koan-fix", opts.fixPhase] }, + role, + prompt, + { ...opts, extraFlags, modelOverride }, log, ); } -// -- QR workers -- - -export function spawnQRDecomposer(opts: SpawnQRDecomposerOptions): Promise { +// Planner: produces a detailed plan for a story. +export async function spawnPlanner(opts: SpawnStoryOptions): Promise { + const role: SubagentRole = "planner"; const log = opts.log ?? createLogger("Subagent"); - return spawnSubagent("qr-decomposer", `qr-${opts.phase}`, "Begin the QR decompose phase.", opts, log); + const modelOverride = opts.modelOverride ?? await resolveModelForRole(role); + const extraFlags: string[] = ["--koan-story-id", opts.storyId]; + const scoutContext = makeScoutSpawnContext(opts, log); + const prompt = `Begin the planning phase for story ${opts.storyId}.`; + return spawnSubagent( + role, + prompt, + { ...opts, extraFlags, modelOverride, scoutContext }, + log, + ); } -export function spawnReviewer(opts: SpawnReviewerOptions): Promise { +// Executor: implements a story plan. +export async function spawnExecutor( + opts: SpawnStoryOptions & { retryContext?: string }, +): Promise { + const role: SubagentRole = "executor"; const log = opts.log ?? createLogger("Subagent"); - const itemList = opts.itemIds.join(","); - const prompt = opts.itemIds.length === 1 - ? "Verify the assigned QR item." - : `Verify the ${opts.itemIds.length} assigned QR items.`; + const modelOverride = opts.modelOverride ?? await resolveModelForRole(role); + const extraFlags: string[] = ["--koan-story-id", opts.storyId]; + if (opts.retryContext) { + extraFlags.push("--koan-retry-context", opts.retryContext); + } + const basePrompt = `Implement the plan for story ${opts.storyId}.`; + const prompt = opts.retryContext + ? `${basePrompt}\n\nPrevious attempt failed: ${opts.retryContext}` + : basePrompt; return spawnSubagent( - "reviewer", - `qr-${opts.phase}`, + role, prompt, - { ...opts, extraFlags: ["--koan-qr-item", itemList] }, + { ...opts, extraFlags, modelOverride }, log, ); } diff --git a/src/planner/ui/epic-widget.ts b/src/planner/ui/epic-widget.ts new file mode 100644 index 0000000..88e9cb7 --- /dev/null +++ b/src/planner/ui/epic-widget.ts @@ -0,0 +1,243 @@ +// Epic execution status widget. Renders a TUI panel showing: +// - Story list with status icons +// - Active subagent: role, step, elapsed time +// - Recent log tail from the active subagent directory +// - Autonomous decision counter +// +// The driver creates one instance at the start of runEpicPipeline (before intake) +// and calls update() after each state change. Spans the full epic lifecycle (Phase +// A + B), not just story execution. Pure observation layer — never influences routing. +// Self-renders via pi's setWidget API; a 1-second unref'd timer keeps elapsed time fresh. + +import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; +import type { Theme, ThemeColor } from "@mariozechner/pi-coding-agent"; +import { truncateToWidth, visibleWidth } from "@mariozechner/pi-tui"; + +import type { EpicPhase, StoryStatus } from "../types.js"; +import type { LogLine } from "../lib/audit.js"; + +// -- Types -- + +export interface ActiveSubagentInfo { + role: string; + storyId?: string; + step: number; + totalSteps: number; + stepName: string; + startedAt: number; +} + +export interface EpicWidgetState { + epicId: string; + epicPhase: EpicPhase; + stories: Array<{ storyId: string; status: StoryStatus }>; + activeSubagent: ActiveSubagentInfo | null; + logLines: LogLine[]; +} + +export interface EpicWidgetUpdate { + epicPhase?: EpicPhase; + stories?: Array<{ storyId: string; status: StoryStatus }>; + activeSubagent?: ActiveSubagentInfo | null; + logLines?: LogLine[]; +} + +// -- Constants -- + +const WIDGET_KEY = "koan-epic"; +const PAD = 2; +const MAX_LOG_LINES = 5; + +// Status icons and colors — no escalated status per §11.3.1. +const STATUS_ICON: Record = { + pending: "○", + selected: "◎", + planning: "◐", + executing: "●", + verifying: "◑", + done: "✓", + retry: "↺", + skipped: "—", +}; + +const STATUS_COLOR: Record = { + pending: "muted", + selected: "accent", + planning: "accent", + executing: "accent", + verifying: "accent", + done: "success", + retry: "warning", + skipped: "dim", +}; + +// -- Helpers -- + +function cw(termWidth: number): number { + return Math.max(40, termWidth - PAD * 2); +} + +function line(content: string, termWidth: number, theme: Theme): string { + const w = cw(termWidth); + const inner = clamp(content, w); + return theme.bg("toolPendingBg", " ".repeat(PAD) + inner + " ".repeat(PAD)); +} + +function clamp(text: string, width: number): string { + const truncated = truncateToWidth(text, width, "", false); + const vw = visibleWidth(truncated); + return vw >= width ? truncated : truncated + " ".repeat(width - vw); +} + +function formatElapsed(ms: number): string { + const s = Math.floor(ms / 1000); + const h = Math.floor(s / 3600); + const m = Math.floor((s % 3600) / 60); + const sec = s % 60; + if (h > 0) return `${h}h ${String(m).padStart(2, "0")}m`; + return `${m}m ${String(sec).padStart(2, "0")}s`; +} + +// -- Render -- + +function renderHeader(state: EpicWidgetState, theme: Theme, width: number): string { + const elapsed = state.activeSubagent + ? theme.fg("dim", formatElapsed(Date.now() - state.activeSubagent.startedAt)) + : ""; + const title = theme.bold(theme.fg("accent", `Epic · ${state.epicId}`)); + const phaseBadge = theme.fg("muted", ` · ${state.epicPhase}`); + const left = `${title}${phaseBadge}`; + const gap = Math.max(1, width - visibleWidth(left) - visibleWidth(elapsed)); + return clamp(`${left}${" ".repeat(gap)}${elapsed}`, width); +} + +function renderStoryList(state: EpicWidgetState, theme: Theme, width: number): string[] { + if (state.stories.length === 0) { + return [clamp(theme.fg("muted", " No stories yet"), width)]; + } + return state.stories.map(({ storyId, status }) => { + const icon = STATUS_ICON[status] ?? "?"; + const color = STATUS_COLOR[status] ?? "muted"; + const iconStr = theme.fg(color, icon); + const label = status === "executing" || status === "planning" || status === "verifying" + ? theme.bold(theme.fg(color, storyId)) + : theme.fg(color, storyId); + const statusLabel = theme.fg("dim", ` (${status})`); + return clamp(` ${iconStr} ${label}${statusLabel}`, width); + }); +} + +function renderActiveSubagent(state: EpicWidgetState, theme: Theme, width: number): string[] { + const sa = state.activeSubagent; + if (!sa) { + return [clamp(theme.fg("muted", " idle"), width)]; + } + const roleLabel = sa.storyId ? `${sa.role} · ${sa.storyId}` : sa.role; + const stepLabel = sa.totalSteps > 0 + ? `step ${sa.step}/${sa.totalSteps}${sa.stepName ? ` · ${sa.stepName}` : ""}` + : "starting"; + const elapsedStr = formatElapsed(Date.now() - sa.startedAt); + return [ + clamp(` ${theme.bold(theme.fg("accent", roleLabel))} ${theme.fg("muted", stepLabel)}`, width), + clamp(` ${theme.fg("dim", elapsedStr)}`, width), + ]; +} + +function renderLogTail(state: EpicWidgetState, theme: Theme, width: number): string[] { + const entries = state.logLines.slice(-MAX_LOG_LINES); + if (entries.length === 0) { + return [clamp(theme.fg("dim", " (no log entries)"), width)]; + } + return entries.map((entry) => { + const toolStr = theme.bold(theme.fg("accent", entry.tool)); + const summary = entry.summary.trim(); + const sep = summary ? " " : ""; + return clamp(` ${toolStr}${sep}${theme.fg("muted", summary)}`, width); + }); +} + +function renderDivider(label: string, theme: Theme, width: number): string { + const tag = ` ${label} `; + const tagLen = visibleWidth(tag); + const dashCount = Math.max(0, width - tagLen); + const left = Math.floor(dashCount / 2); + const right = dashCount - left; + return clamp( + `${theme.fg("dim", "─".repeat(left))}${theme.bold(theme.fg("muted", tag))}${theme.fg("dim", "─".repeat(right))}`, + width, + ); +} + +function render(state: EpicWidgetState, theme: Theme, termWidth: number): string[] { + const w = cw(termWidth); + const L = (content: string) => line(content, termWidth, theme); + const lines: string[] = []; + + lines.push(L("")); + lines.push(L(renderHeader(state, theme, w))); + lines.push(L(renderDivider("stories", theme, w))); + for (const l of renderStoryList(state, theme, w)) lines.push(L(l)); + lines.push(L(renderDivider("active", theme, w))); + for (const l of renderActiveSubagent(state, theme, w)) lines.push(L(l)); + lines.push(L(renderDivider("log", theme, w))); + for (const l of renderLogTail(state, theme, w)) lines.push(L(l)); + lines.push(L("")); + + return lines; +} + +// -- EpicWidgetController -- + +export class EpicWidgetController { + private state: EpicWidgetState; + private lastHash = ""; + private timer: ReturnType; + private ui: ExtensionUIContext; + + constructor(ui: ExtensionUIContext, epicId: string) { + this.ui = ui; + this.state = { + epicId, + epicPhase: "intake", + stories: [], + activeSubagent: null, + logLines: [], + }; + this.timer = setInterval(() => this.doRender(), 1000); + this.timer.unref(); + this.doRender(); + } + + update(patch: EpicWidgetUpdate): void { + if (patch.epicPhase !== undefined) this.state.epicPhase = patch.epicPhase; + if (patch.stories !== undefined) this.state.stories = patch.stories; + if (patch.activeSubagent !== undefined) this.state.activeSubagent = patch.activeSubagent; + if (patch.logLines !== undefined) this.state.logLines = patch.logLines; + this.doRender(); + } + + destroy(): void { + clearInterval(this.timer); + this.ui.setWidget(WIDGET_KEY, undefined); + } + + private doRender(): void { + const snapshot = { + ...this.state, + stories: this.state.stories.map((s) => ({ ...s })), + logLines: this.state.logLines.map((l) => ({ ...l })), + activeSubagent: this.state.activeSubagent ? { ...this.state.activeSubagent } : null, + }; + const { theme } = this.ui; + + const hashLines = render(snapshot, theme, 0); + const hash = hashLines.join("\n"); + if (hash === this.lastHash) return; + this.lastHash = hash; + + this.ui.setWidget(WIDGET_KEY, (_tui, th) => ({ + render: (width: number) => render(snapshot, th, width), + invalidate: () => {}, + })); + } +} diff --git a/src/planner/ui/spec-review.ts b/src/planner/ui/spec-review.ts new file mode 100644 index 0000000..9f5e1a3 --- /dev/null +++ b/src/planner/ui/spec-review.ts @@ -0,0 +1,152 @@ +// Spec review gate: interactive story approval UI. +// Shown after decomposition so the user can approve, or skip individual stories +// before execution begins. Driver blocks until the user confirms. +// +// Controls: +// ↑↓ move cursor +// Space toggle selected story between "include" and "skip" +// A approve all (mark all as include) +// Enter confirm and proceed +// Esc confirm current selections and proceed + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; +import { Key, matchesKey, truncateToWidth, visibleWidth } from "@mariozechner/pi-tui"; + +export interface SpecReviewResult { + approved: string[]; + skipped: string[]; +} + +interface StoryEntry { + storyId: string; + title: string; + include: boolean; +} + +async function readStoryTitle(epicDir: string, storyId: string): Promise { + try { + const raw = await fs.readFile(path.join(epicDir, "stories", storyId, "story.md"), "utf8"); + // Extract first non-empty, non-heading line after a heading, or first heading text. + for (const rawLine of raw.split("\n")) { + const l = rawLine.trim(); + if (!l) continue; + // Strip leading # characters for headings. + const text = l.replace(/^#+\s*/, "").trim(); + if (text) return text.slice(0, 80); + } + return storyId; + } catch { + return storyId; + } +} + +export async function reviewStorySketches( + epicDir: string, + storyIds: string[], + ui: ExtensionUIContext, +): Promise { + if (storyIds.length === 0) { + return { approved: [], skipped: [] }; + } + + // Load story titles asynchronously. + const titles = await Promise.all(storyIds.map((id) => readStoryTitle(epicDir, id))); + const entries: StoryEntry[] = storyIds.map((storyId, i) => ({ + storyId, + title: titles[i] ?? storyId, + include: true, + })); + + const result = await ui.custom<{ entries: StoryEntry[] }>((tui, theme, _keybindings, done) => { + let cursor = 0; + let cachedLines: string[] | undefined; + + const requestRender = () => { + cachedLines = undefined; + tui.requestRender(); + }; + + const render = (width: number): string[] => { + if (cachedLines) return cachedLines; + const lines: string[] = []; + const addLine = (l: string) => lines.push(truncateToWidth(l, width)); + + addLine(theme.fg("accent", "─".repeat(width))); + addLine( + ` ${theme.bold(theme.fg("accent", "Spec Review"))} ${theme.fg("muted", `${entries.length} stories`)}`, + ); + addLine(theme.fg("dim", " Review story sketches before execution begins.")); + addLine(""); + + for (let i = 0; i < entries.length; i++) { + const e = entries[i]; + const isCursor = i === cursor; + const prefix = isCursor ? theme.fg("accent", "→ ") : " "; + const checkbox = e.include + ? theme.fg("success", "[✓]") + : theme.fg("dim", "[ ]"); + const label = isCursor + ? theme.bold(theme.fg(e.include ? "text" : "dim", e.storyId)) + : theme.fg(e.include ? "text" : "dim", e.storyId); + const titleStr = theme.fg("muted", ` — ${e.title}`); + addLine(`${prefix}${checkbox} ${label}${titleStr}`); + } + + addLine(""); + + const approvedCount = entries.filter((e) => e.include).length; + const skippedCount = entries.length - approvedCount; + addLine( + ` ${theme.fg("success", `${approvedCount} approved`)} ${theme.fg("dim", `${skippedCount} skipped`)}`, + ); + addLine(""); + addLine( + theme.fg("dim", " ↑↓ move • Space toggle • A approve all • Enter confirm • Esc confirm"), + ); + addLine(theme.fg("accent", "─".repeat(width))); + + cachedLines = lines; + return lines; + }; + + const handleInput = (data: string) => { + if (matchesKey(data, Key.up)) { + cursor = Math.max(0, cursor - 1); + requestRender(); + return; + } + if (matchesKey(data, Key.down)) { + cursor = Math.min(entries.length - 1, cursor + 1); + requestRender(); + return; + } + if (data === " ") { + entries[cursor].include = !entries[cursor].include; + requestRender(); + return; + } + if (data === "a" || data === "A") { + for (const e of entries) e.include = true; + requestRender(); + return; + } + if (matchesKey(data, Key.enter) || matchesKey(data, Key.escape)) { + done({ entries: entries.map((e) => ({ ...e })) }); + return; + } + }; + + return { + render, + invalidate: () => { cachedLines = undefined; }, + handleInput, + }; + }); + + const approved = result.entries.filter((e) => e.include).map((e) => e.storyId); + const skipped = result.entries.filter((e) => !e.include).map((e) => e.storyId); + return { approved, skipped }; +} From 759ca0182dd7d52baa37a157f5eb935d9b3c7f00 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Mar 2026 12:46:03 +0700 Subject: [PATCH 045/412] refactor(planner): remove legacy plan/qr/session architecture --- src/planner/lib/agent-prompts.ts | 20 - src/planner/lib/conversation-trigger.ts | 60 -- src/planner/lib/dispatch.ts | 68 -- src/planner/lib/resources.ts | 31 - src/planner/phases/plan-code/fix-phase.ts | 166 --- src/planner/phases/plan-code/fix-prompts.ts | 103 -- src/planner/phases/plan-code/phase.ts | 151 --- src/planner/phases/plan-code/prompts.ts | 108 -- src/planner/phases/plan-design/fix-phase.ts | 220 ---- src/planner/phases/plan-design/fix-prompts.ts | 220 ---- src/planner/phases/plan-design/phase.ts | 177 ---- src/planner/phases/plan-design/prompts.ts | 238 ----- src/planner/phases/plan-docs/fix-phase.ts | 169 --- src/planner/phases/plan-docs/fix-prompts.ts | 106 -- src/planner/phases/plan-docs/phase.ts | 154 --- src/planner/phases/plan-docs/prompts.ts | 153 --- src/planner/phases/qr-decompose/phase.ts | 197 ---- src/planner/phases/qr-decompose/prompts.ts | 260 ----- src/planner/phases/qr-verify/phase.ts | 243 ----- src/planner/phases/qr-verify/prompts.ts | 175 --- src/planner/plan/mutate/code.ts | 161 --- src/planner/plan/mutate/decisions.ts | 180 ---- src/planner/plan/mutate/index.ts | 48 - src/planner/plan/mutate/milestones.ts | 91 -- src/planner/plan/mutate/structure.ts | 164 --- src/planner/plan/mutate/top-level.ts | 37 - src/planner/plan/render.ts | 155 --- src/planner/plan/serialize.ts | 45 - src/planner/plan/types.ts | 206 ---- src/planner/plan/validate.ts | 249 ----- src/planner/qr/mutate.ts | 88 -- src/planner/qr/severity.ts | 41 - src/planner/qr/types.ts | 19 - src/planner/session.ts | 985 ----------------- src/planner/state.ts | 40 - src/planner/tools/entity-code.ts | 171 --- src/planner/tools/entity-design.ts | 308 ------ src/planner/tools/entity-structure.ts | 156 --- src/planner/tools/getters.ts | 175 --- src/planner/tools/qr.ts | 230 ---- src/planner/tools/setters.ts | 82 -- src/planner/ui/widget.ts | 999 ------------------ src/utils/lock.ts | 44 - src/utils/plan.ts | 72 -- src/utils/progress.ts | 14 - 45 files changed, 7779 deletions(-) delete mode 100644 src/planner/lib/agent-prompts.ts delete mode 100644 src/planner/lib/conversation-trigger.ts delete mode 100644 src/planner/lib/dispatch.ts delete mode 100644 src/planner/lib/resources.ts delete mode 100644 src/planner/phases/plan-code/fix-phase.ts delete mode 100644 src/planner/phases/plan-code/fix-prompts.ts delete mode 100644 src/planner/phases/plan-code/phase.ts delete mode 100644 src/planner/phases/plan-code/prompts.ts delete mode 100644 src/planner/phases/plan-design/fix-phase.ts delete mode 100644 src/planner/phases/plan-design/fix-prompts.ts delete mode 100644 src/planner/phases/plan-design/phase.ts delete mode 100644 src/planner/phases/plan-design/prompts.ts delete mode 100644 src/planner/phases/plan-docs/fix-phase.ts delete mode 100644 src/planner/phases/plan-docs/fix-prompts.ts delete mode 100644 src/planner/phases/plan-docs/phase.ts delete mode 100644 src/planner/phases/plan-docs/prompts.ts delete mode 100644 src/planner/phases/qr-decompose/phase.ts delete mode 100644 src/planner/phases/qr-decompose/prompts.ts delete mode 100644 src/planner/phases/qr-verify/phase.ts delete mode 100644 src/planner/phases/qr-verify/prompts.ts delete mode 100644 src/planner/plan/mutate/code.ts delete mode 100644 src/planner/plan/mutate/decisions.ts delete mode 100644 src/planner/plan/mutate/index.ts delete mode 100644 src/planner/plan/mutate/milestones.ts delete mode 100644 src/planner/plan/mutate/structure.ts delete mode 100644 src/planner/plan/mutate/top-level.ts delete mode 100644 src/planner/plan/render.ts delete mode 100644 src/planner/plan/serialize.ts delete mode 100644 src/planner/plan/types.ts delete mode 100644 src/planner/plan/validate.ts delete mode 100644 src/planner/qr/mutate.ts delete mode 100644 src/planner/qr/severity.ts delete mode 100644 src/planner/qr/types.ts delete mode 100644 src/planner/session.ts delete mode 100644 src/planner/state.ts delete mode 100644 src/planner/tools/entity-code.ts delete mode 100644 src/planner/tools/entity-design.ts delete mode 100644 src/planner/tools/entity-structure.ts delete mode 100644 src/planner/tools/getters.ts delete mode 100644 src/planner/tools/qr.ts delete mode 100644 src/planner/tools/setters.ts delete mode 100644 src/planner/ui/widget.ts delete mode 100644 src/utils/lock.ts delete mode 100644 src/utils/plan.ts delete mode 100644 src/utils/progress.ts diff --git a/src/planner/lib/agent-prompts.ts b/src/planner/lib/agent-prompts.ts deleted file mode 100644 index 8ab8293..0000000 --- a/src/planner/lib/agent-prompts.ts +++ /dev/null @@ -1,20 +0,0 @@ -// Hard-coded agent prompts for planner phases. -// These are embedded at compile-time to avoid runtime filesystem dependencies. -// Conventions remain file-based and explorable by the LLM. - -export type AgentPromptName = - "architect" - | "developer" - | "quality-reviewer" - | "technical-writer"; - -const AGENT_PROMPTS: Record = { - "architect": "\nYou are an expert Architect who transforms ambiguous requests into unambiguous executable plans. You design; others implement. All business decisions happen during planning, BEFORE code is written.\n\nYou have the skills to design any system. Proceed with confidence.\n\n## Script Invocation\n\nIf your opening prompt includes a python3 command:\n\n1. Execute it immediately as your first action\n2. Read output, follow DO section literally\n3. When NEXT contains a python3 command, invoke it after completing DO\n4. Continue until workflow signals completion\n\nThe script orchestrates your work. Follow it literally.\n\n## Convention Hierarchy\n\nWhen sources conflict, follow this precedence (higher overrides lower):\n\n| Tier | Source | Override Scope |\n| ---- | ----------------------------------- | ----------------------------- |\n| 1 | Explicit user instruction | Override all below |\n| 2 | Project docs (CLAUDE.md, README.md) | Override conventions/defaults |\n| 3 | .claude/conventions/ | Baseline fallback |\n| 4 | Universal best practices | Confirm if uncertain |\n\n**Conflict resolution**: Lower tier numbers win. Subdirectory docs override root docs for that subtree.\n\n## Knowledge Strategy\n\n**CLAUDE.md** = navigation index (WHAT is here, WHEN to read)\n**README.md** = invisible knowledge (WHY it's structured this way)\n\n**Open with confidence**: When CLAUDE.md \"When to read\" trigger matches your task, immediately read that file. Don't hesitate -- important context is stored there.\n\n**Missing documentation**: If no CLAUDE.md exists, state \"No project documentation found\" and fall back to .claude/conventions/.\n\n## Convention References\n\n| Convention | Source | When Needed |\n| ------------ | ------------------------------------------------------------------------------ | ---------------- |\n| Code quality | | Design, planning |\n\nRead the convention index and follow \"Design Review\" applicability.\n\n## Exploration\n\nUse these tools freely and with confidence:\n\n| Tool | Purpose |\n| ------ | --------------------------------- |\n| Glob | Find files by pattern |\n| Grep | Search content |\n| Read | Examine files |\n| Search | Web search for context |\n| Bash | Run commands, inspect environment |\n\n**Always explore**:\n\n- CLAUDE.md at project root and relevant subdirectories\n- README.md for invisible knowledge constraining design\n- Similar features for established patterns\n- Files that will be modified\n\n**Stopping criteria**:\n\n- Decision criteria covered or determined inapplicable\n- Understand HOW patterns work, not just THAT they exist\n- Max 4 deepening iterations\n\n## Design Responsibilities\n\n**Make decisive choices**: Pick one approach, commit to it. Do not present multiple options unless user decision is genuinely required.\n\n**Capture rationale**: Document WHY, not just WHAT. Decisions need multi-step reasoning (2+ steps).\n\n**Blueprint completeness**:\n\n- Decision Log (non-obvious decisions with rationale)\n- Rejected Alternatives (what was considered, why not chosen)\n- Files (exact paths to create/modify)\n- Acceptance Criteria (testable pass/fail)\n- Code Intent (what to change -- NOT implementation diffs)\n\n## Boundaries\n\n| Architect DOES | Architect DOES NOT |\n| ---------------------------------- | -------------------------------------- |\n| Write Code Intent (what to change) | Write implementation diffs (developer) |\n| Make design decisions | Make user decisions (escalate) |\n| Capture invisible knowledge | Write documentation (technical-writer) |\n| Explore and discover patterns | Review artifacts (quality-reviewer) |\n\n## Escalation\n\n**Escalate when**:\n\n- User preference ambiguity (multiple valid choices with user-relevant tradeoffs)\n- Policy defaults (lifecycle, capacity, failure handling) without user backing\n- Multiple valid architectural approaches with policy-relevant tradeoffs\n\n**Decide autonomously when**:\n\n- Existing pattern to follow\n- Milestone ordering (technical optimization)\n- File organization within constraints\n- Error handling with established project convention\n\n## Thinking Economy\n\nMinimize internal reasoning verbosity:\n\n- Per-thought limit: 10 words\n- Use abbreviated notation: \"Pattern->X; Decision->Y; Capture Z\"\n- DO NOT narrate phases\n- Execute exploration silently; output structured results only\n\nExamples:\n\n- VERBOSE: \"Now I need to find similar features. Let me search for authentication patterns.\"\n- CONCISE: \"Similar auth: Grep auth, Read handlers/\"\n", - "developer": "\nYou are an expert Developer who translates architectural specifications into working code. You execute; others design. A project manager owns design decisions and user communication.\n\nYou have the skills to implement any specification. Proceed with confidence.\n\nSuccess means faithful implementation: code that is correct, readable, and follows project standards. Design decisions, user requirements, and architectural trade-offs belong to others -- your job is execution.\n\n## Script Invocation\n\nIf your opening prompt includes a python3 command:\n\n1. Execute it immediately as your first action\n2. Read output, follow DO section literally\n3. When NEXT contains a python3 command, invoke it after completing DO\n4. Continue until workflow signals completion\n\nThe script orchestrates your work. Follow it literally.\n\n## Convention Hierarchy\n\nWhen sources conflict, follow this precedence (higher overrides lower):\n\n| Tier | Source | Override Scope |\n| ---- | ----------------------------------- | ----------------------------- |\n| 1 | Explicit user instruction | Override all below |\n| 2 | Project docs (CLAUDE.md, README.md) | Override conventions/defaults |\n| 3 | .claude/conventions/ | Baseline fallback |\n| 4 | Universal best practices | Confirm if uncertain |\n\n**Conflict resolution**: Lower tier numbers win. Subdirectory docs override root docs for that subtree.\n\n## Knowledge Strategy\n\n**CLAUDE.md** = navigation index (WHAT is here, WHEN to read)\n**README.md** = invisible knowledge (WHY it's structured this way)\n\n**Open with confidence**: When CLAUDE.md \"When to read\" trigger matches your task, immediately read that file. Don't hesitate -- important context is stored there.\n\n**Extract from documentation**: language patterns, error handling, code style, build commands.\n\n**Missing documentation**: If no CLAUDE.md exists, state \"No project documentation found\" and fall back to .claude/conventions/. Use standard language idioms and note this in your output.\n\n## Convention References\n\n| Convention | Source | When Needed |\n| ------------ | ------------------------------------------------------------------------------ | --------------------------- |\n| Code quality | | Implementation, refactoring |\n\nRead the convention index and follow \"Diff Review\" applicability.\n\n## Efficiency\n\nBATCH AGGRESSIVELY: Read all targets first, then execute all edits in one call.\n\nYou have full read/write access. 10+ edits in a single response is normal and encouraged.\nBatching is ALWAYS preferred over sequential edits.\n\nWhen implementing changes across several files or multiple locations:\n\n1. Read all target files first to understand full scope\n2. Group related changes that can be made together\n3. Execute all edits in a single response\n\nThis reduces round-trips and improves performance.\n\n## Thinking Economy\n\nMinimize internal reasoning verbosity:\n\n- Per-thought limit: 10 words\n- Use abbreviated notation: \"Spec->X; File->Y; Apply Z\"\n- DO NOT narrate phases (\"Now I will verify...\")\n- Execute tasks silently; output results only\n\nExamples:\n\n- VERBOSE: \"Now I need to check if the imports are correct. Let me verify...\"\n- CONCISE: \"Imports: check stdlib, add missing\"\n\n## Core Mission\n\nYour workflow: Receive spec \u2192 Understand fully \u2192 Plan \u2192 Execute \u2192 Verify \u2192 Return structured output\n\n\nComplete ALL items before writing code:\n\n1. Identify: inputs, outputs, constraints\n2. List: files, functions, changes required\n3. Note: tests the spec requires (only those)\n4. Flag: ambiguities or blockers (escalate if found)\n\nThen execute systematically.\n\n\n## Spec Adherence\n\nClassify the spec, then adjust your approach.\n\n\nA spec is **detailed** when it prescribes HOW to implement, not just WHAT to achieve.\n\n**The principle**: If the spec names specific code artifacts (functions, files, lines, variables), follow those names exactly.\n\nRecognition signals: \"at line 45\", \"in foo/bar.py\", \"rename X to Y\", \"add parameter Z\"\n\nWhen detailed:\n\n- Follow the spec exactly\n- Add no components, files, or tests beyond what is specified\n- Match prescribed structure and naming\n \n\n\nA spec is **freeform** when it describes WHAT to achieve without prescribing HOW.\n\n**The principle**: Intent-driven specs grant implementation latitude but not scope latitude.\n\nRecognition signals: \"add logging\", \"improve error handling\", \"make it faster\", \"support feature X\"\n\nWhen freeform:\n\n- Use your judgment for implementation details\n- Follow project conventions for decisions the spec does not address\n- Implement the smallest change that satisfies the intent\n\n**SCOPE LIMITATION: Do what has been asked; nothing more, nothing less.**\n\n\nIf you find yourself:\n\n- Planning multiple approaches \u2192 STOP, pick the simplest\n- Considering edge cases not in the spec \u2192 STOP, implement the literal request\n- Adding \"improvements\" beyond the request \u2192 STOP, that's scope creep\n\nReturn to the spec. Implement only what it says.\n\n\n\n## Priority Order\n\nWhen rules conflict:\n\n1. **Security constraints** (RULE 0) -- override everything\n2. **Project documentation** (CLAUDE.md) -- override spec details\n3. **Detailed spec instructions** -- follow exactly when no conflict\n4. **Your judgment** -- for freeform specs only\n\n## Spec Language\n\nSpecs contain directive language that guides implementation but does not belong in output.\n\n\nRecognize and exclude:\n\n| Category | Examples | Action |\n| -------------------- | ------------------------------------------------------ | ---------------------------------------- |\n| Change markers | FIXED:, NEW:, IMPORTANT:, NOTE: | Exclude from output |\n| Planning annotations | \"(consistent across both orderings)\", \"after line 425\" | Exclude from output |\n| Location directives | \"insert before line 716\", \"add after retry loop\" | Use diff context for location, exclude |\n| Implementation hints | \"use a lock here\", \"skip .git directory\" | Follow the instruction, exclude the text |\n\n\n\n## Comment Handling by Workflow\n\n\nWhen implementing from a scrubbed plan (via /plan-execution):\n\n### Developer Consumption Protocol\n\n\nIf you are about to guess where code should go because context lines don't match, STOP.\n\n\"Best guess\" patching causes:\n\n- Code inserted in wrong location\n- Duplicate code if original location exists elsewhere\n- Subtle bugs from incorrect context assumptions\n\nInstead: Use the escalation format below and return to coordinator.\n\n\n**Step 0: Filter relevant context (System 2 Attention)**\nFor files >200 lines, before matching:\n\n- Identify the target function/class from @@ line\n- Extract ONLY that function/class into working context\n- Proceed with matching against extracted context, not full file\n\nThis prevents irrelevant code from biasing your pattern matching.\n\n**Matching rules:**\n\n- Context lines are the authoritative anchors - find these patterns in the actual file\n- Line numbers in @@ are HINTS ONLY - the actual location may differ by 10, 50, or 100+ lines\n- A \"match\" means the context line content matches, regardless of line number\n- When multiple potential matches exist:\n 1. Use prose hint and function context to disambiguate\n 2. If still ambiguous, prefer the match where:\n - More context lines match (higher anchor confidence)\n - The surrounding code logic aligns with the plan's stated purpose\n 3. Document your match reasoning in output notes\n\n### Context Drift Tolerance\n\nContext lines are **semantic anchors**, not exact strings. Match using this hierarchy:\n\n| Match Quality | Action |\n| ---------------------------------------- | ------------------------------------- |\n| Exact match | Proceed |\n| Whitespace differs | Proceed (normalize whitespace) |\n| Comment text differs | Proceed (comments are not structural) |\n| Variable name differs but same semantics | Proceed with note in output |\n| Code structure same, minor refactoring | Proceed with note in output |\n| Function exists but logic restructured | **STOP** -> Escalate |\n| Context lines not found anywhere | **STOP** -> Escalate |\n\n**Context Drift Examples:**\n\n| Plan Context | Actual File | Action |\n| ---------------------------------- | ---------------------------- | ----------------- |\n| `for item in items: process(item)` | Same + whitespace/comment | PROCEED |\n| Same | Variable renamed (`element`) | PROCEED_WITH_NOTE |\n| Same | Logic restructured (`map()`) | ESCALATE |\n\n**Principle:** If you can confidently identify WHERE the change belongs and the surrounding logic is equivalent, proceed. If the code structure has fundamentally changed such that the planned change no longer makes sense in context, escalate.\n\n**Escalation trigger**: Escalate only when context lines are **NOT FOUND ANYWHERE** in the file OR when code has been restructured such that the planned change no longer applies. Line number mismatch alone is NOT a reason to escalate.\n\n\n BLOCKED\n Implementing [milestone] change to [file]\n CONTEXT_NOT_FOUND - Expected context: \"[context line from diff]\"\n Searched: entire file. Function hint: [function from @@ line].\n Prose hint: [prose description if present]\n Updated diff with current context lines, or confirmation that code structure changed\n\n\n### Comment Transcription\n\nYour action: **Transcribe comments from +lines verbatim.** Do not rewrite, improve, or add to them.\n\n\nException: If a comment starts with obvious contamination signals (Added, Replaced, Changed, TODO, After line, Insert before), STOP. This indicates TW review was incomplete. Use the escalation format:\n\n\n BLOCKED\n Comment in +lines contains change-relative language\n TEMPORAL_CONTAMINATION\n TW annotation pass or manual comment cleanup\n\n\nThis exception is rare -- TW and QR should catch contamination. But contaminated comments in production code cause long-term debt.\n\n\nIf the plan lacks TW-prepared comments (e.g., skipped review phase), add no discretionary comments. Documentation is @agent-technical-writer's responsibility.\n\n\n\nWhen implementing from a freeform spec (no TW annotation):\n\nCode snippets may contain directive language (see markers above). Your action:\n\n- Implement the code as specified\n- Exclude directive markers from output\n- Add no discretionary comments\n\nDocumentation is Technical Writer's responsibility. If comments are needed, they will be added in a subsequent documentation pass.\n\n\n## Allowed Corrections\n\nMake these mechanical corrections without asking:\n\n- Import statements the code requires\n- Error checks that project conventions mandate\n- Path typos (spec says \"foo/utils\" but project has \"foo/util\")\n- Line number drift (spec says \"line 123\" but function is at line 135)\n- Excluding directive markers from output (FIXED:, NOTE:, planning annotations)\n\n## Prohibited Actions\n\nProhibitions by severity. RULE 0 overrides all others. Lower numbers override higher.\n\n### RULE 0 (ABSOLUTE): Security violations\n\nThese patterns are NEVER acceptable regardless of what the spec says:\n\n| Category | Forbidden | Use Instead |\n| ------------------- | -------------------------------------------- | ---------------------------------------------------- |\n| Arbitrary execution | `eval()`, `exec()`, `subprocess(shell=True)` | Explicit function calls, `subprocess` with list args |\n| Injection vectors | SQL concatenation, template injection | Parameterized queries, safe templating |\n| Resource exhaustion | Unbounded loops, uncontrolled recursion | Explicit limits, iteration caps |\n| Error suppression | `except: pass`, swallowing errors | Explicit error handling, logging |\n\nIf a spec requires any RULE 0 violation, escalate immediately.\n\n### RULE 1: Scope violations\n\n- Adding dependencies, files, tests, or features not specified\n- Running test suite unless instructed\n- Making architectural decisions (belong to project manager)\n\n### RULE 2: Spec contamination\n\n- Copying directive markers (FIXED:, NEW:, NOTE:, planning annotations) into output\n- Rewriting or \"improving\" comments that TW prepared\n\n### RULE 2.5: Documentation Milestone Refusal\n\nIf delegated a milestone where milestone name contains \"Documentation\" OR target files are CLAUDE.md/README.md:\n\n\n BLOCKED\n Documentation milestone delegated to Developer\n WRONG_AGENT\n Route to @agent-technical-writer with mode: post-implementation\n\n\n### RULE 3: Fidelity violations\n\n- Non-trivial deviations from detailed specs\n\n## Escalation\n\nYou work under a project manager with full project context.\n\nSTOP and escalate when you encounter:\n\n- Missing functions, modules, or dependencies the spec references\n- Contradictions between spec and existing code requiring design decisions\n- Ambiguities that project documentation cannot resolve\n- Blockers preventing implementation\n\n\n BLOCKED | NEEDS_DECISION | UNCERTAINTY\n [task]\n [problem]\n [required]\n\n\n## Verification\n\n\nAnswer with open questions (not yes/no):\n\n1. CLAUDE.md pattern followed? (cite or \"none\")\n2. Spec requirement per changed function? (cite)\n3. Error paths and behavior?\n4. Files/tests created? Any unspecified? (remove if yes)\n5. Hardcoded values needing config?\n6. Spec comments vs output comments match?\n7. Directive markers in output? (remove if yes)\n\nConditional: 8. Shared state protection? 9. External API failure handling?\n\n\nRun linting only if the spec instructs verification. Report unresolved issues in ``.\n\n## Output Format\n\nReturn ONLY the XML structure below. Start immediately with ``. Include nothing outside these tags.\n\n\n\n[Code blocks with file paths]\n\n\n\n[Test code blocks, only if spec requested tests]\n\n\n\n[5-word summary per check; max 3 checks; max 25 tokens total]\n\n\n\n[Assumptions, corrections, clarifications, match reasoning for ambiguous context]\n\n\n\nIf you cannot complete the implementation, use the escalation format instead.\n", - "quality-reviewer": "\nYou are an expert Quality Reviewer who detects production risks, conformance\nviolations, and structural defects. You read any code, understand any\narchitecture, and identify issues that escape casual inspection.\n\nYour assessments are precise and actionable. You find what others miss.\n\nYou have the skills to review any codebase. Proceed with confidence.\n\n## Script Invocation\n\nIf your opening prompt includes a python3 command:\n\n1. Execute it immediately as your first action\n2. Read output, follow DO section literally\n3. When NEXT contains a python3 command, invoke it after completing DO\n4. Continue until workflow signals completion\n\nThe script orchestrates your work. Follow it literally.\n\n## Convention Hierarchy\n\nWhen sources conflict, follow this precedence (higher overrides lower):\n\n| Tier | Source | Override Scope |\n| ---- | ----------------------------------- | ----------------------------- |\n| 1 | Explicit user instruction | Override all below |\n| 2 | Project docs (CLAUDE.md, README.md) | Override conventions/defaults |\n| 3 | .claude/conventions/ | Baseline fallback |\n| 4 | Universal best practices | Confirm if uncertain |\n\n**Conflict resolution**: Lower tier numbers win. Subdirectory docs override root docs for that subtree.\n\n## Priority Rules\n\n RULE 0 overrides RULE 1 and RULE 2. RULE 1 overrides RULE 2.\nWhen rules conflict, lower numbers win.\n\n**Severity markers:** MUST severity is reserved for RULE 0 (knowledge loss and\nunrecoverable issues). RULE 1 uses SHOULD. RULE 2 uses SHOULD or COULD. Do not\nescalate severity beyond what the rule level permits. \n\n### RULE 0 (HIGHEST PRIORITY): Knowledge Preservation & Production Reliability\n\nKnowledge loss and unrecoverable production risks take absolute precedence.\nNever flag structural or conformance issues if a RULE 0 problem exists in the\nsame code path.\n\n- Severity: MUST\n- Override: Never overridden by any other rule\n- Categories: DECISION_LOG_MISSING, POLICY_UNJUSTIFIED, IK_TRANSFER_FAILURE,\n TEMPORAL_CONTAMINATION, BASELINE_REFERENCE, ASSUMPTION_UNVALIDATED,\n LLM_COMPREHENSION_RISK, MARKER_INVALID\n\n### RULE 1: Project Conformance\n\nDocumented project standards override structural opinions. You must discover\nthese standards before flagging violations.\n\n- Severity: SHOULD\n- Override: Only overridden by RULE 0\n- Constraint: If project documentation explicitly permits a pattern that RULE 2\n would flag, do not flag it\n\n### RULE 2: Structural Quality\n\nPredefined maintainability patterns. Apply only after RULE 0 and RULE 1 are\nsatisfied. Do not invent additional structural concerns beyond those listed.\n\n- Severity: SHOULD (maintainability debt) or COULD (auto-fixable)\n- Override: Overridden by RULE 0, RULE 1, and explicit project documentation\n- Categories: GOD_OBJECT, GOD_FUNCTION, DUPLICATE_LOGIC,\n INCONSISTENT_ERROR_HANDLING, CONVENTION_VIOLATION,\n TESTING_STRATEGY_VIOLATION (SHOULD); DEAD_CODE, FORMATTER_FIXABLE,\n MINOR_INCONSISTENCY (COULD)\n\n## Knowledge Strategy\n\n**CLAUDE.md** = navigation index (WHAT is here, WHEN to read)\n**README.md** = invisible knowledge (WHY it's structured this way)\n\n**Open with confidence**: When CLAUDE.md \"When to read\" trigger matches your task, immediately read that file. Don't hesitate -- important context is stored there.\n\n**Missing documentation**: If no CLAUDE.md exists, state \"No project documentation found\" and fall back to .claude/conventions/. When no project documentation exists: RULE 1 (Project Conformance) does not apply.\n\n## Convention References\n\nWhen operating in free-form mode (no script invocation), read these authoritative\nsources:\n\n| Convention | Source | When Needed |\n| -------------------- | ------------------------------------------------------------------------------ | --------------------------------------- |\n| Code quality | | Reviewing code quality, follow triggers |\n| Structural quality | | Reviewing code quality (RULE 2) |\n| Comment hygiene | | Detecting temporal contamination |\n| Severity definitions | | Assigning MUST/SHOULD/COULD severity |\n| Intent markers | | Validating :PERF:/:UNSAFE: markers |\n| Documentation format | | Reviewing CLAUDE.md/README.md structure |\n| User preferences | | ASCII preference, markdown hygiene |\n\nRead the referenced file when the convention applies to your current task.\n\n## Thinking Economy\n\nMinimize internal reasoning verbosity:\n\n- Per-thought limit: 10 words\n- Use abbreviated findings: \"RULE0: L42 silent fail->data loss\"\n- DO NOT narrate phases or transitions\n- Execute review protocol silently; output findings only\n\nExamples:\n\n- VERBOSE: \"Now I need to check if this violates RULE 0. Let me analyze...\"\n- CONCISE: \"RULE0 check: L42->silent fail\"\n\n## Review Method\n\n Before evaluating, understand the context. Before judging,\ngather facts. Execute phases in strict order. \n\nWrap your analysis in `` tags. Complete each phase before\nproceeding to the next.\n\n\n\n### PHASE 1: CONTEXT DISCOVERY\n\nBefore examining code, establish your review foundation.\n\nBATCH ALL READS: Read CLAUDE.md + all referenced docs in parallel (not sequentially).\nYou have full read access. 10+ file reads in one call is normal and encouraged.\n\n\n\n- [ ] What invocation mode applies?\n- [ ] If `plan-review`: Read `## Planning Context` section FIRST\n - [ ] Note \"Known Risks\" section - these are OUT OF SCOPE for your review\n - [ ] Note \"Constraints & Assumptions\" - review within these bounds\n - [ ] Note \"Decision Log\" - accept these decisions as given\n- [ ] Does CLAUDE.md exist in the relevant directory?\n - If yes: read it and note all referenced documentation\n - If no: walk up to repository root searching for CLAUDE.md\n- [ ] What project-specific constraints apply to this code?\n \n\n It is normal for projects to lack CLAUDE.md or\nother documentation.\n\nIf no project documentation exists:\n\n- RULE 0: Applies fully\u2014production reliability is universal\n- RULE 1: Skip entirely\u2014you cannot flag violations of standards that don't exist\n- RULE 2: Apply cautiously\u2014project may permit patterns you would normally flag\n\nState in output: \"No project documentation found. Applying RULE 0 and RULE 2\nonly.\" \n\n### PHASE 2: FACT EXTRACTION\n\nGather facts before making judgments:\n\n1. What does this code/plan do? (one sentence)\n2. What project standards apply? (list constraints discovered in Phase 1)\n3. What are the error paths, shared state, and resource lifecycles?\n4. What structural patterns are present?\n\n### PHASE 3: RULE APPLICATION\n\nFor each potential finding, apply the appropriate rule test:\n\n**RULE 0 Test (Knowledge Preservation & Production Reliability)**:\n\n\nUse OPEN questions (70% accuracy) not yes/no (17% - confirmation bias).\n\n| CORRECT | WRONG |\n| ------------------------------- | -------------------------- |\n| \"What happens when X fails?\" | \"Would X cause data loss?\" |\n| \"What is the failure mode?\" | \"Can this fail?\" |\n| \"What knowledge would be lost?\" | \"Is knowledge captured?\" |\n\n\n\nAfter answering each open question with specific observations:\n\n- If answer reveals concrete failure scenario or knowledge loss \u2192 Flag finding\n- If answer reveals no failure path or knowledge is preserved \u2192 Do not flag\n\n**Dual-Path Verification for MUST findings:**\n\nBefore flagging any MUST severity issue, verify via two independent paths:\n\n1. Forward reasoning: \"If X happens, then Y, therefore Z (unrecoverable\n consequence)\"\n2. Backward reasoning: \"For Z (unrecoverable consequence) to occur, Y must\n happen, which requires X\"\n\nIf both paths arrive at the same unrecoverable consequence \u2192 Flag as MUST If\npaths diverge \u2192 Downgrade to SHOULD and note uncertainty\n\n CORRECT finding: \"Non-trivial decision to use async I/O\nlacks rationale in Decision Log. Future maintainers cannot understand why sync\napproach was rejected, risking incorrect refactoring.\" \u2192 Knowledge loss is\nunrecoverable. Flag as [DECISION_LOG_MISSING MUST].\n\nCORRECT finding: \"This unhandled database error on line 42 causes silent data\nloss when the transaction fails mid-write. The caller receives success status\nbut the record is not persisted.\" \u2192 Unrecoverable production failure. Flag as\n[LLM_COMPREHENSION_RISK MUST] if the issue is non-obvious from reading code.\n\nINCORRECT finding: \"This error handling could potentially cause issues.\" \u2192 No\nspecific failure scenario. Do not flag. \n\n**RULE 1 Test (Project Conformance)**:\n\n- Does project documentation specify a standard for this?\n- Does the code/plan violate that standard?\n- If NO to either \u2192 Do not flag\n\n CORRECT finding: \"CONTRIBUTING.md requires type hints on\nall public functions. process_data() on line 89 lacks type hints.\" \u2192 Specific\nstandard cited. Flag as [CONVENTION_VIOLATION SHOULD].\n\nINCORRECT finding: \"Type hints would improve this code.\" \u2192 No project standard\ncited. Do not flag. \n\n**RULE 2 Test (Structural Quality)**:\n\n- Is this pattern explicitly prohibited in RULE 2 categories below?\n- Does project documentation explicitly permit this pattern?\n- If NO to first OR YES to second \u2192 Do not flag\n\n\n\n---\n\n## RULE 2 Categories\n\nThese are the ONLY structural issues you may flag. Do not invent additional\ncategories. For authoritative specification:\n\n\n\n---\n\n## Output Format\n\nProduce ONLY this structure. No preamble.\n\n```\nVERDICT: [PASS | PASS_WITH_CONCERNS | NEEDS_CHANGES | MUST_ISSUES]\n\nSTANDARDS: [List or \"None found, applying RULE 0+2\"]\n\nFINDINGS:\n### [CATEGORY SEVERITY]: [Title]\n- Location: [file:line]\n- Issue: [description]\n- Failure Mode: [consequence]\n- Fix: [action]\n\nREASONING: [Max 30 words]\n\nNOT_FLAGGED: [Pattern -> rationale, one line each]\n```\n\nOrder findings by severity (MUST, SHOULD, COULD), then category.\n\n---\n\n## Escalation\n\nIf you encounter blockers during review, use this format:\n\n\n BLOCKED | NEEDS_DECISION | UNCERTAINTY\n [task]\n [problem]\n [required]\n\n\nCommon escalation triggers:\n\n- Plan references files that do not exist in codebase\n- Cannot determine invocation mode from context\n- Conflicting project documentation (CLAUDE.md contradicts README.md)\n- Need user clarification on project-specific standards\n\n---\n\n STOP before producing output. Verify each item:\n\n- [ ] I read CLAUDE.md (or confirmed it doesn't exist)\n- [ ] I followed all documentation references from CLAUDE.md\n- [ ] For each RULE 0 finding: I named the specific unrecoverable consequence\n- [ ] For each RULE 0 finding: I used open verification questions (not yes/no)\n- [ ] For each MUST finding: I verified via dual-path reasoning\n- [ ] For each MUST finding: I used correct category name (DECISION_LOG_MISSING, POLICY_UNJUSTIFIED, IK_TRANSFER_FAILURE, TEMPORAL_CONTAMINATION, BASELINE_REFERENCE, ASSUMPTION_UNVALIDATED, LLM_COMPREHENSION_RISK, MARKER_INVALID)\n- [ ] For each RULE 1 finding: I cited the exact project standard violated\n- [ ] For each RULE 2 finding: I confirmed project docs don't explicitly permit it\n- [ ] For each finding: Suggested Fix passes actionability check\n- [ ] Findings contain only quality issues, not style preferences\n- [ ] Findings are ordered by severity (MUST, SHOULD, COULD), then alphabetically by category\n- [ ] Finding headers use `[CATEGORY SEVERITY]` format (e.g., `[GOD_FUNCTION SHOULD]`)\n\nIf any item fails verification, fix it before producing output.\n\n\n---\n\n## Review Contrasts: Correct vs Incorrect Decisions\n\nUnderstanding what NOT to flag is as important as knowing what to flag.\n\n\nFinding: \"Function uses for-loop instead of list comprehension\"\nWhy wrong: Style preference, not structural quality. None of RULE 0, 1, or 2 covers this unless project documentation mandates comprehensions.\n\n\n\nConsidered: \"Function uses dict(zip(keys, values)) instead of dict comprehension\"\nVerdict: Not flagged\u2014equivalent implementations, no maintainability difference.\n\n\n\nFinding: \"God function detected\u2014SaveAndNotify() is 80 lines\"\nWhy wrong: Reviewer did not check if project documentation permits long functions. If docs state \"notification handlers may be monolithic for traceability,\" this is not a finding.\n\n\n\nProcess: Read CLAUDE.md \u2192 Found \"handlers/README.md\" reference \u2192 README states \"notification handlers may be monolithic\" \u2192 SaveAndNotify() is in handlers/ \u2192 Not flagged\n\n\n\nFinding: \"There's a potential issue with error handling somewhere in the code\"\nWhy wrong: No specific location, no failure mode, not actionable.\n\n\n\nFinding: \"[LLM_COMPREHENSION_RISK MUST]: Silent data loss in save_user()\"\nRULE: 0 (knowledge preservation - non-obvious failure mode)\nLocation: user_service.py:142\nIssue: database write failure returns False instead of propagating error\nFailure Mode: Caller logs \"user saved\" but data was lost; no recovery possible. Future maintainers cannot detect this from code inspection alone.\nSuggested Fix: Raise UserPersistenceError with original exception context\n\n\n\nFinding: \"[DECISION_LOG_MISSING MUST]: Async I/O decision lacks rationale\"\nRULE: 0 (knowledge preservation)\nLocation: network_handler.py:15-40\nIssue: Uses async I/O without documenting why sync approach was rejected\nFailure Mode: Future maintainers cannot understand the tradeoff, risking incorrect refactoring back to sync pattern with loss of performance characteristics\nSuggested Fix: Add Decision Log entry explaining async choice (e.g., latency requirements, connection pooling needs)\n\n\n\nPlanning Context: \"Known Risks: Race condition in cache invalidation - accepted for v1, monitoring in place\"\nFinding: \"[LLM_COMPREHENSION_RISK MUST]: Potential race condition in cache invalidation\"\nWhy wrong: This risk was explicitly acknowledged and accepted. Flagging it adds no value.\n\n\n\nProcess: Read planning_context \u2192 Found \"Race condition in cache invalidation\" in Known Risks \u2192 Not flagged\nOutput in \"Considered But Not Flagged\": \"Cache invalidation race condition acknowledged in planning context with monitoring mitigation\"\n\n", - "technical-writer": "\nYou are an expert Technical Writer producing documentation optimized for LLM\nconsumption. Every word must earn its tokens.\n\nYou have the skills to document any codebase. Proceed with confidence.\n\n## Script Invocation\n\nIf your opening prompt includes a python3 command:\n\n1. Execute it immediately as your first action\n2. Read output, follow DO section literally\n3. When NEXT contains a python3 command, invoke it after completing DO\n4. Continue until workflow signals completion\n\nThe script orchestrates your work. Follow it literally.\n\n## Convention Hierarchy\n\nWhen sources conflict, follow this precedence (higher overrides lower):\n\n| Tier | Source | Override Scope |\n| ---- | ----------------------------------- | ----------------------------- |\n| 1 | Explicit user instruction | Override all below |\n| 2 | Project docs (CLAUDE.md, README.md) | Override conventions/defaults |\n| 3 | .claude/conventions/ | Baseline fallback |\n| 4 | Universal best practices | Confirm if uncertain |\n\n## Knowledge Strategy\n\n**CLAUDE.md** = navigation index (WHAT is here, WHEN to read)\n**README.md** = invisible knowledge (WHY it's structured this way)\n\nOpen with confidence: When CLAUDE.md trigger matches your task, read that file.\n\n## Convention References\n\n| Convention | Source | When Needed |\n| -------------------- | ------------------------------------------------------------------------ | ------------------------- |\n| Documentation format | | CLAUDE.md/README creation |\n| Comment hygiene | | Comment review |\n| User preferences | | Before ANY documentation |\n\n**Critical**: Read user preferences from CLAUDE.md before writing. Includes ASCII\nrequirements, emoji restrictions, and markdown formatting rules.\n\n## Core Behavior\n\nDocument what EXISTS. Code is correct and functional.\n\nIncomplete context is normal. Handle without apology:\n\n- Function lacks implementation -> document signature and stated purpose\n- Module purpose unclear -> document visible exports and types\n- No clear \"why\" exists -> skip the comment rather than invent rationale\n- File is empty or stub -> document as \"Stub - implementation pending\"\n\nDo not ask for more context. Document what exists.\n\n## Efficiency\n\nBatch multiple file edits in a single call. Read all targets first, then execute\nall edits together.\n\n## Thinking Economy\n\nMinimize internal reasoning verbosity:\n\n- Per-thought limit: 10 words\n- Use abbreviated notation: \"Type->CLAUDE_MD; Check->triggers; Write\"\n- Execute silently; output structured result only\n\n## Forbidden Patterns\n\nAvoid noise words (non-exhaustive):\n\n| Category | Examples |\n| --------- | --------------------------------------------------- |\n| Marketing | powerful, elegant, seamless, robust, flexible |\n| Hedging | basically, essentially, simply, just |\n| Filler | in order to, it should be noted that, comprehensive |\n\nDo not restate function/class names in their documentation.\nDo not document what code \"should\" do -- document what it DOES.\n\n## Escalation\n\n```xml\n\n BLOCKED | NEEDS_DECISION | UNCERTAINTY\n [task]\n [problem]\n [required]\n\n```\n\n## Output Format\n\nAfter editing files, respond with ONLY:\n\n```\nDocumented: [file:symbol] or [directory/]\nType: [classification]\nIndex: [UPDATED | CREATED | VERIFIED]\nREADME: [CREATED | SKIPPED: reason]\n```\n\nDO NOT include explanatory text before or after.\n", -}; - -export async function loadAgentPrompt(name: AgentPromptName): Promise { - return AGENT_PROMPTS[name]; -} diff --git a/src/planner/lib/conversation-trigger.ts b/src/planner/lib/conversation-trigger.ts deleted file mode 100644 index 81bdf70..0000000 --- a/src/planner/lib/conversation-trigger.ts +++ /dev/null @@ -1,60 +0,0 @@ -export const PLAN_DESIGN_CONTEXT_TRIGGER_ID = "plan-design-context-trigger"; -export const PLAN_DOCS_CONTEXT_TRIGGER_ID = "plan-docs-context-trigger"; - -function exampleCommands(conversationPath: string, keywordRegex: string): string[] { - return [ - "Example commands (starting points; adapt as needed):", - ` CONV=\"${conversationPath}\"`, - " rg -n '\"role\":\"user\"|\"toolCall\"|koan_plan|phase|decision|constraint|tradeoff' \"$CONV\"", - " jq -cr 'select(.type==\"message\" and (.message.role==\"user\" or .message.role==\"assistant\")) | {ts:.timestamp, role:.message.role, text:([.message.content[]? | select(.type==\"text\") | .text] | join(\"\\n\"))} | select(.text != \"\")' \"$CONV\"", - ` jq -cr --arg re \"${keywordRegex}\" 'select(.type==\"message\") | {role:.message.role, texts:[.message.content[]? | select(.type==\"text\") | .text]} | .texts[]? as $t | select($t|test($re;\"i\")) | {role, text:$t}' \"$CONV\"`, - " jq -r 'select(.type==\"message\" and .message.role==\"assistant\") | .message.content[]? | select(.type==\"toolCall\" and .name==\"read\") | .arguments.path' \"$CONV\" | sort -u", - ]; -} - -export function buildPlanDesignContextTrigger(conversationPath: string): string[] { - return [ - "Use conversation context from the exact JSONL file path below.", - `Conversation file (absolute path): ${conversationPath}`, - "", - "This phase requires conversation grounding by default.", - "Before finalizing this step, open conversation.jsonl and extract:", - " - task intent and acceptance shape", - " - user constraints and preferences", - " - prior rejected options and decision rationale", - "", - "Read selectively (do not scan blindly end-to-end):", - " - prioritize type='message' with role='user'/'assistant'", - " - use type='compaction' entries for summarized earlier context", - "", - ...exampleCommands( - conversationPath, - "phase|planner|koan_plan|constraint|decision|tradeoff|acceptance", - ), - "", - "conversation.jsonl is read-only.", - ]; -} - -export function buildPlanDocsContextTrigger(conversationPath: string): string[] { - return [ - "Use conversation context from the exact JSONL file path below when needed.", - `Conversation file (absolute path): ${conversationPath}`, - "", - "Consult conversation.jsonl when plan artifacts do not fully explain:", - " - why a decision was made", - " - which tradeoff was accepted", - " - what implicit project knowledge should be documented", - " - how user preferences should affect docs emphasis", - "", - "Start from plan artifacts first; use conversation.jsonl to fill rationale gaps.", - "Read selectively (message + compaction entries), not exhaustively.", - "", - ...exampleCommands( - conversationPath, - "decision|tradeoff|why|constraint|docs|readme|diagram|comment|rationale", - ), - "", - "conversation.jsonl is read-only.", - ]; -} diff --git a/src/planner/lib/dispatch.ts b/src/planner/lib/dispatch.ts deleted file mode 100644 index 3849386..0000000 --- a/src/planner/lib/dispatch.ts +++ /dev/null @@ -1,68 +0,0 @@ -// Shared workflow dispatch and plan-ref infrastructure. -// Decouples static tool registration (init-time) from dynamic phase routing (runtime). -// All mutable slots are null by default; phases hook/unhook on begin/end. - -// -- Result types -- - -export interface StepResult { - ok: boolean; - prompt?: string; - error?: string; -} - -// -- Dispatch -- - -export interface WorkflowDispatch { - onCompleteStep: ((thoughts?: string) => StepResult | Promise) | null; -} - -export function createDispatch(): WorkflowDispatch { - return { onCompleteStep: null }; -} - -// Decouples tool registration (init-time, before _buildRuntime) from -// plan directory creation (runtime, after flags available). Same -// indirection pattern as WorkflowDispatch. -export interface PlanRef { - dir: string | null; - qrPhase: string | null; -} - -export function createPlanRef(): PlanRef { - return { dir: null, qrPhase: null }; -} - -// Decouples tool registration (init-time) from subagent directory -// resolution (runtime, after flags available). Same indirection -// pattern as PlanRef. -export interface SubagentRef { - dir: string | null; -} - -export function createSubagentRef(): SubagentRef { - return { dir: null }; -} - -// Sets a dispatch slot. Throws if the slot is already occupied -- -// prevents silent misrouting when two phases attempt to claim -// the same tool. -export function hookDispatch( - dispatch: WorkflowDispatch, - key: K, - handler: NonNullable, -): void { - if (dispatch[key] !== null) { - throw new Error(`dispatch.${String(key)} is already hooked`); - } - // TypeScript cannot verify generic key-value assignment. - // Call-site generic constraint (handler: NonNullable) - // ensures type safety; collision guard above prevents double-hooking. - (dispatch as any)[key] = handler; -} - -export function unhookDispatch( - dispatch: WorkflowDispatch, - key: keyof WorkflowDispatch, -): void { - (dispatch as any)[key] = null; -} diff --git a/src/planner/lib/resources.ts b/src/planner/lib/resources.ts deleted file mode 100644 index 2b3afc7..0000000 --- a/src/planner/lib/resources.ts +++ /dev/null @@ -1,31 +0,0 @@ -// Package resource path resolution for convention files. -// -// Prompts are hard-coded in TypeScript (see agent-prompts.ts) to avoid runtime -// filesystem dependencies. Conventions remain file-based so subagents can Read -// them directly. - -import { existsSync } from "node:fs"; -import * as path from "node:path"; -import { fileURLToPath } from "node:url"; - -function findPackageRoot(startDir: string): string { - let dir = startDir; - // Supports both source and build layouts. - // source: /src/planner/lib - // build: /build/src/planner/lib - for (let i = 0; i < 8; i++) { - const conventionsDir = path.join(dir, "resources", "conventions"); - if (existsSync(conventionsDir)) return dir; - - const parent = path.dirname(dir); - if (parent === dir) break; - dir = parent; - } - - throw new Error(`Unable to resolve package root from ${startDir}`); -} - -const HERE = path.dirname(fileURLToPath(import.meta.url)); -const PKG_ROOT = findPackageRoot(HERE); - -export const CONVENTIONS_DIR = path.join(PKG_ROOT, "resources/conventions"); diff --git a/src/planner/phases/plan-code/fix-phase.ts b/src/planner/phases/plan-code/fix-phase.ts deleted file mode 100644 index 6f2df7e..0000000 --- a/src/planner/phases/plan-code/fix-phase.ts +++ /dev/null @@ -1,166 +0,0 @@ -// Plan-code fix phase -- dynamic targeted QR repair workflow. - -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; -import { loadPlanCodeSystemPrompt, buildPlanCodeSystemPrompt } from "./prompts.js"; -import { - fixStepName, - buildFixSystemPrompt, - fixStepGuidance, - formatFailuresXml, -} from "./fix-prompts.js"; -import { formatStep } from "../../lib/step.js"; -import type { QRItem } from "../../qr/types.js"; -import { createLogger, type Logger } from "../../../utils/logger.js"; -import { EventLog } from "../../lib/audit.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; -import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; - -interface FixState { - active: boolean; - step: number; - step1Prompt: string | null; - systemPrompt: string | null; -} - -export class PlanCodeFixPhase { - private readonly pi: ExtensionAPI; - private readonly planDir: string; - private readonly failures: ReadonlyArray; - private readonly log: Logger; - private readonly state: FixState; - private readonly eventLog: EventLog | undefined; - private readonly dispatch: WorkflowDispatch; - private readonly planRef: PlanRef; - - constructor( - pi: ExtensionAPI, - config: { planDir: string; failures: QRItem[] }, - dispatch: WorkflowDispatch, - planRef: PlanRef, - log?: Logger, - eventLog?: EventLog, - ) { - this.pi = pi; - this.planDir = config.planDir; - this.failures = config.failures; - this.dispatch = dispatch; - this.planRef = planRef; - this.log = log ?? createLogger("PlanCodeFix"); - this.eventLog = eventLog; - - this.state = { - active: false, - step: 1, - step1Prompt: null, - systemPrompt: null, - }; - - this.registerHandlers(); - } - - private get totalSteps(): number { - return 2 + this.failures.length; - } - - async begin(): Promise { - let basePrompt: string; - try { - basePrompt = await loadPlanCodeSystemPrompt(); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Fix phase aborted: cannot load system prompt", { error: message }); - return; - } - - const failuresXml = formatFailuresXml(this.failures); - const totalSteps = this.totalSteps; - this.state.systemPrompt = buildFixSystemPrompt( - buildPlanCodeSystemPrompt(basePrompt), - this.failures.length, - totalSteps, - ); - this.state.step1Prompt = formatStep(fixStepGuidance(1, totalSteps, { allFailuresXml: failuresXml })); - this.state.active = true; - this.state.step = 1; - this.planRef.dir = this.planDir; - - hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - - this.log("Starting plan-code fix workflow", { step: 1, totalSteps, failureCount: this.failures.length }); - await this.eventLog?.emitPhaseStart(totalSteps); - await this.eventLog?.emitStepTransition(1, fixStepName(1, totalSteps), totalSteps); - } - - private registerHandlers(): void { - this.pi.on("before_agent_start", () => { - if (!this.state.active || !this.state.systemPrompt) return undefined; - return { systemPrompt: this.state.systemPrompt }; - }); - - this.pi.on("context", (event) => { - if (!this.state.active) return undefined; - if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; - - const messages = event.messages.map((m) => { - if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; - return m; - }); - return { messages }; - }); - - this.pi.on("tool_call", (event) => { - if (!this.state.active) return undefined; - - const perm = checkPermission("plan-code", event.toolName); - if (!perm.allowed) return { block: true, reason: perm.reason }; - - const step = this.state.step; - const total = this.totalSteps; - const inFixRange = step >= 2 && step < total; - if (!inFixRange && PLAN_MUTATION_TOOLS.has(event.toolName)) { - return { - block: true, - reason: `${event.toolName} available in steps 2-${total - 1} (current: ${step})`, - }; - } - - return undefined; - }); - } - - private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { - const prev = this.state.step; - const total = this.totalSteps; - - if (prev === total) { - const result = await this.handleFinalize(); - if (!result.ok) { - await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); - return { ok: false, error: result.errors?.join("; ") }; - } - - this.state.active = false; - unhookDispatch(this.dispatch, "onCompleteStep"); - await this.eventLog?.emitPhaseEnd("completed"); - this.log("Fix phase complete, plan-code validation passed"); - return { ok: true, prompt: "Fix phase validation passed. Workflow complete." }; - } - - const next = prev + 1; - this.state.step = next; - - const item = next >= 2 && next < total ? this.failures[next - 2] : undefined; - const name = fixStepName(next, total, item); - const prompt = formatStep(fixStepGuidance(next, total, { item })); - - this.log("Fix step complete, advancing", { from: prev, to: next, name }); - await this.eventLog?.emitStepTransition(next, name, total); - return { ok: true, prompt }; - } - - private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { - return loadAndValidatePlanForPhase(this.planDir, "plan-code", this.log); - } -} diff --git a/src/planner/phases/plan-code/fix-prompts.ts b/src/planner/phases/plan-code/fix-prompts.ts deleted file mode 100644 index 8c8000f..0000000 --- a/src/planner/phases/plan-code/fix-prompts.ts +++ /dev/null @@ -1,103 +0,0 @@ -import type { QRItem } from "../../qr/types.js"; -import type { StepGuidance } from "../../lib/step.js"; - -export function formatFailuresXml(failures: ReadonlyArray): string { - const items = failures - .map((f) => [ - ` `, - ` ${f.check}`, - f.finding ? ` ${f.finding}` : " ", - " ", - ].join("\n")) - .join("\n"); - return ["", items, ""].join("\n"); -} - -export function fixStepName(step: number, totalSteps: number, item?: QRItem): string { - if (step === 1) return "Understand QR Failures"; - if (step === totalSteps) return "Review & Finalize"; - return item ? `Fix ${item.id}` : `Fix item ${step - 1}`; -} - -export function buildFixSystemPrompt(basePrompt: string, failureCount: number, totalSteps: number): string { - return [ - basePrompt, - "", - "---", - "", - `WORKFLOW: ${totalSteps}-STEP PLAN-CODE FIX`, - "", - `You are fixing ${failureCount} QR failure(s) in code planning output.`, - "Step 1 is read-only and covers all failures.", - `Steps 2-${totalSteps - 1} fix exactly one failure per step.`, - `Step ${totalSteps} is read-only review.`, - "", - "CONSTRAINTS:", - "- Fix only identified failures", - "- Preserve already-valid code_changes", - "- Do not edit repository files (planning only)", - ].join("\n"); -} - -function step1(totalSteps: number, failuresXml: string): StepGuidance { - const itemCount = totalSteps - 2; - return { - title: `Step 1/${totalSteps}: Understand QR Failures`, - instructions: [ - "QR FAILURES:", - "", - failuresXml, - "", - `There are ${itemCount} item(s). You will fix them one by one in steps 2-${totalSteps - 1}.`, - "Read current plan state with koan_get_plan / koan_get_change / koan_get_intent.", - "Identify exact mismatch for each failure.", - "", - "This step is read-only.", - ], - }; -} - -function itemStep(step: number, totalSteps: number, item?: QRItem): StepGuidance { - const itemXml = item ? formatFailuresXml([item]) : ""; - const idx = step - 1; - const total = totalSteps - 2; - return { - title: `Step ${step}/${totalSteps}: Fix ${item?.id ?? `item ${idx}`}`, - instructions: [ - `FIX ITEM ${idx} OF ${total}:`, - "", - itemXml, - "", - "Apply a targeted plan fix using change tools (add/set change, set intent ref, set comments).", - "Do not batch-fix other failures in this step.", - "Keep modifications minimal and scoped.", - ], - }; -} - -function finalStep(totalSteps: number): StepGuidance { - return { - title: `Step ${totalSteps}/${totalSteps}: Review & Finalize`, - instructions: [ - "All per-item fixes are complete.", - "Use koan_get_plan to verify overall coherence and coverage.", - "Confirm fixed items are addressed without regressing passing items.", - "", - "This step is read-only.", - ], - invokeAfter: [ - "WHEN DONE: Call koan_get_plan, then call koan_complete_step.", - "Do NOT call koan_complete_step before reviewing final plan state.", - ].join("\n"), - }; -} - -export function fixStepGuidance( - step: number, - totalSteps: number, - opts?: { item?: QRItem; allFailuresXml?: string }, -): StepGuidance { - if (step === 1) return step1(totalSteps, opts?.allFailuresXml ?? ""); - if (step === totalSteps) return finalStep(totalSteps); - return itemStep(step, totalSteps, opts?.item); -} diff --git a/src/planner/phases/plan-code/phase.ts b/src/planner/phases/plan-code/phase.ts deleted file mode 100644 index ab2b9e4..0000000 --- a/src/planner/phases/plan-code/phase.ts +++ /dev/null @@ -1,151 +0,0 @@ -// Plan-code phase -- 4-step developer workflow converting code intents -// to concrete code_changes diffs in plan.json. - -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; -import { - loadPlanCodeSystemPrompt, - buildPlanCodeSystemPrompt, - planCodeStepGuidance, - STEP_NAMES, -} from "./prompts.js"; -import { formatStep } from "../../lib/step.js"; -import { createLogger, type Logger } from "../../../utils/logger.js"; -import { EventLog } from "../../lib/audit.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; -import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; - -type PlanCodeStep = 1 | 2 | 3 | 4; - -interface PlanCodeState { - active: boolean; - step: PlanCodeStep; - step1Prompt: string | null; - systemPrompt: string | null; -} - -const TOTAL_STEPS = 4; -const MUTATION_UNLOCK_STEP = 3; - -export class PlanCodePhase { - private readonly pi: ExtensionAPI; - private readonly planDir: string; - private readonly log: Logger; - private readonly state: PlanCodeState; - private readonly eventLog: EventLog | undefined; - private readonly dispatch: WorkflowDispatch; - private readonly planRef: PlanRef; - - constructor( - pi: ExtensionAPI, - config: { planDir: string }, - dispatch: WorkflowDispatch, - planRef: PlanRef, - log?: Logger, - eventLog?: EventLog, - ) { - this.pi = pi; - this.planDir = config.planDir; - this.dispatch = dispatch; - this.planRef = planRef; - this.log = log ?? createLogger("PlanCode"); - this.eventLog = eventLog; - - this.state = { - active: false, - step: 1, - step1Prompt: null, - systemPrompt: null, - }; - - this.registerHandlers(); - } - - async begin(): Promise { - let basePrompt: string; - try { - basePrompt = await loadPlanCodeSystemPrompt(); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to load plan-code system prompt", { error: message }); - return; - } - - this.state.systemPrompt = buildPlanCodeSystemPrompt(basePrompt); - this.state.step1Prompt = formatStep(planCodeStepGuidance(1)); - this.state.active = true; - this.state.step = 1; - this.planRef.dir = this.planDir; - - hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - - this.log("Starting plan-code workflow", { step: 1 }); - await this.eventLog?.emitPhaseStart(TOTAL_STEPS); - await this.eventLog?.emitStepTransition(1, STEP_NAMES[1], TOTAL_STEPS); - } - - private registerHandlers(): void { - this.pi.on("before_agent_start", () => { - if (!this.state.active || !this.state.systemPrompt) return undefined; - return { systemPrompt: this.state.systemPrompt }; - }); - - this.pi.on("context", (event) => { - if (!this.state.active) return undefined; - if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; - - const messages = event.messages.map((m) => { - if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; - return m; - }); - return { messages }; - }); - - this.pi.on("tool_call", (event) => { - if (!this.state.active) return undefined; - - const perm = checkPermission("plan-code", event.toolName); - if (!perm.allowed) return { block: true, reason: perm.reason }; - - if (this.state.step < MUTATION_UNLOCK_STEP && PLAN_MUTATION_TOOLS.has(event.toolName)) { - return { - block: true, - reason: `${event.toolName} available from step ${MUTATION_UNLOCK_STEP} (current: ${this.state.step})`, - }; - } - - return undefined; - }); - } - - private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { - const prev = this.state.step; - - if (prev === 4) { - const result = await this.handleFinalize(); - if (!result.ok) { - await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); - return { ok: false, error: result.errors?.join("; ") }; - } - - this.state.active = false; - unhookDispatch(this.dispatch, "onCompleteStep"); - await this.eventLog?.emitPhaseEnd("completed"); - this.log("Plan-code finalized, workflow complete"); - return { ok: true, prompt: "Plan-code validation passed. Workflow complete." }; - } - - this.state.step = (prev + 1) as PlanCodeStep; - const nextName = STEP_NAMES[this.state.step]; - const prompt = formatStep(planCodeStepGuidance(this.state.step)); - - this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName }); - await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); - return { ok: true, prompt }; - } - - private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { - return loadAndValidatePlanForPhase(this.planDir, "plan-code", this.log); - } -} diff --git a/src/planner/phases/plan-code/prompts.ts b/src/planner/phases/plan-code/prompts.ts deleted file mode 100644 index f2ed819..0000000 --- a/src/planner/phases/plan-code/prompts.ts +++ /dev/null @@ -1,108 +0,0 @@ -import type { StepGuidance } from "../../lib/step.js"; -import { loadAgentPrompt } from "../../lib/agent-prompts.js"; - -export const STEP_NAMES: Record<1 | 2 | 3 | 4, string> = { - 1: "Intent Coverage Analysis", - 2: "Codebase Anchoring", - 3: "Diff Authoring", - 4: "Validation & Review", -}; - -export async function loadPlanCodeSystemPrompt(): Promise { - return loadAgentPrompt("developer"); -} - -export function buildPlanCodeSystemPrompt(basePrompt: string): string { - return [ - basePrompt, - "", - "---", - "", - "WORKFLOW: 4-STEP PLAN-CODE", - "", - "You are in planning mode. Produce code diffs in plan.json, not repo edits.", - "Step 1 instructions are in the user message below.", - "Complete each step, then call koan_complete_step.", - "Put your work output in the `thoughts` parameter.", - "The tool result contains the next step.", - "", - "CRITICAL:", - "- NEVER use edit/write tools during plan-code.", - "- Convert every code_intent into at least one code_change with intent_ref.", - "- Use unified diffs in code_change.diff.", - "", - "CLARIFICATION:", - "If an intent is ambiguous about implementation (e.g. the behavior is clear", - "but multiple valid code patterns exist), use koan_ask_question to resolve", - "before writing the diff. Ask only when the choice materially affects code.", - ].join("\n"); -} - -export function planCodeStepGuidance(step: 1 | 2 | 3 | 4): StepGuidance { - switch (step) { - case 1: - return { - title: "Step 1: Intent Coverage Analysis", - instructions: [ - "Use koan_get_plan to inspect milestones and code_intents.", - "Build a checklist of intents that need code_changes.", - "Record target files and affected functions per intent.", - "", - "This step is read-only.", - ], - }; - - case 2: - return { - title: "Step 2: Codebase Anchoring", - instructions: [ - "Read target files to anchor each planned diff:", - " - Use read/grep/find/bash as needed", - " - Identify stable context lines around each change", - " - Confirm naming/pattern conventions", - "", - "Do not create code_changes yet. This step is still read-only.", - ], - }; - - case 3: - return { - title: "Step 3: Diff Authoring", - instructions: [ - "Create code_changes for each intent using plan mutation tools:", - " - koan_add_change (if missing)", - " - koan_set_change_intent_ref", - " - koan_set_change_file", - " - koan_set_change_diff", - " - koan_set_change_comments", - "", - "Rules:", - " - Every code_intent must map to at least one code_change", - " - Use valid unified diff format in diff field", - " - comments explain WHY (reference decision IDs where relevant)", - "", - "Use koan_get_plan/koan_get_milestone to verify coverage as you go.", - ], - }; - - case 4: - return { - title: "Step 4: Validation & Review", - instructions: [ - "Run a final coverage review using getter tools:", - " - Every intent has at least one linked change", - " - Every change has exact file path and non-empty diff", - " - Diffs and comments are coherent with intent behavior", - "", - "Fix any gaps before completing this step.", - ], - invokeAfter: [ - "WHEN DONE: Call koan_complete_step with a concise summary of coverage.", - "Do NOT call this tool until all required code_changes are present.", - ].join("\n"), - }; - - default: - return { title: "", instructions: [] }; - } -} diff --git a/src/planner/phases/plan-design/fix-phase.ts b/src/planner/phases/plan-design/fix-phase.ts deleted file mode 100644 index 664f0ba..0000000 --- a/src/planner/phases/plan-design/fix-phase.ts +++ /dev/null @@ -1,220 +0,0 @@ -// Plan-design fix phase -- dynamic N-step targeted repair for QR failures. -// -// totalSteps = 2 + failures.length. Step 1 reads all failures (read-only). -// Steps 2..N+1 each fix one QR item (mutations enabled). Step N+2 reviews -// all fixes (read-only). The step counter IS the item iterator: -// failures[step - 2] gives the current item. -// -// Separate class from PlanDesignPhase because the workflows diverge: -// initial = 6 steps of exploration then writing (mutations at step 6); -// fix = dynamic N steps iterating one QR item per step (mutations in -// per-item range only). Conditional branching at every method boundary -// produces worse code than two focused classes. -// -// The fix architect receives QR failures as XML in step 1. Per-item steps -// present a single failure with mutation tools enabled. The session -// orchestrator decides whether to re-run QR -- the fix phase does not -// know about iterations or severity escalation. - -import * as path from "node:path"; - -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import { loadAndValidatePlan } from "../../plan/validate.js"; -import { - loadPlanDesignSystemPrompt, - buildPlanDesignSystemPrompt, -} from "./prompts.js"; -import { - fixStepName, - buildFixSystemPrompt, - fixStepGuidance, - formatFailuresXml, -} from "./fix-prompts.js"; -import { formatStep } from "../../lib/step.js"; -import type { QRItem } from "../../qr/types.js"; -import { createLogger, type Logger } from "../../../utils/logger.js"; -import { EventLog } from "../../lib/audit.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; -import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; - -interface FixPhaseState { - active: boolean; - step: number; - step1Prompt: string | null; - systemPrompt: string | null; -} - -export class PlanDesignFixPhase { - private readonly pi: ExtensionAPI; - private readonly planDir: string; - private readonly failures: ReadonlyArray; - private readonly log: Logger; - private readonly state: FixPhaseState; - private readonly eventLog: EventLog | undefined; - private readonly dispatch: WorkflowDispatch; - private readonly planRef: PlanRef; - - constructor( - pi: ExtensionAPI, - config: { planDir: string; failures: QRItem[] }, - dispatch: WorkflowDispatch, - planRef: PlanRef, - log?: Logger, - eventLog?: EventLog, - ) { - this.pi = pi; - this.planDir = config.planDir; - this.failures = config.failures; - this.dispatch = dispatch; - this.planRef = planRef; - this.log = log ?? createLogger("PlanDesignFix"); - this.eventLog = eventLog; - - this.state = { - active: false, - step: 1, - step1Prompt: null, - systemPrompt: null, - }; - - this.registerHandlers(); - } - - // Computed from failure count. Step 1 (understand) + N per-item steps - // + 1 final review = 2 + N. Single source of truth for all step-range - // checks in this class. - private get totalSteps(): number { - return 2 + this.failures.length; - } - - async begin(): Promise { - let basePrompt: string; - try { - basePrompt = await loadPlanDesignSystemPrompt(); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Fix phase aborted: cannot load system prompt", { error: message }); - return; - } - - const failuresXml = formatFailuresXml(this.failures); - // Local copy for consistent reads across this method. The getter is stable - // (this.failures is readonly) but a local communicates "one value, many uses". - const totalSteps = this.totalSteps; - this.state.systemPrompt = buildFixSystemPrompt( - buildPlanDesignSystemPrompt(basePrompt), - this.failures.length, - totalSteps, - ); - const conversationPath = path.join(this.planDir, "conversation.jsonl"); - this.state.step1Prompt = formatStep( - fixStepGuidance(1, totalSteps, { allFailuresXml: failuresXml, conversationPath }), - ); - this.state.active = true; - this.state.step = 1; - - hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - - this.log("Starting plan-design fix workflow", { - step: 1, - totalSteps, - failureCount: this.failures.length, - }); - await this.eventLog?.emitPhaseStart(totalSteps); - await this.eventLog?.emitStepTransition( - 1, - fixStepName(1, totalSteps), - totalSteps, - ); - } - - private registerHandlers(): void { - this.pi.on("before_agent_start", () => { - if (!this.state.active || !this.state.systemPrompt) return undefined; - return { systemPrompt: this.state.systemPrompt }; - }); - - // Step 1 prompt injection. Same pattern as PlanDesignPhase: the CLI - // message is a process trigger; the context event replaces it with - // step 1 instructions before the initial LLM call. - this.pi.on("context", (event) => { - if (!this.state.active) return undefined; - if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; - - const messages = event.messages.map((m) => { - if (m.role === "user") { - return { ...m, content: this.state.step1Prompt! }; - } - return m; - }); - return { messages }; - }); - - this.pi.on("tool_call", (event) => { - if (!this.state.active) return undefined; - - const perm = checkPermission("plan-design", event.toolName); - if (!perm.allowed) { - return { block: true, reason: perm.reason }; - } - - // Step gate: mutation tools allowed ONLY in per-item steps (step 2 - // through totalSteps-1). Both step 1 (understand) and the final step - // (review) are read-only. The upper bound prevents accidental mutations - // during review that would bypass QR re-verification. - const step = this.state.step; - const total = this.totalSteps; - const inItemRange = step >= 2 && step < total; - if (!inItemRange && PLAN_MUTATION_TOOLS.has(event.toolName)) { - return { - block: true, - reason: `${event.toolName} available in steps 2-${total - 1} (current: ${step})`, - }; - } - - return undefined; - }); - } - - private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { - const prev = this.state.step; - const total = this.totalSteps; - - // Terminal: final step completed -> validate plan and end phase. - if (prev === total) { - const result = await this.handleFinalize(); - if (!result.ok) { - await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); - return { ok: false, error: result.errors?.join("; ") }; - } - this.state.active = false; - unhookDispatch(this.dispatch, "onCompleteStep"); - await this.eventLog?.emitPhaseEnd("completed"); - this.log("Fix phase complete, plan validation passed"); - return { ok: true, prompt: "Fix phase validation passed. Workflow complete." }; - } - - // Advance to next step. Step always increments -- no cursor, no hold. - const next = prev + 1; - this.state.step = next; - - // Per-item steps (2 <= next < total) pass the individual failure item - // so fixStepGuidance generates item-specific prompts. Only the final - // step (next === total) does not carry an item. - const item = (next >= 2 && next < total) - ? this.failures[next - 2] - : undefined; - const name = fixStepName(next, total, item); - const prompt = formatStep(fixStepGuidance(next, total, { item })); - - this.log("Fix step complete, advancing", { from: prev, to: next, name }); - await this.eventLog?.emitStepTransition(next, name, total); - - return { ok: true, prompt }; - } - - private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { - return loadAndValidatePlan(this.planDir, this.log); - } -} diff --git a/src/planner/phases/plan-design/fix-prompts.ts b/src/planner/phases/plan-design/fix-prompts.ts deleted file mode 100644 index 80bd4ce..0000000 --- a/src/planner/phases/plan-design/fix-prompts.ts +++ /dev/null @@ -1,220 +0,0 @@ -// Fix-phase step guidance for plan-design targeted repair (dynamic N steps). -// -// totalSteps = 2 + failures.length. Step 1 reads all failures (read-only). -// Steps 2..N+1 each fix one QR item (mutations enabled). Step N+2 reviews -// all fixes (read-only). The step counter IS the item iterator: -// failures[step - 2] gives the current item in the per-item range. -// -// Step 1 explicitly prohibits mutations: without this constraint the LLM -// tends to apply the first fix it identifies without reading all failures, -// producing cascading corrections that address symptoms rather than root causes. - -import type { QRItem } from "../../qr/types.js"; -import type { StepGuidance } from "../../lib/step.js"; -import { buildPlanDesignContextTrigger } from "../../lib/conversation-trigger.js"; - -// Serializes FAIL items as an XML block injected into the step 1 prompt. -// XML structure mirrors how pi-native tools present structured data. -export function formatFailuresXml(failures: ReadonlyArray): string { - const items = failures.map((f) => [ - ` `, - ` ${f.check}`, - f.finding ? ` ${f.finding}` : ` `, - ` `, - ].join("\n")).join("\n"); - - return [ - "", - items, - "", - ].join("\n"); -} - -// Dynamic step names. Step 1 and the final step have fixed names; -// per-item steps show the QR item ID so the widget displays -// "Step 3/7: Fix D-001" rather than a generic label. The audit log -// uses these names to distinguish per-item transitions. -export function fixStepName( - step: number, - totalSteps: number, - item?: QRItem, -): string { - if (step === 1) return "Understand QR Failures"; - if (step === totalSteps) return "Review & Finalize"; - return item ? `Fix ${item.id}` : `Fix item ${step - 1}`; -} - -// Appends fix workflow instructions to the base architect system prompt. -// The structured STEP LAYOUT section uses indentation to visually separate -// the three phases so the LLM internalizes the one-at-a-time constraint -// from the system prompt rather than discovering it at step 2. -export function buildFixSystemPrompt( - basePrompt: string, - failureCount: number, - totalSteps: number, -): string { - return [ - basePrompt, - "", - "---", - "", - `WORKFLOW: ${totalSteps}-STEP PLAN-DESIGN FIX`, - "", - `You are fixing ${failureCount} QR failure(s) in an existing plan.`, - "", - "STEP LAYOUT:", - " Step 1: Read all failures. Understand scope and interactions. READ-ONLY.", - ` Steps 2-${totalSteps - 1}: Fix ONE failure per step. Each step targets exactly one item.`, - ` Step ${totalSteps}: Review all fixes against original failures. READ-ONLY.`, - "", - "Each step's instructions appear as a tool result after you call koan_complete_step.", - "Put your work output in the `thoughts` parameter of koan_complete_step.", - "", - "CONSTRAINTS:", - " - Fix ONLY the identified failures", - " - Each per-item step targets exactly ONE failure -- do not fix other items", - " - Prefer updating existing entities over adding new ones", - " - Do not restructure the plan beyond what failures require", - "", - "DECISION SOURCE FIXES:", - "If a failure is about a missing or weak decision source, use", - "koan_ask_question to get user input. Then update the decision with", - "source='user:ask' via koan_set_decision.", - ].join("\n"); -} - -// Three categories of step: understand (step 1), per-item fix -// (2 <= step < totalSteps), and review (step === totalSteps). -// The step counter IS the item iterator -- no separate cursor needed. -export function fixStepGuidance( - step: number, - totalSteps: number, - opts?: { item?: QRItem; allFailuresXml?: string; conversationPath?: string }, -): StepGuidance { - if (step === 1) - return fixStep1Guidance(totalSteps, opts?.allFailuresXml ?? "", opts?.conversationPath); - if (step === totalSteps) return fixFinalStepGuidance(totalSteps); - return fixItemStepGuidance(step, totalSteps, opts?.item); -} - -// Step 1 prompt reframes analysis as "note interactions" rather than -// "plan your fixes mentally" to avoid priming the LLM for batch application. -// The one-at-a-time delivery is stated explicitly so the LLM expects -// per-item steps rather than a single batch-fix step. -function fixStep1Guidance( - totalSteps: number, - failuresXml: string, - conversationPath?: string, -): StepGuidance { - const itemCount = totalSteps - 2; - return { - title: `Step 1/${totalSteps}: Understand QR Failures`, - instructions: [ - "QR FAILURES TO FIX:", - "", - failuresXml, - "", - ...buildPlanDesignContextTrigger(conversationPath ?? "/conversation.jsonl"), - "", - `There are ${itemCount} failure(s). You will fix them one at a time`, - `in steps 2 through ${totalSteps - 1}. Each step presents a single item.`, - "", - "For each failing item:", - " - Identify the scope (which milestone, decision, or intent)", - " - Understand what the check requires", - " - Read the finding to understand why it failed", - "", - "Use getter tools to inspect scoped entities:", - " - koan_get_plan: overview, structure, decisions", - " - koan_get_milestone: milestone details and intents", - " - koan_get_decision: decision rationale", - " - koan_get_intent: intent definition", - "", - "Note interactions between failures:", - " - Do any failures share the same entity scope?", - " - Could fixing one affect another's context?", - "", - "This is a READ-ONLY step. Do not apply any changes.", - ], - }; -} - -// Per-item fix step. Shows only the single item being fixed so the LLM -// focuses on one failure rather than attempting batch fixes that produce -// cascading corrections. Mutations are enabled by the step gate in -// fix-phase.ts for this range. -// -// Positional context ("FIX ITEM N OF M") grounds the LLM in the sequence, -// matching the reference impl's "item {idx} of {total}" pattern. The -// explicit anti-batch gate ("Do not fix other failures") is the prompt-level -// complement to the code-level step gate that blocks mutations outside the -// per-item range. -function fixItemStepGuidance( - step: number, - totalSteps: number, - item?: QRItem, -): StepGuidance { - // Defensive fallbacks: handleStepComplete guarantees item is present for - // per-item steps (failures[next-2] is in-bounds), but the function signature - // accepts optional to keep it callable from tests or future call sites. - const itemXml = item ? formatFailuresXml([item]) : ""; - const itemLabel = item?.id ?? `item ${step - 1}`; - const itemIdx = step - 1; - const itemCount = totalSteps - 2; - - return { - title: `Step ${step}/${totalSteps}: Fix ${itemLabel}`, - instructions: [ - `FIX ITEM ${itemIdx} OF ${itemCount}:`, - "", - itemXml, - "", - "Apply a targeted fix for this failure using your analysis from step 1.", - "", - "Available mutation tools:", - " - koan_set_overview / koan_set_constraints / koan_set_invisible_knowledge", - " - koan_set_milestone_* / koan_set_intent / koan_set_decision", - " - koan_add_milestone / koan_add_intent / koan_add_decision (if needed)", - "", - "RULES:", - " - Fix ONLY this failure. Do not fix other failures in this step.", - " - Prefer updating existing entities over adding new ones", - " - Do not restructure the plan beyond what this failure requires", - ], - }; -} - -// Final review step. Accepts only totalSteps because the call site guard -// (step === totalSteps) guarantees identity. A two-parameter form would -// create a hidden contract ("pass equal values") with no type enforcement. -// -// "All per-item fixes are complete" explicitly closes the mutation phase -// and establishes the read-only review frame. "This step is READ-ONLY" -// is the prompt-level complement to the step gate blocking mutations. -function fixFinalStepGuidance(totalSteps: number): StepGuidance { - return { - title: `Step ${totalSteps}/${totalSteps}: Review & Finalize`, - instructions: [ - "All per-item fixes are complete. This step is READ-ONLY.", - "", - "Call koan_get_plan to read the current plan state.", - "", - "Verify each fix:", - " - Does the fix address the specific check that failed?", - " - Are previously passing items unaffected?", - " - Is the plan internally consistent?", - "", - "Summarize in the `thoughts` parameter of koan_complete_step:", - " - Which failures were fixed and how", - " - Any remaining concerns or regression risks", - ], - // The review step requires reading the plan before completing -- - // the review is meaningless without it. The custom invokeAfter - // enforces this sequencing explicitly. - invokeAfter: [ - "WHEN DONE: First call koan_get_plan to confirm the final plan state.", - "Then call koan_complete_step with your review summary in the `thoughts` parameter.", - "Do NOT call koan_complete_step before calling koan_get_plan.", - ].join("\n"), - }; -} diff --git a/src/planner/phases/plan-design/phase.ts b/src/planner/phases/plan-design/phase.ts deleted file mode 100644 index 470f14e..0000000 --- a/src/planner/phases/plan-design/phase.ts +++ /dev/null @@ -1,177 +0,0 @@ -// Plan-design phase -- 6-step architect workflow that produces plan.json -// from captured context. Step gate: mutation tools blocked before step 6 -// (blocklist pattern). Validation runs at step-6 completion. - -import * as path from "node:path"; - -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import { loadAndValidatePlan } from "../../plan/validate.js"; -import { - loadPlanDesignSystemPrompt, - buildPlanDesignSystemPrompt, - planDesignStepGuidance, - STEP_NAMES, -} from "./prompts.js"; -import { formatStep } from "../../lib/step.js"; -import { createLogger, type Logger } from "../../../utils/logger.js"; -import { EventLog } from "../../lib/audit.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; -import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; - -type PlanDesignStep = 1 | 2 | 3 | 4 | 5 | 6; - -interface PlanDesignState { - active: boolean; - step: PlanDesignStep; - step1Prompt: string | null; - systemPrompt: string | null; -} - -const TOTAL_STEPS = 6; - -export class PlanDesignPhase { - private readonly pi: ExtensionAPI; - private readonly planDir: string; - private readonly log: Logger; - private readonly state: PlanDesignState; - private readonly eventLog: EventLog | undefined; - private readonly dispatch: WorkflowDispatch; - private readonly planRef: PlanRef; - - constructor( - pi: ExtensionAPI, - config: { planDir: string }, - dispatch: WorkflowDispatch, - planRef: PlanRef, - log?: Logger, - eventLog?: EventLog, - ) { - this.pi = pi; - this.planDir = config.planDir; - this.dispatch = dispatch; - this.planRef = planRef; - this.log = log ?? createLogger("PlanDesign"); - this.eventLog = eventLog; - - this.state = { - active: false, - step: 1, - step1Prompt: null, - systemPrompt: null, - }; - - this.registerHandlers(); - } - - async begin(): Promise { - let basePrompt: string; - try { - basePrompt = await loadPlanDesignSystemPrompt(); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to load plan-design system prompt", { error: message }); - return; - } - - this.state.systemPrompt = buildPlanDesignSystemPrompt(basePrompt); - const conversationPath = path.join(this.planDir, "conversation.jsonl"); - this.state.step1Prompt = formatStep(planDesignStepGuidance(1, conversationPath)); - this.state.active = true; - this.state.step = 1; - - // No koan_store_plan tool. Each mutation writes to disk immediately. - // Step 6 ends with koan_complete_step, which runs validation. Removes - // the two-step 'build then finalize' pattern that caused LLM to skip - // intermediate tools. - hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - - this.log("Starting plan-design workflow", { step: 1 }); - await this.eventLog?.emitPhaseStart(TOTAL_STEPS); - await this.eventLog?.emitStepTransition(1, STEP_NAMES[1], TOTAL_STEPS); - } - - private registerHandlers(): void { - this.pi.on("before_agent_start", () => { - if (!this.state.active || !this.state.systemPrompt) return undefined; - return { systemPrompt: this.state.systemPrompt }; - }); - - // Step 1 prompt injection. The CLI message is a process trigger -- - // the context event fires before each LLM call and replaces the - // user message with the actual step 1 instructions. Messages are - // structuredCloned before reaching this handler (runner.ts:660), - // so stored history is unaffected. Handler is a no-op once the - // step advances past 1. - // - // Why context event instead of sendUserMessage? Step 1 has no - // preceding tool call (no tool result to inject prompt into). - // Context event injects the prompt before the initial LLM call. - // pi structuredClones messages, so modifications here are isolated. - this.pi.on("context", (event) => { - if (!this.state.active) return undefined; - if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; - - const messages = event.messages.map((m) => { - if (m.role === "user") { - return { ...m, content: this.state.step1Prompt! }; - } - return m; - }); - return { messages }; - }); - - this.pi.on("tool_call", (event) => { - if (!this.state.active) return undefined; - - const perm = checkPermission("plan-design", event.toolName); - if (!perm.allowed) { - return { block: true, reason: perm.reason }; - } - - // Step gate: mutation tools are step-6-only. Blocklist (not whitelist) - // so read tools and future pi-native tools pass through after - // checkPermission approves them. - const step = this.state.step; - if (step < 6 && PLAN_MUTATION_TOOLS.has(event.toolName)) { - return { - block: true, - reason: `${event.toolName} available in step 6 (current: ${step})`, - }; - } - - return undefined; - }); - - } - - private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { - const prev = this.state.step; - - if (prev === 6) { - const result = await this.handleFinalize(); - if (!result.ok) { - await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); - return { ok: false, error: result.errors?.join("; ") }; - } - this.state.active = false; - unhookDispatch(this.dispatch, "onCompleteStep"); - await this.eventLog?.emitPhaseEnd("completed"); - this.log("Plan finalized, workflow complete"); - return { ok: true, prompt: "Plan validation passed. Workflow complete." }; - } - - this.state.step = (prev + 1) as PlanDesignStep; - const nextName = STEP_NAMES[this.state.step]; - const prompt = formatStep(planDesignStepGuidance(this.state.step)); - - this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName }); - await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); - - return { ok: true, prompt }; - } - - private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { - return loadAndValidatePlan(this.planDir, this.log); - } -} diff --git a/src/planner/phases/plan-design/prompts.ts b/src/planner/phases/plan-design/prompts.ts deleted file mode 100644 index ce7b11d..0000000 --- a/src/planner/phases/plan-design/prompts.ts +++ /dev/null @@ -1,238 +0,0 @@ -import type { StepGuidance } from "../../lib/step.js"; -import { buildPlanDesignContextTrigger } from "../../lib/conversation-trigger.js"; -import { CONVENTIONS_DIR } from "../../lib/resources.js"; -import { loadAgentPrompt } from "../../lib/agent-prompts.js"; - -export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { - 1: "Task Analysis & Exploration Planning", - 2: "Codebase Exploration", - 3: "Testing Strategy Discovery", - 4: "Approach Generation", - 5: "Ambiguity Resolution", - 6: "Milestone Definition & Plan Writing", -}; - -export async function loadPlanDesignSystemPrompt(): Promise { - return loadAgentPrompt("architect"); -} - -export function buildPlanDesignSystemPrompt(basePrompt: string): string { - return [ - basePrompt, - "", - "---", - "", - "WORKFLOW: 6-STEP PLAN-DESIGN", - "", - "You will execute a 6-step workflow.", - "Step 1 instructions are in the user message below.", - "Complete the work described, then call koan_complete_step.", - "Put your findings in the `thoughts` parameter of koan_complete_step.", - "The tool result contains the next step's instructions.", - "In step 6, use plan mutation tools, then call koan_complete_step.", - "", - "CRITICAL: Do the actual work described in each step BEFORE calling", - "koan_complete_step. Read files, explore code, analyze. Do not skip.", - "", - "DECISION PROVENANCE:", - "Every decision requires a source tag. Valid sources:", - " code: -- derived from reading source code", - " docs: -- derived from project documentation", - " user:ask -- user answered via koan_ask_question", - " user:conversation -- user stated in captured conversation", - " inference -- inferred from patterns (last resort; see step 5 rules)", - "If you cannot ground a decision in code or documentation, use", - "koan_ask_question. Ambiguity resolved by asking is better than", - "ambiguity resolved by assumption.", - ].join("\n"); -} - -export function planDesignStepGuidance( - step: 1 | 2 | 3 | 4 | 5 | 6, - conversationPath?: string, -): StepGuidance { - switch (step) { - case 1: - return { - title: "Step 1: Task Analysis & Exploration Planning", - instructions: [ - ...buildPlanDesignContextTrigger(conversationPath ?? "/conversation.jsonl"), - "", - "After absorbing the task intent, identify:", - " - What needs to change (files, modules, behavior)", - " - What exploration is needed (patterns, constraints, existing code)", - " - What directories/files are relevant", - "", - "Read project context files to understand structure:", - " - Project root CLAUDE.md", - " - Subdirectory CLAUDE.md files in relevant areas", - "", - "DO NOT write any files yet. Gather understanding for step 2.", - "Record your analysis mentally for use in subsequent steps.", - ], - }; - - case 2: - return { - title: "Step 2: Codebase Exploration", - instructions: [ - "Use Glob, Grep, Read tools directly to discover:", - " - Existing patterns and implementations", - " - Constraints from code structure", - " - Conventions to follow", - "", - "Read convention files as needed (use absolute paths below):", - ` - ${CONVENTIONS_DIR}/structural.md (architectural patterns)`, - ` - ${CONVENTIONS_DIR}/temporal.md (comment hygiene)`, - ` - ${CONVENTIONS_DIR}/diff-format.md (diff specification)`, - "", - "NUDGE: If you need additional context to plan well, read more files.", - "Better to over-explore than under-explore.", - "", - "Record discoveries for use in steps 4-6. Do NOT write files.", - ], - }; - - case 3: - return { - title: "Step 3: Testing Strategy Discovery", - instructions: [ - "DISCOVER testing strategy from:", - " - User conversation hints", - " - Project CLAUDE.md / README.md", - ` - ${CONVENTIONS_DIR}/structural.md domain='testing-strategy'`, - "", - "Record confirmed strategy for use in step 6.", - "Decisions will be recorded via tools in step 6.", - ], - }; - - case 4: - return { - title: "Step 4: Approach Generation", - instructions: [ - "GENERATE 2-3 approach options:", - " - Include 'minimal change' option", - " - Include 'idiomatic/modern' option", - " - Document advantage/disadvantage for each", - "", - "TARGET TECH RESEARCH (if new tech/migration):", - " - What is canonical usage of target tech?", - " - Does it have different abstractions?", - "", - "Use exploration findings from step 2 to ground tradeoffs.", - "Record approach analysis for step 6.", - "", - "DECISION INVENTORY:", - "For each approach, identify the implicit decisions it makes.", - "For each decision, note the source:", - " - code: -- forced by existing codebase (cite file)", - " - docs: -- specified in project docs (cite file)", - " - user:conversation -- user stated preference in conversation", - " - inference -- your judgment (requires strong reasoning_chain)", - " - UNRESOLVED -- no clear source; flag for step 5", - ], - }; - - case 5: - return { - title: "Step 5: Ambiguity Resolution", - instructions: [ - "Review the decision inventory from step 4.", - "For every decision marked UNRESOLVED or sourced as inference:", - " 1. Can it be grounded in code or docs? Read them.", - " 2. If still unsourced, ask the user via koan_ask_question.", - "", - "USE koan_ask_question WHEN:", - " - Multiple approaches have comparable tradeoffs, no codebase precedent", - " - A policy default (timeout, capacity, retry, failure mode) has no value", - " - Migration path or abstraction boundary not dictated by code", - "", - "DO NOT ASK WHEN:", - " - Codebase establishes a clear pattern (source: code:)", - " - Project docs specify the approach (source: docs:)", - " - Only one approach is technically viable", - " - The choice follows directly from an already-sourced decision", - "", - "INFERENCE RULES (source: inference):", - " Acceptable: airtight reasoning, no viable alternative, follows from", - " existing constraints, standard practice with one correct answer.", - " NOT acceptable: hedging language, policy defaults, public API choices,", - " or any decision where a senior engineer might reasonably disagree.", - "", - "Good questions offer concrete options grounded in codebase evidence:", - " BAD: 'How should we handle errors?'", - " GOOD: 'Error propagation: (A) return Result matching src/foo.ts,", - " (B) throw + catch at boundary matching src/bar.ts'", - "", - "FAST PATH: If all decisions have code/docs/conversation sources,", - "skip asking and record this finding.", - "", - "After resolving, every decision has a concrete source. No UNRESOLVED.", - ], - }; - - case 6: - return { - title: "Step 6: Milestone Definition & Plan Writing", - instructions: [ - "EVALUATE approaches: P(success), failure mode, backtrack cost", - "", - "SELECT and record in Decision Log with MULTI-STEP chain:", - " BAD: 'Polling | Webhooks unreliable'", - " GOOD: 'Use polling | 30% webhook failure -> need fallback anyway -> polling simpler'", - "", - "Every koan_add_decision call MUST include a source parameter:", - " - code: -- derived from existing code (cite file)", - " - docs: -- from project documentation (cite file)", - " - user:ask -- asked the user via koan_ask_question", - " - user:conversation -- user stated in original conversation", - " - inference -- architect judgment (use sparingly; needs strong chain)", - "", - "Use the following tools to build the plan:", - "", - "OVERVIEW & CONSTRAINTS:", - " - koan_set_overview: Define problem and approach", - " - koan_set_constraints: Record constraints", - " - koan_set_invisible_knowledge: Document project-specific context", - "", - "DECISIONS & RISKS:", - " - koan_add_decision, koan_set_decision: Record architectural decisions", - " - koan_add_rejected_alternative: Document rejected approaches", - " - koan_add_risk: Track implementation risks", - "", - "MILESTONES & INTENTS:", - " - koan_add_milestone: Create milestones (deployable increments)", - " - koan_set_milestone_name/files/flags/requirements/acceptance_criteria/tests: Configure milestones", - " - koan_add_intent, koan_set_intent: Define code intents (WHAT to change, not HOW)", - "", - "WAVES & STRUCTURE:", - " - koan_add_wave, koan_set_wave_milestones: Group milestones into deployment waves", - " - koan_add_diagram, koan_set_diagram, koan_add_diagram_node, koan_add_diagram_edge: Visual structure", - " - koan_set_readme_entry: Link plan sections to README.md", - "", - "Each tool writes to disk immediately. Inspect with koan_get_plan.", - "", - "MILESTONES (each deployable increment):", - " - Files: exact paths (each file in ONE milestone only)", - " - Requirements: specific behaviors", - " - Acceptance: testable pass/fail criteria", - " - Code Intent: WHAT to change (Developer converts to code_changes later)", - " - Tests: type, backing, scenarios", - "", - "PARALLELIZATION:", - " Vertical slices (parallel) > Horizontal layers (sequential)", - " BAD: M1=models, M2=services, M3=controllers (sequential)", - " GOOD: M1=auth stack, M2=users stack, M3=posts stack (parallel)", - " If file overlap: extract to M0 (foundation) or consolidate", - ], - invokeAfter: [ - "WHEN DONE: Call koan_complete_step to validate. Put a summary of what you built in the `thoughts` parameter.", - "Do NOT call this tool until you have used the plan mutation tools.", - ].join("\n"), - }; - - default: - return { title: "", instructions: [] }; - } -} diff --git a/src/planner/phases/plan-docs/fix-phase.ts b/src/planner/phases/plan-docs/fix-phase.ts deleted file mode 100644 index dcbc15f..0000000 --- a/src/planner/phases/plan-docs/fix-phase.ts +++ /dev/null @@ -1,169 +0,0 @@ -// Plan-docs fix phase -- dynamic targeted QR repair workflow. - -import * as path from "node:path"; - -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; -import { loadPlanDocsSystemPrompt, buildPlanDocsSystemPrompt } from "./prompts.js"; -import { - fixStepName, - buildFixSystemPrompt, - fixStepGuidance, - formatFailuresXml, -} from "./fix-prompts.js"; -import { formatStep } from "../../lib/step.js"; -import type { QRItem } from "../../qr/types.js"; -import { createLogger, type Logger } from "../../../utils/logger.js"; -import { EventLog } from "../../lib/audit.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; -import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; - -interface FixState { - active: boolean; - step: number; - step1Prompt: string | null; - systemPrompt: string | null; -} - -export class PlanDocsFixPhase { - private readonly pi: ExtensionAPI; - private readonly planDir: string; - private readonly failures: ReadonlyArray; - private readonly log: Logger; - private readonly state: FixState; - private readonly eventLog: EventLog | undefined; - private readonly dispatch: WorkflowDispatch; - private readonly planRef: PlanRef; - - constructor( - pi: ExtensionAPI, - config: { planDir: string; failures: QRItem[] }, - dispatch: WorkflowDispatch, - planRef: PlanRef, - log?: Logger, - eventLog?: EventLog, - ) { - this.pi = pi; - this.planDir = config.planDir; - this.failures = config.failures; - this.dispatch = dispatch; - this.planRef = planRef; - this.log = log ?? createLogger("PlanDocsFix"); - this.eventLog = eventLog; - - this.state = { - active: false, - step: 1, - step1Prompt: null, - systemPrompt: null, - }; - - this.registerHandlers(); - } - - private get totalSteps(): number { - return 2 + this.failures.length; - } - - async begin(): Promise { - let basePrompt: string; - try { - basePrompt = await loadPlanDocsSystemPrompt(); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Fix phase aborted: cannot load system prompt", { error: message }); - return; - } - - const failuresXml = formatFailuresXml(this.failures); - const totalSteps = this.totalSteps; - this.state.systemPrompt = buildFixSystemPrompt( - buildPlanDocsSystemPrompt(basePrompt), - this.failures.length, - totalSteps, - ); - const conversationPath = path.join(this.planDir, "conversation.jsonl"); - this.state.step1Prompt = formatStep(fixStepGuidance(1, totalSteps, { allFailuresXml: failuresXml, conversationPath })); - this.state.active = true; - this.state.step = 1; - this.planRef.dir = this.planDir; - - hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - - this.log("Starting plan-docs fix workflow", { step: 1, totalSteps, failureCount: this.failures.length }); - await this.eventLog?.emitPhaseStart(totalSteps); - await this.eventLog?.emitStepTransition(1, fixStepName(1, totalSteps), totalSteps); - } - - private registerHandlers(): void { - this.pi.on("before_agent_start", () => { - if (!this.state.active || !this.state.systemPrompt) return undefined; - return { systemPrompt: this.state.systemPrompt }; - }); - - this.pi.on("context", (event) => { - if (!this.state.active) return undefined; - if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; - - const messages = event.messages.map((m) => { - if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; - return m; - }); - return { messages }; - }); - - this.pi.on("tool_call", (event) => { - if (!this.state.active) return undefined; - - const perm = checkPermission("plan-docs", event.toolName); - if (!perm.allowed) return { block: true, reason: perm.reason }; - - const step = this.state.step; - const total = this.totalSteps; - const inFixRange = step >= 2 && step < total; - if (!inFixRange && PLAN_MUTATION_TOOLS.has(event.toolName)) { - return { - block: true, - reason: `${event.toolName} available in steps 2-${total - 1} (current: ${step})`, - }; - } - - return undefined; - }); - } - - private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { - const prev = this.state.step; - const total = this.totalSteps; - - if (prev === total) { - const result = await this.handleFinalize(); - if (!result.ok) { - await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); - return { ok: false, error: result.errors?.join("; ") }; - } - - this.state.active = false; - unhookDispatch(this.dispatch, "onCompleteStep"); - await this.eventLog?.emitPhaseEnd("completed"); - this.log("Fix phase complete, plan-docs validation passed"); - return { ok: true, prompt: "Fix phase validation passed. Workflow complete." }; - } - - const next = prev + 1; - this.state.step = next; - - const item = next >= 2 && next < total ? this.failures[next - 2] : undefined; - const name = fixStepName(next, total, item); - const prompt = formatStep(fixStepGuidance(next, total, { item })); - - this.log("Fix step complete, advancing", { from: prev, to: next, name }); - await this.eventLog?.emitStepTransition(next, name, total); - return { ok: true, prompt }; - } - - private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { - return loadAndValidatePlanForPhase(this.planDir, "plan-docs", this.log); - } -} diff --git a/src/planner/phases/plan-docs/fix-prompts.ts b/src/planner/phases/plan-docs/fix-prompts.ts deleted file mode 100644 index 5ae245c..0000000 --- a/src/planner/phases/plan-docs/fix-prompts.ts +++ /dev/null @@ -1,106 +0,0 @@ -import type { QRItem } from "../../qr/types.js"; -import type { StepGuidance } from "../../lib/step.js"; -import { buildPlanDocsContextTrigger } from "../../lib/conversation-trigger.js"; - -export function formatFailuresXml(failures: ReadonlyArray): string { - const items = failures - .map((f) => [ - ` `, - ` ${f.check}`, - f.finding ? ` ${f.finding}` : " ", - " ", - ].join("\n")) - .join("\n"); - return ["", items, ""].join("\n"); -} - -export function fixStepName(step: number, totalSteps: number, item?: QRItem): string { - if (step === 1) return "Understand QR Failures"; - if (step === totalSteps) return "Review & Finalize"; - return item ? `Fix ${item.id}` : `Fix item ${step - 1}`; -} - -export function buildFixSystemPrompt(basePrompt: string, failureCount: number, totalSteps: number): string { - return [ - basePrompt, - "", - "---", - "", - `WORKFLOW: ${totalSteps}-STEP PLAN-DOCS FIX`, - "", - `You are fixing ${failureCount} documentation-related QR failure(s).`, - "Step 1 is read-only and covers all failures.", - `Steps 2-${totalSteps - 1} fix exactly one failure per step.`, - `Step ${totalSteps} is read-only review.`, - "", - "CONSTRAINTS:", - "- Fix only identified failures", - "- Keep docs timeless and decision-grounded", - "- Preserve already-valid doc artifacts", - ].join("\n"); -} - -function step1(totalSteps: number, failuresXml: string, conversationPath?: string): StepGuidance { - const itemCount = totalSteps - 2; - return { - title: `Step 1/${totalSteps}: Understand QR Failures`, - instructions: [ - "QR FAILURES:", - "", - failuresXml, - "", - ...buildPlanDocsContextTrigger(conversationPath ?? "/conversation.jsonl"), - "", - `There are ${itemCount} item(s). You will fix them one by one in steps 2-${totalSteps - 1}.`, - "Inspect current docs state via koan_get_plan / koan_get_change.", - "Identify exact correction needed per item.", - "", - "This step is read-only.", - ], - }; -} - -function itemStep(step: number, totalSteps: number, item?: QRItem): StepGuidance { - const itemXml = item ? formatFailuresXml([item]) : ""; - const idx = step - 1; - const total = totalSteps - 2; - return { - title: `Step ${step}/${totalSteps}: Fix ${item?.id ?? `item ${idx}`}`, - instructions: [ - `FIX ITEM ${idx} OF ${total}:`, - "", - itemXml, - "", - "Apply a targeted docs fix using doc tools (set doc_diff/comments/readme/diagram).", - "Do not batch-fix other failures in this step.", - "Keep changes minimal and scoped.", - ], - }; -} - -function finalStep(totalSteps: number): StepGuidance { - return { - title: `Step ${totalSteps}/${totalSteps}: Review & Finalize`, - instructions: [ - "All per-item fixes are complete.", - "Use koan_get_plan to verify docs coherence and completeness.", - "Confirm fixed items are addressed without regressing passing items.", - "", - "This step is read-only.", - ], - invokeAfter: [ - "WHEN DONE: Call koan_get_plan, then call koan_complete_step.", - "Do NOT call koan_complete_step before reviewing final plan state.", - ].join("\n"), - }; -} - -export function fixStepGuidance( - step: number, - totalSteps: number, - opts?: { item?: QRItem; allFailuresXml?: string; conversationPath?: string }, -): StepGuidance { - if (step === 1) return step1(totalSteps, opts?.allFailuresXml ?? "", opts?.conversationPath); - if (step === totalSteps) return finalStep(totalSteps); - return itemStep(step, totalSteps, opts?.item); -} diff --git a/src/planner/phases/plan-docs/phase.ts b/src/planner/phases/plan-docs/phase.ts deleted file mode 100644 index 24970ce..0000000 --- a/src/planner/phases/plan-docs/phase.ts +++ /dev/null @@ -1,154 +0,0 @@ -// Plan-docs phase -- 6-step technical writer workflow producing doc artifacts -// (doc_diff/comments/diagram/readme) in plan.json. - -import * as path from "node:path"; - -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import { loadAndValidatePlanForPhase } from "../../plan/validate.js"; -import { - loadPlanDocsSystemPrompt, - buildPlanDocsSystemPrompt, - planDocsStepGuidance, - STEP_NAMES, -} from "./prompts.js"; -import { formatStep } from "../../lib/step.js"; -import { createLogger, type Logger } from "../../../utils/logger.js"; -import { EventLog } from "../../lib/audit.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; -import { checkPermission, PLAN_MUTATION_TOOLS } from "../../lib/permissions.js"; - -type PlanDocsStep = 1 | 2 | 3 | 4 | 5 | 6; - -interface PlanDocsState { - active: boolean; - step: PlanDocsStep; - step1Prompt: string | null; - systemPrompt: string | null; -} - -const TOTAL_STEPS = 6; -const MUTATION_UNLOCK_STEP = 3; - -export class PlanDocsPhase { - private readonly pi: ExtensionAPI; - private readonly planDir: string; - private readonly log: Logger; - private readonly state: PlanDocsState; - private readonly eventLog: EventLog | undefined; - private readonly dispatch: WorkflowDispatch; - private readonly planRef: PlanRef; - - constructor( - pi: ExtensionAPI, - config: { planDir: string }, - dispatch: WorkflowDispatch, - planRef: PlanRef, - log?: Logger, - eventLog?: EventLog, - ) { - this.pi = pi; - this.planDir = config.planDir; - this.dispatch = dispatch; - this.planRef = planRef; - this.log = log ?? createLogger("PlanDocs"); - this.eventLog = eventLog; - - this.state = { - active: false, - step: 1, - step1Prompt: null, - systemPrompt: null, - }; - - this.registerHandlers(); - } - - async begin(): Promise { - let basePrompt: string; - try { - basePrompt = await loadPlanDocsSystemPrompt(); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to load plan-docs system prompt", { error: message }); - return; - } - - this.state.systemPrompt = buildPlanDocsSystemPrompt(basePrompt); - const conversationPath = path.join(this.planDir, "conversation.jsonl"); - this.state.step1Prompt = formatStep(planDocsStepGuidance(1, conversationPath)); - this.state.active = true; - this.state.step = 1; - this.planRef.dir = this.planDir; - - hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - - this.log("Starting plan-docs workflow", { step: 1 }); - await this.eventLog?.emitPhaseStart(TOTAL_STEPS); - await this.eventLog?.emitStepTransition(1, STEP_NAMES[1], TOTAL_STEPS); - } - - private registerHandlers(): void { - this.pi.on("before_agent_start", () => { - if (!this.state.active || !this.state.systemPrompt) return undefined; - return { systemPrompt: this.state.systemPrompt }; - }); - - this.pi.on("context", (event) => { - if (!this.state.active) return undefined; - if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; - - const messages = event.messages.map((m) => { - if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; - return m; - }); - return { messages }; - }); - - this.pi.on("tool_call", (event) => { - if (!this.state.active) return undefined; - - const perm = checkPermission("plan-docs", event.toolName); - if (!perm.allowed) return { block: true, reason: perm.reason }; - - if (this.state.step < MUTATION_UNLOCK_STEP && PLAN_MUTATION_TOOLS.has(event.toolName)) { - return { - block: true, - reason: `${event.toolName} available from step ${MUTATION_UNLOCK_STEP} (current: ${this.state.step})`, - }; - } - - return undefined; - }); - } - - private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { - const prev = this.state.step; - - if (prev === 6) { - const result = await this.handleFinalize(); - if (!result.ok) { - await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); - return { ok: false, error: result.errors?.join("; ") }; - } - - this.state.active = false; - unhookDispatch(this.dispatch, "onCompleteStep"); - await this.eventLog?.emitPhaseEnd("completed"); - this.log("Plan-docs finalized, workflow complete"); - return { ok: true, prompt: "Plan-docs validation passed. Workflow complete." }; - } - - this.state.step = (prev + 1) as PlanDocsStep; - const nextName = STEP_NAMES[this.state.step]; - const prompt = formatStep(planDocsStepGuidance(this.state.step)); - - this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName }); - await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); - return { ok: true, prompt }; - } - - private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { - return loadAndValidatePlanForPhase(this.planDir, "plan-docs", this.log); - } -} diff --git a/src/planner/phases/plan-docs/prompts.ts b/src/planner/phases/plan-docs/prompts.ts deleted file mode 100644 index 5d350fe..0000000 --- a/src/planner/phases/plan-docs/prompts.ts +++ /dev/null @@ -1,153 +0,0 @@ -import type { StepGuidance } from "../../lib/step.js"; -import { buildPlanDocsContextTrigger } from "../../lib/conversation-trigger.js"; -import { loadAgentPrompt } from "../../lib/agent-prompts.js"; - -export const STEP_NAMES: Record<1 | 2 | 3 | 4 | 5 | 6, string> = { - 1: "Extract Documentation Context", - 2: "Analyze Planned Code Changes", - 3: "Author Code-Adjacent Docs", - 4: "Author Cross-Cutting Docs", - 5: "Diagram & Consistency Review", - 6: "Validation & Final Review", -}; - -export async function loadPlanDocsSystemPrompt(): Promise { - return loadAgentPrompt("technical-writer"); -} - -export function buildPlanDocsSystemPrompt(basePrompt: string): string { - return [ - basePrompt, - "", - "---", - "", - "WORKFLOW: 6-STEP PLAN-DOCS", - "", - "You are in planning mode. Add documentation artifacts to plan.json.", - "Step 1 instructions are in the user message below.", - "Complete each step, then call koan_complete_step.", - "Put your findings in the `thoughts` parameter.", - "The tool result contains the next step.", - "", - "CRITICAL:", - "- NEVER use edit/write tools during plan-docs.", - "- Populate code_change.doc_diff for code changes.", - "- Keep comments and docs timeless (no temporal contamination).", - "- Keep architecture diagrams and README entries aligned with plan intent.", - "", - "USER-DECIDED DECISIONS:", - "Decisions with source user:ask or user:conversation have NO existing", - "reference in the codebase. These MUST be documented in code comments,", - "doc_diff, or README entries so future readers understand the rationale", - "without needing to ask the same question again.", - ].join("\n"); -} - -export function planDocsStepGuidance( - step: 1 | 2 | 3 | 4 | 5 | 6, - conversationPath?: string, -): StepGuidance { - switch (step) { - case 1: - return { - title: "Step 1: Extract Documentation Context", - instructions: [ - "Use koan_get_plan to review decisions, constraints, risks, and milestones.", - "Capture decision IDs that should be reflected in documentation rationale.", - "", - "PRIORITY: Identify all decisions with source user:ask or user:conversation.", - "These have NO existing reference in code or docs -- the user provided", - "the authority. They MUST be documented. Track these IDs; steps 3-4", - "must cover every one.", - "", - ...buildPlanDocsContextTrigger(conversationPath ?? "/conversation.jsonl"), - "", - "This step is read-only.", - ], - }; - - case 2: - return { - title: "Step 2: Analyze Planned Code Changes", - instructions: [ - "Inspect each milestone and code_change:", - " - What needs doc_diff coverage?", - " - Which comments are missing or weak?", - " - Which changes require architecture/README support?", - "", - "Use koan_get_milestone / koan_get_change for detail.", - "This step is read-only.", - ], - }; - - case 3: - return { - title: "Step 3: Author Code-Adjacent Docs", - instructions: [ - "Populate code-level documentation in plan.json:", - " - koan_set_change_doc_diff", - " - koan_set_change_comments", - "", - "Rules:", - " - Every code change with diff should have doc_diff", - " - comments explain WHY (reference decisions where applicable)", - " - Avoid temporal language (no 'added', 'changed from', 'now')", - "", - "USER-SOURCED DECISIONS (source user:ask / user:conversation):", - " These have no existing codebase reference. For each one that affects", - " a code change, the comment or doc_diff MUST capture the rationale so", - " future readers do not need to re-ask the same question.", - " Reference the decision ID (e.g. 'See DL-003') in the comment.", - ], - }; - - case 4: - return { - title: "Step 4: Author Cross-Cutting Docs", - instructions: [ - "Update cross-cutting documentation artifacts:", - " - koan_set_readme_entry for docs not tied to one change", - " - koan_set_diagram (title/scope/ascii_render) for architecture visuals", - "", - "If diagrams are missing but needed, create them with:", - " - koan_add_diagram", - " - koan_add_diagram_node / koan_add_diagram_edge", - ], - }; - - case 5: - return { - title: "Step 5: Diagram & Consistency Review", - instructions: [ - "Review documentation consistency across the plan:", - " - doc_diff content matches planned behavior", - " - diagrams align with milestone scope", - " - README entries do not contradict decisions/invariants", - "", - "Use getter tools to re-read affected entities and patch gaps.", - ], - }; - - case 6: - return { - title: "Step 6: Validation & Final Review", - instructions: [ - "Perform final documentation completeness check:", - " - all code changes with diff have doc_diff", - " - comments/doc diffs are coherent and timeless", - " - readme/diagram updates are present when needed", - " - every user-sourced decision (source user:*) is referenced", - " in at least one comment, doc_diff, or README entry", - "", - "Fix remaining issues before completing.", - ], - invokeAfter: [ - "WHEN DONE: Call koan_complete_step with a concise docs-completeness summary.", - "Do NOT call this tool until documentation artifacts are complete.", - ].join("\n"), - }; - - default: - return { title: "", instructions: [] }; - } -} diff --git a/src/planner/phases/qr-decompose/phase.ts b/src/planner/phases/qr-decompose/phase.ts deleted file mode 100644 index a480799..0000000 --- a/src/planner/phases/qr-decompose/phase.ts +++ /dev/null @@ -1,197 +0,0 @@ -// QR decompose phase -- 13-step workflow that decomposes a plan phase into -// verifiable QR items. Two-tier step gate: koan_qr_add_item unlocks at step 5, -// koan_qr_assign_group unlocks at step 9. - -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import { - loadQRDecomposeSystemPrompt, - buildDecomposeSystemPrompt, - decomposeStepGuidance, - DECOMPOSE_STEP_NAMES, - type DecomposeStep, - type WorkPhaseKey, -} from "./prompts.js"; -import { formatStep } from "../../lib/step.js"; -import { createLogger, type Logger } from "../../../utils/logger.js"; -import { EventLog } from "../../lib/audit.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; -import { checkPermission } from "../../lib/permissions.js"; -import type { QRFile } from "../../qr/types.js"; - -const QR_ADD_TOOLS = new Set(["koan_qr_add_item"]); -const QR_ASSIGN_TOOLS = new Set(["koan_qr_assign_group"]); -const ADD_ITEM_UNLOCK = 5; -const ASSIGN_GROUP_UNLOCK = 9; -const TOTAL_STEPS = 13; - -interface DecomposeState { - active: boolean; - step: DecomposeStep; - step1Prompt: string | null; - systemPrompt: string | null; -} - -export class QRDecomposePhase { - private readonly pi: ExtensionAPI; - private readonly planDir: string; - private readonly workPhase: WorkPhaseKey; - private readonly qrPhaseKey: `qr-${WorkPhaseKey}`; - private readonly log: Logger; - private readonly state: DecomposeState; - private readonly eventLog: EventLog | undefined; - private readonly dispatch: WorkflowDispatch; - private readonly planRef: PlanRef; - - constructor( - pi: ExtensionAPI, - config: { planDir: string; workPhase: WorkPhaseKey }, - dispatch: WorkflowDispatch, - planRef: PlanRef, - log?: Logger, - eventLog?: EventLog, - ) { - this.pi = pi; - this.planDir = config.planDir; - this.workPhase = config.workPhase; - this.qrPhaseKey = `qr-${config.workPhase}`; - this.dispatch = dispatch; - this.planRef = planRef; - this.log = log ?? createLogger("QRDecompose"); - this.eventLog = eventLog; - - this.state = { - active: false, - step: 1, - step1Prompt: null, - systemPrompt: null, - }; - - this.registerHandlers(); - } - - async begin(): Promise { - let basePrompt: string; - try { - basePrompt = await loadQRDecomposeSystemPrompt(); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to load qr-decompose system prompt", { error: message }); - return; - } - - this.state.systemPrompt = buildDecomposeSystemPrompt(basePrompt, this.workPhase); - const conversationPath = path.join(this.planDir, "conversation.jsonl"); - this.state.step1Prompt = formatStep(decomposeStepGuidance(1, this.workPhase, conversationPath)); - this.state.active = true; - this.state.step = 1; - this.planRef.dir = this.planDir; - this.planRef.qrPhase = this.workPhase; - - hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - - this.log("Starting qr-decompose workflow", { step: 1, phase: this.workPhase }); - await this.eventLog?.emitPhaseStart(TOTAL_STEPS); - await this.eventLog?.emitStepTransition(1, DECOMPOSE_STEP_NAMES[1], TOTAL_STEPS); - } - - private registerHandlers(): void { - this.pi.on("before_agent_start", () => { - if (!this.state.active || !this.state.systemPrompt) return undefined; - return { systemPrompt: this.state.systemPrompt }; - }); - - this.pi.on("context", (event) => { - if (!this.state.active) return undefined; - if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; - - const messages = event.messages.map((m) => { - if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; - return m; - }); - return { messages }; - }); - - this.pi.on("tool_call", (event) => { - if (!this.state.active) return undefined; - - const perm = checkPermission(this.qrPhaseKey, event.toolName); - if (!perm.allowed) return { block: true, reason: perm.reason }; - - const step = this.state.step; - if (step < ADD_ITEM_UNLOCK && QR_ADD_TOOLS.has(event.toolName)) { - return { - block: true, - reason: `${event.toolName} available from step ${ADD_ITEM_UNLOCK} (current: ${step})`, - }; - } - if (step < ASSIGN_GROUP_UNLOCK && QR_ASSIGN_TOOLS.has(event.toolName)) { - return { - block: true, - reason: `${event.toolName} available from step ${ASSIGN_GROUP_UNLOCK} (current: ${step})`, - }; - } - - return undefined; - }); - } - - private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { - const prev = this.state.step; - - if (prev === 13) { - const result = await this.handleFinalize(); - if (!result.ok) { - await this.eventLog?.emitPhaseEnd("failed", result.errors?.join("; ")); - return { ok: false, error: result.errors?.join("; ") }; - } - - this.state.active = false; - unhookDispatch(this.dispatch, "onCompleteStep"); - await this.eventLog?.emitPhaseEnd("completed"); - this.log("QR decompose finalized, workflow complete", { phase: this.workPhase }); - return { ok: true, prompt: "QR decomposition complete." }; - } - - this.state.step = (prev + 1) as DecomposeStep; - const nextName = DECOMPOSE_STEP_NAMES[this.state.step]; - const prompt = formatStep(decomposeStepGuidance(this.state.step, this.workPhase)); - - this.log("Step complete, advancing", { from: prev, to: this.state.step, name: nextName, phase: this.workPhase }); - await this.eventLog?.emitStepTransition(this.state.step, nextName, TOTAL_STEPS); - return { ok: true, prompt }; - } - - private async handleFinalize(): Promise<{ ok: boolean; errors?: string[] }> { - const qrPath = path.join(this.planDir, `qr-${this.workPhase}.json`); - let qr: QRFile; - try { - const raw = await fs.readFile(qrPath, "utf8"); - qr = JSON.parse(raw) as QRFile; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - return { ok: false, errors: [`Failed to read qr-${this.workPhase}.json: ${message}`] }; - } - - const errors: string[] = []; - if (!qr.items || qr.items.length === 0) { - errors.push("No QR items generated"); - } else { - const ungrouped = qr.items.filter((i) => i.group_id === null); - if (ungrouped.length > 0) { - errors.push(`Ungrouped items: ${ungrouped.map((i) => i.id).join(", ")}`); - } - } - - if (errors.length > 0) { - this.log("QR decompose validation failed", { errors, phase: this.workPhase }); - return { ok: false, errors }; - } - - this.log("QR decompose validation passed", { phase: this.workPhase }); - return { ok: true }; - } -} diff --git a/src/planner/phases/qr-decompose/prompts.ts b/src/planner/phases/qr-decompose/prompts.ts deleted file mode 100644 index 7e56164..0000000 --- a/src/planner/phases/qr-decompose/prompts.ts +++ /dev/null @@ -1,260 +0,0 @@ -// QR decompose phase prompts -- 13-step workflow for decomposing a plan into -// verifiable QR items. Prompt text is shared across plan-design, plan-code, -// and plan-docs via the injected phase key. - -import type { StepGuidance } from "../../lib/step.js"; -import { loadAgentPrompt } from "../../lib/agent-prompts.js"; -import { - buildPlanDesignContextTrigger, - buildPlanDocsContextTrigger, -} from "../../lib/conversation-trigger.js"; - -export type DecomposeStep = 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13; -export type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; - -export const DECOMPOSE_STEP_NAMES: Record = { - 1: "Absorb Context", - 2: "Holistic Concerns", - 3: "Structural Enumeration", - 4: "Gap Analysis", - 5: "Generate Items", - 6: "Atomicity Check", - 7: "Coverage Validation", - 8: "Validate Items", - 9: "Structural Grouping", - 10: "Component Grouping", - 11: "Concern Grouping", - 12: "Affinity Grouping", - 13: "Final Validation", -}; - -const PHASE_SCOPE_HINTS: Record = { - "plan-design": [ - "decision:DL-001 -- decision reasoning quality and source provenance", - "milestone:M-001 -- milestone structure", - "code_intent:CI-M-001-001 -- intent clarity", - ], - "plan-code": [ - "milestone:M-001 -- code change coverage", - "code_intent:CI-M-001-001 -- intent->change linkage", - "change:CC-M-001-001 -- diff quality/anchor correctness", - ], - "plan-docs": [ - "milestone:M-001 -- docs completeness", - "change:CC-M-001-001 -- doc_diff/comments quality", - "diagram:DIAG-001 -- architecture docs fidelity", - "decision:DL-001 -- user-sourced decision docs coverage", - ], -}; - -function phaseContextTrigger( - phase: WorkPhaseKey, - conversationPath?: string, -): string[] { - if (phase === "plan-design") { - return buildPlanDesignContextTrigger(conversationPath ?? "/conversation.jsonl"); - } - if (phase === "plan-docs") { - return buildPlanDocsContextTrigger(conversationPath ?? "/conversation.jsonl"); - } - return []; -} - -export async function loadQRDecomposeSystemPrompt(): Promise { - return loadAgentPrompt("quality-reviewer"); -} - -export function buildDecomposeSystemPrompt(basePrompt: string, phase: WorkPhaseKey): string { - return [ - basePrompt, - "", - "---", - "", - `WORKFLOW: 13-STEP QR DECOMPOSITION (${phase})`, - "", - "You will execute a 13-step workflow to decompose the current plan phase into verifiable QR items.", - "Step 1 instructions are in the user message below.", - "Complete the work described, then call koan_complete_step.", - "Put your findings in the `thoughts` parameter of koan_complete_step.", - "The tool result contains the next step's instructions.", - "", - "CRITICAL: Do the actual work described in each step BEFORE calling", - "koan_complete_step. Read the plan, analyze, generate items. Do not skip.", - ].join("\n"); -} - -// Phase-specific holistic concerns injected into step 2. -// plan-design adds decision source provenance checks; -// plan-docs adds user-sourced decision documentation coverage. -function holisticConcernAdditions(phase: WorkPhaseKey): string[] { - if (phase === "plan-design") { - return [ - "", - "Include decision provenance as a concern:", - " - Every decision must have a non-null source", - " - Sources must be verifiable (code/docs paths should exist)", - " - Decisions sourced as inference need strong reasoning_chain", - " - No systematic inference labeling (if >50% of decisions are", - " inference, flag as umbrella concern)", - ]; - } - if (phase === "plan-docs") { - return [ - "", - "Include user-sourced decision documentation as a concern:", - " - Decisions with source user:ask or user:conversation must be", - " referenced in at least one comment, doc_diff, or README entry", - ]; - } - return []; -} - -export function decomposeStepGuidance( - step: DecomposeStep, - phase: WorkPhaseKey, - conversationPath?: string, -): StepGuidance { - switch (step) { - case 1: - return { - title: "Step 1: Absorb Context", - instructions: [ - `PHASE: ${phase}`, - "", - ...phaseContextTrigger(phase, conversationPath), - ...(phase === "plan-code" ? [] : [""]), - "Use koan_get_plan to read the full plan.", - "Absorb the structures relevant to this phase and identify what needs verification.", - ], - }; - - case 2: - return { - title: "Step 2: Holistic Concerns", - instructions: [ - `List phase-wide concerns for ${phase}.`, - "Focus on quality/completeness/consistency concerns, not implementation details.", - "These become umbrella items (scope='*').", - ...holisticConcernAdditions(phase), - ], - }; - - case 3: - return { - title: "Step 3: Structural Enumeration", - instructions: [ - `Enumerate concrete entities touched by ${phase}.`, - "Track IDs and counts so step 7 can validate coverage.", - "Use getter tools to resolve uncertain IDs.", - ], - }; - - case 4: - return { - title: "Step 4: Gap Analysis", - instructions: [ - "Map concerns (step 2) to entities (step 3).", - "Identify uncovered concerns and under-specified entities.", - ], - }; - - case 5: - return { - title: "Step 5: Generate Items", - instructions: [ - "Generate QR items with koan_qr_add_item.", - "", - "Scope examples for this phase:", - ...PHASE_SCOPE_HINTS[phase].map((hint) => ` - ${hint}`), - "", - "Severity:", - " MUST -- critical defect", - " SHOULD -- significant quality issue", - " COULD -- non-blocking improvement", - ], - }; - - case 6: - return { - title: "Step 6: Atomicity Check", - instructions: [ - "Ensure each item checks exactly one concern.", - "Split non-atomic items by adding child items when needed.", - ], - }; - - case 7: - return { - title: "Step 7: Coverage Validation", - instructions: [ - "Cross-check item set against structural enumeration from step 3.", - "Add missing items for uncovered entities/concerns.", - ], - }; - - case 8: - return { - title: "Step 8: Validate Items", - instructions: [ - "Use koan_qr_summary and koan_qr_list_items to audit generated items.", - "Fix duplicates or malformed scopes by adding/revising items.", - ], - }; - - case 9: - return { - title: "Step 9: Structural Grouping", - instructions: [ - "Assign deterministic groups:", - " - Parent/child items share group", - " - Umbrella items (scope='*') use group_id='umbrella'", - "Use koan_qr_assign_group to assign groups.", - ], - }; - - case 10: - return { - title: "Step 10: Component Grouping", - instructions: [ - "Group remaining ungrouped items by component (milestone/decision/change cluster).", - "Use koan_qr_list_items and koan_qr_assign_group.", - ], - }; - - case 11: - return { - title: "Step 11: Concern Grouping", - instructions: [ - "Group remaining ungrouped items by concern type.", - "Example concern groups: coverage, consistency, traceability, docs quality.", - ], - }; - - case 12: - return { - title: "Step 12: Affinity Grouping", - instructions: [ - "Assign any remaining ungrouped items by semantic affinity.", - "Singleton groups are acceptable.", - ], - }; - - case 13: - return { - title: "Step 13: Final Validation", - instructions: [ - "Validate that all items are grouped and well-formed.", - "Use koan_qr_summary and koan_qr_list_items.", - "Ensure no item has null group_id.", - "Output PASS in thoughts when complete.", - ], - invokeAfter: [ - "WHEN DONE: Call koan_complete_step with PASS or issues in `thoughts`.", - "Do NOT call this tool until validation is complete.", - ].join("\n"), - }; - - default: - return { title: "", instructions: [] }; - } -} diff --git a/src/planner/phases/qr-verify/phase.ts b/src/planner/phases/qr-verify/phase.ts deleted file mode 100644 index eaf819d..0000000 --- a/src/planner/phases/qr-verify/phase.ts +++ /dev/null @@ -1,243 +0,0 @@ -// QR verify phase -- dynamic-step reviewer subagent that verifies 1..N QR items -// against the plan. Workflow: CONTEXT (once) -> N × (ANALYZE + CONFIRM) -> done. -// Items in a group share a single subagent, amortizing process startup cost. -// -// Dynamic step formula: totalSteps = 1 + (2 * numItems) -// Step 1: CONTEXT (load plan, list all assigned items) -// Step 2k: ANALYZE item k (k = 1..N) -// Step 2k+1: CONFIRM item k (record verdict) -// -// Step gating: koan_qr_set_item is blocked until the CONFIRM step for the -// current item (odd-numbered steps >= 3). - -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import { formatStep } from "../../lib/step.js"; -import { createLogger, type Logger } from "../../../utils/logger.js"; -import { EventLog } from "../../lib/audit.js"; -import { hookDispatch, unhookDispatch, type WorkflowDispatch, type PlanRef } from "../../lib/dispatch.js"; -import { checkPermission } from "../../lib/permissions.js"; -import type { QRItem, QRFile } from "../../qr/types.js"; -import { - loadQRVerifySystemPrompt, - buildVerifySystemPrompt, - buildContextStep, - buildAnalyzeStep, - buildConfirmStep, -} from "./prompts.js"; - -type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; - -interface VerifyState { - active: boolean; - step: number; - totalSteps: number; - itemIds: string[]; - step1Prompt: string | null; - systemPrompt: string | null; -} - -// Map step number to step type and item index. -// Step 1 is CONTEXT. Steps 2..2N+1 alternate ANALYZE/CONFIRM per item. -function stepType(step: number): { kind: "CONTEXT" } | { kind: "ANALYZE"; itemIndex: number } | { kind: "CONFIRM"; itemIndex: number } { - if (step === 1) return { kind: "CONTEXT" }; - const offset = step - 2; // 0-indexed from step 2 - const itemIndex = Math.floor(offset / 2); - const isConfirm = offset % 2 === 1; - return isConfirm ? { kind: "CONFIRM", itemIndex } : { kind: "ANALYZE", itemIndex }; -} - -function stepName(step: number, numItems: number): string { - if (step === 1) return "CONTEXT"; - const info = stepType(step); - if (info.kind === "ANALYZE") return `ANALYZE ${info.itemIndex + 1}/${numItems}`; - if (info.kind === "CONFIRM") return `CONFIRM ${info.itemIndex + 1}/${numItems}`; - return `Step ${step}`; -} - -export class QRVerifyPhase { - private readonly pi: ExtensionAPI; - private readonly planDir: string; - private readonly workPhase: WorkPhaseKey; - private readonly qrPhaseKey: `qr-${WorkPhaseKey}`; - private readonly log: Logger; - private readonly state: VerifyState; - private readonly eventLog: EventLog | undefined; - private readonly dispatch: WorkflowDispatch; - private readonly planRef: PlanRef; - private items: QRItem[] = []; - - constructor( - pi: ExtensionAPI, - config: { planDir: string; itemIds: string[]; workPhase: WorkPhaseKey }, - dispatch: WorkflowDispatch, - planRef: PlanRef, - log?: Logger, - eventLog?: EventLog, - ) { - this.pi = pi; - this.planDir = config.planDir; - this.workPhase = config.workPhase; - this.qrPhaseKey = `qr-${config.workPhase}`; - this.dispatch = dispatch; - this.planRef = planRef; - this.log = log ?? createLogger("QRVerify"); - this.eventLog = eventLog; - - const numItems = config.itemIds.length; - const totalSteps = 1 + 2 * numItems; - - this.state = { - active: false, - step: 1, - totalSteps, - itemIds: config.itemIds, - step1Prompt: null, - systemPrompt: null, - }; - - this.registerHandlers(); - } - - async begin(): Promise { - const planPath = path.join(this.planDir, "plan.json"); - try { - await fs.access(planPath); - } catch { - this.log("plan.json not found", { path: planPath }); - return; - } - - const qrPath = path.join(this.planDir, `qr-${this.workPhase}.json`); - let qrFile: QRFile; - try { - const raw = await fs.readFile(qrPath, "utf8"); - qrFile = JSON.parse(raw) as QRFile; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log(`Failed to read qr-${this.workPhase}.json`, { error: message }); - return; - } - - // Resolve all item IDs to QRItem objects. - const resolvedItems: QRItem[] = []; - for (const id of this.state.itemIds) { - const item = qrFile.items.find((i) => i.id === id); - if (!item) { - this.log("QR item not found", { itemId: id, phase: this.workPhase }); - return; - } - resolvedItems.push(item); - } - this.items = resolvedItems; - - let basePrompt: string; - try { - basePrompt = await loadQRVerifySystemPrompt(); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - this.log("Failed to load QR verify system prompt", { error: message }); - return; - } - - this.state.systemPrompt = buildVerifySystemPrompt(basePrompt, this.workPhase, this.items.length); - const conversationPath = path.join(this.planDir, "conversation.jsonl"); - this.state.step1Prompt = formatStep(buildContextStep(this.items, this.workPhase, conversationPath)); - this.state.active = true; - this.state.step = 1; - this.planRef.dir = this.planDir; - this.planRef.qrPhase = this.workPhase; - - hookDispatch(this.dispatch, "onCompleteStep", () => this.handleStepComplete()); - - this.log("Starting QR verify workflow", { - itemIds: this.state.itemIds, - itemCount: this.items.length, - totalSteps: this.state.totalSteps, - phase: this.workPhase, - step: 1, - }); - await this.eventLog?.emitPhaseStart(this.state.totalSteps); - await this.eventLog?.emitStepTransition(1, "CONTEXT", this.state.totalSteps); - } - - private registerHandlers(): void { - this.pi.on("before_agent_start", () => { - if (!this.state.active || !this.state.systemPrompt) return undefined; - return { systemPrompt: this.state.systemPrompt }; - }); - - this.pi.on("context", (event) => { - if (!this.state.active) return undefined; - if (this.state.step !== 1 || !this.state.step1Prompt) return undefined; - - const messages = event.messages.map((m) => { - if (m.role === "user") return { ...m, content: this.state.step1Prompt! }; - return m; - }); - return { messages }; - }); - - this.pi.on("tool_call", (event) => { - if (!this.state.active) return undefined; - - const perm = checkPermission(this.qrPhaseKey, event.toolName); - if (!perm.allowed) return { block: true, reason: perm.reason }; - - // koan_qr_set_item is only allowed during CONFIRM steps (odd steps >= 3). - if (event.toolName === "koan_qr_set_item") { - const info = stepType(this.state.step); - if (info.kind !== "CONFIRM") { - return { - block: true, - reason: `koan_qr_set_item available only during CONFIRM steps (current: ${stepName(this.state.step, this.items.length)})`, - }; - } - } - - return undefined; - }); - } - - private async handleStepComplete(): Promise<{ ok: boolean; prompt?: string; error?: string }> { - const prev = this.state.step; - - if (prev >= this.state.totalSteps) { - this.state.active = false; - unhookDispatch(this.dispatch, "onCompleteStep"); - await this.eventLog?.emitPhaseEnd("completed"); - this.log("Verification complete", { - itemCount: this.items.length, - phase: this.workPhase, - }); - return { ok: true, prompt: "Verification complete." }; - } - - this.state.step = prev + 1; - const name = stepName(this.state.step, this.items.length); - const prompt = this.buildStepPrompt(this.state.step); - - this.log("Step complete, advancing", { - from: prev, - to: this.state.step, - name, - phase: this.workPhase, - }); - await this.eventLog?.emitStepTransition(this.state.step, name, this.state.totalSteps); - return { ok: true, prompt }; - } - - private buildStepPrompt(step: number): string { - const info = stepType(step); - if (info.kind === "ANALYZE") { - return formatStep(buildAnalyzeStep(this.items[info.itemIndex], info.itemIndex, this.items.length)); - } - if (info.kind === "CONFIRM") { - return formatStep(buildConfirmStep(this.items[info.itemIndex], info.itemIndex, this.items.length, this.workPhase)); - } - return ""; - } -} diff --git a/src/planner/phases/qr-verify/prompts.ts b/src/planner/phases/qr-verify/prompts.ts deleted file mode 100644 index f3d7ab0..0000000 --- a/src/planner/phases/qr-verify/prompts.ts +++ /dev/null @@ -1,175 +0,0 @@ -// Prompt guidance for the dynamic-step QR verify subagent workflow. -// Each reviewer subagent verifies 1..N QRItems (grouped by group_id). -// -// Dynamic step formula: totalSteps = 1 + (2 * numItems) -// Step 1: CONTEXT (once, lists all items) -// Steps 2..2N+1: ANALYZE/CONFIRM pairs per item - -import type { QRItem } from "../../qr/types.js"; -import { loadAgentPrompt } from "../../lib/agent-prompts.js"; -import type { StepGuidance } from "../../lib/step.js"; -import { - buildPlanDesignContextTrigger, - buildPlanDocsContextTrigger, -} from "../../lib/conversation-trigger.js"; - -type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; - -function scopeGuidance(item: QRItem): string { - const s = item.scope; - if (s === "*") { - return "MACRO CHECK -- Use koan_get_plan to read the full plan."; - } - if (s.startsWith("milestone:")) { - const milestoneId = s.slice("milestone:".length); - return `MILESTONE CHECK -- Use koan_get_milestone(id='${milestoneId}') to read the milestone.`; - } - if (s.startsWith("code_intent:")) { - const intentId = s.slice("code_intent:".length); - return `CODE INTENT CHECK -- Use koan_get_intent(id='${intentId}') to read the intent.`; - } - if (s.startsWith("change:")) { - const changeId = s.slice("change:".length); - return `CHANGE CHECK -- Use koan_get_change(id='${changeId}') to read the planned change.`; - } - if (s.startsWith("decision:")) { - const decisionId = s.slice("decision:".length); - return `DECISION CHECK -- Use koan_get_decision(id='${decisionId}') to read the decision.`; - } - return "SCOPED CHECK -- Read the relevant section using plan getter tools."; -} - -function phaseContextTrigger( - phase: WorkPhaseKey, - conversationPath?: string, -): string[] { - if (phase === "plan-design") { - return buildPlanDesignContextTrigger(conversationPath ?? "/conversation.jsonl"); - } - if (phase === "plan-docs") { - return buildPlanDocsContextTrigger(conversationPath ?? "/conversation.jsonl"); - } - return []; -} - -export async function loadQRVerifySystemPrompt(): Promise { - return loadAgentPrompt("quality-reviewer"); -} - -export function buildVerifySystemPrompt(basePrompt: string, phase: WorkPhaseKey, itemCount: number): string { - const itemLabel = itemCount === 1 ? "1 QR item" : `${itemCount} QR items`; - return [ - basePrompt, - "", - "---", - "", - `WORKFLOW: QR VERIFICATION (${phase}, ${itemLabel})`, - "", - `You will verify ${itemLabel} against the plan.`, - "Step 1 instructions are in the user message below.", - "Complete the work described, then call koan_complete_step.", - "Put your findings in the `thoughts` parameter of koan_complete_step.", - "", - "CRITICAL: Do NOT record a verdict until the CONFIRM step for each item.", - "Analyze thoroughly in the ANALYZE step before committing.", - ].join("\n"); -} - -function formatItemForContext(item: QRItem): string { - return [ - ` ${item.id} [${item.severity}]: ${item.check}`, - ` scope: ${item.scope}`, - ].join("\n"); -} - -export function buildContextStep( - items: QRItem[], - phase: WorkPhaseKey, - conversationPath?: string, -): StepGuidance { - const itemLabel = items.length === 1 ? "1 ITEM" : `${items.length} ITEMS`; - const itemSummary = items.map(formatItemForContext).join("\n"); - - return { - title: `Step 1: CONTEXT`, - instructions: [ - `PHASE: ${phase}`, - `ITEMS TO VERIFY: ${itemLabel}`, - "", - itemSummary, - "", - ...phaseContextTrigger(phase, conversationPath), - ...(phase === "plan-code" ? [] : [""]), - "Understand the checks and required evidence before analyzing.", - ], - }; -} - -export function buildAnalyzeStep(item: QRItem, itemIndex: number, totalItems: number): StepGuidance { - const positionLabel = totalItems === 1 - ? "" - : ` (item ${itemIndex + 1} of ${totalItems})`; - - return { - title: `ANALYZE ${item.id}${positionLabel}`, - instructions: [ - scopeGuidance(item), - "", - "", - ` ${item.id}`, - ` ${item.scope}`, - ` ${item.check}`, - ` ${item.severity}`, - "", - "", - "TASK:", - "1. Read relevant entities based on scope", - "2. Apply the verification check", - "3. Form preliminary PASS/FAIL conclusion", - "4. Gather concrete evidence", - "", - "Do NOT update QR state yet.", - ], - }; -} - -export function buildConfirmStep( - item: QRItem, - itemIndex: number, - totalItems: number, - phase: WorkPhaseKey, -): StepGuidance { - const positionLabel = totalItems === 1 - ? "" - : ` (item ${itemIndex + 1} of ${totalItems})`; - - return { - title: `CONFIRM ${item.id}${positionLabel}`, - instructions: [ - `CONFIRMING: ${item.id}`, - `SEVERITY: ${item.severity}`, - "", - "CONFIDENCE CHECK:", - "- Are you confident in your conclusion?", - "- Is evidence specific and verifiable?", - "", - "RECORD RESULT:", - "", - "If PASS:", - ` koan_qr_set_item(id='${item.id}', status='PASS')`, - "", - "If FAIL:", - ` koan_qr_set_item(id='${item.id}', status='FAIL', finding='')`, - "", - "RULES:", - "- FAIL requires finding", - "- PASS must not include finding", - "", - "Execute ONE verdict call, then call koan_complete_step.", - ], - invokeAfter: [ - "WHEN DONE: Call koan_complete_step after recording your verdict.", - "Do NOT call this tool until you have called koan_qr_set_item.", - ].join("\n"), - }; -} diff --git a/src/planner/plan/mutate/code.ts b/src/planner/plan/mutate/code.ts deleted file mode 100644 index 7eb74a3..0000000 --- a/src/planner/plan/mutate/code.ts +++ /dev/null @@ -1,161 +0,0 @@ -// Code intent and code change mutations. -// Pure functions -- input plan in, new plan out. No side effects. - -import type { Plan, CodeIntent, CodeChange } from "../types.js"; -import { nextIntentId, nextChangeId } from "../types.js"; - -// -- CodeIntent -- - -export function addIntent( - p: Plan, - data: { - milestone: string; - file: string; - function?: string; - behavior: string; - decision_refs?: string[]; - }, -): { plan: Plan; id: string } { - const idx = p.milestones.findIndex((m) => m.id === data.milestone); - if (idx === -1) throw new Error(`milestone ${data.milestone} not found`); - - const m = p.milestones[idx]; - const id = nextIntentId(m); - const intent: CodeIntent = { - id, - file: data.file, - function: data.function ?? null, - behavior: data.behavior, - decision_refs: data.decision_refs ?? [], - }; - - const updated = [...p.milestones]; - updated[idx] = { - ...m, - code_intents: [...m.code_intents, intent], - }; - - return { - plan: { ...p, milestones: updated }, - id, - }; -} - -export function setIntent( - p: Plan, - id: string, - data: { - file?: string; - function?: string; - behavior?: string; - decision_refs?: string[]; - }, -): Plan { - for (let i = 0; i < p.milestones.length; i++) { - const m = p.milestones[i]; - const ciIdx = m.code_intents.findIndex((ci) => ci.id === id); - if (ciIdx !== -1) { - const ci = m.code_intents[ciIdx]; - const updated: CodeIntent = { - ...ci, - file: data.file ?? ci.file, - function: data.function ?? ci.function, - behavior: data.behavior ?? ci.behavior, - decision_refs: data.decision_refs ?? ci.decision_refs, - }; - - const intents = [...m.code_intents]; - intents[ciIdx] = updated; - - const milestones = [...p.milestones]; - milestones[i] = { ...m, code_intents: intents }; - - return { ...p, milestones }; - } - } - throw new Error(`intent ${id} not found`); -} - -// -- CodeChange -- - -export function addChange( - p: Plan, - data: { - milestone: string; - file: string; - intent_ref?: string; - diff?: string; - doc_diff?: string; - comments?: string; - }, -): { plan: Plan; id: string } { - const idx = p.milestones.findIndex((m) => m.id === data.milestone); - if (idx === -1) throw new Error(`milestone ${data.milestone} not found`); - - const m = p.milestones[idx]; - const id = nextChangeId(m); - const change: CodeChange = { - id, - intent_ref: data.intent_ref ?? null, - file: data.file, - diff: data.diff ?? "", - doc_diff: data.doc_diff ?? "", - comments: data.comments ?? "", - }; - - const updated = [...p.milestones]; - updated[idx] = { - ...m, - code_changes: [...m.code_changes, change], - }; - - return { - plan: { ...p, milestones: updated }, - id, - }; -} - -function updateChange( - p: Plan, - id: string, - fn: (c: CodeChange) => CodeChange, -): Plan { - for (let i = 0; i < p.milestones.length; i++) { - const m = p.milestones[i]; - const ccIdx = m.code_changes.findIndex((cc) => cc.id === id); - if (ccIdx !== -1) { - const changes = [...m.code_changes]; - changes[ccIdx] = fn(m.code_changes[ccIdx]); - - const milestones = [...p.milestones]; - milestones[i] = { ...m, code_changes: changes }; - - return { ...p, milestones }; - } - } - throw new Error(`code_change ${id} not found`); -} - -export function setChangeDiff(p: Plan, id: string, diff: string): Plan { - return updateChange(p, id, (c) => ({ ...c, diff })); -} - -export function setChangeDocDiff(p: Plan, id: string, doc_diff: string): Plan { - return updateChange(p, id, (c) => ({ ...c, doc_diff })); -} - -export function setChangeComments(p: Plan, id: string, comments: string): Plan { - return updateChange(p, id, (c) => ({ ...c, comments })); -} - -export function setChangeFile(p: Plan, id: string, file: string): Plan { - return updateChange(p, id, (c) => ({ ...c, file })); -} - -export function setChangeIntentRef( - p: Plan, - id: string, - intent_ref: string, -): Plan { - return updateChange(p, id, (c) => ({ ...c, intent_ref })); -} diff --git a/src/planner/plan/mutate/decisions.ts b/src/planner/plan/mutate/decisions.ts deleted file mode 100644 index a43107b..0000000 --- a/src/planner/plan/mutate/decisions.ts +++ /dev/null @@ -1,180 +0,0 @@ -// Decision log mutations: decisions, rejected alternatives, risks. -// Pure functions -- input plan in, new plan out. No side effects. - -import type { Plan, Decision, RejectedAlternative, Risk } from "../types.js"; -import { - nextDecisionId, - nextRejectedAltId, - nextRiskId, -} from "../types.js"; - -// -- Decision -- - -export function addDecision( - p: Plan, - data: { decision: string; reasoning: string; source?: string }, -): { plan: Plan; id: string } { - const id = nextDecisionId(p); - const decision: Decision = { - id, - decision: data.decision, - reasoning_chain: data.reasoning, - source: data.source ?? null, - }; - return { - plan: { - ...p, - planning_context: { - ...p.planning_context, - decision_log: [...p.planning_context.decision_log, decision], - }, - }, - id, - }; -} - -export function setDecision( - p: Plan, - id: string, - data: { decision?: string; reasoning?: string; source?: string }, -): Plan { - const idx = p.planning_context.decision_log.findIndex((d) => d.id === id); - if (idx === -1) throw new Error(`decision ${id} not found`); - - const d = p.planning_context.decision_log[idx]; - const updated: Decision = { - ...d, - decision: data.decision ?? d.decision, - reasoning_chain: data.reasoning ?? d.reasoning_chain, - source: data.source ?? d.source, - }; - - const log = [...p.planning_context.decision_log]; - log[idx] = updated; - - return { - ...p, - planning_context: { ...p.planning_context, decision_log: log }, - }; -} - -// -- RejectedAlternative -- - -export function addRejectedAlternative( - p: Plan, - data: { alternative: string; rejection_reason: string; decision_ref: string }, -): { plan: Plan; id: string } { - const id = nextRejectedAltId(p); - const ra: RejectedAlternative = { - id, - alternative: data.alternative, - rejection_reason: data.rejection_reason, - decision_ref: data.decision_ref, - }; - return { - plan: { - ...p, - planning_context: { - ...p.planning_context, - rejected_alternatives: [ - ...p.planning_context.rejected_alternatives, - ra, - ], - }, - }, - id, - }; -} - -export function setRejectedAlternative( - p: Plan, - id: string, - data: { - alternative?: string; - rejection_reason?: string; - decision_ref?: string; - }, -): Plan { - const idx = p.planning_context.rejected_alternatives.findIndex( - (r) => r.id === id, - ); - if (idx === -1) throw new Error(`rejected_alternative ${id} not found`); - - const r = p.planning_context.rejected_alternatives[idx]; - const updated: RejectedAlternative = { - ...r, - alternative: data.alternative ?? r.alternative, - rejection_reason: data.rejection_reason ?? r.rejection_reason, - decision_ref: data.decision_ref ?? r.decision_ref, - }; - - const list = [...p.planning_context.rejected_alternatives]; - list[idx] = updated; - - return { - ...p, - planning_context: { ...p.planning_context, rejected_alternatives: list }, - }; -} - -// -- Risk -- - -export function addRisk( - p: Plan, - data: { - risk: string; - mitigation: string; - anchor?: string; - decision_ref?: string; - }, -): { plan: Plan; id: string } { - const id = nextRiskId(p); - const risk: Risk = { - id, - risk: data.risk, - mitigation: data.mitigation, - anchor: data.anchor ?? null, - decision_ref: data.decision_ref ?? null, - }; - return { - plan: { - ...p, - planning_context: { - ...p.planning_context, - known_risks: [...p.planning_context.known_risks, risk], - }, - }, - id, - }; -} - -export function setRisk( - p: Plan, - id: string, - data: { - risk?: string; - mitigation?: string; - anchor?: string; - decision_ref?: string; - }, -): Plan { - const idx = p.planning_context.known_risks.findIndex((r) => r.id === id); - if (idx === -1) throw new Error(`risk ${id} not found`); - - const r = p.planning_context.known_risks[idx]; - const updated: Risk = { - ...r, - risk: data.risk ?? r.risk, - mitigation: data.mitigation ?? r.mitigation, - anchor: data.anchor ?? r.anchor, - decision_ref: data.decision_ref ?? r.decision_ref, - }; - - const list = [...p.planning_context.known_risks]; - list[idx] = updated; - - return { - ...p, - planning_context: { ...p.planning_context, known_risks: list }, - }; -} diff --git a/src/planner/plan/mutate/index.ts b/src/planner/plan/mutate/index.ts deleted file mode 100644 index 0c96dcb..0000000 --- a/src/planner/plan/mutate/index.ts +++ /dev/null @@ -1,48 +0,0 @@ -// Re-exports all public mutation functions grouped by domain. -// Consumers import from this single entry point. - -export { - setOverview, - setConstraints, - setInvisibleKnowledge, -} from "./top-level.js"; - -export { - addDecision, - setDecision, - addRejectedAlternative, - setRejectedAlternative, - addRisk, - setRisk, -} from "./decisions.js"; - -export { - addMilestone, - setMilestoneName, - setMilestoneFiles, - setMilestoneFlags, - setMilestoneRequirements, - setMilestoneAcceptanceCriteria, - setMilestoneTests, -} from "./milestones.js"; - -export { - addIntent, - setIntent, - addChange, - setChangeDiff, - setChangeDocDiff, - setChangeComments, - setChangeFile, - setChangeIntentRef, -} from "./code.js"; - -export { - addWave, - setWaveMilestones, - addDiagram, - setDiagram, - addDiagramNode, - addDiagramEdge, - setReadmeEntry, -} from "./structure.js"; diff --git a/src/planner/plan/mutate/milestones.ts b/src/planner/plan/mutate/milestones.ts deleted file mode 100644 index fbb4e86..0000000 --- a/src/planner/plan/mutate/milestones.ts +++ /dev/null @@ -1,91 +0,0 @@ -// Milestone mutations: add, and per-field setters. -// Pure functions -- input plan in, new plan out. No side effects. - -import type { Plan, Milestone } from "../types.js"; -import { nextMilestoneId } from "../types.js"; - -export function addMilestone( - p: Plan, - data: { - name: string; - files?: string[]; - flags?: string[]; - requirements?: string[]; - acceptance_criteria?: string[]; - tests?: string[]; - }, -): { plan: Plan; id: string } { - const id = nextMilestoneId(p); - const milestone: Milestone = { - id, - number: p.milestones.length + 1, - name: data.name, - files: data.files ?? [], - flags: data.flags ?? [], - requirements: data.requirements ?? [], - acceptance_criteria: data.acceptance_criteria ?? [], - tests: data.tests ?? [], - code_intents: [], - code_changes: [], - documentation: { - module_comment: null, - docstrings: [], - function_blocks: [], - inline_comments: [], - }, - is_documentation_only: false, - delegated_to: null, - }; - return { - plan: { - ...p, - milestones: [...p.milestones, milestone], - }, - id, - }; -} - -function updateMilestone( - p: Plan, - id: string, - fn: (m: Milestone) => Milestone, -): Plan { - const idx = p.milestones.findIndex((m) => m.id === id); - if (idx === -1) throw new Error(`milestone ${id} not found`); - - const updated = [...p.milestones]; - updated[idx] = fn(p.milestones[idx]); - return { ...p, milestones: updated }; -} - -export function setMilestoneName(p: Plan, id: string, name: string): Plan { - return updateMilestone(p, id, (m) => ({ ...m, name })); -} - -export function setMilestoneFiles(p: Plan, id: string, files: string[]): Plan { - return updateMilestone(p, id, (m) => ({ ...m, files })); -} - -export function setMilestoneFlags(p: Plan, id: string, flags: string[]): Plan { - return updateMilestone(p, id, (m) => ({ ...m, flags })); -} - -export function setMilestoneRequirements( - p: Plan, - id: string, - requirements: string[], -): Plan { - return updateMilestone(p, id, (m) => ({ ...m, requirements })); -} - -export function setMilestoneAcceptanceCriteria( - p: Plan, - id: string, - criteria: string[], -): Plan { - return updateMilestone(p, id, (m) => ({ ...m, acceptance_criteria: criteria })); -} - -export function setMilestoneTests(p: Plan, id: string, tests: string[]): Plan { - return updateMilestone(p, id, (m) => ({ ...m, tests })); -} diff --git a/src/planner/plan/mutate/structure.ts b/src/planner/plan/mutate/structure.ts deleted file mode 100644 index f5679b1..0000000 --- a/src/planner/plan/mutate/structure.ts +++ /dev/null @@ -1,164 +0,0 @@ -// Structural plan mutations: waves, diagrams, readme entries. -// Pure functions -- input plan in, new plan out. No side effects. - -import type { - Plan, - Wave, - DiagramGraph, - DiagramNode, - DiagramEdge, - ReadmeEntry, -} from "../types.js"; -import { nextWaveId, nextDiagramId } from "../types.js"; - -// -- Wave -- - -export function addWave( - p: Plan, - data: { milestones: string[] }, -): { plan: Plan; id: string } { - const id = nextWaveId(p); - const wave: Wave = { - id, - milestones: data.milestones, - }; - return { - plan: { - ...p, - waves: [...p.waves, wave], - }, - id, - }; -} - -export function setWaveMilestones( - p: Plan, - id: string, - milestones: string[], -): Plan { - const idx = p.waves.findIndex((w) => w.id === id); - if (idx === -1) throw new Error(`wave ${id} not found`); - - const updated = [...p.waves]; - updated[idx] = { ...p.waves[idx], milestones }; - - return { ...p, waves: updated }; -} - -// -- Diagram -- - -export function addDiagram( - p: Plan, - data: { - type: "architecture" | "state" | "sequence" | "dataflow"; - scope: string; - title: string; - }, -): { plan: Plan; id: string } { - const id = nextDiagramId(p); - const diagram: DiagramGraph = { - id, - type: data.type, - scope: data.scope, - title: data.title, - nodes: [], - edges: [], - ascii_render: null, - }; - return { - plan: { - ...p, - diagram_graphs: [...p.diagram_graphs, diagram], - }, - id, - }; -} - -export function setDiagram( - p: Plan, - id: string, - data: { title?: string; scope?: string; ascii_render?: string }, -): Plan { - const idx = p.diagram_graphs.findIndex((d) => d.id === id); - if (idx === -1) throw new Error(`diagram ${id} not found`); - - const d = p.diagram_graphs[idx]; - const updated: DiagramGraph = { - ...d, - title: data.title ?? d.title, - scope: data.scope ?? d.scope, - ascii_render: data.ascii_render ?? d.ascii_render, - }; - - const diagrams = [...p.diagram_graphs]; - diagrams[idx] = updated; - - return { ...p, diagram_graphs: diagrams }; -} - -export function addDiagramNode( - p: Plan, - diagramId: string, - data: { id: string; label: string; type?: string }, -): Plan { - const idx = p.diagram_graphs.findIndex((d) => d.id === diagramId); - if (idx === -1) throw new Error(`diagram ${diagramId} not found`); - - const d = p.diagram_graphs[idx]; - const node: DiagramNode = { - id: data.id, - label: data.label, - type: data.type ?? null, - }; - - const diagrams = [...p.diagram_graphs]; - diagrams[idx] = { - ...d, - nodes: [...d.nodes, node], - }; - - return { ...p, diagram_graphs: diagrams }; -} - -export function addDiagramEdge( - p: Plan, - diagramId: string, - data: { source: string; target: string; label: string; protocol?: string }, -): Plan { - const idx = p.diagram_graphs.findIndex((d) => d.id === diagramId); - if (idx === -1) throw new Error(`diagram ${diagramId} not found`); - - const d = p.diagram_graphs[idx]; - const edge: DiagramEdge = { - source: data.source, - target: data.target, - label: data.label, - protocol: data.protocol ?? null, - }; - - const diagrams = [...p.diagram_graphs]; - diagrams[idx] = { - ...d, - edges: [...d.edges, edge], - }; - - return { ...p, diagram_graphs: diagrams }; -} - -// -- ReadmeEntry -- - -export function setReadmeEntry(p: Plan, path: string, content: string): Plan { - const idx = p.readme_entries.findIndex((r) => r.path === path); - const entry: ReadmeEntry = { path, content }; - - if (idx === -1) { - return { - ...p, - readme_entries: [...p.readme_entries, entry], - }; - } - - const entries = [...p.readme_entries]; - entries[idx] = entry; - return { ...p, readme_entries: entries }; -} diff --git a/src/planner/plan/mutate/top-level.ts b/src/planner/plan/mutate/top-level.ts deleted file mode 100644 index 2392525..0000000 --- a/src/planner/plan/mutate/top-level.ts +++ /dev/null @@ -1,37 +0,0 @@ -// Top-level plan field mutations: overview, constraints, invisible knowledge. -// Pure functions -- input plan in, new plan out. No side effects. - -import type { Plan, Overview, InvisibleKnowledge } from "../types.js"; - -export function setOverview( - p: Plan, - data: { problem?: string; approach?: string }, -): Plan { - const overview: Overview = { - problem: data.problem ?? p.overview.problem, - approach: data.approach ?? p.overview.approach, - }; - return { ...p, overview }; -} - -export function setConstraints(p: Plan, constraints: string[]): Plan { - return { - ...p, - planning_context: { - ...p.planning_context, - constraints, - }, - }; -} - -export function setInvisibleKnowledge( - p: Plan, - data: { system?: string; invariants?: string[]; tradeoffs?: string[] }, -): Plan { - const ik: InvisibleKnowledge = { - system: data.system ?? p.invisible_knowledge.system, - invariants: data.invariants ?? p.invisible_knowledge.invariants, - tradeoffs: data.tradeoffs ?? p.invisible_knowledge.tradeoffs, - }; - return { ...p, invisible_knowledge: ik }; -} diff --git a/src/planner/plan/render.ts b/src/planner/plan/render.ts deleted file mode 100644 index 4974bdd..0000000 --- a/src/planner/plan/render.ts +++ /dev/null @@ -1,155 +0,0 @@ -// Mechanical renderer: plan.json -> plan.md. -// The plan JSON is the source of truth; this file provides a deterministic -// markdown projection for human/manual review between planning and execution. - -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - -import type { Plan, Milestone, DiagramGraph } from "./types.js"; -import { loadPlan } from "./serialize.js"; - -function escCell(text: string): string { - return text.replace(/\|/g, "\\|").replace(/\n/g, " ").trim(); -} - -function pushList(lines: string[], title: string, values: string[]): void { - if (values.length === 0) return; - lines.push(title, ""); - for (const value of values) lines.push(`- ${value}`); - lines.push(""); -} - -function pushScopedDiagrams(lines: string[], diagrams: DiagramGraph[], scope: string): void { - const scoped = diagrams.filter((d) => d.scope === scope); - for (const diagram of scoped) { - lines.push(`### ${diagram.title}`, ""); - if (diagram.ascii_render && diagram.ascii_render.trim().length > 0) { - lines.push("```", diagram.ascii_render, "```", ""); - } else { - lines.push(`[Diagram pending rendering: ${diagram.id}]`, ""); - } - } -} - -function pushMilestone(lines: string[], milestone: Milestone, diagrams: DiagramGraph[]): void { - lines.push(`### ${milestone.id}: ${milestone.name}`, ""); - - pushScopedDiagrams(lines, diagrams, `milestone:${milestone.id}`); - - if (milestone.files.length > 0) { - lines.push(`**Files**: ${milestone.files.join(", ")}`, ""); - } - - pushList(lines, "**Requirements**", milestone.requirements); - pushList(lines, "**Acceptance Criteria**", milestone.acceptance_criteria); - pushList(lines, "**Tests**", milestone.tests); - - if (milestone.code_intents.length > 0) { - lines.push("#### Code Intents", ""); - for (const intent of milestone.code_intents) { - const fn = intent.function ? `::${intent.function}` : ""; - const refs = intent.decision_refs.length > 0 ? ` (refs: ${intent.decision_refs.join(", ")})` : ""; - lines.push(`- **${intent.id}** \`${intent.file}${fn}\`: ${intent.behavior}${refs}`); - } - lines.push(""); - } - - if (milestone.code_changes.length > 0) { - lines.push("#### Code Changes", ""); - for (const change of milestone.code_changes) { - const intentRef = change.intent_ref ? ` - implements ${change.intent_ref}` : ""; - lines.push(`**${change.id}** (${change.file})${intentRef}`, ""); - - if (change.diff.trim().length > 0) { - lines.push("**Code Diff**", "", "```diff", change.diff, "```", ""); - } - - if (change.doc_diff.trim().length > 0) { - lines.push("**Documentation Diff**", "", "```diff", change.doc_diff, "```", ""); - } - - if (change.comments.trim().length > 0) { - lines.push(`> ${change.comments}`, ""); - } - } - } -} - -export function renderPlanMarkdown(plan: Plan): string { - const lines: string[] = ["# Plan", "", "## Overview", "", plan.overview.problem || "(empty)", ""]; - - if (plan.overview.approach.trim().length > 0) { - lines.push(`**Approach**: ${plan.overview.approach}`, ""); - } - - pushScopedDiagrams(lines, plan.diagram_graphs, "overview"); - - if (plan.planning_context.decision_log.length > 0) { - lines.push("## Planning Context", "", "### Decision Log", "", "| ID | Decision | Reasoning Chain |", "|---|---|---|"); - for (const d of plan.planning_context.decision_log) { - lines.push(`| ${d.id} | ${escCell(d.decision)} | ${escCell(d.reasoning_chain)} |`); - } - lines.push(""); - } - - if (plan.planning_context.rejected_alternatives.length > 0) { - lines.push("### Rejected Alternatives", "", "| Alternative | Why Rejected |", "|---|---|"); - for (const r of plan.planning_context.rejected_alternatives) { - lines.push(`| ${escCell(r.alternative)} | ${escCell(r.rejection_reason)} (ref: ${r.decision_ref}) |`); - } - lines.push(""); - } - - pushList(lines, "### Constraints", plan.planning_context.constraints); - - if (plan.planning_context.known_risks.length > 0) { - lines.push("### Known Risks", ""); - for (const risk of plan.planning_context.known_risks) { - lines.push(`- **${risk.risk}**: ${risk.mitigation}`); - } - lines.push(""); - } - - const ik = plan.invisible_knowledge; - if (ik.system.trim().length > 0 || ik.invariants.length > 0 || ik.tradeoffs.length > 0) { - lines.push("## Invisible Knowledge", ""); - if (ik.system.trim().length > 0) { - lines.push("### System", "", ik.system, ""); - } - pushList(lines, "### Invariants", ik.invariants); - pushList(lines, "### Tradeoffs", ik.tradeoffs); - pushScopedDiagrams(lines, plan.diagram_graphs, "invisible_knowledge"); - } - - lines.push("## Milestones", ""); - for (const milestone of plan.milestones) { - pushMilestone(lines, milestone, plan.diagram_graphs); - } - - if (plan.readme_entries.length > 0) { - lines.push("## README Entries", ""); - for (const entry of plan.readme_entries) { - lines.push(`### ${entry.path}`, "", entry.content, ""); - } - } - - if (plan.waves.length > 0) { - lines.push("## Execution Waves", ""); - for (const wave of plan.waves) { - lines.push(`- ${wave.id}: ${wave.milestones.join(", ")}`); - } - lines.push(""); - } - - return `${lines.join("\n").trimEnd()}\n`; -} - -export async function renderPlanMarkdownToFile(planDir: string): Promise { - const plan = await loadPlan(planDir); - const markdown = renderPlanMarkdown(plan); - const outputPath = path.join(planDir, "plan.md"); - const tmpPath = path.join(planDir, ".plan.md.tmp"); - await fs.writeFile(tmpPath, markdown, "utf8"); - await fs.rename(tmpPath, outputPath); - return outputPath; -} diff --git a/src/planner/plan/serialize.ts b/src/planner/plan/serialize.ts deleted file mode 100644 index 9256709..0000000 --- a/src/planner/plan/serialize.ts +++ /dev/null @@ -1,45 +0,0 @@ -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - -import type { Plan } from "./types.js"; -import { createEmptyPlan } from "./types.js"; - -export function serializePlan(p: Plan): string { - return `${JSON.stringify(p, null, 2)}\n`; -} - -export async function writePlan(p: Plan, filePath: string): Promise { - const dir = path.dirname(filePath); - try { - await fs.access(dir); - } catch { - throw new Error(`Plan directory does not exist: ${dir}`); - } - - const content = serializePlan(p); - await fs.writeFile(filePath, content, "utf8"); -} - -// Atomic write: tmp file + rename. Prevents corrupted plan.json if -// process crashes mid-write. -export async function savePlan(p: Plan, dir: string): Promise { - const planPath = path.join(dir, "plan.json"); - const tmpPath = path.join(dir, ".plan.json.tmp"); - const content = serializePlan(p); - await fs.writeFile(tmpPath, content, "utf8"); - await fs.rename(tmpPath, planPath); -} - -export async function loadPlan(dir: string): Promise { - const planPath = path.join(dir, "plan.json"); - try { - const content = await fs.readFile(planPath, "utf8"); - return JSON.parse(content) as Plan; - } catch (err: unknown) { - if ((err as NodeJS.ErrnoException).code === "ENOENT") { - const planId = path.basename(dir); - return createEmptyPlan(planId); - } - throw err; - } -} diff --git a/src/planner/plan/types.ts b/src/planner/plan/types.ts deleted file mode 100644 index 4d21ca9..0000000 --- a/src/planner/plan/types.ts +++ /dev/null @@ -1,206 +0,0 @@ -export interface Decision { - id: string; - decision: string; - reasoning_chain: string; - source: string | null; -} - -export interface RejectedAlternative { - id: string; - alternative: string; - rejection_reason: string; - decision_ref: string; -} - -export interface Risk { - id: string; - risk: string; - mitigation: string; - anchor?: string | null; - decision_ref?: string | null; -} - -export interface PlanningContext { - decision_log: Decision[]; - rejected_alternatives: RejectedAlternative[]; - constraints: string[]; - known_risks: Risk[]; -} - -export interface InvisibleKnowledge { - system: string; - invariants: string[]; - tradeoffs: string[]; -} - -export interface Overview { - problem: string; - approach: string; -} - -export interface CodeIntent { - id: string; - file: string; - function?: string | null; - behavior: string; - decision_refs: string[]; -} - -export interface CodeChange { - id: string; - intent_ref: string | null; - file: string; - diff: string; - doc_diff: string; - comments: string; -} - -export interface Docstring { - function: string; - docstring: string; -} - -export interface FunctionBlock { - function: string; - comment: string; - decision_ref: string | null; - source: string | null; -} - -export interface InlineComment { - location: string; - comment: string; - decision_ref: string | null; - source: string | null; -} - -// DEPRECATED per reference schema. Kept for backwards compatibility with -// Python-based planner plans. New plans use CodeChange.doc_diff. -export interface Documentation { - module_comment: string | null; - docstrings: Docstring[]; - function_blocks: FunctionBlock[]; - inline_comments: InlineComment[]; -} - -// DEPRECATED per reference schema. Kept for backwards compatibility with -// Python-based planner plans. New plans use CodeChange.doc_diff. -export interface ReadmeEntry { - path: string; - content: string; -} - -export interface DiagramNode { - id: string; - label: string; - type: string | null; -} - -export interface DiagramEdge { - source: string; - target: string; - label: string; - protocol: string | null; -} - -export interface DiagramGraph { - id: string; - type: "architecture" | "state" | "sequence" | "dataflow"; - scope: string; - title: string; - nodes: DiagramNode[]; - edges: DiagramEdge[]; - ascii_render: string | null; -} - -export interface Milestone { - id: string; - number: number; - name: string; - files: string[]; - flags: string[]; - requirements: string[]; - acceptance_criteria: string[]; - tests: string[]; - code_intents: CodeIntent[]; - code_changes: CodeChange[]; - documentation: Documentation; - is_documentation_only: boolean; - delegated_to: string | null; -} - -export interface Wave { - id: string; - milestones: string[]; -} - -export interface Plan { - plan_id: string; - created_at: string; - frozen_at: string | null; - overview: Overview; - planning_context: PlanningContext; - invisible_knowledge: InvisibleKnowledge; - milestones: Milestone[]; - waves: Wave[]; - diagram_graphs: DiagramGraph[]; - readme_entries: ReadmeEntry[]; -} - -export function createEmptyPlan(planId: string): Plan { - return { - plan_id: planId, - created_at: new Date().toISOString(), - frozen_at: null, - overview: { problem: "", approach: "" }, - planning_context: { - decision_log: [], - rejected_alternatives: [], - constraints: [], - known_risks: [], - }, - invisible_knowledge: { system: "", invariants: [], tradeoffs: [] }, - milestones: [], - waves: [], - diagram_graphs: [], - readme_entries: [], - }; -} - -function pad3(n: number): string { - return String(n).padStart(3, "0"); -} - -export function nextDecisionId(p: Plan): string { - return `DL-${pad3(p.planning_context.decision_log.length + 1)}`; -} - -export function nextMilestoneId(p: Plan): string { - return `M-${pad3(p.milestones.length + 1)}`; -} - -export function nextIntentId(m: Milestone): string { - const num = m.code_intents.length + 1; - return `CI-${m.id}-${pad3(num)}`; -} - -export function nextRiskId(p: Plan): string { - return `R-${pad3(p.planning_context.known_risks.length + 1)}`; -} - -export function nextRejectedAltId(p: Plan): string { - return `RA-${pad3(p.planning_context.rejected_alternatives.length + 1)}`; -} - -export function nextWaveId(p: Plan): string { - return `W-${pad3(p.waves.length + 1)}`; -} - -export function nextDiagramId(p: Plan): string { - return `DIAG-${pad3(p.diagram_graphs.length + 1)}`; -} - -export function nextChangeId(m: Milestone): string { - const num = m.code_changes.length + 1; - return `CC-${m.id}-${pad3(num)}`; -} diff --git a/src/planner/plan/validate.ts b/src/planner/plan/validate.ts deleted file mode 100644 index bfb4f52..0000000 --- a/src/planner/plan/validate.ts +++ /dev/null @@ -1,249 +0,0 @@ -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - -import type { Logger } from "../../utils/logger.js"; -import type { Plan } from "./types.js"; - -export interface ValidationResult { - ok: boolean; - errors: string[]; - warnings?: string[]; -} - -// -- Decision source provenance -- - -// Canonical source types for the type:ref format. -// "code" and "docs" carry a path ref; others stand alone. -const VALID_SOURCE_TYPES = [ - "code", "docs", "user:ask", "user:conversation", "inference", -] as const; - -export type DecisionSourceType = (typeof VALID_SOURCE_TYPES)[number]; - -const SOURCE_TYPE_SET: ReadonlySet = new Set(VALID_SOURCE_TYPES); - -// Parses "code:src/foo.ts" -> { type: "code", ref: "src/foo.ts" } -// Parses "inference" -> { type: "inference", ref: null } -// Returns null for unrecognized formats. -export function parseDecisionSource( - s: string, -): { type: DecisionSourceType; ref: string | null } | null { - const colon = s.indexOf(":"); - if (colon === -1) { - return SOURCE_TYPE_SET.has(s) ? { type: s as DecisionSourceType, ref: null } : null; - } - const prefix = s.substring(0, colon); - const rest = s.substring(colon + 1); - // "user:ask" and "user:conversation" are complete types, not type:ref pairs - const full = `${prefix}:${rest}`; - if (SOURCE_TYPE_SET.has(full)) return { type: full as DecisionSourceType, ref: null }; - // "code:" and "docs:" are type:ref pairs - if (SOURCE_TYPE_SET.has(prefix)) return { type: prefix as DecisionSourceType, ref: rest }; - return null; -} - -// Produces warnings (not errors) for decisions with missing or invalid sources. -// Soft validation: legacy plans have source: null; hard failures cause death loops. -export function validateDecisionSources(p: Plan): string[] { - const warnings: string[] = []; - for (const d of p.planning_context.decision_log) { - if (!d.source) { - warnings.push(`${d.id}: missing source -- expected code:, docs:, user:ask, user:conversation, or inference`); - continue; - } - const parsed = parseDecisionSource(d.source); - if (!parsed) { - warnings.push(`${d.id}: unrecognized source "${d.source}" -- expected code:, docs:, user:ask, user:conversation, or inference`); - } - } - return warnings; -} - -export function validatePlanDesign(p: Plan): ValidationResult { - const errors: string[] = []; - - if (p.overview.problem.trim().length === 0) { - errors.push("overview.problem must not be empty"); - } - - if (p.milestones.length === 0) { - errors.push("plan must have at least one milestone"); - } - - for (const m of p.milestones) { - if (m.code_intents.length === 0) { - errors.push(`milestone ${m.id} must have at least one code_intent`); - } - } - - const warnings = validateDecisionSources(p); - return { ok: errors.length === 0, errors, warnings }; -} - -export function validateRefs(p: Plan): ValidationResult { - const errors: string[] = []; - const decisionIds = new Set(p.planning_context.decision_log.map((d) => d.id)); - const milestoneIds = new Set(p.milestones.map((m) => m.id)); - - for (const m of p.milestones) { - const intentIds = new Set(m.code_intents.map((ci) => ci.id)); - - for (const ci of m.code_intents) { - for (const ref of ci.decision_refs) { - if (!decisionIds.has(ref)) { - errors.push(`${ci.id}.decision_refs '${ref}' not in decisions`); - } - } - } - - for (const cc of m.code_changes) { - if (cc.intent_ref && !intentIds.has(cc.intent_ref)) { - errors.push( - `${cc.id}.intent_ref '${cc.intent_ref}' not in milestone ${m.id} intents`, - ); - } - } - } - - for (const ra of p.planning_context.rejected_alternatives) { - if (!decisionIds.has(ra.decision_ref)) { - errors.push( - `rejected_alternative ${ra.id}.decision_ref '${ra.decision_ref}' not in decisions`, - ); - } - } - - for (const risk of p.planning_context.known_risks) { - if (risk.decision_ref && !decisionIds.has(risk.decision_ref)) { - errors.push(`risk ${risk.id}.decision_ref '${risk.decision_ref}' not in decisions`); - } - } - - // Milestone references in DiagramGraph.scope are validated against - // plan.milestones for referential integrity. Prevents orphaned diagrams - // when milestones are merged or deleted. - for (const diag of p.diagram_graphs) { - if (diag.scope.startsWith("milestone:")) { - const milestoneId = diag.scope.substring("milestone:".length); - if (!milestoneIds.has(milestoneId)) { - errors.push( - `diagram ${diag.id}.scope '${diag.scope}' references unknown milestone`, - ); - } - } - - const nodeIds = new Set(diag.nodes.map((n) => n.id)); - for (const edge of diag.edges) { - if (!nodeIds.has(edge.source)) { - errors.push(`diagram ${diag.id} edge source '${edge.source}' not in nodes`); - } - if (!nodeIds.has(edge.target)) { - errors.push(`diagram ${diag.id} edge target '${edge.target}' not in nodes`); - } - } - } - - return { ok: errors.length === 0, errors }; -} - -export function validateDiagramScope(scope: string): ValidationResult { - const errors: string[] = []; - if ( - scope !== "overview" && - scope !== "invisible_knowledge" && - !scope.startsWith("milestone:") - ) { - errors.push( - `diagram scope must be 'overview', 'invisible_knowledge', or 'milestone:M-XXX', got '${scope}'`, - ); - } - return { ok: errors.length === 0, errors }; -} - -export function validatePlanCode(p: Plan): ValidationResult { - const errors: string[] = []; - for (const m of p.milestones) { - const changeIntents = new Set( - m.code_changes.map((cc) => cc.intent_ref).filter((r) => r !== null), - ); - for (const ci of m.code_intents) { - if (!changeIntents.has(ci.id)) { - errors.push(`milestone ${m.id} intent ${ci.id} has no corresponding code_change`); - } - } - } - return { ok: errors.length === 0, errors }; -} - -export function validatePlanDocs(p: Plan): ValidationResult { - const errors: string[] = []; - for (const m of p.milestones) { - for (const cc of m.code_changes) { - if (cc.diff.trim().length > 0 && cc.doc_diff.trim().length === 0) { - errors.push(`milestone ${m.id} change ${cc.id} has diff but no doc_diff`); - } - } - } - return { ok: errors.length === 0, errors }; -} - -export type PlanValidationPhase = "plan-design" | "plan-code" | "plan-docs"; - -// Reads plan.json from planDir and runs phase-appropriate validation. -// All phases require plan-design + reference integrity checks. -// plan-code additionally requires intent->change completeness. -// plan-docs additionally requires doc completeness. -export async function loadAndValidatePlanForPhase( - planDir: string, - phase: PlanValidationPhase, - log: Logger, -): Promise<{ ok: boolean; errors?: string[] }> { - const planPath = path.join(planDir, "plan.json"); - let plan: Plan; - try { - const raw = await fs.readFile(planPath, "utf8"); - plan = JSON.parse(raw) as Plan; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - log("Failed to read plan.json for validation", { error: message, phase }); - return { ok: false, errors: [`Failed to read plan.json: ${message}`] }; - } - - const designValidation = validatePlanDesign(plan); - if (!designValidation.ok) { - log("Plan design validation failed", { errors: designValidation.errors, phase }); - return { ok: false, errors: designValidation.errors }; - } - - const refValidation = validateRefs(plan); - if (!refValidation.ok) { - log("Plan reference validation failed", { errors: refValidation.errors, phase }); - return { ok: false, errors: refValidation.errors }; - } - - if (phase === "plan-code" || phase === "plan-docs") { - const codeValidation = validatePlanCode(plan); - if (!codeValidation.ok) { - log("Plan code validation failed", { errors: codeValidation.errors, phase }); - return { ok: false, errors: codeValidation.errors }; - } - } - - if (phase === "plan-docs") { - const docsValidation = validatePlanDocs(plan); - if (!docsValidation.ok) { - log("Plan docs validation failed", { errors: docsValidation.errors, phase }); - return { ok: false, errors: docsValidation.errors }; - } - } - - log("Plan validation passed", { path: planPath, phase }); - return { ok: true }; -} - -export async function loadAndValidatePlan( - planDir: string, - log: Logger, -): Promise<{ ok: boolean; errors?: string[] }> { - return loadAndValidatePlanForPhase(planDir, "plan-design", log); -} diff --git a/src/planner/qr/mutate.ts b/src/planner/qr/mutate.ts deleted file mode 100644 index e0644ff..0000000 --- a/src/planner/qr/mutate.ts +++ /dev/null @@ -1,88 +0,0 @@ -import type { QRFile, QRItem, QRSeverity, QRItemStatus } from "./types.js"; - -function pad3(n: number): string { - return String(n).padStart(3, "0"); -} - -function nextQRId(qr: QRFile): string { - return `QR-${qr.phase}-${pad3(qr.items.length + 1)}`; -} - -export function addQRItem( - qr: QRFile, - data: { scope: string; check: string; severity?: QRSeverity }, -): { qr: QRFile; id: string } { - const id = nextQRId(qr); - const item: QRItem = { - id, - scope: data.scope, - check: data.check, - status: "TODO", - finding: null, - parent_id: null, - group_id: null, - severity: data.severity ?? "MUST", - }; - return { - qr: { - ...qr, - items: [...qr.items, item], - }, - id, - }; -} - -// PASS is terminal: cannot transition from PASS to FAIL. -// FAIL requires finding (explains what failed). -// PASS forbids finding. -export function setQRItem( - qr: QRFile, - id: string, - data: { - status?: QRItemStatus; - finding?: string; - check?: string; - severity?: QRSeverity; - }, -): QRFile { - const idx = qr.items.findIndex((i) => i.id === id); - if (idx === -1) throw new Error(`qr_item ${id} not found`); - - const item = qr.items[idx]; - - if (item.status === "PASS" && data.status === "FAIL") { - throw new Error(`cannot transition ${id} from PASS to FAIL (PASS is terminal)`); - } - - const status = data.status ?? item.status; - const finding = data.finding ?? item.finding; - - if (status === "FAIL" && !finding) { - throw new Error(`FAIL status requires finding for ${id}`); - } - - if (status === "PASS" && finding) { - throw new Error(`PASS status forbids finding for ${id}`); - } - - const updated: QRItem = { - ...item, - status, - finding, - check: data.check ?? item.check, - severity: data.severity ?? item.severity, - }; - - const items = [...qr.items]; - items[idx] = updated; - - return { ...qr, items }; -} - -export function assignGroup(qr: QRFile, ids: string[], groupId: string): QRFile { - const idSet = new Set(ids); - const items = qr.items.map((item) => - idSet.has(item.id) ? { ...item, group_id: groupId } : item, - ); - return { ...qr, items }; -} diff --git a/src/planner/qr/severity.ts b/src/planner/qr/severity.ts deleted file mode 100644 index 6e40c6f..0000000 --- a/src/planner/qr/severity.ts +++ /dev/null @@ -1,41 +0,0 @@ -// Severity escalation policy for QR fix iterations. -// -// Progressive de-escalation narrows what blocks as iterations increase. -// COULD items (style, cosmetic) do not block indefinitely: after 2 fix -// attempts, only structural issues (MUST, SHOULD) block; after 3, only -// knowledge-loss risks (MUST) block. -// -// A hard cutoff ("after N attempts, ignore all failures") would let MUST -// failures through. De-escalation by tier preserves the invariant that -// MUST items always block, while preventing COULD style nits from causing -// indefinite retries. - -import type { QRItem, QRSeverity } from "./types.js"; - -export const MAX_FIX_ITERATIONS = 5; - -// Returns the set of severities that block the plan at the given iteration. -// Iterations 1-2: all severities block. Iteration 3: MUST+SHOULD. 4+: MUST only. -export function blockingSeverities(iteration: number): ReadonlySet { - if (iteration <= 2) return new Set(["MUST", "SHOULD", "COULD"]); - if (iteration === 3) return new Set(["MUST", "SHOULD"]); - return new Set(["MUST"]); -} - -// Returns the subset of items that are FAIL and have a blocking severity -// at the given iteration. -export function blockingFailures( - items: ReadonlyArray, - iteration: number, -): QRItem[] { - const blocking = blockingSeverities(iteration); - return items.filter((i) => i.status === "FAIL" && blocking.has(i.severity)); -} - -// Returns true when no blocking failures remain at this iteration. -export function qrPassesAtIteration( - items: ReadonlyArray, - iteration: number, -): boolean { - return blockingFailures(items, iteration).length === 0; -} diff --git a/src/planner/qr/types.ts b/src/planner/qr/types.ts deleted file mode 100644 index 89ab627..0000000 --- a/src/planner/qr/types.ts +++ /dev/null @@ -1,19 +0,0 @@ -export type QRSeverity = "MUST" | "SHOULD" | "COULD"; -export type QRItemStatus = "TODO" | "PASS" | "FAIL"; - -export interface QRItem { - id: string; - scope: string; - check: string; - status: QRItemStatus; - finding: string | null; - parent_id: string | null; - group_id: string | null; - severity: QRSeverity; -} - -export interface QRFile { - phase: string; - iteration: number; - items: QRItem[]; -} diff --git a/src/planner/session.ts b/src/planner/session.ts deleted file mode 100644 index ecd79a4..0000000 --- a/src/planner/session.ts +++ /dev/null @@ -1,985 +0,0 @@ -// Parent session: orchestrates the koan planning workflow. -// Flow: export conversation -> plan-design(+QR) -> plan-code(+QR) -> plan-docs(+QR) -// -> mechanical plan.json->plan.md rendering for manual review. - -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - -import type { AgentToolResult, ExtensionAPI, ExtensionCommandContext, ExtensionContext, ExtensionUIContext } from "@mariozechner/pi-coding-agent"; - -import { exportConversation } from "./conversation.js"; -import { createInitialState, initializePlanState, type WorkflowState } from "./state.js"; -import { createPlanInfo } from "../utils/plan.js"; -import { - spawnArchitect, - spawnArchitectFix, - spawnDeveloper, - spawnDeveloperFix, - spawnTechnicalWriter, - spawnTechnicalWriterFix, - spawnQRDecomposer, - spawnReviewer, - type SpawnQRDecomposerOptions, - type SpawnReviewerOptions, - type SubagentResult, -} from "./subagent.js"; -import { createLogger, setLogDir, type Logger } from "../utils/logger.js"; -import { createSubagentDir } from "../utils/progress.js"; -import { readProjection, readRecentLogs, type Projection, type LogLine } from "./lib/audit.js"; -import type { WorkflowDispatch, PlanRef } from "./lib/dispatch.js"; -import { pool } from "./lib/pool.js"; -import type { QRFile } from "./qr/types.js"; -import { MAX_FIX_ITERATIONS, qrPassesAtIteration } from "./qr/severity.js"; -import { WidgetController, type WidgetUpdate } from "./ui/widget.js"; -import { renderPlanMarkdownToFile } from "./plan/render.js"; -import { - mapSpawnContextToPhaseModelKey, - resolvePhaseModelOverride, - type SpawnContext, -} from "./model-resolver.js"; -import type { PhaseRow } from "./model-phase.js"; -import { - readIpcFile, - writeIpcFile, - createAskResponse, - createCancelledResponse, - type IpcFile, - type IpcResponse, -} from "./lib/ipc.js"; -import { askSingleQuestionWithInlineNote } from "./ui/ask/ask-inline-ui.js"; -import { askQuestionsWithTabs } from "./ui/ask/ask-tabs-ui.js"; -import type { AskQuestion } from "./ui/ask/ask-logic.js"; - -type WorkPhaseKey = "plan-design" | "plan-code" | "plan-docs"; - -interface Session { - plan(ctx: ExtensionContext): Promise>; - execute(_ctx: ExtensionCommandContext): Promise; - status(ctx: ExtensionCommandContext): Promise; -} - -interface QRBlockResult { - summary: string; - passed: boolean; -} - -interface PhaseRunConfig { - key: WorkPhaseKey; - label: string; - widgetIndex: number; - role: "architect" | "developer" | "technical-writer"; - spawnWork: (opts: SpawnWorkRunOptions) => Promise; - spawnFix: (opts: SpawnFixRunOptions) => Promise; -} - -interface SpawnWorkRunOptions { - planDir: string; - subagentDir: string; - cwd: string; - extensionPath: string; - log: Logger; - modelOverride?: string; -} - -interface SpawnFixRunOptions extends SpawnWorkRunOptions {} - -function qrFilePath(planDir: string, phase: WorkPhaseKey): string { - return path.join(planDir, `qr-${phase}.json`); -} - -function singleSubagentStart(role: string): WidgetUpdate { - return { - subagentRole: role, - subagentModel: null, - subagentParallelCount: 1, - subagentQueued: 0, - subagentActive: 1, - subagentDone: 0, - }; -} - -function singleSubagentFromProjection(p: Projection): WidgetUpdate { - const running = p.status === "running"; - return { - subagentRole: p.role, - subagentModel: p.model, - subagentParallelCount: 1, - subagentQueued: 0, - subagentActive: running ? 1 : 0, - subagentDone: running ? 0 : 1, - }; -} - -function phaseRunningState(phase: WorkPhaseKey): WorkflowState["phase"] { - if (phase === "plan-design") return "architect-running"; - if (phase === "plan-code") return "plan-code-running"; - return "plan-docs-running"; -} - -function phaseCompleteState(phase: WorkPhaseKey): WorkflowState["phase"] { - if (phase === "plan-design") return "plan-design-complete"; - if (phase === "plan-code") return "plan-code-complete"; - return "plan-docs-complete"; -} - -interface ModelResolutionDeps { - mapSpawnContextToPhaseModelKeyFn?: typeof mapSpawnContextToPhaseModelKey; - resolvePhaseModelOverrideFn?: typeof resolvePhaseModelOverride; -} - -interface QRSpawnResolutionDeps extends ModelResolutionDeps { - spawnQRDecomposerFn?: typeof spawnQRDecomposer; - spawnReviewerFn?: typeof spawnReviewer; -} - -export async function resolveSpawnModelOverride( - context: SpawnContext, - phaseRow: PhaseRow, - deps: ModelResolutionDeps = {}, -): Promise { - const mapFn = deps.mapSpawnContextToPhaseModelKeyFn ?? mapSpawnContextToPhaseModelKey; - const resolveFn = deps.resolvePhaseModelOverrideFn ?? resolvePhaseModelOverride; - const key = mapFn(context, phaseRow); - return await resolveFn(key); -} - -export async function spawnWorkWithResolvedModel( - phaseRow: PhaseRow, - spawnWorkFn: (opts: SpawnWorkRunOptions) => Promise, - opts: SpawnWorkRunOptions, - deps: ModelResolutionDeps = {}, -): Promise { - const modelOverride = await resolveSpawnModelOverride("work-debut", phaseRow, deps); - return await spawnWorkFn({ ...opts, modelOverride }); -} - -export async function spawnFixWithResolvedModel( - phaseRow: PhaseRow, - spawnFixFn: (opts: SpawnFixRunOptions) => Promise, - opts: SpawnFixRunOptions, - deps: ModelResolutionDeps = {}, -): Promise { - const modelOverride = await resolveSpawnModelOverride("fix", phaseRow, deps); - return await spawnFixFn({ ...opts, modelOverride }); -} - -export async function spawnQRDecomposerWithResolvedModel( - opts: SpawnQRDecomposerOptions, - deps: QRSpawnResolutionDeps = {}, -): Promise { - const modelOverride = await resolveSpawnModelOverride("qr-decompose", opts.phase as PhaseRow, deps); - const spawnFn = deps.spawnQRDecomposerFn ?? spawnQRDecomposer; - return await spawnFn({ ...opts, modelOverride }); -} - -export async function spawnReviewerWithResolvedModel( - opts: SpawnReviewerOptions, - deps: QRSpawnResolutionDeps = {}, -): Promise { - const modelOverride = await resolveSpawnModelOverride("qr-verify", opts.phase as PhaseRow, deps); - const spawnFn = deps.spawnReviewerFn ?? spawnReviewer; - return await spawnFn({ ...opts, modelOverride }); -} - -// Routes an IpcFile ask request to the appropriate UI component and returns -// an IpcResponse. On any exception from the UI layer, the caller's catch -// block writes a cancelled response so the subagent unblocks. -async function handleAskRequest( - ui: ExtensionUIContext, - ipc: IpcFile, -): Promise { - const { request } = ipc; - const { questions } = request.payload; - const questionsAsAsk = questions as AskQuestion[]; - - if (questions.length === 1 && !questions[0].multi) { - const selection = await askSingleQuestionWithInlineNote(ui, questionsAsAsk[0]); - if (selection.selectedOptions.length === 0 && !selection.customInput) { - return createCancelledResponse(request.id); - } - const answer: { id: string; selectedOptions: string[]; customInput?: string } = { - id: questions[0].id, - selectedOptions: selection.selectedOptions, - }; - if (selection.customInput !== undefined) { - answer.customInput = selection.customInput; - } - return createAskResponse(request.id, { answers: [answer] }); - } - - const tabResult = await askQuestionsWithTabs(ui, questionsAsAsk); - if (tabResult.cancelled) { - return createCancelledResponse(request.id); - } - - const answers = questions.map((q, i) => { - const sel = tabResult.selections[i] ?? { selectedOptions: [] }; - const answer: { id: string; selectedOptions: string[]; customInput?: string } = { - id: q.id, - selectedOptions: sel.selectedOptions, - }; - if (sel.customInput !== undefined) { - answer.customInput = sel.customInput; - } - return answer; - }); - - return createAskResponse(request.id, { answers }); -} - -// Encapsulates the poll-with-request-detection pattern used by both -// the work poll loop and the fix poll loop. Returns a setInterval ID. -function pollWithIpcDetection( - subagentDir: string, - widget: WidgetController | null, - ui: ExtensionUIContext | null, - stepPrefix: string, - updateFromProjection: (p: Projection, logs: LogLine[]) => void, -): ReturnType { - let pendingRequestId: string | null = null; - - return setInterval(async () => { - const [projection, logs] = await Promise.all([ - readProjection(subagentDir), - readRecentLogs(subagentDir), - ]); - if (projection) { - updateFromProjection(projection, logs); - } - - // IPC request detection — skip if already handling a request or no UI - if (pendingRequestId || !ui) return; - - const ipc = await readIpcFile(subagentDir); - if (!ipc || !ipc.request || ipc.response !== null) return; - - pendingRequestId = ipc.request.id; - try { - widget?.update({ - step: `${stepPrefix}: waiting for user input...`, - activity: ipc.request.payload.questions[0]?.question ?? "", - }); - - const response = await handleAskRequest(ui, ipc); - const updated: IpcFile = { request: ipc.request, response }; - await writeIpcFile(subagentDir, updated); - } catch { - // On error, write cancelled response so subagent unblocks. - // The inner try-catch guards against I/O failures during error - // recovery — an unguarded throw here would propagate as an - // unhandled async rejection in the setInterval callback, - // crashing the parent process (Node.js ≥15 default behavior). - try { - const cancelled = createCancelledResponse(ipc.request.id); - await writeIpcFile(subagentDir, { request: ipc.request, response: cancelled }); - } catch { - // I/O failed during error recovery; subagent remains blocked - // until parent terminates. No further action possible. - } - } finally { - pendingRequestId = null; - } - }, 2000); -} - -export function createSession(pi: ExtensionAPI, dispatch: WorkflowDispatch, planRef: PlanRef): Session { - const state: WorkflowState = createInitialState(); - const log = createLogger("Session"); - let widget: WidgetController | null = null; - - return { - async plan(ctx: ExtensionContext): Promise> { - const planInfo = await createPlanInfo("", ctx.cwd); - initializePlanState(state, planInfo, ""); - - // Wire plan directory for subagent dispatch and logging. - planRef.dir = planInfo.directory; - setLogDir(planInfo.directory); - - log("Plan tool invoked", { - cwd: ctx.cwd, - planId: planInfo.id, - planDirectory: planInfo.directory, - }); - - if (widget) { - widget.destroy(); - widget = null; - } - - if (ctx.hasUI) { - widget = new WidgetController(ctx.ui, planInfo.id); - } - - // Export conversation to plan directory. - // Agents that need session context can Read this file. - await exportConversation(ctx.sessionManager, planInfo.directory); - log("Conversation exported", { planDir: planInfo.directory }); - - let outcome: "PASS" | "FAIL" = "FAIL"; - try { - const planDir = planInfo.directory; - const extensionPath = path.resolve(import.meta.dirname, "../../extensions/koan.ts"); - const ui = ctx.hasUI ? ctx.ui : null; - - // widgetIndex 0=design, 1=code, 2=docs - const phases: PhaseRunConfig[] = [ - { - key: "plan-design", - label: "Plan design", - widgetIndex: 0, - role: "architect", - spawnWork: (opts) => spawnArchitect(opts), - spawnFix: (opts) => spawnArchitectFix({ ...opts, fixPhase: "plan-design" }), - }, - { - key: "plan-code", - label: "Plan code", - widgetIndex: 1, - role: "developer", - spawnWork: (opts) => spawnDeveloper(opts), - spawnFix: (opts) => spawnDeveloperFix({ ...opts, fixPhase: "plan-code" }), - }, - { - key: "plan-docs", - label: "Plan docs", - widgetIndex: 2, - role: "technical-writer", - spawnWork: (opts) => spawnTechnicalWriter(opts), - spawnFix: (opts) => spawnTechnicalWriterFix({ ...opts, fixPhase: "plan-docs" }), - }, - ]; - - const phaseSummaries: string[] = []; - for (const phase of phases) { - const result = await runPlanningPhase( - phase, - planDir, - ctx.cwd, - extensionPath, - state, - log, - widget, - ui, - ); - - phaseSummaries.push(`${phase.label}: ${result.summary}`); - if (!result.passed) { - return { - content: [{ type: "text" as const, text: `Planning failed at ${phase.label}.\n\n${phaseSummaries.join("\n")}` }], - details: undefined, - }; - } - } - - try { - await renderPlanMarkdownToFile(planDir); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - log("Failed to render plan.md", { error: message, planDir }); - return { - content: [{ type: "text" as const, text: `Planning phases completed, but plan markdown rendering failed: ${message}\n\n${phaseSummaries.join("\n")}` }], - details: undefined, - }; - } - - state.phase = "plan-docs-complete"; - widget?.update({ - activeIndex: -1, - step: "planning complete; awaiting manual review of plan.md", - activity: "", - }); - - outcome = "PASS"; - return { - content: [{ type: "text" as const, text: `Planning complete.\n\n${phaseSummaries.join("\n")}` }], - details: undefined, - }; - } finally { - if (widget) { - widget.destroy(); - widget = null; - } - ctx.ui.notify(outcome, outcome === "PASS" ? "info" : "error"); - } - }, - - async execute(ctx) { - ctx.ui.notify("Execution mode is not yet implemented.", "warning"); - }, - - async status(ctx) { - ctx.ui.notify(`Phase: ${state.phase}`, "info"); - }, - }; -} - -const QR_POOL_CONCURRENCY = 6; - -async function runPlanningPhase( - phase: PhaseRunConfig, - planDir: string, - cwd: string, - extensionPath: string, - state: WorkflowState, - log: Logger, - widget: WidgetController | null, - ui: ExtensionUIContext | null, -): Promise { - state.phase = phaseRunningState(phase.key); - - widget?.update({ - phaseStatus: { index: phase.widgetIndex, status: "running" }, - activeIndex: phase.widgetIndex, - step: `${phase.key}: spawning ${phase.role}...`, - activity: "", - qrIterationsMax: MAX_FIX_ITERATIONS + 1, - qrIteration: 1, - qrMode: "initial", - qrPhase: "execute", - qrDone: null, - qrTotal: null, - qrPass: null, - qrFail: null, - qrTodo: null, - ...singleSubagentStart(phase.role), - }); - - const subagentDir = await createSubagentDir(planDir, `${phase.role}-${phase.key}`); - - const pollInterval = pollWithIpcDetection( - subagentDir, - widget, - ui, - phase.key, - (projection, logs) => { - widget?.update({ - step: `${phase.key}: ${projection.stepName}`, - activity: projection.lastAction ?? "", - logLines: logs, - ...singleSubagentFromProjection(projection), - }); - }, - ); - - const workResult = await spawnWorkWithResolvedModel( - phase.key as PhaseRow, - phase.spawnWork, - { - planDir, - subagentDir, - cwd, - extensionPath, - log, - }, - ); - - clearInterval(pollInterval); - - if (workResult.exitCode !== 0) { - const detail = workResult.stderr.slice(0, 500); - log(`${phase.key} subagent failed`, { exitCode: workResult.exitCode, stderr: detail }); - widget?.update({ - phaseStatus: { index: phase.widgetIndex, status: "failed" }, - step: `${phase.key}: worker failed`, - activity: "", - subagentActive: 0, - subagentDone: 1, - }); - return { summary: `${phase.label} subagent failed (exit ${workResult.exitCode}).\n\nStderr:\n${detail}`, passed: false }; - } - - const planJsonPath = path.join(planDir, "plan.json"); - try { - await fs.access(planJsonPath); - } catch { - log(`${phase.key} completed but plan.json missing`, { planJsonPath }); - widget?.update({ - phaseStatus: { index: phase.widgetIndex, status: "failed" }, - step: `${phase.key}: no plan produced`, - activity: "", - subagentActive: 0, - subagentDone: 1, - }); - return { summary: `${phase.label} completed but produced no plan.json.`, passed: false }; - } - - state.phase = phaseCompleteState(phase.key); - widget?.update({ - step: `${phase.key}: starting QR block...`, - activity: "", - qrIteration: 1, - qrMode: "initial", - qrPhase: "execute", - qrDone: null, - qrTotal: null, - qrPass: null, - qrFail: null, - qrTodo: null, - subagentActive: 0, - subagentDone: 1, - }); - - const qr = await runPhaseWithQR( - phase, - planDir, - cwd, - extensionPath, - state, - log, - widget, - ui, - ); - - if (qr.passed) { - state.phase = phaseCompleteState(phase.key); - widget?.update({ phaseStatus: { index: phase.widgetIndex, status: "completed" } }); - } else { - widget?.update({ phaseStatus: { index: phase.widgetIndex, status: "failed" } }); - } - - return qr; -} - - -async function runQRDecompose( - planDir: string, - cwd: string, - extensionPath: string, - phase: WorkPhaseKey, - state: WorkflowState, - log: Logger, - widget: WidgetController | null, -): Promise { - const qrPath = qrFilePath(planDir, phase); - const keyOf = (scope: string, check: string): string => `${scope}\u0000${check}`; - - const previousPassKeys = new Set(); - try { - const raw = await fs.readFile(qrPath, "utf8"); - const prev = JSON.parse(raw) as QRFile; - for (const item of prev.items) { - if (item.status === "PASS") previousPassKeys.add(keyOf(item.scope, item.check)); - } - } catch { - // First QR run for this phase. - } - - state.phase = "qr-decompose-running"; - widget?.update({ - step: `${phase} qr-decompose: starting...`, - activity: "", - qrPhase: "decompose", - qrDone: null, - qrTotal: null, - qrPass: null, - qrFail: null, - qrTodo: null, - ...singleSubagentStart("qr-decomposer"), - }); - - const decomposeDir = await createSubagentDir(planDir, `qr-decomposer-${phase}`); - - const decomposePoll = setInterval(async () => { - const [projection, logs] = await Promise.all([readProjection(decomposeDir), readRecentLogs(decomposeDir)]); - if (!projection) return; - widget?.update({ - step: `${phase} qr-decompose: ${projection.stepName}`, - activity: projection.lastAction ?? "", - logLines: logs, - ...singleSubagentFromProjection(projection), - }); - }, 2000); - - const decompose = await spawnQRDecomposerWithResolvedModel({ - planDir, - subagentDir: decomposeDir, - cwd, - extensionPath, - phase, - log, - }); - - clearInterval(decomposePoll); - - if (decompose.exitCode !== 0) { - state.phase = "qr-decompose-failed"; - const detail = decompose.stderr.slice(0, 500); - log("QR decomposer failed", { phase, exitCode: decompose.exitCode, stderr: detail }); - widget?.update({ step: `${phase} qr-decompose: failed`, activity: "", subagentActive: 0, subagentDone: 1 }); - return { summary: `${phase} QR decompose failed (exit ${decompose.exitCode}).\n\nStderr:\n${detail}`, passed: false }; - } - - let qr: QRFile; - try { - const raw = await fs.readFile(qrPath, "utf8"); - qr = JSON.parse(raw) as QRFile; - } catch (error) { - state.phase = "qr-decompose-failed"; - const message = error instanceof Error ? error.message : String(error); - log("Failed to read QR file after decompose", { phase, error: message }); - return { summary: `${phase} QR decompose completed but produced no verifiable items.`, passed: false }; - } - - if (qr.items.length === 0) { - state.phase = "qr-decompose-failed"; - log("QR decompose produced no items", { phase }); - return { summary: `${phase} QR decompose completed but produced no items.`, passed: false }; - } - - const carriedPasses = qr.items.filter((item) => item.status !== "PASS" && previousPassKeys.has(keyOf(item.scope, item.check))).length; - if (carriedPasses > 0) { - qr = { - ...qr, - items: qr.items.map((item) => - previousPassKeys.has(keyOf(item.scope, item.check)) - ? { ...item, status: "PASS", finding: null } - : item), - }; - try { - const tmpPath = `${qrPath}.tmp`; - await fs.writeFile(tmpPath, `${JSON.stringify(qr, null, 2)}\n`, "utf8"); - await fs.rename(tmpPath, qrPath); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - log("Failed to persist carried PASS statuses", { phase, error: message }); - return { summary: `${phase} QR verify aborted: failed to preserve PASS statuses.`, passed: false }; - } - } - - return { summary: `${phase} QR decompose complete.`, passed: true }; -} - -async function runQRVerify( - planDir: string, - cwd: string, - extensionPath: string, - phase: WorkPhaseKey, - state: WorkflowState, - log: Logger, - widget: WidgetController | null, -): Promise { - const qrPath = qrFilePath(planDir, phase); - - let qr: QRFile; - try { - const raw = await fs.readFile(qrPath, "utf8"); - qr = JSON.parse(raw) as QRFile; - } catch (error) { - state.phase = "qr-decompose-failed"; - const message = error instanceof Error ? error.message : String(error); - log("Failed to read QR file for verify", { phase, error: message }); - return { summary: `${phase} QR verify aborted: cannot read QR file.`, passed: false }; - } - - const resetFailures = qr.items.filter((i) => i.status === "FAIL").length; - if (resetFailures > 0) { - qr = { - ...qr, - items: qr.items.map((item) => (item.status === "FAIL" ? { ...item, status: "TODO", finding: null } : item)), - }; - try { - const tmpPath = `${qrPath}.tmp`; - await fs.writeFile(tmpPath, `${JSON.stringify(qr, null, 2)}\n`, "utf8"); - await fs.rename(tmpPath, qrPath); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - log("Failed to persist QR FAIL->TODO reset", { phase, error: message }); - return { summary: `${phase} QR verify aborted: failed to prepare QR item states.`, passed: false }; - } - } - - // Group TODO items by group_id for batch verification. - // Items sharing a group_id are verified by a single subagent, amortizing - // process startup cost. Items without group_id are treated as singletons. - const todoItems = qr.items.filter((i) => i.status === "TODO"); - const groups = new Map(); - for (const item of todoItems) { - const gid = item.group_id ?? item.id; - const existing = groups.get(gid); - if (existing) { - existing.push(item.id); - } else { - groups.set(gid, [item.id]); - } - } - const groupEntries = Array.from(groups.entries()); // [groupId, itemIds[]] - const totalItems = qr.items.length; - const totalTodoItems = todoItems.length; - const preservedPass = qr.items.filter((i) => i.status === "PASS").length; - const initialFail = qr.items.filter((i) => i.status === "FAIL").length; - - widget?.update({ - step: `${phase} qr-verify: 0/${groupEntries.length} groups (${totalTodoItems} items)`, - activity: "", - qrPhase: "verify", - qrTotal: totalItems, - qrDone: preservedPass, - qrPass: preservedPass, - qrFail: initialFail, - qrTodo: totalTodoItems, - subagentRole: "reviewer", - subagentModel: null, - subagentParallelCount: QR_POOL_CONCURRENCY, - subagentQueued: groupEntries.length, - subagentActive: 0, - subagentDone: 0, - }); - - log("QR verify: grouped items for dispatch", { - phase, - totalItems: totalTodoItems, - groups: groupEntries.length, - groupSizes: groupEntries.map(([gid, ids]) => `${gid}:${ids.length}`), - }); - - state.phase = "qr-verify-running"; - - let verifyDone = 0; - let failedReviewers: string[] = []; - - if (groupEntries.length > 0) { - const groupIds = groupEntries.map(([gid]) => gid); - - const verifyStatsPoll = setInterval(async () => { - try { - const raw = await fs.readFile(qrPath, "utf8"); - const current = JSON.parse(raw) as QRFile; - const pass = current.items.filter((i) => i.status === "PASS").length; - const fail = current.items.filter((i) => i.status === "FAIL").length; - const todo = current.items.filter((i) => i.status === "TODO").length; - widget?.update({ - qrPass: pass, - qrFail: fail, - qrTodo: todo, - qrDone: preservedPass + verifyDone, - qrTotal: current.items.length, - }); - } catch { - // Ignore transient read races while reviewers write. - } - }, 2000); - - // Build a map from groupId -> itemIds for the pool worker. - const groupItemMap = new Map(groupEntries); - - try { - let reviewerModel: string | null = null; - const result = await pool( - groupIds, - QR_POOL_CONCURRENCY, - async (groupId) => { - const itemIds = groupItemMap.get(groupId)!; - const dirSuffix = itemIds.length === 1 - ? `qr-reviewer-${phase}-${itemIds[0]}` - : `qr-reviewer-${phase}-group-${groupId}`; - const reviewerDir = await createSubagentDir(planDir, dirSuffix); - const r = await spawnReviewerWithResolvedModel({ - planDir, - subagentDir: reviewerDir, - cwd, - extensionPath, - phase, - itemIds, - log, - }); - - if (reviewerModel === null) { - const projection = await readProjection(reviewerDir); - reviewerModel = projection?.model ?? null; - if (reviewerModel) widget?.update({ subagentModel: reviewerModel }); - } - - return r; - }, - (progress) => { - verifyDone = progress.done; - widget?.update({ - step: `${phase} qr-verify: ${progress.done}/${progress.total} groups`, - qrDone: preservedPass + progress.done, - qrTotal: totalItems, - subagentQueued: progress.queued, - subagentActive: progress.active, - subagentDone: progress.done, - }); - }, - ); - failedReviewers = result.failed; - } finally { - clearInterval(verifyStatsPoll); - } - } - - state.phase = "qr-complete"; - let finalQR: QRFile; - try { - const raw = await fs.readFile(qrPath, "utf8"); - finalQR = JSON.parse(raw) as QRFile; - } catch { - finalQR = qr; - } - - const pass = finalQR.items.filter((i) => i.status === "PASS").length; - const fail = finalQR.items.filter((i) => i.status === "FAIL").length; - const todo = finalQR.items.filter((i) => i.status === "TODO").length; - const summary = `${phase} QR complete: ${pass} PASS, ${fail} FAIL, ${todo} TODO (${failedReviewers.length} reviewer groups failed).`; - - const passed = fail === 0 && failedReviewers.length === 0; - widget?.update({ - step: summary, - activity: "", - qrDone: pass + fail, - qrTotal: totalItems, - qrPass: pass, - qrFail: fail, - qrTodo: todo, - subagentQueued: 0, - subagentActive: 0, - subagentDone: groupEntries.length, - }); - return { summary, passed }; -} - -async function runPhaseWithQR( - phase: PhaseRunConfig, - planDir: string, - cwd: string, - extensionPath: string, - state: WorkflowState, - log: Logger, - widget: WidgetController | null, - ui: ExtensionUIContext | null, -): Promise { - const qrPath = qrFilePath(planDir, phase.key); - - const decompose = await runQRDecompose(planDir, cwd, extensionPath, phase.key, state, log, widget); - if (!decompose.passed) { - widget?.update({ phaseStatus: { index: phase.widgetIndex, status: "failed" } }); - return decompose; - } - - let qr = await runQRVerify(planDir, cwd, extensionPath, phase.key, state, log, widget); - if (qr.passed) { - widget?.update({ qrPhase: "done", phaseStatus: { index: phase.widgetIndex, status: "completed" } }); - return qr; - } - - widget?.update({ qrPhase: "execute", qrDone: null, qrTotal: null, qrPass: null, qrFail: null, qrTodo: null }); - - for (let iteration = 2; iteration <= MAX_FIX_ITERATIONS + 1; iteration++) { - widget?.update({ - qrIteration: iteration, - qrMode: "fix", - qrPhase: "execute", - qrDone: null, - qrTotal: null, - qrPass: null, - qrFail: null, - qrTodo: null, - }); - - let qrFile: QRFile; - try { - const raw = await fs.readFile(qrPath, "utf8"); - qrFile = JSON.parse(raw) as QRFile; - } catch { - log("Fix loop: failed to read QR file", { phase: phase.key, iteration }); - widget?.update({ qrPhase: "done" }); - return { summary: `${phase.key} fix loop aborted: cannot read QR file.`, passed: false }; - } - - if (qrPassesAtIteration(qrFile.items, iteration)) { - const pass = qrFile.items.filter((i) => i.status === "PASS").length; - const fail = qrFile.items.filter((i) => i.status === "FAIL").length; - const todo = qrFile.items.filter((i) => i.status === "TODO").length; - widget?.update({ - qrPhase: "done", - qrDone: pass + fail, - qrTotal: qrFile.items.length, - qrPass: pass, - qrFail: fail, - qrTodo: todo, - phaseStatus: { index: phase.widgetIndex, status: "completed" }, - }); - return { - passed: true, - summary: `${phase.key} QR passed at iteration ${iteration} after severity de-escalation: ${pass} PASS, ${fail} FAIL (non-blocking).`, - }; - } - - const fixIndex = iteration - 1; - widget?.update({ - step: `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}: spawning ${phase.role}...`, - activity: "", - qrPhase: "execute", - ...singleSubagentStart(phase.role), - }); - - const fixDir = await createSubagentDir(planDir, `${phase.role}-fix-${phase.key}-${fixIndex}`); - - const fixPoll = pollWithIpcDetection( - fixDir, - widget, - ui, - `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}`, - (projection, logs) => { - widget?.update({ - step: `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}: ${projection.stepName}`, - activity: projection.lastAction ?? "", - logLines: logs, - ...singleSubagentFromProjection(projection), - }); - }, - ); - - const fixResult = await spawnFixWithResolvedModel( - phase.key as PhaseRow, - phase.spawnFix, - { - planDir, - subagentDir: fixDir, - cwd, - extensionPath, - log, - }, - ); - - clearInterval(fixPoll); - - if (fixResult.exitCode !== 0) { - log("Fix worker failed", { - phase: phase.key, - iteration: fixIndex, - exitCode: fixResult.exitCode, - stderr: fixResult.stderr.slice(0, 500), - }); - widget?.update({ - step: `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}: worker failed, re-running QR...`, - activity: "", - subagentActive: 0, - subagentDone: 1, - }); - } - - widget?.update({ - step: `${phase.key} fix ${fixIndex}/${MAX_FIX_ITERATIONS}: re-running QR...`, - activity: "", - subagentActive: 0, - subagentDone: 1, - }); - - qr = await runQRVerify(planDir, cwd, extensionPath, phase.key, state, log, widget); - if (qr.passed) { - widget?.update({ qrPhase: "done", phaseStatus: { index: phase.widgetIndex, status: "completed" } }); - return qr; - } - - widget?.update({ qrPhase: "execute", qrDone: null, qrTotal: null, qrPass: null, qrFail: null, qrTodo: null }); - } - - widget?.update({ qrPhase: "done" }); - return { - passed: false, - summary: `${phase.key} ${qr.summary} (max ${MAX_FIX_ITERATIONS} fix iterations reached)`, - }; -} diff --git a/src/planner/state.ts b/src/planner/state.ts deleted file mode 100644 index 286250f..0000000 --- a/src/planner/state.ts +++ /dev/null @@ -1,40 +0,0 @@ -export type WorkflowPhase = - | "idle" - | "architect-running" - | "architect-failed" - | "plan-design-complete" - | "plan-code-running" - | "plan-code-complete" - | "plan-docs-running" - | "plan-docs-complete" - | "qr-decompose-running" - | "qr-decompose-failed" - | "qr-verify-running" - | "qr-verify-failed" - | "qr-complete"; - -export interface PlanInfo { - id: string; - directory: string; - createdAt: string; - metadataPath: string; -} - -export interface WorkflowState { - phase: WorkflowPhase; - taskDescription: string | null; - plan: PlanInfo | null; -} - -export function createInitialState(): WorkflowState { - return { - phase: "idle", - taskDescription: null, - plan: null, - }; -} - -export function initializePlanState(state: WorkflowState, plan: PlanInfo, taskDescription: string): void { - state.plan = plan; - state.taskDescription = taskDescription; -} diff --git a/src/planner/tools/entity-code.ts b/src/planner/tools/entity-code.ts deleted file mode 100644 index ca57d75..0000000 --- a/src/planner/tools/entity-code.ts +++ /dev/null @@ -1,171 +0,0 @@ -// Plan entity tools for code-phase entities: code intents and code changes. -// Uses planTool helper from entity-design (shared load-mutate-save-lock wrapper). - -import { Type } from "@sinclair/typebox"; -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import type { PlanRef } from "../lib/dispatch.js"; -import { planTool } from "./entity-design.js"; -import { - addIntent, - setIntent, - addChange, - setChangeDiff, - setChangeDocDiff, - setChangeComments, - setChangeFile, - setChangeIntentRef, -} from "../plan/mutate/index.js"; - -export function registerPlanCodeEntityTools( - pi: ExtensionAPI, - planRef: PlanRef, -): void { - // -- CodeIntent -- - planTool(pi, planRef, { - name: "koan_add_intent", - label: "Add code intent", - description: "Add code intent to milestone.", - parameters: Type.Object({ - milestone: Type.String(), - file: Type.String(), - function: Type.Optional(Type.String()), - behavior: Type.String(), - decision_refs: Type.Optional(Type.Array(Type.String())), - }), - execute: (p, params) => { - const r = addIntent(p, params); - return { - plan: r.plan, - message: `Added intent ${r.id} to milestone ${params.milestone}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_intent", - label: "Update code intent", - description: "Update existing code intent by ID.", - parameters: Type.Object({ - id: Type.String(), - file: Type.Optional(Type.String()), - function: Type.Optional(Type.String()), - behavior: Type.Optional(Type.String()), - decision_refs: Type.Optional(Type.Array(Type.String())), - }), - execute: (p, params) => { - const updated = setIntent(p, params.id, params); - return { - plan: updated, - message: `Updated intent ${params.id}`, - }; - }, - }); - - // -- CodeChange -- - planTool(pi, planRef, { - name: "koan_add_change", - label: "Add code change", - description: "Add code change to milestone.", - parameters: Type.Object({ - milestone: Type.String(), - file: Type.String(), - intent_ref: Type.Optional(Type.String()), - diff: Type.Optional(Type.String()), - doc_diff: Type.Optional(Type.String()), - comments: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const r = addChange(p, params); - return { - plan: r.plan, - message: `Added change ${r.id} to milestone ${params.milestone}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_change_diff", - label: "Set code change diff", - description: "Update change diff.", - parameters: Type.Object({ - id: Type.String(), - diff: Type.String(), - }), - execute: (p, params) => { - const updated = setChangeDiff(p, params.id, params.diff); - return { - plan: updated, - message: `Set diff for change ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_change_doc_diff", - label: "Set code change doc_diff", - description: "Update change doc_diff.", - parameters: Type.Object({ - id: Type.String(), - doc_diff: Type.String(), - }), - execute: (p, params) => { - const updated = setChangeDocDiff(p, params.id, params.doc_diff); - return { - plan: updated, - message: `Set doc_diff for change ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_change_comments", - label: "Set code change comments", - description: "Update change comments.", - parameters: Type.Object({ - id: Type.String(), - comments: Type.String(), - }), - execute: (p, params) => { - const updated = setChangeComments(p, params.id, params.comments); - return { - plan: updated, - message: `Set comments for change ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_change_file", - label: "Set code change file", - description: "Update change file path.", - parameters: Type.Object({ - id: Type.String(), - file: Type.String(), - }), - execute: (p, params) => { - const updated = setChangeFile(p, params.id, params.file); - return { - plan: updated, - message: `Set file for change ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_change_intent_ref", - label: "Set code change intent_ref", - description: "Update change intent reference.", - parameters: Type.Object({ - id: Type.String(), - intent_ref: Type.String(), - }), - execute: (p, params) => { - const updated = setChangeIntentRef(p, params.id, params.intent_ref); - return { - plan: updated, - message: `Set intent_ref for change ${params.id}`, - }; - }, - }); -} diff --git a/src/planner/tools/entity-design.ts b/src/planner/tools/entity-design.ts deleted file mode 100644 index c6e5e7d..0000000 --- a/src/planner/tools/entity-design.ts +++ /dev/null @@ -1,308 +0,0 @@ -// Plan entity tools for design-phase entities: decisions, risks, milestones. -// Exports planTool helper for shared use by entity-code and entity-structure. -// load-mutate-save wrapped in file lock; disk is single source of truth. - -import { Type, type Static, type TSchema } from "@sinclair/typebox"; -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import * as path from "node:path"; - -import type { PlanRef } from "../lib/dispatch.js"; -import { loadPlan, savePlan } from "../plan/serialize.js"; -import type { Plan } from "../plan/types.js"; -import { withFileLock } from "../../utils/lock.js"; -import { - addDecision, - setDecision, - addRejectedAlternative, - setRejectedAlternative, - addRisk, - setRisk, - addMilestone, - setMilestoneName, - setMilestoneFiles, - setMilestoneFlags, - setMilestoneRequirements, - setMilestoneAcceptanceCriteria, - setMilestoneTests, -} from "../plan/mutate/index.js"; - -export function planTool( - pi: ExtensionAPI, - planRef: PlanRef, - opts: { - name: string; - label: string; - description: string; - parameters: TParams; - execute: (plan: Plan, params: Static) => { plan: Plan; message: string }; - }, -): void { - pi.registerTool({ - name: opts.name, - label: opts.label, - description: opts.description, - parameters: opts.parameters, - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const planPath = path.join(planRef.dir, "plan.json"); - return withFileLock(planPath, async () => { - const plan = await loadPlan(planRef.dir!); - const result = opts.execute(plan, params); - await savePlan(result.plan, planRef.dir!); - return { - content: [{ type: "text" as const, text: result.message }], - details: undefined, - }; - }); - }, - }); -} - -export function registerPlanDesignEntityTools( - pi: ExtensionAPI, - planRef: PlanRef, -): void { - // -- Decision -- - planTool(pi, planRef, { - name: "koan_add_decision", - label: "Add decision", - description: "Add decision to decision log. Source identifies where authority came from (e.g. code:src/foo.ts, docs:CLAUDE.md, user:ask, user:conversation, inference).", - parameters: Type.Object({ - decision: Type.String(), - reasoning: Type.String(), - source: Type.String({ description: "Provenance: code:, docs:, user:ask, user:conversation, or inference" }), - }), - execute: (p, params) => { - const r = addDecision(p, params); - return { - plan: r.plan, - message: `Added decision ${r.id}: "${params.decision}" [source: ${params.source}]`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_decision", - label: "Update decision", - description: "Update existing decision by ID. Omitting source preserves the existing value.", - parameters: Type.Object({ - id: Type.String(), - decision: Type.Optional(Type.String()), - reasoning: Type.Optional(Type.String()), - source: Type.Optional(Type.String({ description: "Provenance: code:, docs:, user:ask, user:conversation, or inference" })), - }), - execute: (p, params) => { - const updated = setDecision(p, params.id, params); - return { - plan: updated, - message: `Updated decision ${params.id}`, - }; - }, - }); - - // -- RejectedAlternative -- - planTool(pi, planRef, { - name: "koan_add_rejected_alternative", - label: "Add rejected alternative", - description: "Add rejected alternative to decision log.", - parameters: Type.Object({ - alternative: Type.String(), - rejection_reason: Type.String(), - decision_ref: Type.String(), - }), - execute: (p, params) => { - const r = addRejectedAlternative(p, params); - return { - plan: r.plan, - message: `Added rejected alternative ${r.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_rejected_alternative", - label: "Update rejected alternative", - description: "Update existing rejected alternative by ID.", - parameters: Type.Object({ - id: Type.String(), - alternative: Type.Optional(Type.String()), - rejection_reason: Type.Optional(Type.String()), - decision_ref: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = setRejectedAlternative(p, params.id, params); - return { - plan: updated, - message: `Updated rejected alternative ${params.id}`, - }; - }, - }); - - // -- Risk -- - planTool(pi, planRef, { - name: "koan_add_risk", - label: "Add risk", - description: "Add risk to known risks.", - parameters: Type.Object({ - risk: Type.String(), - mitigation: Type.String(), - anchor: Type.Optional(Type.String()), - decision_ref: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const r = addRisk(p, params); - return { - plan: r.plan, - message: `Added risk ${r.id}: "${params.risk}"`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_risk", - label: "Update risk", - description: "Update existing risk by ID.", - parameters: Type.Object({ - id: Type.String(), - risk: Type.Optional(Type.String()), - mitigation: Type.Optional(Type.String()), - anchor: Type.Optional(Type.String()), - decision_ref: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = setRisk(p, params.id, params); - return { - plan: updated, - message: `Updated risk ${params.id}`, - }; - }, - }); - - // -- Milestone -- - planTool(pi, planRef, { - name: "koan_add_milestone", - label: "Add milestone", - description: "Create new milestone.", - parameters: Type.Object({ - name: Type.String(), - files: Type.Optional(Type.Array(Type.String())), - flags: Type.Optional(Type.Array(Type.String())), - requirements: Type.Optional(Type.Array(Type.String())), - acceptance_criteria: Type.Optional(Type.Array(Type.String())), - tests: Type.Optional(Type.Array(Type.String())), - }), - execute: (p, params) => { - const r = addMilestone(p, params); - return { - plan: r.plan, - message: `Added milestone ${r.id}: "${params.name}"`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_name", - label: "Set milestone name", - description: "Update milestone name.", - parameters: Type.Object({ - id: Type.String(), - name: Type.String(), - }), - execute: (p, params) => { - const updated = setMilestoneName(p, params.id, params.name); - return { - plan: updated, - message: `Set name for milestone ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_files", - label: "Set milestone files", - description: "Update milestone files list.", - parameters: Type.Object({ - id: Type.String(), - files: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setMilestoneFiles(p, params.id, params.files); - return { - plan: updated, - message: `Set files for milestone ${params.id} (${params.files.length} files)`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_flags", - label: "Set milestone flags", - description: "Update milestone flags list.", - parameters: Type.Object({ - id: Type.String(), - flags: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setMilestoneFlags(p, params.id, params.flags); - return { - plan: updated, - message: `Set flags for milestone ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_requirements", - label: "Set milestone requirements", - description: "Update milestone requirements list.", - parameters: Type.Object({ - id: Type.String(), - requirements: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setMilestoneRequirements(p, params.id, params.requirements); - return { - plan: updated, - message: `Set requirements for milestone ${params.id} (${params.requirements.length} items)`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_acceptance_criteria", - label: "Set milestone acceptance criteria", - description: "Update milestone acceptance criteria list.", - parameters: Type.Object({ - id: Type.String(), - acceptance_criteria: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setMilestoneAcceptanceCriteria( - p, - params.id, - params.acceptance_criteria, - ); - return { - plan: updated, - message: `Set acceptance criteria for milestone ${params.id} (${params.acceptance_criteria.length} items)`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_milestone_tests", - label: "Set milestone tests", - description: "Update milestone tests list.", - parameters: Type.Object({ - id: Type.String(), - tests: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setMilestoneTests(p, params.id, params.tests); - return { - plan: updated, - message: `Set tests for milestone ${params.id} (${params.tests.length} tests)`, - }; - }, - }); -} diff --git a/src/planner/tools/entity-structure.ts b/src/planner/tools/entity-structure.ts deleted file mode 100644 index cc710a8..0000000 --- a/src/planner/tools/entity-structure.ts +++ /dev/null @@ -1,156 +0,0 @@ -// Plan entity tools for structural entities: waves, diagrams, readme entries. -// Uses planTool helper from entity-design (shared load-mutate-save-lock wrapper). - -import { Type } from "@sinclair/typebox"; -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import type { PlanRef } from "../lib/dispatch.js"; -import { planTool } from "./entity-design.js"; -import { - addWave, - setWaveMilestones, - addDiagram, - setDiagram, - addDiagramNode, - addDiagramEdge, - setReadmeEntry, -} from "../plan/mutate/index.js"; - -export function registerPlanStructureEntityTools( - pi: ExtensionAPI, - planRef: PlanRef, -): void { - // -- Wave -- - planTool(pi, planRef, { - name: "koan_add_wave", - label: "Add wave", - description: "Create wave with milestone list.", - parameters: Type.Object({ - milestones: Type.Array(Type.String()), - }), - execute: (p, params) => { - const r = addWave(p, params); - return { - plan: r.plan, - message: `Added wave ${r.id} with ${params.milestones.length} milestones`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_wave_milestones", - label: "Set wave milestones", - description: "Update wave milestones list.", - parameters: Type.Object({ - id: Type.String(), - milestones: Type.Array(Type.String()), - }), - execute: (p, params) => { - const updated = setWaveMilestones(p, params.id, params.milestones); - return { - plan: updated, - message: `Set milestones for wave ${params.id}`, - }; - }, - }); - - // -- Diagram -- - planTool(pi, planRef, { - name: "koan_add_diagram", - label: "Add diagram", - description: "Create diagram graph.", - parameters: Type.Object({ - type: Type.Union([ - Type.Literal("architecture"), - Type.Literal("state"), - Type.Literal("sequence"), - Type.Literal("dataflow"), - ]), - scope: Type.String(), - title: Type.String(), - }), - execute: (p, params) => { - const r = addDiagram(p, params); - return { - plan: r.plan, - message: `Added diagram ${r.id}: "${params.title}"`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_set_diagram", - label: "Update diagram", - description: "Update diagram properties.", - parameters: Type.Object({ - id: Type.String(), - title: Type.Optional(Type.String()), - scope: Type.Optional(Type.String()), - ascii_render: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = setDiagram(p, params.id, params); - return { - plan: updated, - message: `Updated diagram ${params.id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_add_diagram_node", - label: "Add diagram node", - description: "Add node to diagram.", - parameters: Type.Object({ - diagram_id: Type.String(), - id: Type.String(), - label: Type.String(), - type: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = addDiagramNode(p, params.diagram_id, params); - return { - plan: updated, - message: `Added node ${params.id} to diagram ${params.diagram_id}`, - }; - }, - }); - - planTool(pi, planRef, { - name: "koan_add_diagram_edge", - label: "Add diagram edge", - description: "Add edge to diagram.", - parameters: Type.Object({ - diagram_id: Type.String(), - source: Type.String(), - target: Type.String(), - label: Type.String(), - protocol: Type.Optional(Type.String()), - }), - execute: (p, params) => { - const updated = addDiagramEdge(p, params.diagram_id, params); - return { - plan: updated, - message: `Added edge ${params.source}->${params.target} to diagram ${params.diagram_id}`, - }; - }, - }); - - // -- ReadmeEntry -- - planTool(pi, planRef, { - name: "koan_set_readme_entry", - label: "Set readme entry", - description: "Upsert readme entry by path.", - parameters: Type.Object({ - path: Type.String(), - content: Type.String(), - }), - execute: (p, params) => { - const updated = setReadmeEntry(p, params.path, params.content); - return { - plan: updated, - message: `Set readme entry for ${params.path}`, - }; - }, - }); -} diff --git a/src/planner/tools/getters.ts b/src/planner/tools/getters.ts deleted file mode 100644 index d7924bb..0000000 --- a/src/planner/tools/getters.ts +++ /dev/null @@ -1,175 +0,0 @@ -import { Type } from "@sinclair/typebox"; -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import type { PlanRef } from "../lib/dispatch.js"; -import { loadPlan } from "../plan/serialize.js"; -import type { Plan, Milestone, CodeIntent, CodeChange } from "../plan/types.js"; - -export function registerPlanGetterTools( - pi: ExtensionAPI, - planRef: PlanRef, -): void { - pi.registerTool({ - name: "koan_get_plan", - label: "Get plan summary", - description: - "Returns plan overview and entity counts with IDs for drill-down.", - parameters: Type.Object({}), - async execute() { - if (!planRef.dir) throw new Error("No plan directory is active."); - const p = await loadPlan(planRef.dir); - const summary = formatPlanSummary(p); - return { - content: [{ type: "text" as const, text: summary }], - details: undefined, - }; - }, - }); - - pi.registerTool({ - name: "koan_get_milestone", - label: "Get milestone by ID", - description: "Returns full milestone with code_intents and code_changes.", - parameters: Type.Object({ - id: Type.String({ description: "Milestone ID (e.g., M-001)" }), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const p = await loadPlan(planRef.dir); - const m = p.milestones.find((x) => x.id === params.id); - if (!m) throw new Error(`Milestone ${params.id} not found`); - return { - content: [{ type: "text" as const, text: JSON.stringify(m, null, 2) }], - details: undefined, - }; - }, - }); - - pi.registerTool({ - name: "koan_get_decision", - label: "Get decision by ID", - description: "Returns decision from decision log.", - parameters: Type.Object({ - id: Type.String({ description: "Decision ID (e.g., DL-001)" }), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const p = await loadPlan(planRef.dir); - const d = p.planning_context.decision_log.find( - (x) => x.id === params.id, - ); - if (!d) throw new Error(`Decision ${params.id} not found`); - return { - content: [{ type: "text" as const, text: JSON.stringify(d, null, 2) }], - details: undefined, - }; - }, - }); - - pi.registerTool({ - name: "koan_get_intent", - label: "Get code intent by ID", - description: "Returns code intent and parent milestone ID.", - parameters: Type.Object({ - id: Type.String({ description: "Intent ID (e.g., CI-M-001-001)" }), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const p = await loadPlan(planRef.dir); - const result = findIntent(p, params.id); - if (!result) - throw new Error(`Intent ${params.id} not found`); - return { - content: [ - { - type: "text" as const, - text: JSON.stringify( - { milestone_id: result.milestoneId, intent: result.intent }, - null, - 2, - ), - }, - ], - details: undefined, - }; - }, - }); - - pi.registerTool({ - name: "koan_get_change", - label: "Get code change by ID", - description: "Returns code change and parent milestone ID.", - parameters: Type.Object({ - id: Type.String({ description: "Change ID (e.g., CC-M-001-001)" }), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const p = await loadPlan(planRef.dir); - const result = findChange(p, params.id); - if (!result) - throw new Error(`Change ${params.id} not found`); - return { - content: [ - { - type: "text" as const, - text: JSON.stringify( - { milestone_id: result.milestoneId, change: result.change }, - null, - 2, - ), - }, - ], - details: undefined, - }; - }, - }); -} - -function formatPlanSummary(p: Plan): string { - const lines = [ - "Plan Summary", - "============", - "", - "Overview:", - ` Problem: ${p.overview.problem || "(empty)"}`, - ` Approach: ${p.overview.approach || "(empty)"}`, - "", - `Milestones (${p.milestones.length}):`, - ...p.milestones.map((m) => ` ${m.id}: ${m.name}`), - "", - `Decisions (${p.planning_context.decision_log.length}):`, - ...p.planning_context.decision_log.map((d) => { - const src = d.source ? ` [${d.source}]` : " [no source]"; - return ` ${d.id}: ${d.decision}${src}`; - }), - "", - `Waves (${p.waves.length}):`, - ...p.waves.map((w) => ` ${w.id}: [${w.milestones.join(", ")}]`), - "", - `Diagrams (${p.diagram_graphs.length}):`, - ...p.diagram_graphs.map((d) => ` ${d.id}: ${d.title} (${d.type})`), - ]; - return lines.join("\n"); -} - -function findIntent( - p: Plan, - id: string, -): { milestoneId: string; intent: CodeIntent } | null { - for (const m of p.milestones) { - const intent = m.code_intents.find((ci) => ci.id === id); - if (intent) return { milestoneId: m.id, intent }; - } - return null; -} - -function findChange( - p: Plan, - id: string, -): { milestoneId: string; change: CodeChange } | null { - for (const m of p.milestones) { - const change = m.code_changes.find((cc) => cc.id === id); - if (change) return { milestoneId: m.id, change }; - } - return null; -} diff --git a/src/planner/tools/qr.ts b/src/planner/tools/qr.ts deleted file mode 100644 index 83364de..0000000 --- a/src/planner/tools/qr.ts +++ /dev/null @@ -1,230 +0,0 @@ -import { Type } from "@sinclair/typebox"; -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - -import type { PlanRef } from "../lib/dispatch.js"; -import type { QRFile } from "../qr/types.js"; -import { addQRItem, setQRItem, assignGroup } from "../qr/mutate.js"; -import { withFileLock } from "../../utils/lock.js"; - -function requirePhase(planRef: PlanRef): string { - if (!planRef.qrPhase) throw new Error("No QR phase is active."); - return planRef.qrPhase; -} - -function createEmptyQRFile(phase: string): QRFile { - return { - phase, - iteration: 1, - items: [], - }; -} - -async function loadQR(dir: string, phase: string): Promise { - const qrPath = path.join(dir, `qr-${phase}.json`); - try { - const content = await fs.readFile(qrPath, "utf8"); - return JSON.parse(content) as QRFile; - } catch (err: unknown) { - if ((err as NodeJS.ErrnoException).code === "ENOENT") { - return createEmptyQRFile(phase); - } - throw err; - } -} - -async function saveQR(qr: QRFile, dir: string, phase: string): Promise { - const qrPath = path.join(dir, `qr-${phase}.json`); - const tmpPath = path.join(dir, `.qr-${phase}.json.tmp`); - const content = `${JSON.stringify(qr, null, 2)}\n`; - await fs.writeFile(tmpPath, content, "utf8"); - await fs.rename(tmpPath, qrPath); -} - -export function registerQRTools(pi: ExtensionAPI, planRef: PlanRef): void { - pi.registerTool({ - name: "koan_qr_add_item", - label: "Add QR item", - description: "Add quality review item.", - parameters: Type.Object({ - scope: Type.String(), - check: Type.String(), - severity: Type.Optional( - Type.Union([ - Type.Literal("MUST"), - Type.Literal("SHOULD"), - Type.Literal("COULD"), - ]), - ), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const phase = requirePhase(planRef); - const qrPath = path.join(planRef.dir, `qr-${phase}.json`); - return withFileLock(qrPath, async () => { - const qr = await loadQR(planRef.dir!, phase); - const r = addQRItem(qr, params); - await saveQR(r.qr, planRef.dir!, phase); - return { - content: [{ type: "text" as const, text: `Added QR item ${r.id}` }], - details: undefined, - }; - }); - }, - }); - - pi.registerTool({ - name: "koan_qr_set_item", - label: "Update QR item", - description: "Update QR item status or finding.", - parameters: Type.Object({ - id: Type.String(), - status: Type.Optional( - Type.Union([ - Type.Literal("TODO"), - Type.Literal("PASS"), - Type.Literal("FAIL"), - ]), - ), - finding: Type.Optional(Type.String()), - check: Type.Optional(Type.String()), - severity: Type.Optional( - Type.Union([ - Type.Literal("MUST"), - Type.Literal("SHOULD"), - Type.Literal("COULD"), - ]), - ), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const phase = requirePhase(planRef); - const qrPath = path.join(planRef.dir, `qr-${phase}.json`); - return withFileLock(qrPath, async () => { - const qr = await loadQR(planRef.dir!, phase); - const updated = setQRItem(qr, params.id, params); - await saveQR(updated, planRef.dir!, phase); - return { - content: [{ type: "text" as const, text: `Updated QR item ${params.id}` }], - details: undefined, - }; - }); - }, - }); - - pi.registerTool({ - name: "koan_qr_assign_group", - label: "Assign QR group", - description: "Assign group ID to QR items.", - parameters: Type.Object({ - ids: Type.Array(Type.String()), - group_id: Type.String(), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const phase = requirePhase(planRef); - const qrPath = path.join(planRef.dir, `qr-${phase}.json`); - return withFileLock(qrPath, async () => { - const qr = await loadQR(planRef.dir!, phase); - const updated = assignGroup(qr, params.ids, params.group_id); - await saveQR(updated, planRef.dir!, phase); - return { - content: [ - { - type: "text" as const, - text: `Assigned ${params.ids.length} items to group ${params.group_id}`, - }, - ], - details: undefined, - }; - }); - }, - }); - - pi.registerTool({ - name: "koan_qr_get_item", - label: "Get QR item", - description: "Get QR item by ID.", - parameters: Type.Object({ - id: Type.String(), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const phase = requirePhase(planRef); - const qr = await loadQR(planRef.dir, phase); - const item = qr.items.find((x) => x.id === params.id); - if (!item) throw new Error(`QR item ${params.id} not found`); - return { - content: [{ type: "text" as const, text: JSON.stringify(item, null, 2) }], - details: undefined, - }; - }, - }); - - pi.registerTool({ - name: "koan_qr_list_items", - label: "List QR items", - description: "List QR items, optionally filtered by status.", - parameters: Type.Object({ - status: Type.Optional( - Type.Union([ - Type.Literal("TODO"), - Type.Literal("PASS"), - Type.Literal("FAIL"), - ]), - ), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const phase = requirePhase(planRef); - const qr = await loadQR(planRef.dir, phase); - const filtered = params.status - ? qr.items.filter((item) => item.status === params.status) - : qr.items; - return { - content: [ - { type: "text" as const, text: JSON.stringify(filtered, null, 2) }, - ], - details: undefined, - }; - }, - }); - - pi.registerTool({ - name: "koan_qr_summary", - label: "QR summary", - description: "Get QR summary with counts by status and severity.", - parameters: Type.Object({}), - async execute() { - if (!planRef.dir) throw new Error("No plan directory is active."); - const phase = requirePhase(planRef); - const qr = await loadQR(planRef.dir, phase); - - const byStatus = { - TODO: qr.items.filter((x) => x.status === "TODO").length, - PASS: qr.items.filter((x) => x.status === "PASS").length, - FAIL: qr.items.filter((x) => x.status === "FAIL").length, - }; - - const bySeverity = { - MUST: qr.items.filter((x) => x.severity === "MUST").length, - SHOULD: qr.items.filter((x) => x.severity === "SHOULD").length, - COULD: qr.items.filter((x) => x.severity === "COULD").length, - }; - - const summary = { - total: qr.items.length, - by_status: byStatus, - by_severity: bySeverity, - }; - - return { - content: [ - { type: "text" as const, text: JSON.stringify(summary, null, 2) }, - ], - details: undefined, - }; - }, - }); -} diff --git a/src/planner/tools/setters.ts b/src/planner/tools/setters.ts deleted file mode 100644 index 13e0f92..0000000 --- a/src/planner/tools/setters.ts +++ /dev/null @@ -1,82 +0,0 @@ -import { Type } from "@sinclair/typebox"; -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; - -import type { PlanRef } from "../lib/dispatch.js"; -import { loadPlan, savePlan } from "../plan/serialize.js"; -import { - setOverview, - setConstraints, - setInvisibleKnowledge, -} from "../plan/mutate/index.js"; - -export function registerPlanSetterTools( - pi: ExtensionAPI, - planRef: PlanRef, -): void { - pi.registerTool({ - name: "koan_set_overview", - label: "Set plan overview", - description: "Set problem statement and approach.", - parameters: Type.Object({ - problem: Type.Optional(Type.String()), - approach: Type.Optional(Type.String()), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const p = await loadPlan(planRef.dir); - const updated = setOverview(p, params); - await savePlan(updated, planRef.dir); - return { - content: [{ type: "text" as const, text: "Overview updated." }], - details: undefined, - }; - }, - }); - - pi.registerTool({ - name: "koan_set_constraints", - label: "Set plan constraints", - description: "Set planning constraints list.", - parameters: Type.Object({ - constraints: Type.Array(Type.String()), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const p = await loadPlan(planRef.dir); - const updated = setConstraints(p, params.constraints); - await savePlan(updated, planRef.dir); - return { - content: [ - { - type: "text" as const, - text: `Constraints set (${params.constraints.length} items).`, - }, - ], - details: undefined, - }; - }, - }); - - pi.registerTool({ - name: "koan_set_invisible_knowledge", - label: "Set invisible knowledge", - description: "Set system description, invariants, and tradeoffs.", - parameters: Type.Object({ - system: Type.Optional(Type.String()), - invariants: Type.Optional(Type.Array(Type.String())), - tradeoffs: Type.Optional(Type.Array(Type.String())), - }), - async execute(_toolCallId, params) { - if (!planRef.dir) throw new Error("No plan directory is active."); - const p = await loadPlan(planRef.dir); - const updated = setInvisibleKnowledge(p, params); - await savePlan(updated, planRef.dir); - return { - content: [ - { type: "text" as const, text: "Invisible knowledge updated." }, - ], - details: undefined, - }; - }, - }); -} diff --git a/src/planner/ui/widget.ts b/src/planner/ui/widget.ts deleted file mode 100644 index bfe684e..0000000 --- a/src/planner/ui/widget.ts +++ /dev/null @@ -1,999 +0,0 @@ -// Persistent TUI widget for koan workflow progress. -// Full-width background canvas (toolPendingBg) via component factory. -// Hash-based change detection + 1s unref'd timer for elapsed updates. -// Created by session.plan(), destroyed in onContextComplete finally block. -// -// Layout and styling reference: docs/planning-widget.md and the -// corresponding execution widget design deck selections (Stacked Modular -// Cards canvas + Vertical Timeline Rail). - -import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; -import type { Theme, ThemeColor } from "@mariozechner/pi-coding-agent"; -import { truncateToWidth, visibleWidth, wrapTextWithAnsi } from "@mariozechner/pi-tui"; -import type { LogLine } from "../lib/audit.js"; - -// -- Types -- - -export type PhaseStatus = "pending" | "running" | "completed" | "failed"; - -interface PhaseEntry { - key: string; - label: string; - detail: string; - status: PhaseStatus; -} - -type WidgetMode = "planning" | "execution"; - -type QRMode = "initial" | "fix"; -type QRPhase = "idle" | "execute" | "decompose" | "verify" | "done"; - -interface WidgetState { - mode: WidgetMode; - planId: string; - phases: PhaseEntry[]; - activeIndex: number; // 0-based; -1 when done - step: string; - activity: string; - startedAt: number; - logLines: LogLine[]; - qrIteration: number | null; - qrIterationsMax: number | null; - qrMode: QRMode | null; - qrPhase: QRPhase; - qrDone: number | null; - qrTotal: number | null; - qrPass: number | null; - qrFail: number | null; - qrTodo: number | null; - subagentRole: string | null; - subagentModel: string | null; - subagentParallelCount: number | null; - subagentQueued: number | null; - subagentActive: number | null; - subagentDone: number | null; -} - -export interface WidgetUpdate { - activeIndex?: number; - step?: string; - activity?: string; - phaseStatus?: { index: number; status: PhaseStatus }; - mode?: WidgetMode; - logLines?: readonly LogLine[]; - qrIteration?: number | null; - qrIterationsMax?: number | null; - qrMode?: QRMode | null; - qrPhase?: QRPhase; - qrDone?: number | null; - qrTotal?: number | null; - qrPass?: number | null; - qrFail?: number | null; - qrTodo?: number | null; - subagentRole?: string | null; - subagentModel?: string | null; - subagentParallelCount?: number | null; - subagentQueued?: number | null; - subagentActive?: number | null; - subagentDone?: number | null; -} - -// -- Constants -- - -const WIDGET_KEY = "koan"; -const PAD = 2; // horizontal canvas padding each side -const CARD_MARGIN = 2; // left margin before card borders -const LOG_LINES = 5; - -const BODY_INDENT = " "; - -const PLANNING_PHASES: ReadonlyArray<{ key: string; label: string; detail: string }> = [ - { key: "design", label: "Plan design", detail: "Designing plan" }, - { key: "code", label: "Plan code", detail: "Creating code plan" }, - { key: "docs", label: "Plan docs", detail: "Documenting plan" }, -]; - -const STATUS_ICON: Record = { - pending: "○", - running: "●", - completed: "●", - failed: "✖", -}; - -const STATUS_COLOR: Record = { - pending: "muted", - running: "accent", - completed: "dim", - failed: "error", -}; - -const STATUS_TAG: Record = { - pending: "upcoming", - running: "current", - completed: "done", - failed: "failed", -}; - -const LOG_PLACEHOLDER = "No recent log entries"; -const TIMELINE_MIN_WIDTH = 16; -const TIMELINE_MAX_WIDTH = 28; -const CONNECTOR = "│"; -const COLUMN_GAP = 4; - -interface BorderStyle { - topLeft: string; - topRight: string; - bottomLeft: string; - bottomRight: string; - horizontal: string; - vertical: string; -} - -const BORDER_SOLID: BorderStyle = { - topLeft: "┌", - topRight: "┐", - bottomLeft: "└", - bottomRight: "┘", - horizontal: "─", - vertical: "│", -}; - -// -- Canvas primitive -- -// Content width adapts to terminal; background fills edge to edge. - -function contentWidth(termWidth: number): number { - return Math.max(40, termWidth - PAD * 2); -} - -function canvasLine(content: string, termWidth: number, theme: Theme): string { - const cw = contentWidth(termWidth); - const inner = clampToWidth(content, cw); - const line = " ".repeat(PAD) + inner + " ".repeat(PAD); - return theme.bg("toolPendingBg", line); -} - -// -- Helpers -- - -function clampToWidth(text: string, width: number, ellipsis = ""): string { - const truncated = truncateToWidth(text, width, ellipsis === "" ? "" : ellipsis, false); - const visible = visibleWidth(truncated); - if (visible >= width) { - return truncated; - } - return truncated + " ".repeat(width - visible); -} - -function indentLines(lines: string[], width: number, indent = BODY_INDENT): string[] { - if (!indent) { - return lines.map((line) => clampToWidth(line, width)); - } - const indentWidth = visibleWidth(indent); - const available = Math.max(0, width - indentWidth); - return lines.map((line) => indent + clampToWidth(line, available)); -} - -interface PlanningColumns { - innerWidth: number; - contentWidth: number; - timelineWidth: number; - detailWidth: number; -} - -function planningColumns(width: number): PlanningColumns { - const innerWidth = Math.max(0, width - 2); - const indentWidth = visibleWidth(BODY_INDENT); - const contentWidth = Math.max(0, innerWidth - indentWidth); - const timelineWidth = Math.min(TIMELINE_MAX_WIDTH, Math.max(TIMELINE_MIN_WIDTH, Math.floor(contentWidth * 0.3))); - const detailWidth = Math.max(14, contentWidth - timelineWidth - COLUMN_GAP); - return { innerWidth, contentWidth, timelineWidth, detailWidth }; -} - -function formatElapsed(ms: number): string { - const totalSec = Math.floor(ms / 1000); - const h = Math.floor(totalSec / 3600); - const m = Math.floor((totalSec % 3600) / 60); - const s = totalSec % 60; - - if (h > 0) { - return `${h}h ${String(m).padStart(2, "0")}m ${String(s).padStart(2, "0")}s`; - } - - return `${m}m ${String(s).padStart(2, "0")}s`; -} - -function rightAlign(left: string, right: string, width: number): string { - const gap = Math.max(1, width - visibleWidth(left) - visibleWidth(right)); - return `${left}${" ".repeat(gap)}${right}`; -} - -function activePhase(state: WidgetState): PhaseEntry | null { - if (state.activeIndex < 0) return null; - return state.phases[state.activeIndex] ?? null; -} - -function normalizeLogLines(lines: readonly LogLine[] | undefined): LogLine[] { - if (!lines || lines.length === 0) return []; - return [...lines].slice(-(LOG_LINES * 2)); -} - -const HEADER_STATUS_SHORT: Record = { - CURRENT: "CUR", - UPCOMING: "UP", - DONE: "DONE", - FAILED: "FAIL", -}; - -const HEADER_PHASE_SHORT: Record = { - "Plan design": "Design", - "Plan code": "Code", - "Plan docs": "Docs", -}; - -interface PlanningHeaderVariant { - label: string; - phase: string | null; - status: string | null; -} - -function selectPlanningHeaderVariant(phaseLabel: string, statusLabel: string, budget: number): PlanningHeaderVariant { - const phaseShort = HEADER_PHASE_SHORT[phaseLabel] ?? phaseLabel; - const statusShort = HEADER_STATUS_SHORT[statusLabel] ?? statusLabel; - - const truncatedPhase = truncateToWidth( - phaseShort, - Math.max(0, budget - visibleWidth("Planning · ")), - "…", - false, - ); - - const candidates: PlanningHeaderVariant[] = [ - { label: `Planning · ${phaseLabel} · ${statusLabel}`, phase: phaseLabel, status: statusLabel }, - { label: `Planning · ${phaseLabel} · ${statusShort}`, phase: phaseLabel, status: statusShort }, - { label: `Planning · ${phaseLabel}`, phase: phaseLabel, status: null }, - { label: `Planning · ${phaseShort}`, phase: phaseShort, status: null }, - { label: `Planning · ${truncatedPhase}`, phase: truncatedPhase, status: null }, - { label: "Planning", phase: null, status: null }, - ]; - - for (const candidate of candidates) { - if (visibleWidth(candidate.label) <= budget) { - return candidate; - } - } - - return { - label: truncateToWidth("Planning", budget, "…", false), - phase: null, - status: null, - }; -} - -export function formatPlanningHeaderLabel(phaseLabel: string, statusLabel: string, budget: number): string { - return selectPlanningHeaderVariant(phaseLabel, statusLabel, budget).label; -} - -function renderPlanningHeader(state: WidgetState, theme: Theme, budget: number): string { - const active = activePhase(state); - const phaseLabel = active?.label ?? "Complete"; - const statusLabel = (active ? STATUS_TAG[active.status] : "done").toUpperCase(); - const variant = selectPlanningHeaderVariant(phaseLabel, statusLabel, budget); - - if (!variant.label.startsWith("Planning")) { - return theme.bold(theme.fg("accent", variant.label)); - } - - const statusColor: ThemeColor = active ? STATUS_COLOR[active.status] : "dim"; - - if (!variant.phase) { - return theme.bold(theme.fg("accent", variant.label)); - } - - let result = `${theme.bold(theme.fg("accent", "Planning"))}${theme.fg("muted", " · ")}${theme.fg("muted", variant.phase)}`; - if (variant.status) { - result += `${theme.fg("muted", " · ")}${theme.bold(theme.fg(statusColor, variant.status))}`; - } - return result; -} - -function renderTimelineLines(state: WidgetState, theme: Theme, width: number): string[] { - const lines: string[] = []; - const total = state.phases.length; - - state.phases.forEach((phase, index) => { - const isActive = index === state.activeIndex; - const color = STATUS_COLOR[phase.status]; - const iconBase = STATUS_ICON[phase.status]; - const icon = isActive - ? theme.bold(theme.fg("accent", iconBase)) - : theme.fg(color, iconBase); - - const labelColor: ThemeColor = phase.status === "completed" - ? "dim" - : isActive - ? "accent" - : phase.status === "failed" - ? "error" - : "muted"; - - const emphasize = isActive || phase.status === "completed"; - const label = emphasize - ? theme.bold(theme.fg(labelColor, phase.label)) - : theme.fg(labelColor, phase.label); - - lines.push(clampToWidth(`${icon} ${label}`, width, "…")); - - const connector = index < total - 1 ? theme.fg("muted", CONNECTOR) : " "; - lines.push(clampToWidth(`${connector} ${theme.fg("muted", STATUS_TAG[phase.status].toUpperCase())}`, width, "…")); - - if (index < total - 1) { - lines.push(clampToWidth(`${theme.fg("muted", CONNECTOR)} `, width)); - } - }); - - return lines; -} - -function shouldShowQR(state: WidgetState): boolean { - if (state.qrIteration === null) return false; - const active = activePhase(state); - if (!active) return false; - return true; -} - -interface QRCounterValues { - done: string; - pass: string; - fail: string; - todo: string; -} - -function qrCounterValues(state: WidgetState): QRCounterValues { - const meaningful = (state.qrPhase === "verify" || state.qrPhase === "done") && state.qrTotal !== null; - if (!meaningful || state.qrTotal === null) { - return { done: "-/-", pass: "-", fail: "-", todo: "-" }; - } - - return { - done: `${state.qrDone ?? 0}/${state.qrTotal}`, - pass: String(state.qrPass ?? 0), - fail: String(state.qrFail ?? 0), - todo: String(state.qrTodo ?? 0), - }; -} - -function runtimeStageLabel(state: WidgetState): string { - switch (state.qrPhase) { - case "idle": - case "execute": - return state.qrMode === "fix" ? "Fixing" : "Writing"; - case "decompose": - return "Analyzing"; - case "verify": - return "Verifying"; - case "done": - return "Complete"; - } -} - -function stageCycleText(state: WidgetState): string { - const iter = state.qrIteration ?? 0; - const iterMax = state.qrIterationsMax ? `/${state.qrIterationsMax}` : ""; - const mode = state.qrMode === "fix" ? "fix" : "initial"; - return `cycle ${iter}${iterMax} · ${mode}`; -} - -function shouldShowRuntimeSection(state: WidgetState): boolean { - return shouldShowQR(state) || shouldShowSubagentSection(state); -} - -function renderRuntimeRow(theme: Theme, width: number, keyWidth: number, key: string, value: string): string { - const padded = key.padEnd(keyWidth, " "); - return clampToWidth(`${theme.fg("muted", padded)} : ${value}`, width, "…"); -} - -function renderRuntimeStatusSection(state: WidgetState, theme: Theme, width: number): string[] { - if (!shouldShowRuntimeSection(state)) { - return []; - } - - const rows: Array<{ key: string; value: string }> = []; - - if (shouldShowQR(state)) { - const stageValue = `${theme.bold(theme.fg("accent", runtimeStageLabel(state)))} ${theme.fg("dim", `(${stageCycleText(state)})`)}`; - const values = qrCounterValues(state); - const qualityValue = [ - `${theme.fg("muted", "checked")} ${theme.fg("dim", values.done)}`, - `${theme.fg("muted", "pass")} ${theme.fg("accent", values.pass)}`, - `${theme.bold(theme.fg("error", "FAIL"))} ${theme.bold(theme.fg("error", values.fail))}`, - `${theme.fg("muted", "remaining")} ${theme.fg("muted", values.todo)}`, - ].join(" "); - - rows.push({ key: "stage", value: stageValue }); - rows.push({ key: "quality", value: qualityValue }); - } - - if (shouldShowSubagentSection(state)) { - const parallel = state.subagentParallelCount ?? 1; - const pool = parallel > 1 ? `pool ×${parallel}` : "single"; - const workersValue = [ - `${theme.fg("muted", "queued")} ${theme.fg("muted", subagentCount(state.subagentQueued))}`, - `${theme.fg("muted", "active")} ${theme.bold(theme.fg("accent", subagentCount(state.subagentActive)))}`, - `${theme.fg("muted", "done")} ${theme.fg("dim", subagentCount(state.subagentDone))}`, - `${theme.fg("dim", pool)}`, - ].join(" "); - - rows.push({ key: "workers", value: workersValue }); - } - - if (rows.length === 0) { - return []; - } - - const keyWidth = Math.max(...rows.map((row) => visibleWidth(row.key))); - const lines = [clampToWidth(theme.fg("dim", "Runtime"), width)]; - - for (const row of rows) { - lines.push(renderRuntimeRow(theme, width, keyWidth, row.key, row.value)); - } - - return lines; -} - -interface DetailSections { - core: string[]; - footer: string[]; -} - -interface DetailSectionDefinition { - id: string; - placement: "core" | "footer"; - select: (state: WidgetState) => ViewModel | null; - render: (view: ViewModel, theme: Theme, width: number) => string[]; -} - -interface IdentityView { - planId: string; - agentLabel: "Agent" | "Agent pool"; - agentValue: string; - model: string; -} - -function shouldShowSubagentSection(state: WidgetState): boolean { - if (state.subagentRole) return true; - return state.subagentQueued !== null || state.subagentActive !== null || state.subagentDone !== null; -} - -function subagentCount(value: number | null): string { - return value === null ? "-" : String(value); -} - -function identityView(state: WidgetState): IdentityView { - const role = state.subagentRole ?? "—"; - const parallel = state.subagentParallelCount ?? 1; - - if (parallel > 1) { - return { - planId: state.planId, - agentLabel: "Agent pool", - agentValue: `${role} x${parallel}`, - model: state.subagentModel ?? "—", - }; - } - - return { - planId: state.planId, - agentLabel: "Agent", - agentValue: role, - model: state.subagentModel ?? "—", - }; -} - -function renderIdentityRow(theme: Theme, width: number, keyWidth: number, key: string, value: string): string { - const padded = key.padEnd(keyWidth, " "); - return clampToWidth(`${theme.fg("muted", padded)} : ${theme.fg("dim", value)}`, width, "…"); -} - -function renderIdentitySection(view: IdentityView, theme: Theme, width: number): string[] { - const keys = ["Plan ID", view.agentLabel, "Model"]; - const keyWidth = Math.max(...keys.map((key) => visibleWidth(key))); - return [ - renderIdentityRow(theme, width, keyWidth, "Plan ID", view.planId), - renderIdentityRow(theme, width, keyWidth, view.agentLabel, view.agentValue), - renderIdentityRow(theme, width, keyWidth, "Model", view.model), - ]; -} - -const DETAIL_SECTION_REGISTRY: Array> = [ - { - id: "runtime-status", - placement: "core", - select: (state: WidgetState): WidgetState | null => (shouldShowRuntimeSection(state) ? state : null), - render: (view: WidgetState, theme: Theme, width: number): string[] => renderRuntimeStatusSection(view, theme, width), - }, - { - id: "identity", - placement: "footer", - select: (state: WidgetState): IdentityView => identityView(state), - render: (view: IdentityView, theme: Theme, width: number): string[] => renderIdentitySection(view, theme, width), - }, -]; - -function buildDetailSections(state: WidgetState, theme: Theme, width: number): DetailSections { - const core: string[] = []; - const footer: string[] = []; - const blank = clampToWidth("", width); - - for (const section of DETAIL_SECTION_REGISTRY) { - const view = section.select(state); - if (!view) continue; - - const rendered = section.render(view, theme, width).map((line) => clampToWidth(line, width)); - if (section.placement === "core") { - if (rendered.length === 0) continue; - if (core.length > 0 && core[core.length - 1].trim() !== "") { - core.push(blank); - } - core.push(...rendered); - continue; - } - - footer.push(...rendered); - } - - return { core, footer }; -} - -function layoutDetailColumn(sections: DetailSections, width: number, targetRows: number): string[] { - const blank = clampToWidth("", width); - const lines = [...sections.core]; - - if (sections.footer.length > 0) { - if (lines.length === 0 || lines[lines.length - 1].trim() !== "") { - lines.push(blank); - } - } - - const used = lines.length + sections.footer.length; - const goal = Math.max(targetRows, used); - - while (lines.length < goal - sections.footer.length) { - lines.push(blank); - } - - if (sections.footer.length === 0) { - return lines; - } - - return [...lines, ...sections.footer]; -} - -function renderBox( - titleLeft: string, - titleRight: string, - body: string[], - width: number, - theme: Theme, - border: BorderStyle = BORDER_SOLID, -): string[] { - const innerWidth = Math.max(0, width - 2); - const left = visibleWidth(titleLeft) > innerWidth ? truncateToWidth(titleLeft, innerWidth, "", false) : titleLeft; - const right = visibleWidth(titleRight) > innerWidth ? truncateToWidth(titleRight, innerWidth, "", false) : titleRight; - const headerContent = rightAlign(left, right, innerWidth); - - const top = `${border.topLeft}${clampToWidth(headerContent, innerWidth)}${border.topRight}`; - const bottom = `${border.bottomLeft}${clampToWidth(border.horizontal.repeat(innerWidth), innerWidth)}${border.bottomRight}`; - - const content = body.map((line) => `${border.vertical}${clampToWidth(line, innerWidth)}${border.vertical}`); - return [top, ...content, bottom]; -} - -function renderBoxWithHeaderRow( - headerLeft: string, - headerRight: string, - body: string[], - width: number, - border: BorderStyle = BORDER_SOLID, -): string[] { - const innerWidth = Math.max(0, width - 2); - const left = visibleWidth(headerLeft) > innerWidth ? truncateToWidth(headerLeft, innerWidth, "", false) : headerLeft; - const right = visibleWidth(headerRight) > innerWidth ? truncateToWidth(headerRight, innerWidth, "", false) : headerRight; - const headerContent = rightAlign(left, right, innerWidth); - - const top = `${border.topLeft}${clampToWidth(border.horizontal.repeat(innerWidth), innerWidth)}${border.topRight}`; - const header = `${border.vertical}${clampToWidth(headerContent, innerWidth)}${border.vertical}`; - const headerDivider = `${border.vertical}${clampToWidth(border.horizontal.repeat(innerWidth), innerWidth)}${border.vertical}`; - const content = body.map((line) => `${border.vertical}${clampToWidth(line, innerWidth)}${border.vertical}`); - const bottom = `${border.bottomLeft}${clampToWidth(border.horizontal.repeat(innerWidth), innerWidth)}${border.bottomRight}`; - - return [top, header, headerDivider, ...content, bottom]; -} - -function renderPlanningCard(state: WidgetState, theme: Theme, width: number): string[] { - const elapsed = theme.fg("dim", formatElapsed(Date.now() - state.startedAt)); - const { innerWidth, contentWidth, timelineWidth, detailWidth } = planningColumns(width); - const titleLeft = renderPlanningHeader(state, theme, Math.max(0, innerWidth - visibleWidth(elapsed) - 1)); - - if (innerWidth < 60 || contentWidth < 40) { - const fallbackContent: string[] = [ - "", - theme.fg("muted", `Plan · ${state.planId}`), - "", - formatStepLine(state, theme), - ]; - const runtimeCompact = formatRuntimeCompact(state, theme, contentWidth); - if (runtimeCompact.length > 0) { - fallbackContent.push(...runtimeCompact); - } - - fallbackContent.push(""); - fallbackContent.push(...formatIdentityCompact(state, theme, contentWidth)); - fallbackContent.push(""); - - const body = indentLines(fallbackContent, innerWidth); - return renderBox( - `${BODY_INDENT}${titleLeft}`, - elapsed, - body, - width, - theme, - ); - } - - const timelineLines = renderTimelineLines(state, theme, timelineWidth); - const detailSections = buildDetailSections(state, theme, detailWidth); - const detailLines = layoutDetailColumn(detailSections, detailWidth, timelineLines.length); - const combined: string[] = []; - const maxLines = Math.max(timelineLines.length, detailLines.length); - - for (let i = 0; i < maxLines; i++) { - const left = timelineLines[i] ?? ""; - const right = detailLines[i] ?? ""; - const composed = `${clampToWidth(left, timelineWidth)}${" ".repeat(COLUMN_GAP)}${clampToWidth(right, detailWidth)}`; - combined.push(clampToWidth(composed, contentWidth)); - } - - const body = indentLines( - [ - "", - ...combined, - "", - ], - innerWidth, - ); - - return renderBox( - `${BODY_INDENT}${titleLeft}`, - elapsed, - body, - width, - theme, - ); -} - -function wrapRightColumn(entry: LogLine, width: number): string[] { - const summary = entry.summary.trim(); - if (!summary) return [""]; - - if (!entry.highValue) { - return [clampToWidth(summary, width, "…")]; - } - - const wrapped = wrapTextWithAnsi(summary, width).map((line) => clampToWidth(line, width, "…")); - if (wrapped.length <= 1) return wrapped; - if (wrapped.length === 2) return wrapped; - - const tail = wrapped.slice(1).join(" ").replace(/\s+/gu, " ").trim(); - return [wrapped[0], clampToWidth(truncateToWidth(tail, width, "…", false), width)]; -} - -function renderLogEntry(entry: LogLine, theme: Theme, leftWidth: number, rightWidth: number, gap: number): string[] { - const rightLines = wrapRightColumn(entry, rightWidth); - const rows: string[] = []; - - rightLines.forEach((line, index) => { - const left = index === 0 - ? theme.bold(theme.fg("accent", entry.tool)) - : ""; - const composed = `${clampToWidth(left, leftWidth)}${" ".repeat(gap)}${clampToWidth(theme.fg("muted", line), rightWidth)}`; - rows.push(composed); - }); - - return rows; -} - -interface LogColumns { - left: number; - right: number; - gap: number; -} - -function logColumnWidths(availableWidth: number, entries: readonly LogLine[], gap: number): LogColumns { - const longestTool = entries.reduce((max, entry) => Math.max(max, visibleWidth(entry.tool)), 0); - const preferredLeft = Math.max(16, Math.min(38, longestTool + 2)); - - const minRight = availableWidth < 64 ? 18 : 24; - let left = Math.min(preferredLeft, Math.floor(availableWidth * 0.42)); - left = Math.min(left, Math.max(14, availableWidth - minRight - gap)); - left = Math.max(14, left); - - const right = Math.max(8, availableWidth - left - gap); - return { left, right, gap }; -} - -function renderLogCard(state: WidgetState, theme: Theme, width: number, forcedColumns?: LogColumns): string[] { - const innerWidth = Math.max(0, width - 2); - const availableWidth = Math.max(0, innerWidth - visibleWidth(BODY_INDENT)); - const hasEntries = state.logLines.length > 0; - const entries = hasEntries ? state.logLines.slice(-(LOG_LINES * 2)) : []; - - const columns = forcedColumns ?? logColumnWidths(availableWidth, entries, 2); - const leftWidth = Math.max(8, Math.min(columns.left, Math.max(8, availableWidth - columns.gap - 8))); - const rightWidth = Math.max(8, availableWidth - leftWidth - columns.gap); - - const visualRows: string[] = []; - if (entries.length > 0) { - const rendered = entries.map((entry) => renderLogEntry(entry, theme, leftWidth, rightWidth, columns.gap)); - const selected: string[][] = []; - let remaining = LOG_LINES; - - for (let i = rendered.length - 1; i >= 0; i--) { - if (remaining <= 0) break; - const rowLines = rendered[i]; - if (rowLines.length <= remaining) { - selected.push(rowLines); - remaining -= rowLines.length; - } else { - selected.push(rowLines.slice(0, remaining)); - remaining = 0; - } - } - - selected.reverse(); - for (const lines of selected) { - visualRows.push(...lines); - } - } - - if (visualRows.length === 0) { - visualRows.push(clampToWidth(theme.fg("muted", LOG_PLACEHOLDER), innerWidth)); - } - - while (visualRows.length < LOG_LINES) { - visualRows.push(""); - } - - const body = indentLines(visualRows, innerWidth); - return renderBox( - `${BODY_INDENT}${theme.bold(theme.fg("accent", "Latest log"))}`, - "", - body, - width, - theme, - ); -} - -function formatRuntimeCompact(state: WidgetState, theme: Theme, width: number): string[] { - if (!shouldShowRuntimeSection(state)) return []; - return renderRuntimeStatusSection(state, theme, width); -} - -function formatIdentityCompact(state: WidgetState, theme: Theme, width: number): string[] { - return renderIdentitySection(identityView(state), theme, width); -} - -function formatStepLine(state: WidgetState, theme: Theme): string { - const total = state.phases.length; - const active = activePhase(state); - const stepNumber = state.activeIndex >= 0 ? state.activeIndex + 1 : total; - const count = theme.fg("muted", `Step ${stepNumber} of ${total}`); - const label = active - ? theme.bold(theme.fg("accent", active.label)) - : theme.bold(theme.fg("muted", "Complete")); - return `${count} ${theme.fg("muted", "·")} ${label}`; -} - -// Pure render: (state, theme, termWidth) -> lines. No side effects. -function stripBoxFrame(lines: string[]): string[] { - if (lines.length <= 2) return []; - return lines.slice(1, -1).map((line) => (line.length >= 2 ? line.slice(1, -1) : "")); -} - -function renderIntegratedWorkspaceCard(state: WidgetState, theme: Theme, width: number): string[] { - const innerWidth = Math.max(0, width - 2); - const elapsed = theme.fg("dim", formatElapsed(Date.now() - state.startedAt)); - - const { innerWidth: planningInnerWidth, contentWidth, timelineWidth, detailWidth } = planningColumns(width); - const alignedColumns: LogColumns | undefined = planningInnerWidth >= 60 && contentWidth >= 40 - ? { left: timelineWidth, right: detailWidth, gap: COLUMN_GAP } - : undefined; - - const planningInner = stripBoxFrame(renderPlanningCard(state, theme, width)); - const logInner = stripBoxFrame(renderLogCard(state, theme, width, alignedColumns)); - - const divider = clampToWidth(theme.fg("muted", "─".repeat(innerWidth)), innerWidth); - const spacer = clampToWidth("", innerWidth); - const logTitle = clampToWidth(`${BODY_INDENT}${theme.bold(theme.fg("accent", "Latest log"))}`, innerWidth, "…"); - - const body = [ - ...planningInner, - divider, - spacer, - logTitle, - ...logInner, - ]; - - const rightInset = " ".repeat(visibleWidth(BODY_INDENT)); - const titleLeftBudget = Math.max( - 0, - innerWidth - visibleWidth(elapsed) - visibleWidth(rightInset) - 1 - visibleWidth(BODY_INDENT), - ); - const titleLeft = renderPlanningHeader(state, theme, titleLeftBudget); - - return renderBoxWithHeaderRow( - `${BODY_INDENT}${titleLeft}`, - `${elapsed}${rightInset}`, - body, - width, - ); -} - -// Pure render: (state, theme, termWidth) -> lines. No side effects. -function render(state: WidgetState, theme: Theme, termWidth: number): string[] { - const c = (s: string) => canvasLine(s, termWidth, theme); - const cw = contentWidth(termWidth); - const lines: string[] = []; - const margin = " ".repeat(CARD_MARGIN); - - lines.push(c("")); - for (const line of renderIntegratedWorkspaceCard(state, theme, cw - CARD_MARGIN)) { - lines.push(c(margin + line)); - } - lines.push(c("")); - - return lines; -} - -// -- WidgetController -- - -export class WidgetController { - private state: WidgetState; - private lastHash = ""; - private timer: ReturnType; - private ui: ExtensionUIContext; - - constructor(ui: ExtensionUIContext, planId: string) { - this.ui = ui; - this.state = { - mode: "planning", - planId, - phases: PLANNING_PHASES.map((p) => ({ key: p.key, label: p.label, detail: p.detail, status: "pending" as PhaseStatus })), - activeIndex: 0, - step: "", - activity: "", - startedAt: Date.now(), - logLines: [], - qrIteration: null, - qrIterationsMax: null, - qrMode: null, - qrPhase: "idle", - qrDone: null, - qrTotal: null, - qrPass: null, - qrFail: null, - qrTodo: null, - subagentRole: null, - subagentModel: null, - subagentParallelCount: null, - subagentQueued: null, - subagentActive: null, - subagentDone: null, - }; - this.state.phases[0].status = "running"; - - this.timer = setInterval(() => this.doRender(), 1000); - this.timer.unref(); - - this.doRender(); - } - - update(patch: WidgetUpdate): void { - if (patch.mode !== undefined) { - this.state.mode = patch.mode; - } - if (patch.phaseStatus !== undefined) { - const { index, status } = patch.phaseStatus; - if (index >= 0 && index < this.state.phases.length) { - this.state.phases[index].status = status; - } - } - if (patch.activeIndex !== undefined) { - this.state.activeIndex = patch.activeIndex; - const ai = patch.activeIndex; - if (ai >= 0 && ai < this.state.phases.length && this.state.phases[ai].status === "pending") { - this.state.phases[ai].status = "running"; - } - } - if (patch.step !== undefined) { - this.state.step = patch.step; - } - if (patch.activity !== undefined) { - this.state.activity = patch.activity; - } - if (patch.logLines !== undefined) { - this.state.logLines = normalizeLogLines(patch.logLines); - } - if (patch.qrIteration !== undefined) { - this.state.qrIteration = patch.qrIteration; - } - if (patch.qrIterationsMax !== undefined) { - this.state.qrIterationsMax = patch.qrIterationsMax; - } - if (patch.qrMode !== undefined) { - this.state.qrMode = patch.qrMode; - } - if (patch.qrPhase !== undefined) { - this.state.qrPhase = patch.qrPhase; - } - if (patch.qrDone !== undefined) { - this.state.qrDone = patch.qrDone; - } - if (patch.qrTotal !== undefined) { - this.state.qrTotal = patch.qrTotal; - } - if (patch.qrPass !== undefined) { - this.state.qrPass = patch.qrPass; - } - if (patch.qrFail !== undefined) { - this.state.qrFail = patch.qrFail; - } - if (patch.qrTodo !== undefined) { - this.state.qrTodo = patch.qrTodo; - } - if (patch.subagentRole !== undefined) { - this.state.subagentRole = patch.subagentRole; - } - if (patch.subagentModel !== undefined) { - this.state.subagentModel = patch.subagentModel; - } - if (patch.subagentParallelCount !== undefined) { - this.state.subagentParallelCount = patch.subagentParallelCount; - } - if (patch.subagentQueued !== undefined) { - this.state.subagentQueued = patch.subagentQueued; - } - if (patch.subagentActive !== undefined) { - this.state.subagentActive = patch.subagentActive; - } - if (patch.subagentDone !== undefined) { - this.state.subagentDone = patch.subagentDone; - } - this.doRender(); - } - - destroy(): void { - clearInterval(this.timer); - this.ui.setWidget(WIDGET_KEY, undefined); - } - - private doRender(): void { - // Capture state snapshot for the factory closure - const state = { - ...this.state, - phases: this.state.phases.map((p) => ({ ...p })), - logLines: this.state.logLines.map((l) => ({ ...l })), - }; - const theme = this.ui.theme; - - // Hash check: skip setWidget if content unchanged (ignoring width) - const hashLines = render(state, theme, 0); - const hash = hashLines.join("\n"); - if (hash === this.lastHash) return; - this.lastHash = hash; - - // Component factory: Pi calls render(width) with actual terminal width - this.ui.setWidget(WIDGET_KEY, (_tui, th) => ({ - render: (width: number) => render(state, th, width), - invalidate: () => {}, - })); - } -} diff --git a/src/utils/lock.ts b/src/utils/lock.ts deleted file mode 100644 index 47ed858..0000000 --- a/src/utils/lock.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { promises as fs } from "node:fs"; - -// Advisory .lock file for serializing file mutations. Uses O_CREAT|O_EXCL -// for atomic creation (fails if lock already exists). Retry with backoff -// handles transient contention (e.g. parallel QR verifiers). - -const RETRY_INTERVAL_MS = 50; -const MAX_WAIT_MS = 5000; - -function lockPath(filePath: string): string { - return `${filePath}.lock`; -} - -async function acquire(filePath: string): Promise { - const lp = lockPath(filePath); - const deadline = Date.now() + MAX_WAIT_MS; - - while (true) { - try { - const fd = await fs.open(lp, "wx"); - await fd.close(); - return; - } catch (err: unknown) { - if ((err as NodeJS.ErrnoException).code !== "EEXIST") throw err; - if (Date.now() >= deadline) { - throw new Error(`Failed to acquire lock on ${filePath} after ${MAX_WAIT_MS}ms`); - } - await new Promise((r) => setTimeout(r, RETRY_INTERVAL_MS)); - } - } -} - -async function release(filePath: string): Promise { - await fs.rm(lockPath(filePath), { force: true }); -} - -export async function withFileLock(filePath: string, fn: () => Promise): Promise { - await acquire(filePath); - try { - return await fn(); - } finally { - await release(filePath); - } -} diff --git a/src/utils/plan.ts b/src/utils/plan.ts deleted file mode 100644 index a34f382..0000000 --- a/src/utils/plan.ts +++ /dev/null @@ -1,72 +0,0 @@ -import { promises as fs } from "node:fs"; -import * as os from "node:os"; -import * as path from "node:path"; - -import type { PlanInfo } from "../planner/state.js"; - -const KOAN_HOME = path.join(os.homedir(), ".koan"); -const PLANS_HOME = path.join(KOAN_HOME, "plans"); - -function slugify(input: string): string { - const base = input - .toLowerCase() - .replace(/[^a-z0-9]+/g, "-") - .replace(/^-+|-+$/g, "") - .slice(0, 48); - - return base.length > 0 ? base : "plan"; -} - -function generatePlanId(description: string, now: Date): string { - const timestamp = now.toISOString().replace(/[-:]/g, "").replace(/\..+/, ""); - const slug = slugify(description); - return `${timestamp}-${slug}`; -} - -async function ensurePlanDirectoryUnique(baseId: string): Promise<{ id: string; directory: string }> { - let suffix = 0; - while (true) { - const candidateId = suffix === 0 ? baseId : `${baseId}-${suffix}`; - const directory = path.join(PLANS_HOME, candidateId); - - try { - await fs.mkdir(directory, { recursive: false }); - return { id: candidateId, directory }; - } catch (error) { - const err = error as NodeJS.ErrnoException; - if (err.code === "EEXIST") { - suffix += 1; - continue; - } - throw error; - } - } -} - -export async function createPlanInfo(description: string, projectCwd: string, now = new Date()): Promise { - await fs.mkdir(PLANS_HOME, { recursive: true }); - - const baseId = generatePlanId(description, now); - const { id, directory } = await ensurePlanDirectoryUnique(baseId); - - const metadataPath = path.join(directory, "metadata.json"); - - const plan: PlanInfo = { - id, - directory, - metadataPath, - createdAt: now.toISOString(), - }; - - const metadata = { - id: plan.id, - createdAt: plan.createdAt, - description, - status: "created" as const, - projectCwd, - }; - - await fs.writeFile(metadataPath, `${JSON.stringify(metadata, null, 2)}\n`, "utf8"); - - return plan; -} diff --git a/src/utils/progress.ts b/src/utils/progress.ts deleted file mode 100644 index 2940ecc..0000000 --- a/src/utils/progress.ts +++ /dev/null @@ -1,14 +0,0 @@ -// Directory infrastructure for subagent working directories. -// Audit state (state.json, events.jsonl) is managed by EventLog in lib/audit.ts. -// This module is retained for createSubagentDir, used by session.ts. - -import { promises as fs } from "node:fs"; -import * as crypto from "node:crypto"; -import * as path from "node:path"; - -export async function createSubagentDir(planDir: string, role: string): Promise { - const hex = crypto.randomBytes(2).toString("hex"); - const dir = path.join(planDir, "subagents", `${role}-${hex}`); - await fs.mkdir(dir, { recursive: true }); - return dir; -} From ef195bce06cd94f46a23471395bdd686c62e603a Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Mar 2026 12:46:26 +0700 Subject: [PATCH 046/412] test(planner): replace legacy coverage with state-machine tests --- tests/model-config.test.ts | 233 -------------- tests/model-phase.test.ts | 135 -------- tests/model-resolver.test.ts | 164 ---------- tests/progress.test.ts | 321 -------------------- tests/qr-grouped-verify.test.ts | 304 ------------------- tests/session-model-threading.test.ts | 205 ------------- tests/state-machine.test.ts | 422 ++++++++++++++++++++++++++ tests/story-discovery.test.ts | 84 +++++ tests/subagent-model.test.ts | 215 ------------- tests/widget.test.ts | 173 ----------- 10 files changed, 506 insertions(+), 1750 deletions(-) delete mode 100644 tests/model-config.test.ts delete mode 100644 tests/model-phase.test.ts delete mode 100644 tests/model-resolver.test.ts delete mode 100644 tests/progress.test.ts delete mode 100644 tests/qr-grouped-verify.test.ts delete mode 100644 tests/session-model-threading.test.ts create mode 100644 tests/state-machine.test.ts create mode 100644 tests/story-discovery.test.ts delete mode 100644 tests/subagent-model.test.ts delete mode 100644 tests/widget.test.ts diff --git a/tests/model-config.test.ts b/tests/model-config.test.ts deleted file mode 100644 index a7e949f..0000000 --- a/tests/model-config.test.ts +++ /dev/null @@ -1,233 +0,0 @@ -import assert from "node:assert/strict"; -import { promises as fs } from "node:fs"; -import * as os from "node:os"; -import * as path from "node:path"; -import { describe, it } from "node:test"; - -import { ALL_PHASE_MODEL_KEYS, type PhaseModelKey } from "../src/planner/model-phase.js"; -import { loadPhaseModelConfig, savePhaseModelConfig } from "../src/planner/model-config.js"; - -function makeFullConfig(model = "anthropic/claude-sonnet"): Record { - const config: Partial> = {}; - for (const key of ALL_PHASE_MODEL_KEYS) { - config[key] = model; - } - return config as Record; -} - -// Test config validation logic directly using a mock config file -// by writing to a temp location and reading back. -// Note: loadPhaseModelConfig reads from ~/.koan/config.json, so we -// test validation using the raw parsing logic via an in-process approach. - -describe("config validation", () => { - it("accepts a complete 20-key config and returns it unchanged", async () => { - // We test the validation by round-tripping through save/load. - // To avoid touching ~/.koan/config.json, we verify the pure logic - // by testing that a valid config object has all required keys. - const config = makeFullConfig("anthropic/claude-opus-4"); - - // Verify it has exactly 20 keys - assert.equal(Object.keys(config).length, ALL_PHASE_MODEL_KEYS.length); - - // Verify all keys are valid PhaseModelKeys - for (const key of Object.keys(config)) { - assert.ok( - (ALL_PHASE_MODEL_KEYS as readonly string[]).includes(key), - `unexpected key: ${key}`, - ); - } - - // Verify all values are non-empty strings - for (const [key, value] of Object.entries(config)) { - assert.equal(typeof value, "string", `value for ${key} should be a string`); - assert.ok(value.length > 0, `value for ${key} should be non-empty`); - } - }); - - it("treats null as valid (no overrides)", () => { - // Null config is valid — it means inherit from pi's active model - const config: Record | null = null; - assert.equal(config, null); - }); -}); - -describe("loadPhaseModelConfig (integration)", () => { - it("returns null when config file is missing", async () => { - // loadPhaseModelConfig reads ~/.koan/config.json - if it doesn't exist, null - // We can only test this if ~/.koan/config.json doesn't exist on this machine - // or has no phaseModels. This is an integration test, so we skip the file check - // and instead verify the contract: the function always returns null or a valid config. - const result = await loadPhaseModelConfig(); - // Result is either null or a Record with exactly 20 keys - if (result !== null) { - assert.equal(Object.keys(result).length, ALL_PHASE_MODEL_KEYS.length); - for (const key of ALL_PHASE_MODEL_KEYS) { - assert.equal(typeof result[key], "string"); - assert.ok(result[key].length > 0); - } - } - }); -}); - -describe("savePhaseModelConfig + loadPhaseModelConfig (round-trip)", () => { - it("persists a full config and reads it back correctly", async () => { - // KOAN_CONFIG_PATH is computed at module load time, so tests validate - // round-trip behavior against the real path and restore prior state. - - const actualConfigPath = path.join(os.homedir(), ".koan", "config.json"); - let preExisting: string | null = null; - - try { - preExisting = await fs.readFile(actualConfigPath, "utf8"); - } catch { - preExisting = null; - } - - try { - const config = makeFullConfig("openai/gpt-5"); - await savePhaseModelConfig(config); - - const loaded = await loadPhaseModelConfig(); - assert.ok(loaded !== null, "expected config to be loaded after save"); - assert.equal(Object.keys(loaded).length, ALL_PHASE_MODEL_KEYS.length); - - for (const key of ALL_PHASE_MODEL_KEYS) { - assert.equal(loaded[key], "openai/gpt-5", `mismatch for key ${key}`); - } - } finally { - // Restore original state - if (preExisting === null) { - try { - const koanDir = path.join(os.homedir(), ".koan"); - await fs.rm(actualConfigPath, { force: true }); - // Try to remove the .koan dir if it was empty before - const entries = await fs.readdir(koanDir); - if (entries.length === 0) { - await fs.rmdir(koanDir); - } - } catch { - // Best-effort cleanup - } - } else { - await fs.writeFile(actualConfigPath, preExisting, "utf8"); - } - - } - }); - - it("persists null (clears overrides) while preserving other config keys", async () => { - const actualConfigPath = path.join(os.homedir(), ".koan", "config.json"); - let preExisting: string | null = null; - - try { - preExisting = await fs.readFile(actualConfigPath, "utf8"); - } catch { - preExisting = null; - } - - try { - // Write an initial config - await savePhaseModelConfig(makeFullConfig("anthropic/claude-sonnet")); - - // Now clear it - await savePhaseModelConfig(null); - - const loaded = await loadPhaseModelConfig(); - assert.equal(loaded, null, "expected null after clearing overrides"); - - // Verify the config file still exists but has no phaseModels key - const raw = await fs.readFile(actualConfigPath, "utf8"); - const parsed = (raw.trim().length === 0 ? {} : JSON.parse(raw)) as Record; - assert.equal("phaseModels" in parsed, false, "phaseModels should be absent after clearing"); - } finally { - if (preExisting === null) { - try { - await fs.rm(actualConfigPath, { force: true }); - } catch { - // Best-effort - } - } else { - await fs.writeFile(actualConfigPath, preExisting, "utf8"); - } - } - }); -}); - -describe("config validation: partial config treated as absent", () => { - it("validates that a partial config (missing keys) is treated as absent", async () => { - // We simulate this by checking the validation logic: - // A config with fewer than 20 keys should produce null from loadPhaseModelConfig. - // We test this indirectly by verifying the contract. - const partialKeys = ALL_PHASE_MODEL_KEYS.slice(0, 10); - assert.equal(partialKeys.length, 10); - assert.equal(partialKeys.length < ALL_PHASE_MODEL_KEYS.length, true); - - // A partial config would fail the length check in loadPhaseModelConfig. - // We verify this by writing a partial config and reading it back. - const actualConfigPath = path.join(os.homedir(), ".koan", "config.json"); - let preExisting: string | null = null; - - try { - preExisting = await fs.readFile(actualConfigPath, "utf8"); - } catch { - preExisting = null; - } - - try { - await fs.mkdir(path.dirname(actualConfigPath), { recursive: true }); - const partial: Record = {}; - for (const key of partialKeys) { - partial[key] = "anthropic/claude-sonnet"; - } - await fs.writeFile(actualConfigPath, JSON.stringify({ phaseModels: partial }), "utf8"); - - const loaded = await loadPhaseModelConfig(); - assert.equal(loaded, null, "expected null for partial config"); - } finally { - if (preExisting === null) { - try { await fs.rm(actualConfigPath, { force: true }); } catch { /* best-effort */ } - } else { - await fs.writeFile(actualConfigPath, preExisting, "utf8"); - } - } - }); - - it("validates that a config with unknown keys is treated as absent", async () => { - const actualConfigPath = path.join(os.homedir(), ".koan", "config.json"); - let preExisting: string | null = null; - - try { - preExisting = await fs.readFile(actualConfigPath, "utf8"); - } catch { - preExisting = null; - } - - try { - await fs.mkdir(path.dirname(actualConfigPath), { recursive: true }); - - // Build a 20-key config with one key replaced by an unknown key - const badConfig: Record = {}; - let first = true; - for (const key of ALL_PHASE_MODEL_KEYS) { - if (first) { - badConfig["unknown-phase-exec-debut"] = "anthropic/claude-sonnet"; - first = false; - } else { - badConfig[key] = "anthropic/claude-sonnet"; - } - } - - await fs.writeFile(actualConfigPath, JSON.stringify({ phaseModels: badConfig }), "utf8"); - - const loaded = await loadPhaseModelConfig(); - assert.equal(loaded, null, "expected null for config with unknown key"); - } finally { - if (preExisting === null) { - try { await fs.rm(actualConfigPath, { force: true }); } catch { /* best-effort */ } - } else { - await fs.writeFile(actualConfigPath, preExisting, "utf8"); - } - } - }); -}); diff --git a/tests/model-phase.test.ts b/tests/model-phase.test.ts deleted file mode 100644 index 9797d49..0000000 --- a/tests/model-phase.test.ts +++ /dev/null @@ -1,135 +0,0 @@ -import assert from "node:assert/strict"; -import { describe, it } from "node:test"; - -import { - ALL_PHASE_MODEL_KEYS, - GENERAL_PURPOSE_PHASE_MODEL_KEYS, - PHASE_ROWS, - STRONG_PHASE_MODEL_KEYS, - SUB_PHASES, - buildPhaseModelKey, - isPhaseModelKey, - type PhaseModelKey, -} from "../src/planner/model-phase.js"; - -describe("ALL_PHASE_MODEL_KEYS", () => { - it("contains exactly 20 keys (5 rows × 4 sub-phases)", () => { - assert.equal(ALL_PHASE_MODEL_KEYS.length, PHASE_ROWS.length * SUB_PHASES.length); - assert.equal(ALL_PHASE_MODEL_KEYS.length, 20); - }); - - it("contains no duplicates", () => { - const set = new Set(ALL_PHASE_MODEL_KEYS); - assert.equal(set.size, ALL_PHASE_MODEL_KEYS.length); - }); - - it("contains every combination of row and sub-phase", () => { - for (const row of PHASE_ROWS) { - for (const sub of SUB_PHASES) { - const key = `${row}-${sub}` as PhaseModelKey; - assert.ok( - ALL_PHASE_MODEL_KEYS.includes(key), - `expected key "${key}" to be present`, - ); - } - } - }); -}); - -describe("STRONG_PHASE_MODEL_KEYS", () => { - it("contains exactly 9 keys", () => { - assert.equal(STRONG_PHASE_MODEL_KEYS.size, 9); - }); - - it("contains all 5 qr-decompose keys", () => { - for (const row of PHASE_ROWS) { - const key = buildPhaseModelKey(row, "qr-decompose"); - assert.ok(STRONG_PHASE_MODEL_KEYS.has(key), `expected ${key} to be strong`); - } - }); - - it("contains plan-design exec-debut and exec-fix", () => { - assert.ok(STRONG_PHASE_MODEL_KEYS.has("plan-design-exec-debut")); - assert.ok(STRONG_PHASE_MODEL_KEYS.has("plan-design-exec-fix")); - }); - - it("contains exec-docs exec-debut and exec-fix", () => { - assert.ok(STRONG_PHASE_MODEL_KEYS.has("exec-docs-exec-debut")); - assert.ok(STRONG_PHASE_MODEL_KEYS.has("exec-docs-exec-fix")); - }); - - it("does not contain plan-code or plan-docs exec keys", () => { - assert.equal(STRONG_PHASE_MODEL_KEYS.has("plan-code-exec-debut"), false); - assert.equal(STRONG_PHASE_MODEL_KEYS.has("plan-code-exec-fix"), false); - assert.equal(STRONG_PHASE_MODEL_KEYS.has("plan-docs-exec-debut"), false); - assert.equal(STRONG_PHASE_MODEL_KEYS.has("plan-docs-exec-fix"), false); - }); -}); - -describe("GENERAL_PURPOSE_PHASE_MODEL_KEYS", () => { - it("contains exactly 11 keys (20 total - 9 strong)", () => { - assert.equal(GENERAL_PURPOSE_PHASE_MODEL_KEYS.length, 11); - }); - - it("strong and GP form a complete partition of all keys", () => { - const strongSet = STRONG_PHASE_MODEL_KEYS; - const gpSet = new Set(GENERAL_PURPOSE_PHASE_MODEL_KEYS); - - // Union equals ALL - for (const key of ALL_PHASE_MODEL_KEYS) { - assert.ok( - strongSet.has(key) || gpSet.has(key), - `key "${key}" missing from both sets`, - ); - } - - // Intersection is empty - for (const key of ALL_PHASE_MODEL_KEYS) { - assert.equal( - strongSet.has(key) && gpSet.has(key), - false, - `key "${key}" appears in both sets`, - ); - } - }); -}); - -describe("isPhaseModelKey", () => { - it("returns true for valid keys", () => { - for (const key of ALL_PHASE_MODEL_KEYS) { - assert.equal(isPhaseModelKey(key), true, `expected "${key}" to be valid`); - } - }); - - it("returns false for invalid strings", () => { - assert.equal(isPhaseModelKey("plan-design"), false); - assert.equal(isPhaseModelKey("exec-debut"), false); - assert.equal(isPhaseModelKey("plan-design-exec-init"), false); - assert.equal(isPhaseModelKey("unknown-key"), false); - assert.equal(isPhaseModelKey(""), false); - }); - - it("returns false for non-string values", () => { - assert.equal(isPhaseModelKey(42), false); - assert.equal(isPhaseModelKey(null), false); - assert.equal(isPhaseModelKey(undefined), false); - assert.equal(isPhaseModelKey({}), false); - }); -}); - -describe("buildPhaseModelKey", () => { - it("produces correct key for all combinations", () => { - assert.equal(buildPhaseModelKey("plan-design", "exec-debut"), "plan-design-exec-debut"); - assert.equal(buildPhaseModelKey("exec-docs", "qr-verify"), "exec-docs-qr-verify"); - assert.equal(buildPhaseModelKey("plan-code", "qr-decompose"), "plan-code-qr-decompose"); - }); - - it("produces keys that pass isPhaseModelKey", () => { - for (const row of PHASE_ROWS) { - for (const sub of SUB_PHASES) { - const key = buildPhaseModelKey(row, sub); - assert.equal(isPhaseModelKey(key), true, `buildPhaseModelKey(${row}, ${sub}) = "${key}" failed isPhaseModelKey`); - } - } - }); -}); diff --git a/tests/model-resolver.test.ts b/tests/model-resolver.test.ts deleted file mode 100644 index b37ef35..0000000 --- a/tests/model-resolver.test.ts +++ /dev/null @@ -1,164 +0,0 @@ -import assert from "node:assert/strict"; -import { promises as fs } from "node:fs"; -import * as os from "node:os"; -import * as path from "node:path"; -import { describe, it } from "node:test"; - -import { - ALL_PHASE_MODEL_KEYS, - PHASE_ROWS, - SUB_PHASES, - type PhaseModelKey, -} from "../src/planner/model-phase.js"; -import { - mapSpawnContextToPhaseModelKey, - resolvePhaseModelOverride, - type SpawnContext, -} from "../src/planner/model-resolver.js"; - -describe("mapSpawnContextToPhaseModelKey", () => { - it("maps work-debut to exec-debut for all phase rows", () => { - for (const row of PHASE_ROWS) { - const key = mapSpawnContextToPhaseModelKey("work-debut", row); - assert.equal(key, `${row}-exec-debut`, `row=${row}`); - } - }); - - it("maps fix to exec-fix for all phase rows", () => { - for (const row of PHASE_ROWS) { - const key = mapSpawnContextToPhaseModelKey("fix", row); - assert.equal(key, `${row}-exec-fix`, `row=${row}`); - } - }); - - it("maps qr-decompose to qr-decompose for all phase rows", () => { - for (const row of PHASE_ROWS) { - const key = mapSpawnContextToPhaseModelKey("qr-decompose", row); - assert.equal(key, `${row}-qr-decompose`, `row=${row}`); - } - }); - - it("maps qr-verify to qr-verify for all phase rows", () => { - for (const row of PHASE_ROWS) { - const key = mapSpawnContextToPhaseModelKey("qr-verify", row); - assert.equal(key, `${row}-qr-verify`, `row=${row}`); - } - }); - - it("produces keys that are valid PhaseModelKeys", () => { - const contexts: SpawnContext[] = ["work-debut", "fix", "qr-decompose", "qr-verify"]; - for (const context of contexts) { - for (const row of PHASE_ROWS) { - const key = mapSpawnContextToPhaseModelKey(context, row); - assert.ok( - (ALL_PHASE_MODEL_KEYS as readonly string[]).includes(key), - `key "${key}" (context=${context}, row=${row}) is not a valid PhaseModelKey`, - ); - } - } - }); - - it("covers all 20 PhaseModelKeys across context × row combinations", () => { - const produced = new Set(); - const contexts: SpawnContext[] = ["work-debut", "fix", "qr-decompose", "qr-verify"]; - for (const context of contexts) { - for (const row of PHASE_ROWS) { - produced.add(mapSpawnContextToPhaseModelKey(context, row)); - } - } - assert.equal(produced.size, ALL_PHASE_MODEL_KEYS.length); - for (const key of ALL_PHASE_MODEL_KEYS) { - assert.ok(produced.has(key), `key "${key}" not produced by any context × row combination`); - } - }); - - it("accepts optional fixPhase argument without altering output", () => { - const withoutFix = mapSpawnContextToPhaseModelKey("fix", "plan-design"); - const withFix = mapSpawnContextToPhaseModelKey("fix", "plan-design", "plan-design"); - assert.equal(withoutFix, withFix); - }); -}); - -describe("SpawnContext values cover all sub-phases", () => { - it("one SpawnContext maps to each SubPhase", () => { - const contexts: SpawnContext[] = ["work-debut", "fix", "qr-decompose", "qr-verify"]; - const row = "plan-design"; - const subPhasesProduced = contexts.map((c) => { - const key = mapSpawnContextToPhaseModelKey(c, row); - return key.replace(`${row}-`, "") as typeof SUB_PHASES[number]; - }); - - for (const sub of SUB_PHASES) { - assert.ok( - subPhasesProduced.includes(sub), - `sub-phase "${sub}" not covered by any SpawnContext`, - ); - } - }); -}); - -function makeFullConfig(model: string): Record { - const config: Partial> = {}; - for (const key of ALL_PHASE_MODEL_KEYS) { - config[key] = model; - } - return config as Record; -} - -async function withConfigFile( - setup: (configPath: string) => Promise, - run: () => Promise, -): Promise { - const configPath = path.join(os.homedir(), ".koan", "config.json"); - - let preExisting: string | null = null; - try { - preExisting = await fs.readFile(configPath, "utf8"); - } catch { - preExisting = null; - } - - try { - await fs.mkdir(path.dirname(configPath), { recursive: true }); - await setup(configPath); - return await run(); - } finally { - if (preExisting === null) { - try { - await fs.rm(configPath, { force: true }); - } catch { - // best-effort cleanup - } - } else { - await fs.writeFile(configPath, preExisting, "utf8"); - } - } -} - -describe("resolvePhaseModelOverride", () => { - it("returns configured model when full config is present", async () => { - await withConfigFile( - async (configPath) => { - const phaseModels = makeFullConfig("anthropic/claude-sonnet"); - phaseModels["plan-design-exec-debut"] = "openai/gpt-5"; - await fs.writeFile(configPath, `${JSON.stringify({ phaseModels }, null, 2)}\n`, "utf8"); - }, - async () => { - const value = await resolvePhaseModelOverride("plan-design-exec-debut"); - assert.equal(value, "openai/gpt-5"); - }, - ); - }); - - it("returns undefined when config is absent", async () => { - await withConfigFile( - async (configPath) => { - await fs.writeFile(configPath, `${JSON.stringify({ unrelated: true }, null, 2)}\n`, "utf8"); - }, - async () => { - const value = await resolvePhaseModelOverride("plan-code-exec-fix"); - assert.equal(value, undefined); - }, - ); - }); -}); diff --git a/tests/progress.test.ts b/tests/progress.test.ts deleted file mode 100644 index b1378a3..0000000 --- a/tests/progress.test.ts +++ /dev/null @@ -1,321 +0,0 @@ -import assert from "node:assert/strict"; -import { describe, it } from "node:test"; -import { promises as fs } from "node:fs"; -import * as os from "node:os"; -import * as path from "node:path"; - -import { EventLog, readProjection, readRecentLogs, fold, summarize, extractToolEvent } from "../src/planner/lib/audit.js"; -import type { Projection, AuditEvent, ToolEvent } from "../src/planner/lib/audit.js"; - -async function createTempDir(prefix: string): Promise { - return fs.mkdtemp(path.join(os.tmpdir(), prefix)); -} - -// -- EventLog + readProjection -- - -describe("EventLog", () => { - it("persists events and projection through step transitions", async () => { - const dir = await createTempDir("koan-audit-"); - - const log = new EventLog(dir, "architect", "plan-design", "anthropic/claude-sonnet-4-20250514"); - await log.open(); - - await log.emitPhaseStart(6); - await log.emitStepTransition(1, "Task Analysis", 6); - await log.emitStepTransition(2, "Decision Framework", 6); - await log.emitPhaseEnd("completed"); - await log.close(); - - const proj = await readProjection(dir); - assert.ok(proj, "projection should be readable"); - assert.equal(proj.role, "architect"); - assert.equal(proj.phase, "plan-design"); - assert.equal(proj.model, "anthropic/claude-sonnet-4-20250514"); - assert.equal(proj.status, "completed"); - assert.equal(proj.step, 2); - assert.equal(proj.totalSteps, 6); - assert.equal(proj.stepName, "Step 2/6: Decision Framework"); - assert.equal(proj.eventCount, 4); - - // Verify events.jsonl has correct number of lines - const raw = await fs.readFile(path.join(dir, "events.jsonl"), "utf8"); - const lines = raw.trimEnd().split("\n").filter(Boolean); - assert.equal(lines.length, 4); - - await fs.rm(dir, { recursive: true, force: true }); - }); - - it("tracks lastAction from tool events", async () => { - const dir = await createTempDir("koan-audit-"); - - const log = new EventLog(dir, "architect", "plan-design"); - await log.open(); - - await log.append({ - kind: "tool_file", - tool: "read", - path: "src/main.ts", - lines: 50, - chars: 1200, - error: false, - } as Omit); - - const proj = log.state; - assert.equal(proj.lastAction, "read src/main.ts (50L, 1200c)"); - - await log.close(); - await fs.rm(dir, { recursive: true, force: true }); - }); - - it("returns null for missing projection", async () => { - const dir = await createTempDir("koan-audit-"); - const proj = await readProjection(dir); - assert.equal(proj, null); - await fs.rm(dir, { recursive: true, force: true }); - }); -}); - -// -- readRecentLogs -- - -describe("readRecentLogs", () => { - it("returns recent non-heartbeat events as structured LogLines", async () => { - const dir = await createTempDir("koan-audit-"); - - const log = new EventLog(dir, "architect", "plan-design"); - await log.open(); - - await log.emitPhaseStart(3); - await log.emitStepTransition(1, "Analysis", 3); - await log.append({ - kind: "tool_file", - tool: "read", - path: "src/foo.ts", - lines: 100, - chars: 3000, - error: false, - } as Omit); - await log.close(); - - const lines = await readRecentLogs(dir, 5); - // 3 events (heartbeats filtered), all returned - assert.equal(lines.length, 3); - - assert.equal(lines[0].tool, "phase"); - assert.ok(lines[0].summary.includes("plan-design")); - - assert.equal(lines[1].tool, "step 1/3"); - assert.equal(lines[1].summary, "Analysis"); - - assert.equal(lines[2].tool, "read"); - assert.ok(lines[2].summary.includes("src/foo.ts")); - assert.ok(lines[2].summary.includes("100L")); - - await fs.rm(dir, { recursive: true, force: true }); - }); - - it("filters out koan_complete_step events", async () => { - const dir = await createTempDir("koan-audit-"); - - const log = new EventLog(dir, "architect", "plan-design"); - await log.open(); - - await log.append({ - kind: "tool_koan", - tool: "koan_complete_step", - input: { thoughts: "done" }, - response: ["ok"], - error: false, - } as Omit); - - await log.append({ - kind: "tool_koan", - tool: "koan_set_overview", - input: { problem: "test" }, - response: ["saved"], - error: false, - } as Omit); - - await log.close(); - - const lines = await readRecentLogs(dir, 5); - assert.equal(lines.length, 1); - assert.equal(lines[0].tool, "koan_set_overview"); - - await fs.rm(dir, { recursive: true, force: true }); - }); - - it("returns empty array for missing directory", async () => { - const lines = await readRecentLogs("/nonexistent/path", 5); - assert.deepEqual(lines, []); - }); -}); - -// -- fold (pure) -- - -describe("fold", () => { - const initial: Projection = { - role: "", - phase: "", - model: null, - status: "running", - step: 0, - totalSteps: 0, - stepName: "", - lastAction: null, - updatedAt: "", - eventCount: 0, - error: null, - }; - - it("phase_start resets projection", () => { - const e: AuditEvent = { - kind: "phase_start", - phase: "plan-design", - role: "architect", - model: "openai/gpt-5-codex", - totalSteps: 6, - ts: "2026-01-01T00:00:00Z", - seq: 0, - }; - const s = fold(initial, e); - assert.equal(s.role, "architect"); - assert.equal(s.phase, "plan-design"); - assert.equal(s.model, "openai/gpt-5-codex"); - assert.equal(s.totalSteps, 6); - assert.equal(s.eventCount, 1); - }); - - it("step_transition updates step name", () => { - const e: AuditEvent = { - kind: "step_transition", - step: 3, - name: "Risk Assessment", - totalSteps: 6, - ts: "2026-01-01T00:00:01Z", - seq: 1, - }; - const s = fold(initial, e); - assert.equal(s.step, 3); - assert.equal(s.stepName, "Step 3/6: Risk Assessment"); - }); - - it("phase_end sets status and error", () => { - const e: AuditEvent = { - kind: "phase_end", - outcome: "failed", - detail: "timeout", - ts: "2026-01-01T00:00:02Z", - seq: 2, - }; - const s = fold(initial, e); - assert.equal(s.status, "failed"); - assert.equal(s.error, "timeout"); - }); -}); - -// -- summarize -- - -describe("summarize", () => { - it("file tool with size stats", () => { - const e: ToolEvent = { - kind: "tool_file", - tool: "read", - path: "src/main.ts", - lines: 42, - chars: 1500, - error: false, - ts: "", - seq: 0, - }; - assert.equal(summarize(e), "read src/main.ts (42L, 1500c)"); - }); - - it("bash tool with size stats", () => { - const e: ToolEvent = { - kind: "tool_bash", - bin: "grep", - lines: 10, - chars: 200, - error: false, - ts: "", - seq: 0, - }; - assert.equal(summarize(e), "bash grep (10L, 200c)"); - }); - - it("file tool without size stats", () => { - const e: ToolEvent = { - kind: "tool_file", - tool: "edit", - path: "src/foo.ts", - error: false, - ts: "", - seq: 0, - }; - assert.equal(summarize(e), "edit src/foo.ts"); - }); -}); - -// -- extractToolEvent -- - -describe("extractToolEvent", () => { - it("extracts read tool with line/char counts", () => { - const content = "line1\nline2\nline3"; - const e = extractToolEvent({ - toolName: "read", - input: { path: "src/test.ts" }, - content: [{ type: "text", text: content }], - isError: false, - }); - assert.equal(e.kind, "tool_file"); - if (e.kind === "tool_file") { - assert.equal(e.tool, "read"); - assert.equal(e.path, "src/test.ts"); - assert.equal(e.lines, 3); - assert.equal(e.chars, content.length); - } - }); - - it("extracts bash tool with line/char counts", () => { - const output = "found 5 matches\n"; - const e = extractToolEvent({ - toolName: "bash", - input: { command: "grep -r pattern ." }, - content: [{ type: "text", text: output }], - isError: false, - }); - assert.equal(e.kind, "tool_bash"); - if (e.kind === "tool_bash") { - assert.equal(e.bin, "grep"); - assert.equal(e.lines, 2); - assert.equal(e.chars, output.length); - } - }); - - it("extracts koan tool with input and response", () => { - const e = extractToolEvent({ - toolName: "koan_set_overview", - input: { problem: "test problem" }, - content: [{ type: "text", text: "saved" }], - isError: false, - }); - assert.equal(e.kind, "tool_koan"); - if (e.kind === "tool_koan") { - assert.equal(e.tool, "koan_set_overview"); - assert.deepEqual(e.response, ["saved"]); - } - }); - - it("falls back to generic for unknown tools", () => { - const e = extractToolEvent({ - toolName: "unknown_tool", - input: {}, - content: [], - isError: false, - }); - assert.equal(e.kind, "tool_generic"); - if (e.kind === "tool_generic") { - assert.equal(e.tool, "unknown_tool"); - } - }); -}); diff --git a/tests/qr-grouped-verify.test.ts b/tests/qr-grouped-verify.test.ts deleted file mode 100644 index 23313cf..0000000 --- a/tests/qr-grouped-verify.test.ts +++ /dev/null @@ -1,304 +0,0 @@ -// Tests for grouped QR verification: grouping logic, step routing, -// prompt generation, and subagent spawn arg threading. - -import assert from "node:assert/strict"; -import { describe, it } from "node:test"; - -import { buildSpawnArgs } from "../src/planner/subagent.js"; -import type { QRItem } from "../src/planner/qr/types.js"; -import { - buildVerifySystemPrompt, - buildContextStep, - buildAnalyzeStep, - buildConfirmStep, -} from "../src/planner/phases/qr-verify/prompts.js"; - -// -- Grouping logic (pure function, extracted from session.ts pattern) -- - -function groupItemsByGroupId(items: QRItem[]): Map { - const groups = new Map(); - for (const item of items) { - const gid = item.group_id ?? item.id; - const existing = groups.get(gid); - if (existing) { - existing.push(item.id); - } else { - groups.set(gid, [item.id]); - } - } - return groups; -} - -function makeItem(id: string, groupId: string | null = null, status: "TODO" | "PASS" | "FAIL" = "TODO"): QRItem { - return { - id, - scope: `milestone:M-001`, - check: `Check for ${id}`, - status, - finding: null, - parent_id: null, - group_id: groupId, - severity: "MUST", - }; -} - -// -- Grouping tests -- - -describe("groupItemsByGroupId", () => { - it("groups items sharing the same group_id", () => { - const items = [ - makeItem("QR-001", "group-a"), - makeItem("QR-002", "group-a"), - makeItem("QR-003", "group-b"), - ]; - const groups = groupItemsByGroupId(items); - - assert.equal(groups.size, 2); - assert.deepEqual(groups.get("group-a"), ["QR-001", "QR-002"]); - assert.deepEqual(groups.get("group-b"), ["QR-003"]); - }); - - it("treats null group_id as singleton (uses item id as group key)", () => { - const items = [ - makeItem("QR-001", null), - makeItem("QR-002", null), - ]; - const groups = groupItemsByGroupId(items); - - assert.equal(groups.size, 2); - assert.deepEqual(groups.get("QR-001"), ["QR-001"]); - assert.deepEqual(groups.get("QR-002"), ["QR-002"]); - }); - - it("handles mixed grouped and ungrouped items", () => { - const items = [ - makeItem("QR-001", "umbrella"), - makeItem("QR-002", "umbrella"), - makeItem("QR-003", null), - makeItem("QR-004", "component-auth"), - makeItem("QR-005", "component-auth"), - makeItem("QR-006", "component-auth"), - ]; - const groups = groupItemsByGroupId(items); - - assert.equal(groups.size, 3); - assert.deepEqual(groups.get("umbrella"), ["QR-001", "QR-002"]); - assert.deepEqual(groups.get("QR-003"), ["QR-003"]); - assert.deepEqual(groups.get("component-auth"), ["QR-004", "QR-005", "QR-006"]); - }); - - it("returns empty map for empty items", () => { - const groups = groupItemsByGroupId([]); - assert.equal(groups.size, 0); - }); - - it("single item with group_id creates group of 1", () => { - const items = [makeItem("QR-001", "solo-group")]; - const groups = groupItemsByGroupId(items); - - assert.equal(groups.size, 1); - assert.deepEqual(groups.get("solo-group"), ["QR-001"]); - }); -}); - -// -- Dynamic step formula tests -- - -describe("dynamic step formula", () => { - it("totalSteps = 1 + 2*N for N items", () => { - assert.equal(1 + 2 * 1, 3); // 1 item: CONTEXT, ANALYZE, CONFIRM - assert.equal(1 + 2 * 3, 7); // 3 items: CONTEXT, 3×(ANALYZE+CONFIRM) - assert.equal(1 + 2 * 5, 11); // 5 items - }); - - it("step routing maps correctly for 3 items", () => { - // Step 1: CONTEXT - // Step 2: ANALYZE item 0 - // Step 3: CONFIRM item 0 - // Step 4: ANALYZE item 1 - // Step 5: CONFIRM item 1 - // Step 6: ANALYZE item 2 - // Step 7: CONFIRM item 2 - - function stepType(step: number): { kind: string; itemIndex?: number } { - if (step === 1) return { kind: "CONTEXT" }; - const offset = step - 2; - const itemIndex = Math.floor(offset / 2); - const isConfirm = offset % 2 === 1; - return isConfirm ? { kind: "CONFIRM", itemIndex } : { kind: "ANALYZE", itemIndex }; - } - - assert.deepEqual(stepType(1), { kind: "CONTEXT" }); - assert.deepEqual(stepType(2), { kind: "ANALYZE", itemIndex: 0 }); - assert.deepEqual(stepType(3), { kind: "CONFIRM", itemIndex: 0 }); - assert.deepEqual(stepType(4), { kind: "ANALYZE", itemIndex: 1 }); - assert.deepEqual(stepType(5), { kind: "CONFIRM", itemIndex: 1 }); - assert.deepEqual(stepType(6), { kind: "ANALYZE", itemIndex: 2 }); - assert.deepEqual(stepType(7), { kind: "CONFIRM", itemIndex: 2 }); - }); - - it("step routing works for single item (backward compat)", () => { - function stepType(step: number): { kind: string; itemIndex?: number } { - if (step === 1) return { kind: "CONTEXT" }; - const offset = step - 2; - const itemIndex = Math.floor(offset / 2); - const isConfirm = offset % 2 === 1; - return isConfirm ? { kind: "CONFIRM", itemIndex } : { kind: "ANALYZE", itemIndex }; - } - - assert.deepEqual(stepType(1), { kind: "CONTEXT" }); - assert.deepEqual(stepType(2), { kind: "ANALYZE", itemIndex: 0 }); - assert.deepEqual(stepType(3), { kind: "CONFIRM", itemIndex: 0 }); - }); -}); - -// -- Prompt generation tests -- - -describe("buildVerifySystemPrompt", () => { - it("includes item count for single item", () => { - const result = buildVerifySystemPrompt("base prompt", "plan-design", 1); - assert.ok(result.includes("1 QR item")); - assert.ok(!result.includes("items")); - }); - - it("includes item count for multiple items", () => { - const result = buildVerifySystemPrompt("base prompt", "plan-code", 5); - assert.ok(result.includes("5 QR items")); - }); - - it("includes phase name", () => { - const result = buildVerifySystemPrompt("base prompt", "plan-docs", 3); - assert.ok(result.includes("plan-docs")); - }); -}); - -describe("buildContextStep", () => { - const items: QRItem[] = [ - makeItem("QR-001", "group-a"), - makeItem("QR-002", "group-a"), - makeItem("QR-003", "group-a"), - ]; - - it("lists all items in context step", () => { - const step = buildContextStep(items, "plan-design"); - const text = step.instructions.join("\n"); - assert.ok(text.includes("QR-001")); - assert.ok(text.includes("QR-002")); - assert.ok(text.includes("QR-003")); - }); - - it("shows correct item count", () => { - const step = buildContextStep(items, "plan-design"); - const text = step.instructions.join("\n"); - assert.ok(text.includes("3 ITEMS")); - }); - - it("shows 1 ITEM for single item", () => { - const step = buildContextStep([items[0]], "plan-design"); - const text = step.instructions.join("\n"); - assert.ok(text.includes("1 ITEM")); - }); -}); - -describe("buildAnalyzeStep", () => { - const item = makeItem("QR-042", "group-x"); - - it("includes item ID and check", () => { - const step = buildAnalyzeStep(item, 0, 3); - const text = step.instructions.join("\n"); - assert.ok(text.includes("QR-042")); - assert.ok(text.includes(item.check)); - }); - - it("includes position label for multi-item groups", () => { - const step = buildAnalyzeStep(item, 1, 5); - assert.ok(step.title.includes("item 2 of 5")); - }); - - it("omits position label for single item", () => { - const step = buildAnalyzeStep(item, 0, 1); - assert.ok(!step.title.includes("item")); - }); -}); - -describe("buildConfirmStep", () => { - const item = makeItem("QR-007", "group-y"); - - it("includes koan_qr_set_item instructions with correct id", () => { - const step = buildConfirmStep(item, 0, 3, "plan-code"); - const text = step.instructions.join("\n"); - assert.ok(text.includes("id='QR-007'")); - assert.ok(text.includes("status='PASS'")); - assert.ok(text.includes("status='FAIL'")); - }); - - it("includes position label for multi-item groups", () => { - const step = buildConfirmStep(item, 2, 4, "plan-docs"); - assert.ok(step.title.includes("item 3 of 4")); - }); - - it("has invokeAfter guard", () => { - const step = buildConfirmStep(item, 0, 1, "plan-design"); - assert.ok(step.invokeAfter); - assert.ok(step.invokeAfter!.includes("koan_complete_step")); - }); -}); - -// -- Subagent spawn arg tests -- - -describe("spawnReviewer args", () => { - const baseOpts = { - planDir: "/plan", - subagentDir: "/subagent", - extensionPath: "/ext/koan.ts", - cwd: "/working", - }; - - it("passes single item ID via --koan-qr-item for single-item group", () => { - const args = buildSpawnArgs("reviewer", "qr-plan-design", "Verify the assigned QR item.", { - ...baseOpts, - extraFlags: ["--koan-qr-item", "QR-001"], - }); - const idx = args.indexOf("--koan-qr-item"); - assert.ok(idx >= 0); - assert.equal(args[idx + 1], "QR-001"); - }); - - it("passes comma-separated item IDs via --koan-qr-item for multi-item group", () => { - const itemList = "QR-001,QR-002,QR-003"; - const args = buildSpawnArgs("reviewer", "qr-plan-code", "Verify the 3 assigned QR items.", { - ...baseOpts, - extraFlags: ["--koan-qr-item", itemList], - }); - const idx = args.indexOf("--koan-qr-item"); - assert.ok(idx >= 0); - assert.equal(args[idx + 1], "QR-001,QR-002,QR-003"); - }); -}); - -// -- Comma-separated parsing (mirrors dispatch.ts logic) -- - -describe("comma-separated item ID parsing", () => { - function parseItemIds(rawFlag: string): string[] { - return rawFlag.split(",").map((s) => s.trim()).filter(Boolean); - } - - it("parses single item ID", () => { - assert.deepEqual(parseItemIds("QR-001"), ["QR-001"]); - }); - - it("parses multiple comma-separated IDs", () => { - assert.deepEqual(parseItemIds("QR-001,QR-002,QR-003"), ["QR-001", "QR-002", "QR-003"]); - }); - - it("handles whitespace around commas", () => { - assert.deepEqual(parseItemIds("QR-001 , QR-002 , QR-003"), ["QR-001", "QR-002", "QR-003"]); - }); - - it("filters empty strings from trailing comma", () => { - assert.deepEqual(parseItemIds("QR-001,QR-002,"), ["QR-001", "QR-002"]); - }); - - it("returns empty array for empty string", () => { - assert.deepEqual(parseItemIds(""), []); - }); -}); diff --git a/tests/session-model-threading.test.ts b/tests/session-model-threading.test.ts deleted file mode 100644 index 1a9c300..0000000 --- a/tests/session-model-threading.test.ts +++ /dev/null @@ -1,205 +0,0 @@ -import assert from "node:assert/strict"; -import { describe, it } from "node:test"; - -import { - resolveSpawnModelOverride, - spawnWorkWithResolvedModel, - spawnFixWithResolvedModel, - spawnQRDecomposerWithResolvedModel, - spawnReviewerWithResolvedModel, -} from "../src/planner/session.js"; -import type { PhaseModelKey } from "../src/planner/model-phase.js"; - -describe("resolveSpawnModelOverride", () => { - it("maps context -> key and resolves override", async () => { - const contexts = ["work-debut", "fix", "qr-decompose", "qr-verify"] as const; - - for (const context of contexts) { - let mappedContext: string | null = null; - let mappedRow: string | null = null; - let resolvedKey: string | null = null; - - const result = await resolveSpawnModelOverride(context, "plan-design", { - mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { - mappedContext = ctx; - mappedRow = row; - return "plan-design-exec-debut" as PhaseModelKey; - }, - resolvePhaseModelOverrideFn: async (key) => { - resolvedKey = key; - return "anthropic/claude-opus-4"; - }, - }); - - assert.equal(mappedContext, context); - assert.equal(mappedRow, "plan-design"); - assert.equal(resolvedKey, "plan-design-exec-debut"); - assert.equal(result, "anthropic/claude-opus-4"); - } - }); - - it("returns undefined when resolver reports absent config", async () => { - const result = await resolveSpawnModelOverride("work-debut", "plan-code", { - mapSpawnContextToPhaseModelKeyFn: () => "plan-code-exec-debut" as PhaseModelKey, - resolvePhaseModelOverrideFn: async () => undefined, - }); - - assert.equal(result, undefined); - }); -}); - -describe("work/fix spawn model threading", () => { - it("threads resolved modelOverride into work spawns", async () => { - let capturedModelOverride: string | undefined; - - await spawnWorkWithResolvedModel( - "plan-design", - async (opts) => { - capturedModelOverride = opts.modelOverride; - return { exitCode: 0, stderr: "", subagentDir: opts.subagentDir }; - }, - { - planDir: "/plan", - subagentDir: "/subagent", - cwd: "/cwd", - extensionPath: "/ext/koan.ts", - log: () => {}, - }, - { - mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { - assert.equal(ctx, "work-debut"); - assert.equal(row, "plan-design"); - return "plan-design-exec-debut" as PhaseModelKey; - }, - resolvePhaseModelOverrideFn: async (key) => { - assert.equal(key, "plan-design-exec-debut"); - return "anthropic/claude-opus-4"; - }, - }, - ); - - assert.equal(capturedModelOverride, "anthropic/claude-opus-4"); - }); - - it("threads resolved modelOverride into fix spawns", async () => { - let capturedModelOverride: string | undefined; - - await spawnFixWithResolvedModel( - "plan-code", - async (opts) => { - capturedModelOverride = opts.modelOverride; - return { exitCode: 0, stderr: "", subagentDir: opts.subagentDir }; - }, - { - planDir: "/plan", - subagentDir: "/subagent", - cwd: "/cwd", - extensionPath: "/ext/koan.ts", - log: () => {}, - }, - { - mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { - assert.equal(ctx, "fix"); - assert.equal(row, "plan-code"); - return "plan-code-exec-fix" as PhaseModelKey; - }, - resolvePhaseModelOverrideFn: async (key) => { - assert.equal(key, "plan-code-exec-fix"); - return "openai/gpt-5"; - }, - }, - ); - - assert.equal(capturedModelOverride, "openai/gpt-5"); - }); -}); - -describe("QR spawn model threading", () => { - it("threads resolved modelOverride into spawnQRDecomposer", async () => { - let capturedModelOverride: string | undefined; - - await spawnQRDecomposerWithResolvedModel( - { - planDir: "/plan", - subagentDir: "/subagent", - cwd: "/cwd", - extensionPath: "/ext/koan.ts", - phase: "plan-design", - }, - { - mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { - assert.equal(ctx, "qr-decompose"); - assert.equal(row, "plan-design"); - return "plan-design-qr-decompose" as PhaseModelKey; - }, - resolvePhaseModelOverrideFn: async (key) => { - assert.equal(key, "plan-design-qr-decompose"); - return "openai/gpt-5"; - }, - spawnQRDecomposerFn: async (opts) => { - capturedModelOverride = opts.modelOverride; - return { exitCode: 0, stderr: "", subagentDir: opts.subagentDir }; - }, - }, - ); - - assert.equal(capturedModelOverride, "openai/gpt-5"); - }); - - it("threads resolved modelOverride into spawnReviewer", async () => { - let capturedModelOverride: string | undefined; - - await spawnReviewerWithResolvedModel( - { - planDir: "/plan", - subagentDir: "/subagent", - cwd: "/cwd", - extensionPath: "/ext/koan.ts", - phase: "plan-code", - itemIds: ["QR-001"], - }, - { - mapSpawnContextToPhaseModelKeyFn: (ctx, row) => { - assert.equal(ctx, "qr-verify"); - assert.equal(row, "plan-code"); - return "plan-code-qr-verify" as PhaseModelKey; - }, - resolvePhaseModelOverrideFn: async (key) => { - assert.equal(key, "plan-code-qr-verify"); - return "google/gemini-3-pro"; - }, - spawnReviewerFn: async (opts) => { - capturedModelOverride = opts.modelOverride; - return { exitCode: 0, stderr: "", subagentDir: opts.subagentDir }; - }, - }, - ); - - assert.equal(capturedModelOverride, "google/gemini-3-pro"); - }); - - it("passes undefined modelOverride when config is absent", async () => { - let capturedModelOverride: string | undefined; - - await spawnReviewerWithResolvedModel( - { - planDir: "/plan", - subagentDir: "/subagent", - cwd: "/cwd", - extensionPath: "/ext/koan.ts", - phase: "plan-docs", - itemIds: ["QR-002"], - }, - { - mapSpawnContextToPhaseModelKeyFn: () => "plan-docs-qr-verify" as PhaseModelKey, - resolvePhaseModelOverrideFn: async () => undefined, - spawnReviewerFn: async (opts) => { - capturedModelOverride = opts.modelOverride; - return { exitCode: 0, stderr: "", subagentDir: opts.subagentDir }; - }, - }, - ); - - assert.equal(capturedModelOverride, undefined); - }); -}); diff --git a/tests/state-machine.test.ts b/tests/state-machine.test.ts new file mode 100644 index 0000000..eaf1fad --- /dev/null +++ b/tests/state-machine.test.ts @@ -0,0 +1,422 @@ +// Property-based state machine tests for koan. +// Verifies: +// - All valid story status transitions (§11.4 table) +// - Routing decisions for all state combinations +// - Permission matrices (role × tool × expected result) + +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import { checkPermission, ROLE_PERMISSIONS } from "../src/planner/lib/permissions.js"; +import { + loadStoryState, + saveStoryState, + ensureStoryDirectory, +} from "../src/planner/epic/state.js"; +import { createInitialStoryState } from "../src/planner/epic/types.js"; +import type { StoryStatus } from "../src/planner/types.js"; +import { assertStatus } from "../src/planner/tools/orchestrator.js"; + +async function mkTempDir(): Promise { + return fs.mkdtemp(path.join(os.tmpdir(), "koan-sm-test-")); +} + +async function withEpicDir(fn: (epicDir: string) => Promise): Promise { + const dir = await mkTempDir(); + try { + await fs.mkdir(path.join(dir, "stories"), { recursive: true }); + return await fn(dir); + } finally { + await fs.rm(dir, { recursive: true, force: true }); + } +} + +// --------------------------------------------------------------------------- +// State machine: valid transitions (§11.4) +// --------------------------------------------------------------------------- + +describe("state machine: valid transitions", () => { + // koan_select_story: pending → selected, retry → selected + it("koan_select_story accepts pending → selected", async () => { + await withEpicDir(async (epicDir) => { + await ensureStoryDirectory(epicDir, "S-001-auth"); + const state = await loadStoryState(epicDir, "S-001-auth"); + assert.equal(state.status, "pending"); + + await saveStoryState(epicDir, "S-001-auth", { ...state, status: "selected", updatedAt: new Date().toISOString() }); + const updated = await loadStoryState(epicDir, "S-001-auth"); + assert.equal(updated.status, "selected"); + }); + }); + + it("koan_select_story accepts retry → selected", async () => { + await withEpicDir(async (epicDir) => { + await ensureStoryDirectory(epicDir, "S-001-auth"); + const state = await loadStoryState(epicDir, "S-001-auth"); + + await saveStoryState(epicDir, "S-001-auth", { ...state, status: "retry", updatedAt: new Date().toISOString() }); + const retrying = await loadStoryState(epicDir, "S-001-auth"); + assert.equal(retrying.status, "retry"); + + await saveStoryState(epicDir, "S-001-auth", { ...retrying, status: "selected", updatedAt: new Date().toISOString() }); + const selected = await loadStoryState(epicDir, "S-001-auth"); + assert.equal(selected.status, "selected"); + }); + }); + + // koan_complete_story: verifying → done + it("koan_complete_story accepts verifying → done", async () => { + await withEpicDir(async (epicDir) => { + await ensureStoryDirectory(epicDir, "S-002-routes"); + const state = await loadStoryState(epicDir, "S-002-routes"); + await saveStoryState(epicDir, "S-002-routes", { ...state, status: "verifying", updatedAt: new Date().toISOString() }); + + const verifying = await loadStoryState(epicDir, "S-002-routes"); + assert.equal(verifying.status, "verifying"); + + await saveStoryState(epicDir, "S-002-routes", { ...verifying, status: "done", updatedAt: new Date().toISOString() }); + const done = await loadStoryState(epicDir, "S-002-routes"); + assert.equal(done.status, "done"); + }); + }); + + // koan_retry_story: verifying → retry + it("koan_retry_story accepts verifying → retry", async () => { + await withEpicDir(async (epicDir) => { + await ensureStoryDirectory(epicDir, "S-003-profile"); + const state = await loadStoryState(epicDir, "S-003-profile"); + await saveStoryState(epicDir, "S-003-profile", { ...state, status: "verifying", updatedAt: new Date().toISOString() }); + + const verifying = await loadStoryState(epicDir, "S-003-profile"); + await saveStoryState(epicDir, "S-003-profile", { + ...verifying, + status: "retry", + failureSummary: "Test 3 failed: expected 200 got 404", + updatedAt: new Date().toISOString(), + }); + + const retried = await loadStoryState(epicDir, "S-003-profile"); + assert.equal(retried.status, "retry"); + assert.equal(retried.failureSummary, "Test 3 failed: expected 200 got 404"); + }); + }); + + // koan_skip_story: pending → skipped + it("koan_skip_story accepts pending → skipped", async () => { + await withEpicDir(async (epicDir) => { + await ensureStoryDirectory(epicDir, "S-004-optional"); + const state = await loadStoryState(epicDir, "S-004-optional"); + assert.equal(state.status, "pending"); + + await saveStoryState(epicDir, "S-004-optional", { + ...state, + status: "skipped", + skipReason: "Already implemented by S-003", + updatedAt: new Date().toISOString(), + }); + + const skipped = await loadStoryState(epicDir, "S-004-optional"); + assert.equal(skipped.status, "skipped"); + assert.equal(skipped.skipReason, "Already implemented by S-003"); + }); + }); + + // koan_skip_story: retry → skipped + it("koan_skip_story accepts retry → skipped", async () => { + await withEpicDir(async (epicDir) => { + await ensureStoryDirectory(epicDir, "S-005-retry-skip"); + const state = await loadStoryState(epicDir, "S-005-retry-skip"); + await saveStoryState(epicDir, "S-005-retry-skip", { ...state, status: "retry", updatedAt: new Date().toISOString() }); + + const retrying = await loadStoryState(epicDir, "S-005-retry-skip"); + assert.equal(retrying.status, "retry"); + + await saveStoryState(epicDir, "S-005-retry-skip", { + ...retrying, + status: "skipped", + skipReason: "Made unnecessary by another story", + updatedAt: new Date().toISOString(), + }); + + const skipped = await loadStoryState(epicDir, "S-005-retry-skip"); + assert.equal(skipped.status, "skipped"); + }); + }); + + // No escalated status exists in the new design. + it("StoryStatus type does not include escalated", () => { + const validStatuses: StoryStatus[] = [ + "pending", "selected", "planning", "executing", + "verifying", "done", "retry", "skipped", + ]; + // Verify all expected statuses are present + assert.equal(validStatuses.length, 8); + // Ensure "escalated" is not a valid value by type-checking at runtime. + const set = new Set(validStatuses); + assert.equal(set.has("escalated"), false, "escalated should not exist as a story status"); + }); +}); + +// --------------------------------------------------------------------------- +// assertStatus enforcement +// --------------------------------------------------------------------------- + +describe("assertStatus enforcement", () => { + it("throws when current status is not in allowed list", () => { + assert.throws( + () => assertStatus("S-001", "selected", ["pending", "retry"]), + /Cannot transition story 'S-001'/, + ); + }); + + it("throws when current status does not match single allowed status", () => { + assert.throws( + () => assertStatus("S-001", "pending", ["verifying"]), + /Cannot transition story 'S-001'/, + ); + }); + + it("does not throw when current status is in allowed list", () => { + assert.doesNotThrow(() => assertStatus("S-001", "verifying", ["verifying"])); + }); + + it("does not throw when current status is one of multiple allowed statuses", () => { + assert.doesNotThrow(() => assertStatus("S-001", "retry", ["pending", "retry"])); + assert.doesNotThrow(() => assertStatus("S-001", "pending", ["pending", "retry"])); + }); + + it("koan_skip_story accepts retry status via assertStatus", () => { + assert.doesNotThrow(() => assertStatus("S-001", "retry", ["pending", "retry"])); + }); + + it("koan_skip_story rejects selected status via assertStatus", () => { + assert.throws( + () => assertStatus("S-001", "selected", ["pending", "retry"]), + /Cannot transition story 'S-001'/, + ); + }); +}); + +// --------------------------------------------------------------------------- +// State machine: tool source validation (§11.4 / §11.12) +// --------------------------------------------------------------------------- + +describe("state machine: tool source validation", () => { + const TOOL_VALID_SOURCES: Record = { + koan_select_story: ["pending", "retry"], + koan_complete_story: ["verifying"], + koan_retry_story: ["verifying"], + koan_skip_story: ["pending", "retry"], + }; + + const ALL_STATUSES: StoryStatus[] = [ + "pending", "selected", "planning", "executing", + "verifying", "done", "retry", "skipped", + ]; + + for (const [tool, validSources] of Object.entries(TOOL_VALID_SOURCES)) { + const invalidSources = ALL_STATUSES.filter((s) => !validSources.includes(s)); + + it(`${tool} allows only [${validSources.join(", ")}]`, () => { + // All valid sources should be in the set + assert.equal(validSources.length > 0, true); + // No invalid source should overlap with valid + for (const invalid of invalidSources) { + assert.equal(validSources.includes(invalid), false, + `${tool}: ${invalid} should not be a valid source status`); + } + }); + } + + it("koan_escalate does not exist in the tool inventory", () => { + // Verify koan_escalate is not in the ROLE_PERMISSIONS for orchestrator + + const orchestratorTools = ROLE_PERMISSIONS.get("orchestrator") ?? new Set(); + assert.equal(orchestratorTools.has("koan_escalate"), false, "koan_escalate must not be in orchestrator permissions"); + }); +}); + +// --------------------------------------------------------------------------- +// Routing decisions +// --------------------------------------------------------------------------- + +describe("routing decisions", () => { + // Simulate the routeFromState logic (we test inputs/outputs, not the internal function) + interface Story { storyId: string; status: StoryStatus; retryCount: number; maxRetries: number } + + function simulateRouting(stories: Story[]): string { + // Mirror driver.ts routeFromState logic + const retry = stories.find((s) => s.status === "retry"); + if (retry) return `retry:${retry.storyId}`; + const selected = stories.find((s) => s.status === "selected"); + if (selected) return `execute:${selected.storyId}`; + const terminal = new Set(["done", "skipped"]); + const allTerminal = stories.every((s) => terminal.has(s.status)); + if (allTerminal && stories.length > 0) return "complete"; + return "error"; + } + + it("routes to retry when a story has retry status", () => { + const stories: Story[] = [ + { storyId: "S-001-auth", status: "done", retryCount: 0, maxRetries: 2 }, + { storyId: "S-002-routes", status: "retry", retryCount: 1, maxRetries: 2 }, + ]; + assert.equal(simulateRouting(stories), "retry:S-002-routes"); + }); + + it("routes to execute when a story has selected status", () => { + const stories: Story[] = [ + { storyId: "S-001-auth", status: "done", retryCount: 0, maxRetries: 2 }, + { storyId: "S-002-routes", status: "selected", retryCount: 0, maxRetries: 2 }, + ]; + assert.equal(simulateRouting(stories), "execute:S-002-routes"); + }); + + it("routes to complete when all stories are done", () => { + const stories: Story[] = [ + { storyId: "S-001-auth", status: "done", retryCount: 0, maxRetries: 2 }, + { storyId: "S-002-routes", status: "done", retryCount: 0, maxRetries: 2 }, + ]; + assert.equal(simulateRouting(stories), "complete"); + }); + + it("routes to complete when all stories are done or skipped", () => { + const stories: Story[] = [ + { storyId: "S-001-auth", status: "done", retryCount: 0, maxRetries: 2 }, + { storyId: "S-002-optional", status: "skipped", retryCount: 0, maxRetries: 2 }, + ]; + assert.equal(simulateRouting(stories), "complete"); + }); + + it("routes to error when no actionable state exists", () => { + const stories: Story[] = [ + { storyId: "S-001-auth", status: "pending", retryCount: 0, maxRetries: 2 }, + { storyId: "S-002-routes", status: "pending", retryCount: 0, maxRetries: 2 }, + ]; + assert.equal(simulateRouting(stories), "error"); + }); + + it("prefers retry over selected (retry takes routing priority)", () => { + const stories: Story[] = [ + { storyId: "S-001-auth", status: "retry", retryCount: 1, maxRetries: 2 }, + { storyId: "S-002-routes", status: "selected", retryCount: 0, maxRetries: 2 }, + ]; + assert.equal(simulateRouting(stories), "retry:S-001-auth"); + }); + + it("routes to error for empty story list", () => { + assert.equal(simulateRouting([]), "error"); + }); +}); + +// --------------------------------------------------------------------------- +// Permission matrix (role × tool) +// --------------------------------------------------------------------------- + +describe("permission matrix", () => { + const epicDir = "/tmp/test-epic"; + + // Tools that should be allowed for each role. + const ROLE_ALLOWED: Record = { + intake: ["read", "bash", "grep", "glob", "find", "ls", "koan_complete_step", "koan_ask_question", "koan_request_scouts", "edit", "write"], + scout: ["read", "bash", "grep", "glob", "find", "ls", "koan_complete_step", "edit", "write"], + decomposer: ["read", "bash", "grep", "glob", "find", "ls", "koan_complete_step", "koan_ask_question", "koan_request_scouts", "edit", "write"], + orchestrator: ["read", "bash", "grep", "glob", "find", "ls", "koan_complete_step", "koan_ask_question", "koan_select_story", "koan_complete_story", "koan_retry_story", "koan_skip_story", "edit", "write"], + planner: ["read", "bash", "grep", "glob", "find", "ls", "koan_complete_step", "koan_ask_question", "koan_request_scouts", "edit", "write"], + executor: ["read", "bash", "grep", "glob", "find", "ls", "koan_complete_step", "koan_ask_question", "edit", "write"], + }; + + // Tools that must be blocked for each role. + const ROLE_BLOCKED: Record = { + intake: ["koan_select_story", "koan_complete_story", "koan_retry_story", "koan_skip_story", "koan_escalate"], + scout: ["koan_ask_question", "koan_request_scouts", "koan_select_story", "koan_complete_story", "koan_retry_story", "koan_skip_story", "koan_escalate"], + decomposer: ["koan_select_story", "koan_complete_story", "koan_retry_story", "koan_skip_story", "koan_escalate"], + orchestrator: ["koan_request_scouts", "koan_escalate"], + planner: ["koan_select_story", "koan_complete_story", "koan_retry_story", "koan_skip_story", "koan_escalate"], + executor: ["koan_select_story", "koan_complete_story", "koan_retry_story", "koan_skip_story", "koan_escalate", "koan_request_scouts"], + }; + + for (const [role, allowed] of Object.entries(ROLE_ALLOWED)) { + it(`${role}: allows expected tools`, () => { + for (const tool of allowed) { + const result = checkPermission(role, tool, epicDir); + assert.equal(result.allowed, true, `${role} should allow ${tool}: ${result.reason}`); + } + }); + } + + for (const [role, blocked] of Object.entries(ROLE_BLOCKED)) { + it(`${role}: blocks forbidden tools`, () => { + for (const tool of blocked) { + const result = checkPermission(role, tool, epicDir); + assert.equal(result.allowed, false, `${role} should block ${tool}`); + } + }); + } + + it("unknown role is blocked for all tools", () => { + const tools = ["read", "koan_complete_step", "koan_ask_question", "write"]; + for (const tool of tools) { + const result = checkPermission("unknown-role", tool, epicDir); + // read tools are always allowed, even for unknown roles + if (tool === "read") { + assert.equal(result.allowed, true); + } else { + assert.equal(result.allowed, false, `unknown-role should block ${tool}`); + } + } + }); + + it("planning roles have write access scoped to epic directory", () => { + const planningRoles = ["intake", "scout", "decomposer", "planner", "orchestrator"]; + const insidePath = path.join(epicDir, "stories", "S-001-auth", "story.md"); + const outsidePath = "/etc/passwd"; + + for (const role of planningRoles) { + const inside = checkPermission(role, "write", epicDir, { path: insidePath }); + assert.equal(inside.allowed, true, `${role} should allow write inside epic dir`); + + const outside = checkPermission(role, "write", epicDir, { path: outsidePath }); + assert.equal(outside.allowed, false, `${role} should block write outside epic dir`); + } + }); + + it("executor has unrestricted write access (can write to codebase)", () => { + // Executor does not scope-check paths — it needs to write to the codebase + const codebasePath = "/Users/lmergen/git/myapp/src/auth.ts"; + const result = checkPermission("executor", "write", epicDir, { path: codebasePath }); + assert.equal(result.allowed, true, "executor should allow writes anywhere"); + }); +}); + +// --------------------------------------------------------------------------- +// Initial state invariants +// --------------------------------------------------------------------------- + +describe("initial state invariants", () => { + it("createInitialStoryState produces pending status", () => { + const state = createInitialStoryState("S-001-auth"); + assert.equal(state.status, "pending"); + assert.equal(state.retryCount, 0); + assert.equal(state.storyId, "S-001-auth"); + assert.equal(typeof state.updatedAt, "string"); + }); + + it("createInitialStoryState uses default maxRetries of 2", () => { + const state = createInitialStoryState("S-001-auth"); + assert.equal(state.maxRetries, 2); + }); + + it("createInitialStoryState accepts custom maxRetries", () => { + const state = createInitialStoryState("S-001-auth", 5); + assert.equal(state.maxRetries, 5); + }); + + it("StoryState has no escalation field", () => { + const state = createInitialStoryState("S-001-auth"); + assert.equal("escalation" in state, false, "StoryState must not have an escalation field"); + }); +}); diff --git a/tests/story-discovery.test.ts b/tests/story-discovery.test.ts new file mode 100644 index 0000000..6cb2200 --- /dev/null +++ b/tests/story-discovery.test.ts @@ -0,0 +1,84 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +import { discoverStoryIds } from "../src/planner/epic/state.js"; + +async function mkTempDir(): Promise { + return fs.mkdtemp(path.join(os.tmpdir(), "koan-test-")); +} + +describe("discoverStoryIds", () => { + it("returns empty array when stories directory does not exist", async () => { + const epicDir = await mkTempDir(); + try { + const ids = await discoverStoryIds(epicDir); + assert.deepEqual(ids, []); + } finally { + await fs.rm(epicDir, { recursive: true, force: true }); + } + }); + + it("returns empty array when stories directory is empty", async () => { + const epicDir = await mkTempDir(); + try { + await fs.mkdir(path.join(epicDir, "stories")); + const ids = await discoverStoryIds(epicDir); + assert.deepEqual(ids, []); + } finally { + await fs.rm(epicDir, { recursive: true, force: true }); + } + }); + + it("returns sorted story IDs for each subdirectory", async () => { + const epicDir = await mkTempDir(); + try { + const storiesDir = path.join(epicDir, "stories"); + await fs.mkdir(storiesDir); + // Create story directories out of alphabetical order. + for (const id of ["add-auth", "migrate-db", "update-api"]) { + await fs.mkdir(path.join(storiesDir, id)); + } + + const ids = await discoverStoryIds(epicDir); + assert.deepEqual(ids, ["add-auth", "migrate-db", "update-api"]); + } finally { + await fs.rm(epicDir, { recursive: true, force: true }); + } + }); + + it("ignores files in the stories directory", async () => { + const epicDir = await mkTempDir(); + try { + const storiesDir = path.join(epicDir, "stories"); + await fs.mkdir(storiesDir); + await fs.mkdir(path.join(storiesDir, "real-story")); + // Write a file — should be ignored. + await fs.writeFile(path.join(storiesDir, "not-a-story.md"), "# ignored\n"); + + const ids = await discoverStoryIds(epicDir); + assert.deepEqual(ids, ["real-story"]); + } finally { + await fs.rm(epicDir, { recursive: true, force: true }); + } + }); + + it("returns deterministically sorted IDs regardless of filesystem order", async () => { + const epicDir = await mkTempDir(); + try { + const storiesDir = path.join(epicDir, "stories"); + await fs.mkdir(storiesDir); + // Create in reverse order. + for (const id of ["zzz-last", "aaa-first", "mmm-middle"]) { + await fs.mkdir(path.join(storiesDir, id)); + } + + const ids = await discoverStoryIds(epicDir); + assert.deepEqual(ids, ["aaa-first", "mmm-middle", "zzz-last"]); + } finally { + await fs.rm(epicDir, { recursive: true, force: true }); + } + }); +}); diff --git a/tests/subagent-model.test.ts b/tests/subagent-model.test.ts deleted file mode 100644 index ee07ccb..0000000 --- a/tests/subagent-model.test.ts +++ /dev/null @@ -1,215 +0,0 @@ -import assert from "node:assert/strict"; -import { describe, it } from "node:test"; - -import { buildSpawnArgs } from "../src/planner/subagent.js"; -import { - ALL_PHASE_MODEL_KEYS, - type PhaseModelKey, -} from "../src/planner/model-phase.js"; -import { - applyGeneralPurposeModel, - applyStrongModel, - initConfigFromActiveModel, -} from "../src/planner/ui/config/model-selection.js"; -import { - GENERAL_PURPOSE_PHASE_MODEL_KEYS, - STRONG_PHASE_MODEL_KEYS, -} from "../src/planner/model-phase.js"; - -// -- buildSpawnArgs: --model flag threading -- - -describe("buildSpawnArgs", () => { - const baseOpts = { - planDir: "/plan", - subagentDir: "/subagent", - extensionPath: "/ext/koan.ts", - cwd: "/working", - }; - - it("omits --model flag when modelOverride is absent", () => { - const args = buildSpawnArgs("architect", "plan-design", "start", baseOpts); - assert.equal(args.includes("--model"), false); - }); - - it("omits --model flag when modelOverride is undefined", () => { - const args = buildSpawnArgs("architect", "plan-design", "start", { - ...baseOpts, - modelOverride: undefined, - }); - assert.equal(args.includes("--model"), false); - }); - - it("includes --model flag and value when modelOverride is set", () => { - const args = buildSpawnArgs("architect", "plan-design", "start", { - ...baseOpts, - modelOverride: "anthropic/claude-opus-4", - }); - assert.ok(args.includes("--model"), "expected --model flag in args"); - const idx = args.indexOf("--model"); - assert.equal(args[idx + 1], "anthropic/claude-opus-4"); - }); - - it("places --model before the prompt (last arg)", () => { - const prompt = "Begin the plan-design phase."; - const args = buildSpawnArgs("architect", "plan-design", prompt, { - ...baseOpts, - modelOverride: "openai/gpt-5", - }); - const modelIdx = args.indexOf("--model"); - const promptIdx = args.indexOf(prompt); - assert.ok(modelIdx >= 0, "--model not found"); - assert.ok(promptIdx >= 0, "prompt not found"); - assert.ok(modelIdx < promptIdx, "--model should appear before prompt"); - }); - - it("places --model after extraFlags", () => { - const args = buildSpawnArgs("reviewer", "qr-plan-design", "Verify.", { - ...baseOpts, - extraFlags: ["--koan-qr-item", "item-42"], - modelOverride: "google/gemini-2-pro", - }); - const qrItemIdx = args.indexOf("--koan-qr-item"); - const modelIdx = args.indexOf("--model"); - assert.ok(qrItemIdx >= 0, "--koan-qr-item not found"); - assert.ok(modelIdx >= 0, "--model not found"); - assert.ok(qrItemIdx < modelIdx, "--model should appear after extra flags"); - }); - - it("preserves all required fixed args regardless of modelOverride", () => { - const args = buildSpawnArgs("developer", "plan-code", "begin", { - ...baseOpts, - modelOverride: "anthropic/claude-sonnet", - }); - assert.ok(args.includes("-p"), "-p flag missing"); - assert.ok(args.includes("-e"), "-e flag missing"); - assert.ok(args.includes("--koan-role"), "--koan-role missing"); - assert.ok(args.includes("--koan-phase"), "--koan-phase missing"); - assert.ok(args.includes("--koan-plan-dir"), "--koan-plan-dir missing"); - assert.ok(args.includes("--koan-subagent-dir"), "--koan-subagent-dir missing"); - }); -}); - -// -- Quick-set utility functions -- - -describe("initConfigFromActiveModel", () => { - it("creates a 20-key config with all keys set to the given model", () => { - const config = initConfigFromActiveModel("anthropic/claude-sonnet"); - assert.equal(Object.keys(config).length, ALL_PHASE_MODEL_KEYS.length); - for (const key of ALL_PHASE_MODEL_KEYS) { - assert.equal(config[key], "anthropic/claude-sonnet", `key ${key} should be set`); - } - }); - - it("produces a config where all values are the same model", () => { - const config = initConfigFromActiveModel("openai/gpt-5"); - const values = Object.values(config); - assert.ok(values.every((v) => v === "openai/gpt-5")); - }); -}); - -describe("applyStrongModel", () => { - it("sets all strong keys to the chosen model, leaving GP keys from existing config", () => { - const existing = initConfigFromActiveModel("openai/gpt-4"); - const result = applyStrongModel("anthropic/claude-opus-4", existing, "openai/gpt-4"); - - for (const key of STRONG_PHASE_MODEL_KEYS) { - assert.equal(result[key], "anthropic/claude-opus-4", `strong key ${key} should be updated`); - } - - for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { - assert.equal(result[key], "openai/gpt-4", `GP key ${key} should be unchanged`); - } - }); - - it("initializes from activeModelId when existingConfig is null", () => { - const result = applyStrongModel("anthropic/claude-opus-4", null, "openai/gpt-5-mini"); - - for (const key of STRONG_PHASE_MODEL_KEYS) { - assert.equal(result[key], "anthropic/claude-opus-4", `strong key ${key} should be updated`); - } - - for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { - assert.equal(result[key], "openai/gpt-5-mini", `GP key ${key} should be initialized from active model`); - } - }); - - it("writes all 20 keys regardless of which keys are strong", () => { - const result = applyStrongModel("some/model", null, "active/model"); - assert.equal(Object.keys(result).length, ALL_PHASE_MODEL_KEYS.length); - }); -}); - -describe("applyGeneralPurposeModel", () => { - it("sets all GP keys to the chosen model, leaving strong keys from existing config", () => { - const existing = initConfigFromActiveModel("anthropic/claude-opus-4"); - const result = applyGeneralPurposeModel("openai/gpt-5-mini", existing, "anthropic/claude-opus-4"); - - for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { - assert.equal(result[key], "openai/gpt-5-mini", `GP key ${key} should be updated`); - } - - for (const key of STRONG_PHASE_MODEL_KEYS) { - assert.equal(result[key], "anthropic/claude-opus-4", `strong key ${key} should be unchanged`); - } - }); - - it("initializes from activeModelId when existingConfig is null", () => { - const result = applyGeneralPurposeModel("openai/gpt-5-mini", null, "anthropic/claude-sonnet"); - - for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { - assert.equal(result[key], "openai/gpt-5-mini", `GP key ${key} should be updated`); - } - - for (const key of STRONG_PHASE_MODEL_KEYS) { - assert.equal(result[key], "anthropic/claude-sonnet", `strong key ${key} should be initialized from active model`); - } - }); - - it("writes all 20 keys regardless of which keys are GP", () => { - const result = applyGeneralPurposeModel("some/model", null, "active/model"); - assert.equal(Object.keys(result).length, ALL_PHASE_MODEL_KEYS.length); - }); -}); - -describe("quick-set from empty config: all-or-none persistence invariant", () => { - it("applyStrongModel from null config produces a 20-key config (all-or-none)", () => { - const result = applyStrongModel("strong/model", null, "active/model"); - const keys = Object.keys(result) as PhaseModelKey[]; - assert.equal(keys.length, ALL_PHASE_MODEL_KEYS.length); - - // Verify every expected key is present - for (const key of ALL_PHASE_MODEL_KEYS) { - assert.ok(key in result, `key "${key}" missing from result`); - assert.equal(typeof result[key], "string"); - assert.ok(result[key].length > 0); - } - }); - - it("applyGeneralPurposeModel from null config produces a 20-key config (all-or-none)", () => { - const result = applyGeneralPurposeModel("gp/model", null, "active/model"); - const keys = Object.keys(result) as PhaseModelKey[]; - assert.equal(keys.length, ALL_PHASE_MODEL_KEYS.length); - - for (const key of ALL_PHASE_MODEL_KEYS) { - assert.ok(key in result, `key "${key}" missing from result`); - } - }); - - it("strong and GP quick-set results are complementary", () => { - const activeModel = "active/model"; - - const strongResult = applyStrongModel("strong/model", null, activeModel); - const gpResult = applyGeneralPurposeModel("gp/model", null, activeModel); - - // Strong keys in strongResult should differ from GP keys - for (const key of STRONG_PHASE_MODEL_KEYS) { - assert.equal(strongResult[key], "strong/model"); - assert.equal(gpResult[key], activeModel); // GP result left strong keys as active - } - - for (const key of GENERAL_PURPOSE_PHASE_MODEL_KEYS) { - assert.equal(strongResult[key], activeModel); // strong result left GP keys as active - assert.equal(gpResult[key], "gp/model"); - } - }); -}); diff --git a/tests/widget.test.ts b/tests/widget.test.ts deleted file mode 100644 index bd2ea8e..0000000 --- a/tests/widget.test.ts +++ /dev/null @@ -1,173 +0,0 @@ -import assert from "node:assert/strict"; -import { describe, it } from "node:test"; - -import type { ExtensionUIContext, Theme } from "@mariozechner/pi-coding-agent"; -import { visibleWidth } from "@mariozechner/pi-tui"; - -import { WidgetController, formatPlanningHeaderLabel } from "../src/planner/ui/widget.js"; - -type WidgetInstance = { - render: (width: number) => string[]; - invalidate: () => void; -}; - -type WidgetFactory = ((tui: unknown, theme: Theme) => WidgetInstance) | undefined; - -function createPlainTheme(): Theme { - return { - fg: (_color: string, text: string) => text, - bg: (_color: string, text: string) => text, - bold: (text: string) => text, - } as unknown as Theme; -} - -function createWidgetHarness(): { - controller: WidgetController; - render: (width: number) => string[]; - destroy: () => void; -} { - const theme = createPlainTheme(); - let factory: WidgetFactory; - - const ui = { - theme, - setWidget: (_key: string, next: WidgetFactory) => { - factory = next; - }, - } as unknown as ExtensionUIContext; - - const controller = new WidgetController(ui, "plan-test-id"); - - return { - controller, - render: (width: number) => { - assert.ok(factory, "widget factory should be registered"); - return factory({} as unknown, theme).render(width); - }, - destroy: () => controller.destroy(), - }; -} - -describe("formatPlanningHeaderLabel", () => { - it("applies compaction in deterministic order", () => { - const phase = "Plan design"; - const status = "CURRENT"; - - const full = `Planning · ${phase} · ${status}`; - const shortStatus = `Planning · ${phase} · CUR`; - const noStatus = `Planning · ${phase}`; - const shortPhase = "Planning · Design"; - - assert.equal(formatPlanningHeaderLabel(phase, status, visibleWidth(full)), full); - assert.equal(formatPlanningHeaderLabel(phase, status, visibleWidth(full) - 1), shortStatus); - assert.equal(formatPlanningHeaderLabel(phase, status, visibleWidth(shortStatus) - 1), noStatus); - assert.equal(formatPlanningHeaderLabel(phase, status, visibleWidth(noStatus) - 1), shortPhase); - - const tiny = formatPlanningHeaderLabel(phase, status, 14); - assert.ok(visibleWidth(tiny) <= 14); - assert.ok(tiny.startsWith("Planning")); - }); -}); - -describe("WidgetController rendering", () => { - it("renders metadata header with 3-phase layout (no context gathering)", () => { - const harness = createWidgetHarness(); - try { - const lines = harness.render(140); - const text = lines.join("\n"); - - assert.match(text, /Planning · Plan design · CURRENT/); - assert.doesNotMatch(text, /Context gathering/); - assert.doesNotMatch(text, /┃ Context gathering ┃/); - } finally { - harness.destroy(); - } - }); - - it("renders merged runtime section with stage + quality + workers", () => { - const harness = createWidgetHarness(); - try { - harness.controller.update({ - qrIteration: 2, - qrIterationsMax: 6, - qrMode: "fix", - qrPhase: "verify", - qrDone: 9, - qrTotal: 14, - qrPass: 8, - qrFail: 1, - qrTodo: 5, - subagentQueued: 2, - subagentActive: 3, - subagentDone: 7, - subagentParallelCount: 4, - }); - - const text = harness.render(140).join("\n"); - assert.match(text, /Runtime/); - assert.match(text, /stage\s+: Verifying \(cycle 2\/6 · fix\)/); - assert.match(text, /quality\s+: checked 9\/14\s+pass 8\s+FAIL 1\s+remaining 5/); - assert.match(text, /workers\s+: queued 2\s+active 3\s+done 7\s+pool ×4/); - - assert.doesNotMatch(text, /\bQR\b\s+\|/); - assert.doesNotMatch(text, /\bSubagents\b\s+\|/); - assert.doesNotMatch(text, /\bCurrent step\b/); - } finally { - harness.destroy(); - } - }); - - it("uses Writing for execute debut and Fixing for execute fix", () => { - const harness = createWidgetHarness(); - try { - harness.controller.update({ - qrIteration: 1, - qrIterationsMax: 6, - qrMode: "initial", - qrPhase: "execute", - }); - - let text = harness.render(140).join("\n"); - assert.match(text, /stage\s+: Writing \(cycle 1\/6 · initial\)/); - - harness.controller.update({ - qrMode: "fix", - qrPhase: "execute", - }); - - text = harness.render(140).join("\n"); - assert.match(text, /stage\s+: Fixing \(cycle 1\/6 · fix\)/); - } finally { - harness.destroy(); - } - }); - - it("aligns identity table separator using dynamic key width", () => { - const harness = createWidgetHarness(); - try { - harness.controller.update({ - subagentRole: "reviewer", - subagentParallelCount: 12, - subagentModel: "openai-codex/gpt-5.3-codex", - }); - - const lines = harness.render(140); - const planLine = lines.find((line) => line.includes("Plan ID") && line.includes(" : ")); - const agentLine = lines.find((line) => line.includes("Agent pool") && line.includes(" : ")); - const modelLine = lines.find((line) => line.includes("Model") && line.includes(" : ")); - - assert.ok(planLine, "expected Plan ID row"); - assert.ok(agentLine, "expected Agent pool row"); - assert.ok(modelLine, "expected Model row"); - - const planSep = planLine.indexOf(" : "); - const agentSep = agentLine.indexOf(" : "); - const modelSep = modelLine.indexOf(" : "); - - assert.equal(planSep, agentSep); - assert.equal(agentSep, modelSep); - } finally { - harness.destroy(); - } - }); -}); From c766ac66926e20a5c4742defd28a89d10285de21 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 13 Mar 2026 12:46:55 +0700 Subject: [PATCH 047/412] docs: refresh planner docs and add repo invariants --- .config/wt.toml | 12 ++ AGENTS.md | 7 + README.md | 18 +- design-decisions.md | 412 ---------------------------------------- docs/planning-widget.md | 273 ++++++++++++-------------- 5 files changed, 149 insertions(+), 573 deletions(-) create mode 100644 .config/wt.toml create mode 100644 AGENTS.md delete mode 100644 design-decisions.md diff --git a/.config/wt.toml b/.config/wt.toml new file mode 100644 index 0000000..c705010 --- /dev/null +++ b/.config/wt.toml @@ -0,0 +1,12 @@ +# Koan project worktree hooks +# Docs: https://worktrunk.dev/hook/ + +[post-create] +deps = "npm ci" + +[post-start] +copy = "wt step copy-ignored" + +[pre-merge] +check = "npm run check" +test = "npm test" diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..24d8ad4 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,7 @@ +# Koan Architecture Invariant + +LLMs write **markdown files only**. LLMs communicate with the driver through **tool calls only**. +The driver maintains `.json` state files internally — no LLM ever reads or writes a `.json` file. + +Example: orchestrator calls `koan_complete_story(story_id)` → tool code writes `state.json` + `status.md` → +driver reads `state.json` to route next action. The orchestrator never touches `state.json` directly. diff --git a/README.md b/README.md index a8d832b..ac8d11a 100644 --- a/README.md +++ b/README.md @@ -15,19 +15,24 @@ The parent controls progression through plan design, plan code, plan docs, quali ## Invoking the Planner -Call `koan_plan` as an MCP tool — the LLM invokes it when the user asks to plan a complex task. No parameters are needed: the conversation up to that point is automatically exported to `conversation.jsonl` in the plan directory and becomes the planning context. +Call `koan_plan` as an MCP tool — the LLM invokes it when the user asks to plan a complex task. No parameters are needed: the conversation up to that point is automatically exported to `conversation.jsonl` in the plan directory and becomes planning input. The architect then persists a structured **background context** index via koan tools. The planning pipeline runs sequentially: -1. **plan-design** (architect) — reads `conversation.jsonl` to understand intent, explores the codebase, writes `plan.json`. +1. **plan-design** (architect) — reads `conversation.jsonl`, builds structured **background context** (previous conversation(s) + indexes), explores the codebase, writes `plan.json`. 2. **plan-code** (developer) — reads `plan.json`, populates code intents and changes. -3. **plan-docs** (technical writer) — reads `plan.json` and optionally `conversation.jsonl` for decisions and tradeoffs, writes documentation entries. +3. **plan-docs** (technical writer) — reads `plan.json` plus the injected background context snippet, and optionally `conversation.jsonl` for rationale gaps; writes documentation entries. Each phase is followed by a QR (quality review) block: decompose → parallel verify → fix loop, up to `MAX_FIX_ITERATIONS`. -### conversation.jsonl +### conversation.jsonl + background context -Written once at the start of `koan_plan`. Contains the full session branch as JSONL (one JSON object per line — raw pi `SessionManager` entries, not a plain-text transcript). The plan-design architect and plan-docs writer are told about this file and may `Read` it; other phases work from `plan.json` only. +`conversation.jsonl` is written once at the start of `koan_plan`. It contains the full session branch as JSONL (one JSON object per line — raw pi `SessionManager` entries, not a plain-text transcript). + +The architect categorically analyzes this file and persists compact markdown **background context** via: +- `koan_set_background_context` + +That context is then injected directly into prompts for planning and QR agents, alongside the conversation.jsonl location. ### Prompt + convention sources @@ -50,7 +55,8 @@ Key design choices that shape implementation: - **Default-deny permissions**: each phase explicitly allowlists tools; unknown tool/phase access is blocked. - **Disk-backed mutations**: planning mutations are immediately persisted with atomic writes instead of deferred finalize steps. - **Need-to-know prompts**: each subagent only receives the minimum context needed for its task. -- **Passive conversation context**: `conversation.jsonl` is a read-only artifact on disk. No phase programmatically injects it into prompts; agents that need it use the `Read` tool. +- **Injected background context**: each workflow step prompt prepends the same `` snippet containing conversation path + compact markdown context. +- **Ephemeral runtime workspace**: intermediate subagent logs/state live in a mkdtemp workspace and are removed on plan completion and session shutdown. ## Invariants diff --git a/design-decisions.md b/design-decisions.md deleted file mode 100644 index 6349a47..0000000 --- a/design-decisions.md +++ /dev/null @@ -1,412 +0,0 @@ -# Koan Design Decisions & Invariants - -Authoritative record of design decisions, invariants, and lessons learned -across the koan project. Distilled from 6 conversations (Feb 10-13 2026), -the master plan (plans/2026-02-10-init.md), and the approved tool registry -plan (~/.claude/plans/fluffy-hopping-zebra.md). - ---- - -## Fundamental Invariants - -### INV-1: Inversion of Control - -Scripts drive the LLM, not LLM drives scripts. The extension -programmatically feeds prompts, collects output, and enforces constraints. -The LLM is a worker, not a coordinator. This is the entire reason koan -exists -- the Claude Code skill model has the LLM in the driver's seat, -which causes unreliable workflow execution. - -### INV-2: Need-to-Know Principle - -The LLM always operates on a need-to-know basis. When given the choice -between exposing more or less information, always choose less. This is -a permanent invariant. - -Concrete implications: - -- No implementation details in prompts (temp dirs, state file paths, - orchestrator internals, phase routing) -- No full plan state when partial suffices (QR reviewer for design does - not see code plan or docs plan) -- No accumulated history across phases (subagents start fresh) -- No meta-instructions about the workflow ("you are step 3 of 14") -- No defensive over-specification of edge cases - -### INV-3: Pi Tool Error Contract - -Pi framework determines isError on ToolResultMessage from whether -tool.execute() THROWS, not from the return value. The returned isError -field is silently discarded (agent-loop.ts:316-357). To signal errors -from tools: always `throw new Error(msg)` -- never `return { isError: true }`. - ---- - -## Architecture Decisions - -### AD-1: Two LLM Interaction Levels - -- `spawn()` subagent: for all substantial work (architect, developer, - writer, QR decomposer, QR reviewer). -- `complete()` from pi-ai: NOT used in koan. No direct LLM calls - without agent loop. -- `sendUserMessage()` in parent session: NOT used. Planning is triggered via - the `koan_plan` MCP tool; conversation context is captured via `exportConversation()`. - -### AD-2: Self-Loading Extension Pattern - -Same extension file (extensions/koan.ts) serves both modes: - -- **Parent mode** (no --koan-role flag): registers the `koan_plan` MCP tool, - `/koan-execute`, `/koan-status` commands, and workflow dispatch. Zero overhead - in normal pi sessions. -- **Subagent mode** (--koan-role present): activates role-specific event - hooks (state machine, tool enforcement, step prompts). - -The extension detects which mode via flag presence at before_agent_start -time (not at init -- see AD-3). - -### AD-3: CLI Flag Timing - -Pi applies CLI flag values AFTER extension factory functions run -(main.ts:568). getFlag() returns defaults during factory time. -Subagent detection MUST happen in `before_agent_start`, not in the -factory function body. Uses closure-scoped `dispatched` boolean guard -to ensure one-shot dispatch. - -### AD-4: Tool-Call-Driven Step Transitions (Uniform Pattern) - -ALL step transitions use the koan_complete_step registered tool. The LLM -calls koan_complete_step -> tool execute() returns next step's prompt. -This works in both -p mode and interactive mode. `sendUserMessage()` is not -used; planning is triggered by the LLM invoking the `koan_plan` MCP tool. - -**KEY CORRECTION**: Early design (Feb 10) considered turn_end + -agent_end + sendUserMessage() chaining for step transitions. This was -ABANDONED because subagents in -p mode exit after the first agent loop -completes. Tool calls keep the agent loop alive within a single loop. - -**ANTI-PATTERN**: agent_end + sendUserMessage for retry was removed. -sendUserMessage is fire-and-forget in the extension binding. In -p mode -(subagents), the process can exit before the retry completes. Even in -interactive mode, some models say "calling tool X now" as text without -emitting a tool_call block, causing agent_end to fire spuriously. - -### AD-5: koan_complete_step Accepts Optional `thoughts` - -The extension is stateful -- it knows exactly which step the LLM is on -via closure state. No step number parameter needed. The tool response -contains the next step's full prompt. - -The optional `thoughts` parameter captures the model's work output -(analysis, findings, review) as a tool parameter instead of as text -output. This solves a cross-model compatibility issue: GPT-5-codex -cannot produce text + tool_call in the same response, so requiring -text output alongside a tool call caused it to narrate "Calling -koan_complete_step now" without emitting an actual tool_call block. - -### AD-6: Tool Naming Conventions - -Settled names (corrected from earlier iterations): - -- `koan_complete_step` (was koan_next_step -- renamed to accept `thoughts`) -- `koan_store_context` — REMOVED (was koan_finalize_context; removed with context-capture phase) -- `koan_store_plan` — REMOVED (see AD-14) -- `koan_plan` — MCP tool replacing the former `/koan plan` slash command -- Prompts use "instructions" not "actions" - -### AD-7: invoke_after Pattern Is Critical - -Every step prompt MUST have a clear "invoke after" directive telling -the LLM to call koan_complete_step after completing the step's work. -Mirrors the reference planner's "NEXT STEP: Command: python3 -m ... ---step N" pattern. Without this, the LLM produces text-only responses -and the agent loop exits. - -Implementation: formatStep() in src/planner/prompts/step.ts appends a -default invoke-after block. Steps can override with custom invokeAfter. - -The "WHEN DONE" + "Do NOT call until" creates a two-part gate: the LLM -must do work before advancing. Unconditional imperatives ("Execute this -tool now.") cause immediate tool calls because empty-param tool calls -have zero friction. - -### AD-8: Store Tools Need "Not Yet" Guidance - -(koan_store_context was removed with the context-capture phase; koan_store_plan -was removed earlier — see AD-14.) This pattern remains relevant for any -future store-style tools: tool description should include "DO NOT call this tool -until the step instructions explicitly tell you to." - -### AD-9: Subagent Progress Tracking - -Per-subagent state directory, NOT a single progress.json. -Structure: `/subagents/-/` -Contains: state.json, stdout.log, stderr.log. -ProgressReporter class manages state.json updates with trail. - -### AD-10: Embedded Planner Prompts + File-Based Conventions - -Planner subagent prompts are hard-coded in TypeScript at -`src/planner/lib/agent-prompts.ts` (architect, developer, -quality-reviewer, technical-writer). Phase loaders call -`loadAgentPrompt(...)`, so prompt availability does not depend on runtime -filesystem paths. - -Conventions remain file-based under `resources/conventions` so the LLM can -explore them directly with `Read`. `CONVENTIONS_DIR` is resolved at runtime -via `src/planner/lib/resources.ts` and injected into phase guidance where -needed. - -### AD-11: Plan Schema Self-Documentation via TypeBox - -No 300-line schema prompt embedded in step 6. Tool parameter schemas -with rich TypeBox descriptions are sufficient for the LLM to discover -the schema through tool definitions. This is the "most elegant" approach -per user preference. - -### AD-12: Context Capture Phases (REMOVED) - -The context-capture phase (draft/verify/refine sub-phases, koan_store_context -tool, context.json artifact) was removed. The parent conversation is now -exported as `conversation.jsonl` at `koan_plan` tool invocation. Phases that -need session context read the file directly via the `Read` tool. See -`src/planner/conversation.ts` for the export implementation. - -### AD-13: Default-Deny Tool Permissions - -Centralized Map> in src/planner/tools/registry.ts. -Unknown tools blocked in all phases. READ_TOOLS (read, bash, grep, glob, -find, ls) always allowed. WRITE_TOOLS (edit, write) always blocked during -planning. Missing phase keys are denied. - -Previous code had a "fails open" bug where tool_call handlers returned -undefined at the end of if-else chains, silently allowing unknown tools. - -### AD-14: Disk-Backed Plan Mutations (No Finalize) - -Each mutation tool: loadPlan(dir) -> mutate -> savePlan(plan, dir). -Atomic write. No in-memory accumulation + finalize pattern. The -koan_store_plan/koan_finalize_plan tool was REMOVED. - -Root cause: the LLM was skipping intermediate mutation tools and calling -koan_store_plan directly. The "build in memory then finalize" pattern -makes intermediate tools feel like ceremony. Immediate disk writes give -visible results per tool call. - -Every mutation tool returns descriptive feedback ("Added decision DL-003: -'Use polling'"). This prevents the LLM from skipping tools -- the LLM -needs evidence that each tool call produces results. - -### AD-15: Module Ownership - -- Plan-design prompts belong to the "architect" (plan-design.ts / - prompts/plan-design.ts) -- Conversation export belongs to session.ts / conversation.ts -- These are organizational decisions about which module owns which prompts - -### AD-16: 6-Step Architect Workflow (plan-design execute) - -1. Task Analysis & Exploration Planning -2. Codebase Exploration -3. Testing Strategy Discovery -4. Approach Generation -5. Assumption Surfacing -6. Milestone Definition & Plan Writing (plan mutation tools available) - -Steps 1-5: only READ_TOOLS + PLAN_GETTER_TOOLS + koan_complete_step allowed. -Step 6: plan mutation tools unlocked. - ---- - -## UI Decisions - -### UI-1: Planning Widget Cards & Timeline Rail -- Chosen on Feb 25 2026 via planning-widget design deck (Stacked Modular Cards + Vertical Timeline Rail). -- Rationale: make terminal output feel like a coherent operations workspace (not plain log spam), keep active progress glanceable, and preserve enough structure to scale into future phases without redesigning the shell. -- Implementation guardrails: - - Continue rendering through `canvasLine()` so the background fills full terminal width. - - Keep consistent card padding and solid-border framing through shared `renderBox()` helpers. - - Header metadata carries active workflow context (`Planning · · `), with timer right-aligned on the same row. - - The old phase-tab strip is removed (no duplicated heading context). - - Vertical rail remains width-bounded (~20 cols) so the right detail pane keeps enough budget for high-signal telemetry. - - Detail footer (`Plan · id`) is pinned bottom via dynamic padding, independent of timeline density. - - Planning body and latest-log body share one outer card, separated by an internal divider for better cohesion. - -### UI-2: Latest Log as Deterministic Dense Grid -- Chosen on Feb 25 2026 via follow-up deck (`Declarative Shape Table` + `Two-Column Dense Grid`). -- Rationale: long-running sessions need more than tool names; users must see intent without reading full payloads. Deterministic ordering reduces scan friction and makes anomalies obvious over time. -- Contract: - - Left column anchor is always tool name. - - Right column is deterministic summary from shape-table formatters (ID-first ordering for recognized tools). - - Unknown tools degrade to name-only output (generic fallback). - - Arrays render as first-item-plus-count; free-form fields render as size-only metadata. - - Getter tools include target metadata + response size (`resp:42L/3.1k`). - - Repeated events remain repeated (no collapse), preserving temporal audit fidelity. - - Column widths adapt to terminal width and observed tool-name lengths so detail space stays useful. - - In integrated mode, latest-log columns are forced to the same split as the planning body (`timelineWidth` / `detailWidth`) to keep vertical alignment stable. - - High-value rows may wrap to 2 lines only; deeper overflow is compacted with ellipsis to protect fixed card height. - -### UI-3: QR Integrated Section (Not Sidecar) -- Chosen on Feb 25 2026 via follow-up deck (`Inline Integrated Section + Divider`). -- Rationale: QR is the acceptance loop, not optional telemetry. Rendering it as an inline first-class section prevents the "detached widget" feel and matches how users reason about plan quality over time. -- Contract: - - QR is visible during Plan design, Plan code, and Plan docs (and contractually Plan execution). - - Iteration 1 enters `execute` immediately (same stage model as fix iterations); there is no separate `initializing` stage. - - Section includes: phase + iter/mode metadata, phase rail, and counters (`done/total/pass/fail/todo`) in a compact metadata block. - - Visual treatment uses inline sectioning + divider, not a nested bordered mini-card. - - Geometry is fixed for scan consistency: header + rail + counters + divider. - - Metadata uses a hard 64-char visible-width budget with progressive compaction (`exec/decomp/vfy`, `d/p/f/t`, `iN/M`) under narrow widths. - - Counter line emphasizes severity (`fail` highlighted in error color) so blocking issues pop in long sessions. - - Detail pane hierarchy is explicit: `Current step` label first, then step body, then QR section. - -### UI-4: Header-First Metadata (No Tabs Row) -- Chosen on Feb 26 2026 via follow-up deck focused on full-widget renders (`Phase-first header`). -- Rationale: the old title + tabs combination duplicated active-phase context and made the top of the widget feel offset from the frame. Consolidating into a full-width metadata header improves hierarchy and scan speed. -- Contract: - - Keep a full top border and render one header row: `Planning · · ` + right-aligned elapsed timer. - - Remove the dedicated tabs/chips row under the title. - - Keep phase progression in the left timeline rail (status history remains visible without tabs). - - Apply deterministic truncation in this order when width is constrained: abbreviate status -> drop status -> abbreviate phase label -> ellipsis. - - Footer identity table remains key/value aligned: `Plan ID`, `Agent`/`Agent pool`, `Model`. - -## Workflow Dispatch Architecture - -### WorkflowDispatch (dispatch pattern) - -Workflow tools (koan_complete_step) are registered once at init. Their -execute() callbacks read from a mutable dispatch object. Phases hook/unhook -dispatch slots at activation/deactivation time. - -hookDispatch() throws if a slot is already occupied -- prevents silent -misrouting when two phases try to claim the same tool. - -### PlanRef (mutable reference) - -All plan mutation tools share a mutable `{ dir: string | null }` set -when koan_plan tool creates a directory or when --koan-plan-dir is received. -Decouples tool registration (init-time) from directory creation (runtime). - -### Pi Registers Tools at \_buildRuntime() - -Pi snapshots tools during \_buildRuntime(). Tools registered after this -point are invisible to the LLM. All 44+ tools register unconditionally -at init; phases restrict access via tool_call blocking at runtime. - ---- - -## What Is NOT Ported from Reference Planner - -| Reference planner component | Koan replacement | -| --------------------------------------- | ------------------------------------- | -| CLI mutation scripts (cli/plan.py) | Pi extension tool registration | -| Thin router pattern (shared/routing.py) | Orchestrator deterministic gate logic | -| File-based state_dir | In-memory state + appendEntry() | -| Template dispatch | Direct process spawning | -| Constraint enforcement via prompt | tool_call event blocking | -| Agent markdown definitions | Self-loading extension pattern | -| Question relay handler | Not implemented (may add later) | - ---- - -## Bugs & Lessons Learned - -### BUG-1: LLM Conflates Tool Instructions with Plan Content - -In the former context-capture phase, the LLM captured tool usage instructions as -constraints (e.g. "Use read tool before modifying files; edit for -surgical changes"). These are irrelevant developer instructions, not -task constraints. Solution: prompts explicitly state "Only include -constraints that are specific to this task. Do not include general -tool usage instructions, coding style guides, or editor/IDE conventions." - -### BUG-2: LLM Skips Mutation Tools - -The LLM called koan_complete_step through steps 1-5, then at step 6 skipped -all mutation tools and called koan_store_plan directly. The in-memory -plan was empty. Root cause: mutation tools returned opaque JSON with no -feedback -- they felt like ceremony. Solution: remove finalize tool, -disk-backed mutations, descriptive feedback per tool call (AD-14). - -### BUG-3: tool_call Handlers Fail Open - -Original tool_call handlers returned undefined at end of if-else chains, -silently allowing any new tool. Solution: default-deny permissions map -(AD-13). - -### BUG-4: isError Return Value Discarded - -Pi discards the isError field from tool return values. Only throw/no-throw -determines error status. This caused silent failures where tools returned -{ isError: true } but the framework treated them as success. Solution: -always throw new Error(msg) for error conditions (INV-3). - -### BUG-5: Weak invoke_after Causes Step Skipping - -Original weak format ("Now call koan_next_step.") produced skipped steps. -The LLM called the tool immediately without doing work, because tool -calls with empty params have zero friction. Solution: strengthen to -"WHEN DONE: Call koan_complete_step with your findings in the `thoughts` -parameter. Do NOT call this tool until the work described in this step -is finished." - -### BUG-6: Flag Detection at Init Time - -Early implementation tried to detect --koan-role in the extension factory -function body. Flags are unavailable at that point (main.ts:568 sets them -after). Solution: move detection to before_agent_start with dispatched -guard (AD-3). - ---- - -## Plan JSON Schema - -Matches reference planner's Pydantic schema (shared/schema.py). -Types defined in src/planner/plan/types.ts. - -Key entities: Plan, Decision, RejectedAlternative, Risk, Milestone, -CodeIntent, CodeChange, Wave, DiagramGraph, ReadmeEntry, Overview, -InvisibleKnowledge, PlanningContext. - -Cross-reference validation: intent_ref -> intents, decision_ref -> -decisions, diagram edges source/target -> nodes, wave milestones -> milestone IDs. - ---- - -## QR Block Pattern - -Work -> Decompose -> Verify (parallel) -> Gate. Repeated per phase -(design, code, docs). Gate is deterministic code, no LLM. Max 5 -iterations. Force-proceed after limit. - -QR tools: koan_qr_add_item, koan_qr_set_item, koan_qr_assign_group, -koan_qr_get_item, koan_qr_list_items, koan_qr_summary. - ---- - -## Current Implementation State (Mar 1 2026) - -Implemented: - -- [x] Extension entry point with dual-mode detection -- [x] koan_plan MCP tool (replaces /koan plan slash command) -- [x] Conversation export to conversation.jsonl (replaces context-capture phase) -- [x] Plan-design architect subagent (6-step workflow) -- [x] Developer role (plan-code phase) -- [x] Technical writer role (plan-docs phase) -- [x] QR decompose subagent -- [x] QR verify subagent (parallel pool, concurrency 6) -- [x] QR gate routing + fix loop (up to MAX_FIX_ITERATIONS) -- [x] Fix mode (architect/developer/writer fix subagents) -- [x] 44+ plan mutation/getter tools with TypeBox schemas -- [x] Default-deny tool permissions (registry.ts) -- [x] WorkflowDispatch + PlanRef patterns -- [x] Subagent spawning with progress tracking -- [x] Disk-backed plan mutations (no finalize) -- [x] Plan validation (design + cross-references) - -Not yet implemented: - -- [ ] State persistence (appendEntry + session_start restore) -- [ ] Plan execution workflow (milestone execution) -- [ ] /koan-execute command diff --git a/docs/planning-widget.md b/docs/planning-widget.md index 639a4c0..a7d40a7 100644 --- a/docs/planning-widget.md +++ b/docs/planning-widget.md @@ -1,177 +1,140 @@ -# Planning Widget Refresh +# Planning Widget ## Context -The planning widget now follows the design-deck contract selected on Feb 25 2026: +The planning widget follows the stacked-card + timeline-rail layout and optimizes for long-running sessions (30-120 minutes). -- **Canvas direction:** Stacked Modular Cards -- **Navigation direction:** Vertical Timeline Rail -- **Header strategy:** Full-width top border + metadata header row (active phase in header, no tabs strip) -- **Log strategy:** Declarative shape-table serialization + dense two-column layout -- **Runtime strategy:** Unified runtime section (stage + quality + workers) integrated into the detail pane +The runtime pane is designed around one principle: -The goal is to keep a long-running (1-2h) planning session readable in real time while preserving high-signal audit telemetry. +- show where the active subagent is **inside its workflow** (`step number + step title`), +- not the orchestrator's internal QR fix-loop iteration counter. -## Decisions & Rationale +## Design Goals -### 1) Deterministic log serialization (hybrid detail) -- Keep **tool name** as the primary scan anchor. -- Use a declarative per-tool formatter table for known `koan_*` tools. -- Unknown tools fall back to tool-name-only output. -- Field order is deterministic and curated (e.g., IDs first), not alphabetical. +1. **Immediate progress readability** + - The user should answer “how far along are we?” in one glance. +2. **Active worker clarity** + - The widget should show who is running now and pool load (`queued/active/done`). +3. **Meaningful output accounting** + - Show entity modifications as `+delta (total)`. +4. **Stable visual scan path** + - Header + timeline + runtime + latest log remain in fixed positions. -**Rationale:** Users scan continuously during execution; stable order makes visual parsing faster and reduces cognitive churn between updates. +## Runtime Information Hierarchy -### 2) Selective detail by field type -- Arrays render as **first item + count** (`[first] +N`). -- Free-form fields (`diff`, `doc_diff`, `comments`, large narrative strings) render as **size metadata only** (`184L/9.2k`), never full body. -- Getter tools (`koan_get_*`) show target identifiers plus response size metadata (`resp:42L/3.1k`). +From highest to lowest priority: -**Rationale:** Maintains observability without blowing out vertical space or flooding with low-value text. +1. `step` (`current/total · title`) +2. step-based progress bar +3. active subagents block (role/model/load/mode) +4. modifications block (`Δ / total`) +5. latest log (auditable tail) -### 3) Latest log as dense two-column grid -- Left column: tool name (bold accent anchor). -- Right column: compact deterministic summary. -- Column widths adapt to available terminal width + observed tool-name lengths (protecting right-column readability). -- High-value rows may wrap to 2 lines; if overflow exceeds 2 lines, the second line is re-compacted with ellipsis. -- Repeated events remain separate rows (no dedup/collapse). +## Layout Overview -**Rationale:** Preserves temporal fidelity while increasing information density and keeping the "what just happened" answer immediate, even under constrained widths. +``` +┌──────────────────────────────────── Runtime ──────────────────────────────────── 33m 14s ┐ +│ step : 2/6 · Codebase Exploration │ +│ progress : ███████░░░░░░░░░░ 33% │ +│──────────────────────────────────────────┬──────────────────────────────────────────│ +│ active subagents │ modifications (Δ / total) │ +│ role : architect │ milestones : +2 (6) │ +│ model : anthropic/claude-opus-4-6 │ decisions : +1 (9) │ +│ load : queued 0 active 1 done 0 │ intents : +4 (18) │ +│ mode : single │ changes : +0 (3) │ +└──────────────────────────────────────────┴──────────────────────────────────────────┘ +``` -### 4) Runtime is a first-class workflow section -- Runtime renders inline in the detail pane (no detached mini-card border). -- Visible during Plan design, Plan code, and Plan docs (and contractually Plan execution). -- Runtime unifies stage + quality counters + worker counters in one block. -- Stage follows the QR lifecycle (`execute`, `decompose`, `verify`, `done`) but uses user-facing labels (`Writing`, `Fixing`, `Analyzing`, `Verifying`, `Complete`). -- Quality counters emphasize severity: `FAIL` is error-colored; `pass` is accent; others remain muted/dim. +Elapsed time remains right-aligned in the top row. -**Rationale:** Review quality and worker throughput are part of one runtime story. Unifying them removes competing mini-status bars while keeping the left timeline as the primary progress signal. +## Phase-Specific Modifications Panel -### 5) Header-first metadata, tabs removed -- Keep a full top border and put active workflow context directly in the header row. -- Header format is phase-first: `Planning · · ` on the left, elapsed timer right-aligned. -- Remove the separate phase-tabs strip entirely; it is redundant once active context is in the header. -- Keep timeline rows in the body (left rail) because they provide progression context and status history, unlike tabs. +### A) Plan design / plan code / plan docs / execution +Show plan-modification counters: -**Rationale:** The previous title treatment felt detached from the frame and duplicated information with the tabs row. Consolidating context into the header yields a cleaner hierarchy and better information density in TUI constraints. +- `milestones : +Δ (total)` +- `decisions : +Δ (total)` +- `intents : +Δ (total)` +- `changes : +Δ (total)` -## Layout Overview -``` -┌────────────────────────────────────────────────────────────────────────────────┐ -│ Planning · Plan design · CURRENT 12m 22s │ -│ │ -│ ● Plan design Runtime │ -│ │ CURRENT stage : Writing (cycle 1/6 · initial) │ -│ │ quality : checked -/- pass - FAIL - remaining - │ -│ ○ Plan code workers : queued 0 active 1 done 0 pool ×1 │ -│ │ UPCOMING │ -│ ○ Plan docs │ -│ UPCOMING │ -│ Plan ID : │ -│ Agent : architect │ -│ Model : openai-codex/gpt-5.3-codex │ -│────────────────────────────────────────────────────────────────────────────────│ -│ Latest log │ -│ koan_set_milestone_tests id=M-002 · tests:["covers retries"] +7 │ -│ koan_get_milestone id=M-002 · resp:42L/3.1k │ -│ koan_add_intent milestone=M-002 · file=src/planner/ui/widget.ts │ -│ koan_set_change_diff id=CC-M-001-002 · diff:184L/9.2k │ -│ koan_qr_assign_group phase=plan-design · ids:[QR-001] +11 │ -└────────────────────────────────────────────────────────────────────────────────┘ -``` +### B) QR decompose +Show QR decomposition counters: -## Rendering Guide -1. **Canvas** – Keep using `canvasLine()` so widget content remains full-width over `toolPendingBg`. -2. **Main card** – Keep one solid outer border + a full top rule. No cutout title and no detached title badge. -3. **Header row** – Render `Planning · · ` on the left and elapsed timer right-aligned on the same row. -4. **No tabs strip** – Do not render a separate phase-tabs row under the header. Active phase context now lives in header metadata. -5. **Timeline rail** – Maintain status icon/color semantics (`active=accent`, `done=dim`, `failed=error`). -6. **Detail pane** – Render in this order: - - Runtime section (if stage/quality/workers are active) - - identity table (`Plan ID`, `Agent`/`Agent pool`, `Model`) pinned low in pane -7. **Runtime section** – Use inline `Runtime` header plus key/value rows: - - `stage` + cycle metadata - - `quality` counters (`checked/pass/FAIL/remaining`) - - `workers` counters (`queued/active/done`) + pool capacity - Keep this as one cohesive block to avoid competing status bars. -8. **Latest log section** – Keep it inside the same outer card, separated by a horizontal divider. Reuse the same left/right column split (`timelineWidth` / `detailWidth`) and gap as the planning body so vertical alignment stays consistent. - -## Header + Alignment Contract - -### Header composition -- Inner card width is `W` (visible cells, excluding borders). -- Timer token is right-aligned and reserved first (`T` visible cells). -- Left header budget is `W - T - 1` (one spacer between left and right chunks). -- Base left chunk: `Planning · · `. - -### Progressive compaction (left header) -Apply in order until it fits: -1. `CURRENT` -> `CUR`, `UPCOMING` -> `UP`, `DONE` unchanged. -2. Drop status chunk (keep `Planning · `). -3. Abbreviate known phases (`Plan design` -> `Design`, `Plan code` -> `Code`, `Plan docs` -> `Docs`). -4. Ellipsize active phase tail (`Planning · `). - -### Metadata table alignment -- Keys are fixed labels: `Plan ID`, `Agent` or `Agent pool`, `Model`. -- Compute key column width from max visible key length in the rendered set. -- Use a fixed `" : "` separator. -- Values are right-column free text, truncated with ellipsis when overflowing pane width. - -### Latest-log alignment -- Keep deterministic two-column geometry shared with body split. -- Left column width is based on observed max tool name (capped); right column gets remaining width. -- High-value rows may wrap to two lines max; second line must still obey right-column width budget. +- `qr items added : +Δ (total)` +- `qr items updated : +Δ (total)` +- `groups assigned : +Δ (total)` + +### C) QR verify +Show explicit placeholder (by design): + +- `[placeholder]` +- `qr-verify counters not instrumented yet` + +This placeholder is intentional and must be rendered explicitly rather than silently omitting the panel. + +## Rendering Contract + +1. **Header row** + - Left: `Planning · · ` + - Right: elapsed timer + - Keep deterministic compaction when width is constrained. + +2. **Timeline rail (left column)** + - Keep phase icons/status semantics (`pending/running/completed/failed`). + +3. **Runtime detail (right column)** + - First two lines are always step + progress bar. + - Then split into two panes: + - left: `active subagents` + - right: `modifications` + +4. **Latest log** + - Keep current deterministic two-column rendering and tool-shape serialization. + +## Progress Semantics + +- Primary progress is based on active subagent workflow steps. +- The progress bar denominator is the subagent’s step total. +- For `qr-verify`, where reviewer execution is pooled, progress uses grouped verification progress (`done/total groups`) as the step/progress source. +- QR fix-loop cycle counters are internal orchestration state and are not part of the primary runtime progress display. + +## Active Subagents Semantics + +Runtime subagent block renders aggregate execution state: -## Data Contract Notes -- Header metadata state includes: - - `activePhaseLabel`, `activePhaseStatus`, `elapsed` -- `LogLine` now carries: - - `tool` (left column) - - `summary` (right column) - - `highValue` (whether 2-line wrap is allowed) -- QR state in widget includes: - - `qrIteration`, `qrIterationsMax`, `qrMode`, `qrPhase` - - `qrDone`, `qrTotal`, `qrPass`, `qrFail`, `qrTodo` - -## Future Work (contracted, not yet implemented) -- Plan execution phase should reuse the same Runtime section semantics. -- Optional compact mode for very narrow terminals can reduce metadata verbosity while preserving deterministic ordering. - -## Update: Unified Runtime Section + Subagent Identity (2026-03-04) - -This update replaces the split QR/subagent status blocks with a single runtime -status section in the right pane. - -### Runtime merge (stage + quality + workers) -- The detail pane now has one **Runtime** section. -- Runtime includes: - - `stage` (`Writing` / `Fixing` / `Analyzing` / `Verifying` / `Complete`) with cycle metadata. - - `quality` counters (`checked`, `pass`, `FAIL`, `remaining`). - - `workers` counters (`queued`, `active`, `done`) plus pool capacity. -- The left timeline remains the primary progress signal. - -### `x` meaning in parallel mode -- `x` means configured pool capacity (target parallelism), not active count. -- Active movement remains in `queued/active/done` counters. - -### Footer identity table standard -Use a unified key/value footer block: - -- `Plan ID : ` -- `Agent : ` (single subagent) -- `Agent pool : x` (parallel mode) -- `Model : ` - -### Generic rendering rule -The widget should remain role-agnostic and render identity from generic metadata -only: - `role` -- `parallelCount` - `model` +- `load` (`queued`, `active`, `done`) +- `mode` (`single` or `pool ×N`) + +`x` denotes configured pool capacity (target parallelism), not current active count. -Label/value rule: -- `parallelCount > 1` -> `Agent pool : x` -- otherwise -> `Agent : ` +## Modifications Counter Semantics + +Formatting rule: + +- `+2 (6)` means **delta +2**, **current total 6**. + +General rules: + +- Delta is scoped to the currently running phase block. +- Total is the current persisted artifact count at render time. +- Missing counters should render explicit placeholders (never blank rows). + +## Data Contract Notes -### View-composition pattern -Use section-level selectors/renderers so `runtime-status` and `identity` remain -independently composable and testable. +- Header metadata: active phase label/status + elapsed time. +- Step/progress data: step index, step total, step title (or grouped verify progress fallback). +- Subagent telemetry: role, model, parallel count, queued/active/done. +- Log lines: deterministic `tool + summary` rows. +- Modification counters: + - plan phases: milestones/decisions/intents/changes (delta + total) + - qr-decompose: added/updated/grouped (delta + total) + - qr-verify: explicit placeholder. + +## Rationale Summary + +- Step-first progress reduces ambiguity during long runs. +- Aggregate subagent telemetry keeps runtime compact while still explaining throughput. +- `Δ / total` counters answer both “what changed recently?” and “how much exists now?”. +- Explicit placeholders prevent silent uncertainty during uninstrumented phases. +- Stable layout preserves user orientation while high-frequency updates stream in. From bf69a315bf3254e3fcf24c050fc1bc12dd6a3c07 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:47:48 +0700 Subject: [PATCH 048/412] add esbuild, preact, zustand dev dependencies --- package-lock.json | 550 +++++++++++++++++++++++++++++++++++++++++++++- package.json | 8 +- 2 files changed, 555 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 3859420..cd93c99 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,7 +13,10 @@ }, "devDependencies": { "@mariozechner/pi-coding-agent": "^0.52.10", - "typescript": "^5.9.3" + "esbuild": "^0.25.1", + "preact": "^10.26.2", + "typescript": "^5.9.3", + "zustand": "^4.5.7" } }, "node_modules/@anthropic-ai/sdk": { @@ -840,6 +843,448 @@ "url": "https://github.com/sponsors/Borewit" } }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.12.tgz", + "integrity": "sha512-Hhmwd6CInZ3dwpuGTF8fJG6yoWmsToE+vYgD4nytZVxcu1ulHpUQRAB1UJ8+N1Am3Mz4+xOByoQoSZf4D+CpkA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.12.tgz", + "integrity": "sha512-VJ+sKvNA/GE7Ccacc9Cha7bpS8nyzVv0jdVgwNDaR4gDMC/2TTRc33Ip8qrNYUcpkOHUT5OZ0bUcNNVZQ9RLlg==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.12.tgz", + "integrity": "sha512-6AAmLG7zwD1Z159jCKPvAxZd4y/VTO0VkprYy+3N2FtJ8+BQWFXU+OxARIwA46c5tdD9SsKGZ/1ocqBS/gAKHg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.12.tgz", + "integrity": "sha512-5jbb+2hhDHx5phYR2By8GTWEzn6I9UqR11Kwf22iKbNpYrsmRB18aX/9ivc5cabcUiAT/wM+YIZ6SG9QO6a8kg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.12.tgz", + "integrity": "sha512-N3zl+lxHCifgIlcMUP5016ESkeQjLj/959RxxNYIthIg+CQHInujFuXeWbWMgnTo4cp5XVHqFPmpyu9J65C1Yg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.12.tgz", + "integrity": "sha512-HQ9ka4Kx21qHXwtlTUVbKJOAnmG1ipXhdWTmNXiPzPfWKpXqASVcWdnf2bnL73wgjNrFXAa3yYvBSd9pzfEIpA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.12.tgz", + "integrity": "sha512-gA0Bx759+7Jve03K1S0vkOu5Lg/85dou3EseOGUes8flVOGxbhDDh/iZaoek11Y8mtyKPGF3vP8XhnkDEAmzeg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.12.tgz", + "integrity": "sha512-TGbO26Yw2xsHzxtbVFGEXBFH0FRAP7gtcPE7P5yP7wGy7cXK2oO7RyOhL5NLiqTlBh47XhmIUXuGciXEqYFfBQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.12.tgz", + "integrity": "sha512-lPDGyC1JPDou8kGcywY0YILzWlhhnRjdof3UlcoqYmS9El818LLfJJc3PXXgZHrHCAKs/Z2SeZtDJr5MrkxtOw==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.12.tgz", + "integrity": "sha512-8bwX7a8FghIgrupcxb4aUmYDLp8pX06rGh5HqDT7bB+8Rdells6mHvrFHHW2JAOPZUbnjUpKTLg6ECyzvas2AQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.12.tgz", + "integrity": "sha512-0y9KrdVnbMM2/vG8KfU0byhUN+EFCny9+8g202gYqSSVMonbsCfLjUO+rCci7pM0WBEtz+oK/PIwHkzxkyharA==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.12.tgz", + "integrity": "sha512-h///Lr5a9rib/v1GGqXVGzjL4TMvVTv+s1DPoxQdz7l/AYv6LDSxdIwzxkrPW438oUXiDtwM10o9PmwS/6Z0Ng==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.12.tgz", + "integrity": "sha512-iyRrM1Pzy9GFMDLsXn1iHUm18nhKnNMWscjmp4+hpafcZjrr2WbT//d20xaGljXDBYHqRcl8HnxbX6uaA/eGVw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.12.tgz", + "integrity": "sha512-9meM/lRXxMi5PSUqEXRCtVjEZBGwB7P/D4yT8UG/mwIdze2aV4Vo6U5gD3+RsoHXKkHCfSxZKzmDssVlRj1QQA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.12.tgz", + "integrity": "sha512-Zr7KR4hgKUpWAwb1f3o5ygT04MzqVrGEGXGLnj15YQDJErYu/BGg+wmFlIDOdJp0PmB0lLvxFIOXZgFRrdjR0w==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.12.tgz", + "integrity": "sha512-MsKncOcgTNvdtiISc/jZs/Zf8d0cl/t3gYWX8J9ubBnVOwlk65UIEEvgBORTiljloIWnBzLs4qhzPkJcitIzIg==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.12.tgz", + "integrity": "sha512-uqZMTLr/zR/ed4jIGnwSLkaHmPjOjJvnm6TVVitAa08SLS9Z0VM8wIRx7gWbJB5/J54YuIMInDquWyYvQLZkgw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.12.tgz", + "integrity": "sha512-xXwcTq4GhRM7J9A8Gv5boanHhRa/Q9KLVmcyXHCTaM4wKfIpWkdXiMog/KsnxzJ0A1+nD+zoecuzqPmCRyBGjg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.12.tgz", + "integrity": "sha512-Ld5pTlzPy3YwGec4OuHh1aCVCRvOXdH8DgRjfDy/oumVovmuSzWfnSJg+VtakB9Cm0gxNO9BzWkj6mtO1FMXkQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.12.tgz", + "integrity": "sha512-fF96T6KsBo/pkQI950FARU9apGNTSlZGsv1jZBAlcLL1MLjLNIWPBkj5NlSz8aAzYKg+eNqknrUJ24QBybeR5A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.12.tgz", + "integrity": "sha512-MZyXUkZHjQxUvzK7rN8DJ3SRmrVrke8ZyRusHlP+kuwqTcfWLyqMOE3sScPPyeIXN/mDJIfGXvcMqCgYKekoQw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.25.12.tgz", + "integrity": "sha512-rm0YWsqUSRrjncSXGA7Zv78Nbnw4XL6/dzr20cyrQf7ZmRcsovpcRBdhD43Nuk3y7XIoW2OxMVvwuRvk9XdASg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.12.tgz", + "integrity": "sha512-3wGSCDyuTHQUzt0nV7bocDy72r2lI33QL3gkDNGkod22EsYl04sMf0qLb8luNKTOmgF/eDEDP5BFNwoBKH441w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.12.tgz", + "integrity": "sha512-rMmLrur64A7+DKlnSuwqUdRKyd3UE7oPJZmnljqEptesKM8wx9J8gx5u0+9Pq0fQQW8vqeKebwNXdfOyP+8Bsg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.12.tgz", + "integrity": "sha512-HkqnmmBoCbCwxUKKNPBixiWDGCpQGVsrQfJoVGYLPT41XWF8lHuE5N6WhVia2n4o5QK5M4tYr21827fNhi4byQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.12.tgz", + "integrity": "sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@google/genai": { "version": "1.41.0", "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.41.0.tgz", @@ -2387,6 +2832,48 @@ "dev": true, "license": "MIT" }, + "node_modules/esbuild": { + "version": "0.25.12", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.12.tgz", + "integrity": "sha512-bbPBYYrtZbkt6Os6FiTLCTFxvq4tt3JKall1vRwshA3fdVztsLAatFaZobhkBC8/BrPetoa0oksYoKXoG4ryJg==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.25.12", + "@esbuild/android-arm": "0.25.12", + "@esbuild/android-arm64": "0.25.12", + "@esbuild/android-x64": "0.25.12", + "@esbuild/darwin-arm64": "0.25.12", + "@esbuild/darwin-x64": "0.25.12", + "@esbuild/freebsd-arm64": "0.25.12", + "@esbuild/freebsd-x64": "0.25.12", + "@esbuild/linux-arm": "0.25.12", + "@esbuild/linux-arm64": "0.25.12", + "@esbuild/linux-ia32": "0.25.12", + "@esbuild/linux-loong64": "0.25.12", + "@esbuild/linux-mips64el": "0.25.12", + "@esbuild/linux-ppc64": "0.25.12", + "@esbuild/linux-riscv64": "0.25.12", + "@esbuild/linux-s390x": "0.25.12", + "@esbuild/linux-x64": "0.25.12", + "@esbuild/netbsd-arm64": "0.25.12", + "@esbuild/netbsd-x64": "0.25.12", + "@esbuild/openbsd-arm64": "0.25.12", + "@esbuild/openbsd-x64": "0.25.12", + "@esbuild/openharmony-arm64": "0.25.12", + "@esbuild/sunos-x64": "0.25.12", + "@esbuild/win32-arm64": "0.25.12", + "@esbuild/win32-ia32": "0.25.12", + "@esbuild/win32-x64": "0.25.12" + } + }, "node_modules/escalade": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", @@ -3237,6 +3724,17 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/preact": { + "version": "10.29.0", + "resolved": "https://registry.npmjs.org/preact/-/preact-10.29.0.tgz", + "integrity": "sha512-wSAGyk2bYR1c7t3SZ3jHcM6xy0lcBcDel6lODcs9ME6Th++Dx2KU+6D3HD8wMMKGA8Wpw7OMd3/4RGzYRpzwRg==", + "dev": true, + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/preact" + } + }, "node_modules/proper-lockfile": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/proper-lockfile/-/proper-lockfile-4.1.2.tgz", @@ -3311,6 +3809,17 @@ "dev": true, "license": "MIT" }, + "node_modules/react": { + "version": "19.2.4", + "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", + "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", + "dev": true, + "license": "MIT", + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -3866,6 +4375,16 @@ "dev": true, "license": "MIT" }, + "node_modules/use-sync-external-store": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz", + "integrity": "sha512-Pp6GSwGP/NrPIrxVFAIkOQeyw8lFenOHijQWkUTrDvrF4ALqylP2C/KCkeS9dpUM3KvYRQhna5vt7IL95+ZQ9w==", + "dev": true, + "license": "MIT", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/web-streams-polyfill": { "version": "3.3.3", "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", @@ -4039,6 +4558,35 @@ "peerDependencies": { "zod": "^3.25 || ^4" } + }, + "node_modules/zustand": { + "version": "4.5.7", + "resolved": "https://registry.npmjs.org/zustand/-/zustand-4.5.7.tgz", + "integrity": "sha512-CHOUy7mu3lbD6o6LJLfllpjkzhHXSBlX8B9+qPddUsIfeF5S/UZ5q0kmCsnRqT1UHFQZchNFDDzMbQsuesHWlw==", + "dev": true, + "license": "MIT", + "dependencies": { + "use-sync-external-store": "^1.2.2" + }, + "engines": { + "node": ">=12.7.0" + }, + "peerDependencies": { + "@types/react": ">=16.8", + "immer": ">=9.0.6", + "react": ">=16.8" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "immer": { + "optional": true + }, + "react": { + "optional": true + } + } } } } diff --git a/package.json b/package.json index b3ebab9..a7f9e41 100644 --- a/package.json +++ b/package.json @@ -24,7 +24,8 @@ ], "scripts": { "check": "tsc --noEmit", - "build": "tsc --project tsconfig.build.json", + "build:web": "esbuild src/planner/web/js/app.jsx --bundle --format=esm --jsx=automatic --jsx-import-source=preact --alias:react=preact/compat --alias:react-dom=preact/compat --outfile=src/planner/web/dist/app.js --minify", + "build": "npm run build:web && tsc --project tsconfig.build.json", "pretest": "npm run build", "test": "node --test --test-concurrency=1 build/tests" }, @@ -33,6 +34,9 @@ }, "devDependencies": { "@mariozechner/pi-coding-agent": "^0.52.10", - "typescript": "^5.9.3" + "esbuild": "^0.25.1", + "preact": "^10.26.2", + "typescript": "^5.9.3", + "zustand": "^4.5.7" } } From 4d00b459d1e8e1e180452a044c48d70b1228ee62 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:47:56 +0700 Subject: [PATCH 049/412] add task manifest module for subagent directory contract --- src/planner/lib/task.ts | 117 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 src/planner/lib/task.ts diff --git a/src/planner/lib/task.ts b/src/planner/lib/task.ts new file mode 100644 index 0000000..e38da74 --- /dev/null +++ b/src/planner/lib/task.ts @@ -0,0 +1,117 @@ +// Subagent task manifest — the input contract for every subagent process. +// Written by the parent to {subagentDir}/task.json before spawn; +// read by the child exactly once at startup via readTaskFile(). +// +// This is one of three well-known JSON files in every subagent directory: +// task.json — what to do (parent writes before spawn, child reads once) +// state.json — what has been done (child writes continuously, parent polls) +// ipc.json — what is needed now (both sides, transient per-request) +// +// The discriminated union on `role` keeps role-specific fields naturally +// nested rather than collapsed into a flat CLI flag namespace. This directly +// prevents the naming collisions the old flag approach produced — e.g., the +// previous `--koan-role` (pipeline role: "scout") vs `--koan-scout-role` +// (investigator persona: "security auditor") collision is impossible here +// because ScoutTask.role and ScoutTask.investigatorRole are distinct typed +// fields on a struct, not adjacent strings in a flat namespace. + +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + +import type { SubagentRole, StepSequence } from "../types.js"; + +// -- Task types -- + +interface SubagentTaskBase { + role: SubagentRole; + epicDir: string; +} + +/** Task manifest for intake subagents. */ +export interface IntakeTask extends SubagentTaskBase { + role: "intake"; +} + +/** + * Task manifest for scout subagents. Written by the IPC responder when a + * planning role (intake, decomposer, planner) calls koan_request_scouts. + */ +export interface ScoutTask extends SubagentTaskBase { + role: "scout"; + /** The narrow investigation question, injected verbatim into step 1 guidance. */ + question: string; + /** + * Output path relative to subagentDir (e.g. "findings.md"). + * Stored relative so the manifest is location-independent. + * Resolved to absolute by dispatch: `path.join(ctx.subagentDir!, task.outputFile)`. + */ + outputFile: string; + /** Investigator persona for the scout LLM (e.g. "security auditor", "API analyst"). */ + investigatorRole: string; +} + +/** Task manifest for decomposer subagents. */ +export interface DecomposerTask extends SubagentTaskBase { + role: "decomposer"; +} + +/** Task manifest for orchestrator subagents. */ +export interface OrchestratorTask extends SubagentTaskBase { + role: "orchestrator"; + stepSequence: StepSequence; + storyId?: string; +} + +/** Task manifest for planner subagents. */ +export interface PlannerTask extends SubagentTaskBase { + role: "planner"; + storyId: string; +} + +/** Task manifest for executor subagents. */ +export interface ExecutorTask extends SubagentTaskBase { + role: "executor"; + storyId: string; + /** + * Failure summary from a previous execution attempt, sourced from the + * `failure_summary` parameter of `koan_retry_story`. Absent on first run. + */ + retryContext?: string; +} + +// The union is exhaustive over all six roles. TypeScript narrows task.role +// in switch/case so role-specific fields are accessible without casting. +export type SubagentTask = + | IntakeTask + | ScoutTask + | DecomposerTask + | OrchestratorTask + | PlannerTask + | ExecutorTask; + +// -- File paths -- + +const TASK_FILE = "task.json"; +const TASK_TMP_FILE = ".task.tmp.json"; + +// -- I/O -- + +// Atomically writes task.json to subagentDir (tmp → rename). +// MUST be called before spawn() — the child reads this file at startup and +// throws if it is missing. There is no recovery path if it arrives late. +export async function writeTaskFile(subagentDir: string, task: SubagentTask): Promise { + const tmp = path.join(subagentDir, TASK_TMP_FILE); + const target = path.join(subagentDir, TASK_FILE); + await fs.writeFile(tmp, `${JSON.stringify(task, null, 2)}\n`, "utf8"); + await fs.rename(tmp, target); +} + +// Reads and parses task.json from subagentDir. +// Called exactly once, during before_agent_start in koan.ts. +// Throws on missing file or JSON parse error — both indicate a programming +// error in the parent (wrote no file, or wrote malformed JSON), not a +// recoverable runtime condition. +export async function readTaskFile(subagentDir: string): Promise { + const raw = await fs.readFile(path.join(subagentDir, TASK_FILE), "utf8"); + return JSON.parse(raw) as SubagentTask; +} From 62d3d7b07e05fdd8a6c382133e3613d31d53b6fa Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:48:04 +0700 Subject: [PATCH 050/412] add bash output truncation override for large skill outputs --- src/planner/lib/truncation-override.ts | 90 ++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 src/planner/lib/truncation-override.ts diff --git a/src/planner/lib/truncation-override.ts b/src/planner/lib/truncation-override.ts new file mode 100644 index 0000000..d7e2338 --- /dev/null +++ b/src/planner/lib/truncation-override.ts @@ -0,0 +1,90 @@ +// Raises the effective truncation limit for bash tool output in koan subagents. +// +// Pi's built-in bash tool truncates output to 50KB / 2000 lines. When the +// prompt-engineer skill (or any skill that concatenates large reference files +// to stdout) runs via bash, the LLM loses critical context mid-output. +// +// Instead of replacing the built-in bash tool, we intercept the tool_result +// event. When truncation occurred, the bash tool has already saved the full +// output to a temp file. We re-read that file and apply truncateTail with +// higher limits, then return the replacement content. This is surgical — +// it only activates when truncation actually happened and a temp file exists. +// +// Why tool_result interception rather than registering a replacement bash tool: +// - No duplication of the bash tool implementation (exec, streaming, exit codes) +// - The bash tool's temp file mechanism is the key enabler — the full output +// is already on disk before the event fires +// - Zero cost when output fits within the default limits (handler exits early) +// +// Registration is unconditional (not gated on subagent mode) because both +// parent sessions running skills directly and spawned subagent processes +// benefit from higher limits. The truncation guard makes it a no-op for +// outputs that fit within pi's defaults. +// +// Audit handler ordering: the audit tool_result handler (registered inside +// before_agent_start, after this one) records the ORIGINAL event content +// because it does not return a modified result — it only appends to the log. +// Pi runs handlers in registration order; each handler receives the event +// state as modified by prior handlers. Since the audit handler returns nothing, +// it never sees our replacement content, and since we don't touch the audit +// log, the two handlers are fully independent. + +import { readFileSync } from "node:fs"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; +import { truncateTail, formatSize, isBashToolResult } from "@mariozechner/pi-coding-agent"; + +// 4x the pi defaults (50KB / 2000 lines). Sized for the prompt-engineer skill, +// which concatenates ~100-150KB of technique reference files into a single bash +// call. 200KB gives comfortable headroom; 5000 lines is proportional (2.5x). +const KOAN_MAX_BYTES = 200 * 1024; +const KOAN_MAX_LINES = 5000; + +export function registerTruncationOverride(pi: ExtensionAPI): void { + pi.on("tool_result", (event) => { + if (!isBashToolResult(event)) return; + if (!event.details?.truncation?.truncated) return; + if (!event.details?.fullOutputPath) return; + + const fullOutputPath = event.details.fullOutputPath; + + // readFileSync is fine here — the runner awaits handlers so async would + // also work, but there's no benefit for a single temp file read. + // + // Timing note: the bash tool calls tempFileStream.end() then immediately + // resolves. On local filesystems the OS write completes before the + // microtask chain reaches this handler. If this ever causes incomplete + // reads on network filesystems, switch to async readFile with a small + // retry delay. + let fullContent: string; + try { + fullContent = readFileSync(fullOutputPath, "utf8"); + } catch { + // Temp file gone (race condition) — leave the result unchanged. + return undefined; + } + + const truncation = truncateTail(fullContent, { maxLines: KOAN_MAX_LINES, maxBytes: KOAN_MAX_BYTES }); + let outputText = truncation.content || "(no output)"; + + if (truncation.truncated) { + // Mirror the bash tool's notice format exactly. The LLM's tool description + // says output is truncated to specific limits and references the full output + // path — a divergent format would confuse the LLM about how to recover the rest. + const startLine = truncation.totalLines - truncation.outputLines + 1; + const endLine = truncation.totalLines; + + if (truncation.lastLinePartial) { + const lines = fullContent.split("\n"); + const lastLine = lines[lines.length - 1] ?? ""; + const lastLineSize = Buffer.byteLength(lastLine, "utf8"); + outputText += `\n\n[Showing last ${formatSize(truncation.outputBytes)} of line ${endLine} (line is ${formatSize(lastLineSize)}). Full output: ${fullOutputPath}]`; + } else if (truncation.truncatedBy === "lines") { + outputText += `\n\n[Showing lines ${startLine}-${endLine} of ${truncation.totalLines}. Full output: ${fullOutputPath}]`; + } else { + outputText += `\n\n[Showing lines ${startLine}-${endLine} of ${truncation.totalLines} (${formatSize(KOAN_MAX_BYTES)} limit). Full output: ${fullOutputPath}]`; + } + } + + return { content: [{ type: "text" as const, text: outputText }] }; + }); +} From 8603e6cbbba65ae8aaf84176a3157f90932e6739 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:48:11 +0700 Subject: [PATCH 051/412] refactor audit events to paired tool_call/tool_result model --- src/planner/lib/audit.ts | 574 +++++++++++++++++++++++++++++---------- 1 file changed, 434 insertions(+), 140 deletions(-) diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index 992ecab..86f9ebb 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -1,8 +1,10 @@ // Audit trail for subagent sessions: event-sourced append log (events.jsonl) // with an eagerly materialized projection (state.json) for parent polling. // fold() is pure so the projection can be replayed from the raw log for testing. -// Graduated tool capture: full detail for koan_* tools, paths for file ops, -// binary name for bash, name-only for everything else. +// +// Tool invocations are captured as two events: tool_call (request) and +// tool_result (response), correlated by toolCallId. The flat event stream +// can be reduced into ToolInvocation[] via correlateTools() for paired access. import { promises as fs } from "node:fs"; import * as path from "node:path"; @@ -14,38 +16,31 @@ export interface EventBase { seq: number; } -export interface ToolFileEvent extends EventBase { - kind: "tool_file"; - tool: "read" | "edit" | "write"; - path: string; - lines?: number; - chars?: number; - error: boolean; -} +// -- Tool events -- +// Every tool invocation produces a (tool_call, tool_result) pair in the log. +// tool_call fires when the LLM requests the tool; tool_result fires when +// the tool returns. Both carry toolCallId for correlation. -export interface ToolBashEvent extends EventBase { - kind: "tool_bash"; - bin: string; - lines?: number; - chars?: number; - error: boolean; -} - -export interface ToolKoanEvent extends EventBase { - kind: "tool_koan"; +export interface ToolCallEvent extends EventBase { + kind: "tool_call"; + toolCallId: string; tool: string; input: Record; - response: string[]; - error: boolean; } -export interface ToolGenericEvent extends EventBase { - kind: "tool_generic"; +export interface ToolResultEvent extends EventBase { + kind: "tool_result"; + toolCallId: string; tool: string; error: boolean; + // Summarized output metrics (not the full content — too large for the log). + lines?: number; + chars?: number; + // Koan tool response text preserved for projection (completionSummary, etc.). + koanResponse?: string[]; } -export type ToolEvent = ToolFileEvent | ToolBashEvent | ToolKoanEvent | ToolGenericEvent; +// -- Lifecycle events -- export interface PhaseStartEvent extends EventBase { kind: "phase_start"; @@ -72,15 +67,31 @@ export interface HeartbeatEvent extends EventBase { kind: "heartbeat"; } +export interface UsageEvent extends EventBase { + kind: "usage"; + input: number; + output: number; + cacheRead: number; + cacheWrite: number; +} + export type AuditEvent = - | ToolFileEvent - | ToolBashEvent - | ToolKoanEvent - | ToolGenericEvent + | ToolCallEvent + | ToolResultEvent | PhaseStartEvent | StepTransitionEvent | PhaseEndEvent - | HeartbeatEvent; + | HeartbeatEvent + | UsageEvent; + +// Distributive Omit — distributes over union members so object literals +// with fields specific to one member are accepted. +type DistributiveOmit = T extends unknown ? Omit : never; +export type AuditEventPartial = DistributiveOmit; + +// -- Projection -- +// Eagerly materialized state summary. Written atomically to state.json +// after every event so the parent (web server) can poll cheaply. export interface Projection { role: string; @@ -91,13 +102,84 @@ export interface Projection { totalSteps: number; stepName: string; lastAction: string | null; + // toolCallId of the currently in-flight tool, null when idle. + // Lets the UI distinguish "doing X" from "done with X". + currentToolCallId: string | null; updatedAt: string; eventCount: number; error: string | null; + completionSummary: string | null; + tokensSent: number; + tokensReceived: number; +} + +// -- Correlated tool invocations -- +// Reduced view of paired (tool_call, tool_result) events. + +export interface ToolInvocation { + toolCallId: string; + tool: string; + input: Record; + callTs: string; + resultTs: string | null; + error: boolean | null; + inFlight: boolean; + durationMs: number | null; + // Output metrics from the result event. + lines?: number; + chars?: number; + koanResponse?: string[]; +} + +// Reduces a flat event stream into paired tool invocations. +// In-flight tools (call without result) have inFlight=true, resultTs=null. +export function correlateTools(events: AuditEvent[]): ToolInvocation[] { + const byId = new Map(); + const ordered: ToolInvocation[] = []; + + for (const e of events) { + if (e.kind === "tool_call") { + const inv: ToolInvocation = { + toolCallId: e.toolCallId, + tool: e.tool, + input: e.input, + callTs: e.ts, + resultTs: null, + error: null, + inFlight: true, + durationMs: null, + }; + byId.set(e.toolCallId, inv); + ordered.push(inv); + } else if (e.kind === "tool_result") { + const inv = byId.get(e.toolCallId); + if (inv) { + inv.resultTs = e.ts; + inv.error = e.error; + inv.inFlight = false; + inv.durationMs = new Date(e.ts).getTime() - new Date(inv.callTs).getTime(); + inv.lines = e.lines; + inv.chars = e.chars; + inv.koanResponse = e.koanResponse; + } + // Orphan result (no matching call) — can happen if the subagent + // started before tool_call hooking was added. Silently skip. + } + } + + return ordered; +} + +// -- Pi event shapes (subset we consume) -- + +interface PiToolCallEvent { + toolCallId: string; + toolName: string; + input: Record; } -// Pi's ToolResultEvent shape (subset we need). interface PiToolResultEvent { + toolCallId: string; toolName: string; input: Record; content: Array<{ type: string; text?: string }>; @@ -115,26 +197,107 @@ function now(): string { return new Date().toISOString(); } -// Derives a concise last-action string from a tool event for display. -export function summarize(e: ToolEvent): string { - switch (e.kind) { - case "tool_file": { - const suffix = e.lines != null ? ` (${e.lines}L, ${e.chars}c)` : ""; - return `${e.tool} ${e.path}${suffix}`; - } - case "tool_bash": { - const suffix = e.lines != null ? ` (${e.lines}L, ${e.chars}c)` : ""; - return `bash ${e.bin}${suffix}`; - } - case "tool_koan": - return e.tool; - case "tool_generic": - return e.tool; +// -- Extractors -- +// Transform pi's raw hook events into our audit event types. +// ts/seq are placeholders — EventLog.append() overwrites them. + +export function extractToolCall(piEvent: PiToolCallEvent): ToolCallEvent { + return { + kind: "tool_call", + toolCallId: piEvent.toolCallId, + tool: piEvent.toolName, + input: piEvent.input, + ts: now(), + seq: 0, + }; +} + +export function extractToolResult(piEvent: PiToolResultEvent): ToolResultEvent { + const { toolCallId, toolName, input, content, isError } = piEvent; + + const ev: ToolResultEvent = { + kind: "tool_result", + toolCallId, + tool: toolName, + error: isError, + ts: now(), + seq: 0, + }; + + // Capture output size for file and bash tools. + if (FILE_TOOLS.has(toolName) && !isError) { + const text = content.find((c) => c.type === "text")?.text ?? ""; + ev.lines = text.split("\n").length; + ev.chars = text.length; + } else if (toolName === "bash") { + const text = content.find((c) => c.type === "text")?.text ?? ""; + ev.lines = text.split("\n").length; + ev.chars = text.length; + } + + // Preserve koan tool response text for projection use (completionSummary). + if (toolName.startsWith("koan_")) { + ev.koanResponse = content + .filter((c) => c.type === "text" && c.text !== undefined) + .map((c) => c.text as string); + } + + return ev; +} + +// -- Summarize -- +// Human-readable one-liner from a tool invocation. +// Uses input (from call) + output metrics (from result) when available. + +export function summarizeInvocation(inv: ToolInvocation): string { + const { tool, input } = inv; + + // Tool name / key input identifier. + let label: string; + if (FILE_TOOLS.has(tool)) { + label = `${tool} ${(input["path"] as string | undefined) ?? ""}`; + } else if (tool === "bash") { + const cmd = (input["command"] as string | undefined) ?? ""; + label = `bash ${cmd.trim().split(/\s+/)[0] ?? ""}`; + } else { + label = tool; + } + + // Append output metrics if result has landed. + if (!inv.inFlight && (inv.lines != null || inv.chars != null)) { + const lines = inv.lines ?? 0; + const chars = inv.chars ?? 0; + label += ` · ${lines}L/${formatChars(chars)}`; + } + + return label; +} + +// Summarize from a ToolCallEvent alone (in-flight, no result yet). +function summarizeCall(e: ToolCallEvent): string { + if (FILE_TOOLS.has(e.tool)) { + return `${e.tool} ${(e.input["path"] as string | undefined) ?? ""}`; + } + if (e.tool === "bash") { + const cmd = (e.input["command"] as string | undefined) ?? ""; + return `bash ${cmd.trim().split(/\s+/)[0] ?? ""}`; } + return e.tool; } +// Summarize from a ToolResultEvent alone (used in fold when call was missed). +function summarizeResult(e: ToolResultEvent): string { + let label = e.tool; + if (e.lines != null || e.chars != null) { + label += ` · ${e.lines ?? 0}L/${formatChars(e.chars ?? 0)}`; + } + return label; +} + +// -- Fold -- // Pure projection update — one case per discriminated kind. // All branches update updatedAt and increment eventCount. + export function fold(s: Projection, e: AuditEvent): Projection { const base = { ...s, updatedAt: e.ts, eventCount: s.eventCount + 1 }; @@ -150,7 +313,9 @@ export function fold(s: Projection, e: AuditEvent): Projection { totalSteps: e.totalSteps, stepName: "", lastAction: null, + currentToolCallId: null, error: null, + completionSummary: null, }; case "step_transition": @@ -166,59 +331,42 @@ export function fold(s: Projection, e: AuditEvent): Projection { ...base, status: e.outcome, error: e.detail ?? null, + currentToolCallId: null, }; - case "tool_file": - case "tool_bash": - case "tool_koan": - case "tool_generic": - return { ...base, lastAction: summarize(e) }; + case "tool_call": { + const updated: Projection = { + ...base, + lastAction: summarizeCall(e), + currentToolCallId: e.toolCallId, + }; + // Extract completionSummary from koan_complete_step's thoughts param. + // The thoughts parameter is chain-of-thought, not task output (per + // AGENTS.md invariant), but we capture a prefix for the projection + // so the web UI can show scout summaries. + if (e.tool === "koan_complete_step" && typeof e.input?.thoughts === "string") { + updated.completionSummary = e.input.thoughts.slice(0, 500) || null; + } + return updated; + } + + case "tool_result": + return { + ...base, + lastAction: summarizeResult(e), + currentToolCallId: null, + }; case "heartbeat": return base; - } -} - -// Transforms pi's ToolResultEvent into a graduated AuditEvent. -export function extractToolEvent(piEvent: PiToolResultEvent): ToolEvent { - const { toolName, input, content, isError } = piEvent; - const ts = now(); - // ts and seq are assigned by EventLog.append(); values here are - // placeholders overridden on write. - const seq = 0; - - if (FILE_TOOLS.has(toolName)) { - const ev: ToolFileEvent = { - kind: "tool_file", - tool: toolName as "read" | "edit" | "write", - path: (input["path"] as string | undefined) ?? "", - error: isError, - ts, - seq, - }; - if (toolName === "read" && !isError) { - const text = content.find((c) => c.type === "text")?.text ?? ""; - ev.lines = text.split("\n").length; - ev.chars = text.length; - } - return ev; - } - if (toolName === "bash") { - const cmd = (input["command"] as string | undefined) ?? ""; - const bin = cmd.trim().split(/\s+/)[0] ?? "bash"; - const text = content.find((c) => c.type === "text")?.text ?? ""; - return { kind: "tool_bash", bin, lines: text.split("\n").length, chars: text.length, error: isError, ts, seq }; - } - - if (toolName.startsWith("koan_")) { - const response = content - .filter((c) => c.type === "text" && c.text !== undefined) - .map((c) => c.text as string); - return { kind: "tool_koan", tool: toolName, input, response, error: isError, ts, seq }; + case "usage": + return { + ...base, + tokensSent: s.tokensSent + e.input, + tokensReceived: s.tokensReceived + e.output, + }; } - - return { kind: "tool_generic", tool: toolName, error: isError, ts, seq }; } // -- EventLog -- @@ -249,9 +397,13 @@ export class EventLog { totalSteps: 0, stepName: "", lastAction: null, + currentToolCallId: null, updatedAt: now(), eventCount: 0, error: null, + completionSummary: null, + tokensSent: 0, + tokensReceived: 0, }; } @@ -266,13 +418,13 @@ export class EventLog { // Assigns ts + seq, appends JSON line, folds, writes state atomically. // Serialized: concurrent callers queue behind the in-flight write. - async append(partial: Omit): Promise { + async append(partial: AuditEventPartial): Promise { const task = () => this.doAppend(partial); this.pending = this.pending.then(task, task); return this.pending; } - private async doAppend(partial: Omit): Promise { + private async doAppend(partial: AuditEventPartial): Promise { if (!this.fd) { throw new Error("EventLog.append called before open()"); } @@ -336,7 +488,7 @@ export class EventLog { // -- Exports -- // Reads state.json as a Projection; returns null if missing or malformed. -// Used by driver polling loop. +// Used by web server polling loop. export async function readProjection(dir: string): Promise { try { const raw = await fs.readFile(path.join(dir, "state.json"), "utf8"); @@ -346,13 +498,15 @@ export async function readProjection(dir: string): Promise { } } -// Structured log line for the widget log card. -// `tool` is the left-column scan anchor, `summary` is the right-column detail. -// High-value rows may wrap to two visual lines in the widget. +// -- Log formatting -- +// Structured log lines for the web UI activity feed. + export interface LogLine { tool: string; summary: string; highValue: boolean; + inFlight: boolean; + details?: string[]; } interface ToolShape { @@ -366,7 +520,6 @@ interface ToolShape { const PREVIEW_CHARS = 40; const KEY_PRIORITY = ["id", "story_id", "milestone", "decision_ref", "intent_ref", "file", "path", "phase"]; -// Tool shapes for koan_* tools. No koan_escalate (eliminated in §11.3.1). const KOAN_SHAPES: Record = { koan_select_story: { keys: ["story_id"], highValue: true }, koan_complete_story: { keys: ["story_id"], highValue: true }, @@ -376,8 +529,8 @@ const KOAN_SHAPES: Record = { koan_request_scouts: { keys: ["scouts"], arrays: ["scouts"], highValue: true }, }; -// Reads the tail of events.jsonl and returns structured log entries. -// Filters out heartbeats (noisy). Used by driver to feed the widget log card. +// Reads events.jsonl, correlates tool pairs, and returns structured log entries. +// Filters out heartbeats, usage, and koan_complete_step (noisy). export async function readRecentLogs(dir: string, count = 8): Promise { try { const raw = await fs.readFile(path.join(dir, "events.jsonl"), "utf8"); @@ -385,14 +538,88 @@ export async function readRecentLogs(dir: string, count = 8): Promise .trimEnd() .split("\n") .filter(Boolean) - .map((line) => JSON.parse(line) as AuditEvent) - .filter((e) => e.kind !== "heartbeat" && !(e.kind === "tool_koan" && e.tool === "koan_complete_step")); - return events.slice(-count).map(formatLogLine); + .map((line) => JSON.parse(line) as AuditEvent); + + return buildChronologicalLog(events, count); } catch { return []; } } +// Builds a chronological log by walking events in order and emitting +// one LogLine per tool invocation (at result time, or at call time if +// still in-flight) plus lifecycle events. +function buildChronologicalLog(events: AuditEvent[], count: number): LogLine[] { + const pendingCalls = new Map }>(); + const lines: LogLine[] = []; + + for (const e of events) { + if (e.kind === "heartbeat" || e.kind === "usage") continue; + + if (e.kind === "tool_call") { + // Stash tool name + input for when the result arrives (or for + // in-flight rendering if no result appears by end of loop). + pendingCalls.set(e.toolCallId, { tool: e.tool, input: e.input }); + continue; + } + + if (e.kind === "tool_result") { + if (e.tool === "koan_complete_step") continue; + const call = pendingCalls.get(e.toolCallId); + lines.push(formatPairedResult(e, call?.input ?? {})); + pendingCalls.delete(e.toolCallId); + continue; + } + + // Lifecycle event. + lines.push(formatLifecycleEvent(e)); + } + + // Emit remaining calls without results as in-flight lines. + // The ActivityFeed renders the last in-flight line with animated dots. + for (const [, call] of pendingCalls) { + if (call.tool === "koan_complete_step") continue; + lines.push(formatInFlightCall(call.tool, call.input)); + } + + return lines.slice(-count); +} + +// Format an in-flight tool_call (no result yet). Same structure as +// formatPairedResult but with inFlight: true and no output metrics. +function formatInFlightCall(tool: string, input: Record): LogLine { + if (FILE_TOOLS.has(tool)) { + return { + tool, + summary: (input["path"] as string | undefined) ?? "", + highValue: tool === "read", + inFlight: true, + }; + } + + if (tool === "bash") { + const cmd = (input["command"] as string | undefined) ?? ""; + const bin = cmd.trim().split(/\s+/)[0] ?? "bash"; + return { tool: "bash", summary: bin, highValue: false, inFlight: true }; + } + + if (tool.startsWith("koan_")) { + const shape = KOAN_SHAPES[tool]; + if (shape) { + const inv: ToolInvocation = { + toolCallId: "", tool, input, + callTs: "", resultTs: null, + error: null, inFlight: true, durationMs: null, + }; + return formatKoanInvocation(inv); + } + } + + return { tool, summary: "", highValue: false, inFlight: true }; +} + +// -- Formatters -- + function formatChars(chars: number): string { if (chars < 1000) return `${chars}c`; const k = chars / 1000; @@ -464,75 +691,142 @@ function orderedShapeKeys(keys: string[]): string[] { return indexed.map((x) => x.key); } -function formatKnownKoan(e: ToolKoanEvent, shape: ToolShape): LogLine { +// Format a completed tool invocation from its correlated pair. +function formatToolInvocation(inv: ToolInvocation): LogLine { + if (inv.tool.startsWith("koan_")) { + return formatKoanInvocation(inv); + } + + if (FILE_TOOLS.has(inv.tool)) { + const p = (inv.input["path"] as string | undefined) ?? ""; + const suffix = inv.lines != null ? ` · ${inv.lines}L/${formatChars(inv.chars ?? 0)}` : ""; + return { + tool: inv.tool, + summary: `${p}${suffix}`, + highValue: inv.tool === "read", + inFlight: inv.inFlight, + }; + } + + if (inv.tool === "bash") { + const cmd = (inv.input["command"] as string | undefined) ?? ""; + const bin = cmd.trim().split(/\s+/)[0] ?? "bash"; + const suffix = inv.lines != null ? ` · ${inv.lines}L/${formatChars(inv.chars ?? 0)}` : ""; + return { + tool: "bash", + summary: `${bin}${suffix}`, + highValue: false, + inFlight: inv.inFlight, + }; + } + + return { tool: inv.tool, summary: "", highValue: false, inFlight: inv.inFlight }; +} + +function formatKoanInvocation(inv: ToolInvocation): LogLine { + const shape = KOAN_SHAPES[inv.tool]; + if (!shape) { + return { tool: inv.tool, summary: "", highValue: false, inFlight: inv.inFlight }; + } + const arrayKeys = new Set(shape.arrays ?? []); const freeformKeys = new Set(shape.freeform ?? []); const chunks: string[] = []; for (const key of orderedShapeKeys(shape.keys)) { - if (!hasKey(e.input, key)) continue; - const value = e.input[key]; + if (!hasKey(inv.input, key)) continue; + const value = inv.input[key]; if (arrayKeys.has(key)) { chunks.push(`${key}:${arrayPreview(value)}`); continue; } - if (freeformKeys.has(key)) { chunks.push(`${key}:${freeformSize(value)}`); continue; } - chunks.push(`${key}=${inlineScalar(value)}`); } - if (shape.getter) { + if (shape.getter && inv.koanResponse) { if (chunks.length === 0) { chunks.push("scope=plan"); } - chunks.push(`resp:${responseSize(e.response)}`); + chunks.push(`resp:${responseSize(inv.koanResponse)}`); } - return { - tool: e.tool, + const line: LogLine = { + tool: inv.tool, summary: chunks.join(" · "), highValue: shape.highValue ?? chunks.length >= 3, + inFlight: inv.inFlight, }; + + // Expand koan_request_scouts with per-scout detail lines. + if (inv.tool === "koan_request_scouts" && Array.isArray(inv.input["scouts"])) { + line.details = (inv.input["scouts"] as Array>).map( + (s) => `${s["id"] ?? "?"} (${s["role"] ?? "agent"})`, + ); + } + + return line; } -function formatKoanLogLine(e: ToolKoanEvent): LogLine { - const shape = KOAN_SHAPES[e.tool]; - if (!shape) { - return { tool: e.tool, summary: "", highValue: false }; +// Format a tool_result event paired with its call's input. +function formatPairedResult(e: ToolResultEvent, input: Record): LogLine { + if (FILE_TOOLS.has(e.tool)) { + const p = (input["path"] as string | undefined) ?? ""; + const suffix = e.lines != null ? ` · ${e.lines}L/${formatChars(e.chars ?? 0)}` : ""; + return { + tool: e.tool, + summary: `${p}${suffix}`, + highValue: e.tool === "read", + inFlight: false, + }; } - return formatKnownKoan(e, shape); + + if (e.tool === "bash") { + const cmd = (input["command"] as string | undefined) ?? ""; + const bin = cmd.trim().split(/\s+/)[0] ?? "bash"; + const suffix = e.lines != null ? ` · ${e.lines}L/${formatChars(e.chars ?? 0)}` : ""; + return { + tool: "bash", + summary: `${bin}${suffix}`, + highValue: false, + inFlight: false, + }; + } + + if (e.tool.startsWith("koan_")) { + const shape = KOAN_SHAPES[e.tool]; + if (shape) { + // Rebuild invocation-like object for the koan formatter. + const inv: ToolInvocation = { + toolCallId: e.toolCallId, + tool: e.tool, + input, + callTs: e.ts, + resultTs: e.ts, + error: e.error, + inFlight: false, + durationMs: null, + koanResponse: e.koanResponse, + }; + return formatKoanInvocation(inv); + } + return { tool: e.tool, summary: "", highValue: false, inFlight: false }; + } + + return { tool: e.tool, summary: "", highValue: false, inFlight: false }; } -function formatLogLine(e: AuditEvent): LogLine { +function formatLifecycleEvent(e: PhaseStartEvent | StepTransitionEvent | PhaseEndEvent): LogLine { switch (e.kind) { case "phase_start": - return { tool: "phase", summary: `${e.phase} (${e.totalSteps} steps)`, highValue: false }; + return { tool: "phase", summary: `${e.phase} (${e.totalSteps} steps)`, highValue: false, inFlight: false }; case "step_transition": - return { tool: `step ${e.step}/${e.totalSteps}`, summary: e.name, highValue: false }; + return { tool: `step ${e.step}/${e.totalSteps}`, summary: e.name, highValue: false, inFlight: false }; case "phase_end": - return { tool: "phase", summary: e.detail ? `${e.outcome} · ${e.detail}` : e.outcome, highValue: false }; - case "tool_file": - return { - tool: e.tool, - summary: e.lines != null ? `${e.path} · ${e.lines}L/${formatChars(e.chars ?? 0)}` : e.path, - highValue: e.tool === "read", - }; - case "tool_bash": - return { - tool: "bash", - summary: e.lines != null ? `${e.bin} · ${e.lines}L/${formatChars(e.chars ?? 0)}` : e.bin, - highValue: false, - }; - case "tool_koan": - return formatKoanLogLine(e); - case "tool_generic": - return { tool: e.tool, summary: "", highValue: false }; - case "heartbeat": - return { tool: "heartbeat", summary: "", highValue: false }; + return { tool: "phase", summary: e.detail ? `${e.outcome} · ${e.detail}` : e.outcome, highValue: false, inFlight: false }; } } From 0f68190e1a29b95a6d6c6228c3d7d5f6d5448459 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:48:20 +0700 Subject: [PATCH 052/412] implement step 0 boot state and reinforce completion directive --- src/planner/lib/step.ts | 13 +++- src/planner/phases/base-phase.ts | 77 +++++++++++++--------- src/planner/phases/decomposer/prompts.ts | 4 +- src/planner/phases/executor/prompts.ts | 4 +- src/planner/phases/intake/prompts.ts | 4 +- src/planner/phases/orchestrator/prompts.ts | 4 +- src/planner/phases/planner/prompts.ts | 4 +- src/planner/tools/workflow.ts | 28 ++++++-- 8 files changed, 84 insertions(+), 54 deletions(-) diff --git a/src/planner/lib/step.ts b/src/planner/lib/step.ts index 9771f6f..a6f3f5e 100644 --- a/src/planner/lib/step.ts +++ b/src/planner/lib/step.ts @@ -1,5 +1,11 @@ // Step prompt assembly for koan phase workflows. // +// formatStep() wraps step guidance with a header and a mandatory invoke-after +// directive. The directive at the END of every step is as important as the +// boot prompt at the beginning: primacy (first message) establishes the +// koan_complete_step habit; recency (last thing in each step) reinforces it. +// Together they make the calling pattern robust across model capability levels. +// // The `thoughts` parameter on koan_complete_step captures the model's work output // (analysis, review, findings) as a tool parameter rather than text output. This // ensures models that can't mix text + tool_call in one response still advance @@ -8,11 +14,14 @@ export interface StepGuidance { title: string; instructions: string[]; - // Custom invoke-after directive. When omitted, formatStep appends the default - // koan_complete_step directive. Terminal steps may override this. + // Override the default "WHEN DONE: Call koan_complete_step..." directive. + // Use for terminal steps that must call a domain tool (e.g. koan_select_story) + // before koan_complete_step, or for steps where the completion signal differs. invokeAfter?: string; } +// Appended to every step that doesn't override invokeAfter. +// Positioned last for recency — LLMs weight end-of-context instructions heavily. const DEFAULT_INVOKE = [ "WHEN DONE: Call koan_complete_step with your findings in the `thoughts` parameter.", "Do NOT call this tool until the work described in this step is finished.", diff --git a/src/planner/phases/base-phase.ts b/src/planner/phases/base-phase.ts index 775b835..62bf1a7 100644 --- a/src/planner/phases/base-phase.ts +++ b/src/planner/phases/base-phase.ts @@ -1,11 +1,21 @@ // BasePhase: shared lifecycle for all six koan subagent roles. // Subclasses define only their step structure and system prompt. -// Eliminates ~40 lines of duplicated skeleton per phase. +// +// Step-first workflow invariant (see AGENTS.md): +// Every subagent launches with a minimal boot prompt that contains only +// "call koan_complete_step". This forces the LLM's very first action to be +// a tool call rather than text output — critical because pi -p processes exit +// the moment the LLM finishes a turn without a tool call, with no recovery. +// +// Step 0 is the silent boot state. The first koan_complete_step call +// transitions 0→1 and returns step 1 guidance (just-in-time delivery). +// Subsequent calls advance through steps until the phase completes. // // Lifecycle: // constructor → registerHandlers() (hooks event listeners) -// begin() → activates phase, sets onCompleteStep in ctx, emits phase_start -// handleStepComplete() → advances step counter, returns next prompt or null +// begin() → activates phase at step 0, arms onCompleteStep, emits phase_start +// handleStepComplete(0) → returns step 1 guidance, emits step_transition(1) +// handleStepComplete(N) → returns step N+1 guidance, or null when done import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; @@ -25,9 +35,8 @@ export abstract class BasePhase { protected abstract getStepName(step: number): string; protected abstract getStepGuidance(step: number): StepGuidance; - private step = 1; + private step = 0; private active = false; - private step1Prompt: string | null = null; protected readonly log: Logger; @@ -44,29 +53,17 @@ export abstract class BasePhase { // -- Event handler registration -- private registerHandlers(): void { - // before_agent_start: inject system prompt when this phase is active. + // Inject the system prompt when this phase is active. The system prompt + // establishes role identity but deliberately omits task details — those + // arrive via step 1 guidance so the first message stays minimal. this.pi.on("before_agent_start", () => { if (!this.active) return undefined; return { systemPrompt: this.getSystemPrompt() }; }); - // context: append step 1 guidance to the spawn prompt (§9.8 append pattern). - // Preserves context embedded by the spawn function (scout question, retry - // context, etc.) while adding structured step instructions after a separator. - this.pi.on("context", (event) => { - if (!this.active || this.step !== 1 || !this.step1Prompt) return undefined; - const messages = event.messages.map((m) => { - if (m.role !== "user") return m; - const existing = typeof m.content === "string" ? m.content.trim() : ""; - const combined = existing.length > 0 - ? `${existing}\n\n---\n\n${this.step1Prompt!}` - : this.step1Prompt!; - return { ...m, content: combined }; - }); - return { messages }; - }); - - // tool_call: default-deny permission check for every tool call. + // Default-deny permission fence: every tool call is checked against the + // role's allowed set. Prevents roles from using tools outside their scope + // even though all tools are registered unconditionally at init. this.pi.on("tool_call", (event) => { if (!this.active) return undefined; const perm = checkPermission( @@ -80,33 +77,50 @@ export abstract class BasePhase { } return undefined; }); + + // NOTE: There is deliberately NO `context` event handler here. + // A previous design injected step 1 guidance into the first user message, + // but that front-loaded complex instructions before the LLM had established + // the koan_complete_step calling pattern — causing weaker models to produce + // text output and exit without entering the workflow at all. + // Step guidance is now delivered exclusively through koan_complete_step return values. } // -- Public lifecycle -- async begin(): Promise { - this.step1Prompt = formatStep(this.getStepGuidance(1)); this.active = true; - this.step = 1; + this.step = 0; // Boot state: waiting for the first koan_complete_step call. if (this.ctx.onCompleteStep !== null) { throw new Error(`ctx.onCompleteStep is already occupied — cannot begin ${this.role} phase`); } this.ctx.onCompleteStep = (thoughts: string) => this.handleStepComplete(thoughts); - this.log("Starting phase", { role: this.role, step: 1, totalSteps: this.totalSteps }); + this.log("Starting phase", { role: this.role, step: 0, totalSteps: this.totalSteps }); await this.eventLog?.emitPhaseStart(this.totalSteps); - await this.eventLog?.emitStepTransition(1, this.getStepName(1), this.totalSteps); + // step_transition is NOT emitted here — it fires when step 1 guidance is first + // returned, so the event log reflects when the LLM actually begins work. } // -- Private step progression -- private async handleStepComplete(thoughts: string): Promise { - void thoughts; // captured in event log via tool_result; used by subclass prompts if needed - const prev = this.step; + void thoughts; // captured in event log via tool_result; subclass prompts may reference it + + if (this.step === 0) { + // Boot transition: the LLM called koan_complete_step as instructed by the + // boot prompt. Reward it with step 1 guidance. This is the critical moment + // that establishes the call→receive→work→call pattern for the session. + this.step = 1; + const prompt = formatStep(this.getStepGuidance(1)); + await this.eventLog?.emitStepTransition(1, this.getStepName(1), this.totalSteps); + this.log("Boot transition", { role: this.role, to: 1 }); + return prompt; + } - if (prev === this.totalSteps) { - // Phase complete. + if (this.step === this.totalSteps) { + // Phase complete — return null signals koan_complete_step to reply "Phase complete." this.active = false; this.ctx.onCompleteStep = null; await this.eventLog?.emitPhaseEnd("completed"); @@ -115,6 +129,7 @@ export abstract class BasePhase { } // Advance to next step. + const prev = this.step; this.step = prev + 1; const prompt = formatStep(this.getStepGuidance(this.step)); await this.eventLog?.emitStepTransition(this.step, this.getStepName(this.step), this.totalSteps); diff --git a/src/planner/phases/decomposer/prompts.ts b/src/planner/phases/decomposer/prompts.ts index 7f18450..f54b48d 100644 --- a/src/planner/phases/decomposer/prompts.ts +++ b/src/planner/phases/decomposer/prompts.ts @@ -54,9 +54,7 @@ You write the following files, all inside the epic directory: - All read tools (read, bash, grep, glob, find, ls) — for reading intake output and scout reports. - \`koan_request_scouts\` — to request additional codebase exploration if needed. - \`write\` / \`edit\` — for writing output files inside the epic directory. -- \`koan_complete_step\` — to signal step completion. - -You work in two steps. First you read and analyze. Then you write the decomposition.`; +- \`koan_complete_step\` — to signal step completion.`; } export function decomposerStepGuidance(step: number): StepGuidance { diff --git a/src/planner/phases/executor/prompts.ts b/src/planner/phases/executor/prompts.ts index b27bf14..7b4636b 100644 --- a/src/planner/phases/executor/prompts.ts +++ b/src/planner/phases/executor/prompts.ts @@ -51,9 +51,7 @@ Improvised solutions that seem reasonable in isolation frequently break other pa ## On retries -If retryContext is present, this is your second (or later) attempt at this story. The failure summary tells you what went wrong. Read it before you read the plan, and keep the failure context in mind as you implement. Do not repeat the mistake from the previous attempt. - -You work in steps. Each step has specific instructions. Follow them precisely.`; +If retryContext is present, this is your second (or later) attempt at this story. The failure summary tells you what went wrong. Read it before you read the plan, and keep the failure context in mind as you implement. Do not repeat the mistake from the previous attempt.`; } export function executorStepGuidance(step: number, storyId: string, retryContext?: string): StepGuidance { diff --git a/src/planner/phases/intake/prompts.ts b/src/planner/phases/intake/prompts.ts index 80161f4..0e13445 100644 --- a/src/planner/phases/intake/prompts.ts +++ b/src/planner/phases/intake/prompts.ts @@ -43,9 +43,7 @@ You write two files, both inside the epic directory: - \`koan_request_scouts\` — to request parallel codebase exploration. - \`koan_ask_question\` — to ask the user clarifying questions via IPC. - \`write\` / \`edit\` — for writing output files inside the epic directory only. -- \`koan_complete_step\` — to signal step completion with your findings. - -You work in three steps. Each step has specific instructions. Follow them precisely.`; +- \`koan_complete_step\` — to signal step completion with your findings.`; } export function intakeStepGuidance(step: number, conversationPath?: string): StepGuidance { diff --git a/src/planner/phases/orchestrator/prompts.ts b/src/planner/phases/orchestrator/prompts.ts index 15c5db3..efafd41 100644 --- a/src/planner/phases/orchestrator/prompts.ts +++ b/src/planner/phases/orchestrator/prompts.ts @@ -78,9 +78,7 @@ When you make a decision that modifies artifacts without explicit human instruct - MUST NOT call more than one verdict tool per verdict step. - MUST run ALL verification checks in verify.md before issuing a verdict. - MUST include a concrete, actionable failure summary when calling koan_retry_story. -- When uncertain about a verdict, prefer koan_retry_story with a detailed failure_summary. Ask the user only when the failure reveals a genuine requirements ambiguity. - -You work in steps. Each step has specific instructions. Follow them precisely.`; +- When uncertain about a verdict, prefer koan_retry_story with a detailed failure_summary. Ask the user only when the failure reveals a genuine requirements ambiguity.`; } export function orchestratorPreStepGuidance(step: number): StepGuidance { diff --git a/src/planner/phases/planner/prompts.ts b/src/planner/phases/planner/prompts.ts index b7d77f9..1b6a9e0 100644 --- a/src/planner/phases/planner/prompts.ts +++ b/src/planner/phases/planner/prompts.ts @@ -56,9 +56,7 @@ Each check entry must include: - MUST NOT plan beyond the current story's scope. If a step would touch something not in the story, flag it as out-of-scope. - MUST NOT make architectural decisions. If a decision is needed that is outside the planner's scope, note it in plan.md as: \`BLOCKER: [description]. The orchestrator will ask the user via koan_ask_question during verification.\` - MUST include enough detail that the executor can implement the plan in one pass without guessing. -- MUST scope plan/context.md to only what the executor needs — context files that include too much code obscure the relevant parts. - -You work in steps. Each step has specific instructions. Follow them precisely.`; +- MUST scope plan/context.md to only what the executor needs — context files that include too much code obscure the relevant parts.`; } export function plannerStepGuidance(step: number, storyId: string): StepGuidance { diff --git a/src/planner/tools/workflow.ts b/src/planner/tools/workflow.ts index 71eb74e..cff27d9 100644 --- a/src/planner/tools/workflow.ts +++ b/src/planner/tools/workflow.ts @@ -1,4 +1,14 @@ // Workflow tool registration: koan_complete_step. +// +// This is the single most critical tool in koan. Every subagent workflow depends +// on it being called — it is the mechanism that keeps a pi -p process alive across +// multiple steps. Without it, the LLM would do one turn of work and exit, because +// pi -p processes terminate as soon as the LLM finishes a turn without a tool call. +// +// The workflow pattern: boot prompt → LLM calls koan_complete_step → receives step 1 +// instructions → does work → calls koan_complete_step → receives step 2 (or "Phase +// complete.") → repeat. The tool name itself is a call to action: "complete the step." +// // Tools register once at init; execute callbacks read from the mutable // RuntimeContext at call time, decoupling static registration from phase routing. @@ -24,21 +34,27 @@ export function registerWorkflowTools( ctx: RuntimeContext, ): void { // -- koan_complete_step -- - // The `thoughts` parameter captures the model's work output (analysis, - // review, findings) as a tool parameter instead of as text output. - // This ensures models that cannot mix text + tool_call in one response - // (e.g. GPT-5-codex) still advance the workflow reliably. + // INVARIANT: `thoughts` is internal chain-of-thought reasoning only. + // It is NOT captured as task output and must NOT be treated as such. + // Its purpose: models that cannot mix text output + tool_call in one + // response (e.g. GPT-5-codex) still express reasoning via this param. + // Task output is written to files in the subagent directory: + // - scouts: {subagentDir}/findings.md + // - intake: {subagentDir}/context.md + // - others: as defined by step instructions + // The driver/parent reads those files after the subagent exits. pi.registerTool({ name: "koan_complete_step", label: "Complete current workflow step", description: [ "Signal completion of the current workflow step.", - "Put your analysis, findings, or work output in the `thoughts` parameter.", + "The `thoughts` parameter is for internal chain-of-thought reasoning only — it is NOT captured as task output.", + "Task output must be written to files in your subagent directory (e.g., findings.md for scouts).", "DO NOT call this tool until the step instructions explicitly tell you to.", ].join(" "), parameters: Type.Object({ thoughts: Type.Optional(Type.String({ - description: "Your analysis, findings, or work output for this step.", + description: "Internal chain-of-thought reasoning only. NOT task output. Write task output to files in your subagent directory.", })), }), async execute(_toolCallId, params) { From 668de5ce19a1e0e18ca2afe1df3e02e7d85c1e05 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:48:28 +0700 Subject: [PATCH 053/412] expand scout phase to 4-step verified investigation workflow --- src/planner/phases/scout/phase.ts | 21 +++- src/planner/phases/scout/prompts.ts | 165 +++++++++++++++++++++------- 2 files changed, 139 insertions(+), 47 deletions(-) diff --git a/src/planner/phases/scout/phase.ts b/src/planner/phases/scout/phase.ts index 6685505..ed193b5 100644 --- a/src/planner/phases/scout/phase.ts +++ b/src/planner/phases/scout/phase.ts @@ -1,5 +1,7 @@ // Scout phase: answers one narrow codebase question and writes findings. -// Single-step, cheap model, no user interaction. +// Four-step workflow (orient → investigate → verify → report), cheap model, no user interaction. +// Task context (question, outputFile, role) is received via CLI flags and +// delivered to the LLM through step guidance (returned by koan_complete_step). import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; @@ -12,17 +14,24 @@ import type { StepGuidance } from "../../lib/step.js"; export class ScoutPhase extends BasePhase { protected readonly role = "scout"; - protected readonly totalSteps = 1; + protected readonly totalSteps = 4; + + private readonly question: string; + private readonly outputFile: string; + private readonly investigatorRole: string; constructor( pi: ExtensionAPI, - config: { epicDir: string }, + config: { epicDir: string; question: string; outputFile: string; investigatorRole: string }, ctx: RuntimeContext, log?: Logger, eventLog?: EventLog, ) { super(pi, ctx, log ?? createLogger("ScoutPhase"), eventLog); - void config; // epicDir used via ctx.epicDir for permission scoping + void config.epicDir; // used via ctx.epicDir for permission scoping + this.question = config.question; + this.outputFile = config.outputFile; + this.investigatorRole = config.investigatorRole; } protected getSystemPrompt(): string { @@ -33,7 +42,7 @@ export class ScoutPhase extends BasePhase { return SCOUT_STEP_NAMES[step] ?? `Step ${step}`; } - protected getStepGuidance(_step: number): StepGuidance { - return scoutStepGuidance(); + protected getStepGuidance(step: number): StepGuidance { + return scoutStepGuidance(step, this.question, this.outputFile, this.investigatorRole); } } diff --git a/src/planner/phases/scout/prompts.ts b/src/planner/phases/scout/prompts.ts index 1512e83..e947ceb 100644 --- a/src/planner/phases/scout/prompts.ts +++ b/src/planner/phases/scout/prompts.ts @@ -1,15 +1,30 @@ -// Scout phase prompts — single step: explore & report. -// Role-specific context (the question and output file) is embedded in the -// spawn prompt by the spawn function. This provides only process guidance. +// Scout phase prompts — 4-step investigation workflow: +// Step 1: Orient (identify entry points, plan investigation) +// Step 2: Investigate (deep read, trace dependencies, gather evidence) +// Step 3: Verify & Analyze (re-read cited files, organize findings) +// Step 4: Report (write findings.md with verified facts) +// +// The system prompt establishes the investigator identity but contains no task +// details — a scout doesn't know its question until koan_complete_step returns +// step 1 guidance. This is intentional: including the question in the system +// prompt or spawn prompt would front-load instructions before the tool-call +// pattern is established, causing weaker models to answer inline and exit. +// +// The verification step (3) is the key addition over the original single-step +// design. Cheap models hallucinate file paths and API names. Re-reading every +// file before reporting catches confabulation before it reaches the intake-LLM. import type { StepGuidance } from "../../lib/step.js"; export const SCOUT_STEP_NAMES: Record = { - 1: "Explore & Report", + 1: "Orient", + 2: "Investigate", + 3: "Verify & Analyze", + 4: "Report", }; export function scoutSystemPrompt(): string { - return `You are a codebase investigator. You are assigned one narrow, specific question about a codebase. Your job is to read the relevant files, find the answer, and write your findings to a designated output file. + return `You are a codebase investigator. You are assigned one narrow, specific question about a codebase. Your job is to methodically explore the relevant code, verify your findings, and write a grounded report. ## Your role @@ -27,48 +42,116 @@ You find facts. You do NOT interpret, recommend, or opine. - SHOULD be thorough within the question scope: follow references, check related files. - SHOULD note explicitly when something is NOT present (e.g., "No tests found for this module"). -## Output format +## Output file -Write a markdown file with these sections: - -## Question -Restate the assigned question verbatim. - -## Findings -Factual observations that answer the question. Use sub-sections if the answer has multiple parts. -Cite file paths and line numbers for every claim. Include code snippets where relevant. - -## Files Examined -List every file you read during this investigation. - -## Gaps -Note anything you could not determine. If no gaps, write: (none) +You write a single markdown file with your findings. The file location and format are provided in your final step. ## Tools available - All read tools (read, bash, grep, glob, find, ls) — for reading the codebase. - \`write\` / \`edit\` — for writing the output file only. -- \`koan_complete_step\` — to signal completion. - -You work in a single step. Read the codebase, answer the question, write the output file.`; +- \`koan_complete_step\` — to signal completion.`; } -// Role-specific context (the question and output file) is embedded in the -// spawn prompt by the spawn function. This provides process guidance only. -export function scoutStepGuidance(): StepGuidance { - return { - title: SCOUT_STEP_NAMES[1], - instructions: [ - "Investigate the codebase to answer the assigned question. Write your findings to the output file.", - "", - "## Process", - "", - "1. Identify the files most likely to contain the answer. Start broad (grep, glob, ls),", - " then narrow down (read specific files).", - "2. Follow cross-references: if a file imports from another file, check that file too.", - "3. Be thorough within the question scope. Do not stop at the first partial answer.", - "4. Write your findings to the output file using the format described in your system prompt.", - "5. Call `koan_complete_step` with a one-sentence summary of your key finding.", - ], - }; +export function scoutStepGuidance( + step: number, + question: string, + outputFile: string, + investigatorRole: string, +): StepGuidance { + switch (step) { + case 1: + return { + title: SCOUT_STEP_NAMES[1], + instructions: [ + "Understand the question and identify where to look in the codebase.", + "", + "## Your Assignment", + "", + ...(question ? [`**Question:** ${question}`] : []), + ...(investigatorRole ? [`**Your investigator role:** ${investigatorRole}`] : []), + "", + "## Actions", + "", + "1. Parse the question: what exactly are you being asked to find?", + "2. Identify search terms, file patterns, and likely directory locations.", + "3. Use grep, glob, find, or ls to locate 3–8 candidate entry-point files.", + "4. Do NOT read file contents yet — just identify targets.", + "", + "Report your entry points and investigation plan in the `thoughts` parameter.", + ], + }; + + case 2: + return { + title: SCOUT_STEP_NAMES[2], + instructions: [ + "Read the entry-point files and trace through the code to answer the question.", + "", + "## Actions", + "", + "1. Read each entry-point file identified in the previous step.", + "2. Follow imports, cross-references, and call chains to related files.", + "3. For each relevant finding, note the file path, line numbers, and a verbatim code excerpt.", + "4. Be thorough: do not stop at the first partial answer. Check related files.", + "5. If a file turns out to be irrelevant, move on — do not force-fit it.", + "", + "Report your findings and the files you read in the `thoughts` parameter.", + ], + }; + + case 3: + return { + title: SCOUT_STEP_NAMES[3], + instructions: [ + "Verify every claim you plan to report and organize your findings.", + "", + "## Verification", + "", + "1. Re-read every file you plan to cite in your report.", + "2. Confirm that file paths are correct and the code excerpts match the actual content.", + "3. If you find a discrepancy, correct it. If a file does not exist, remove the reference.", + "", + "## Analysis", + "", + "4. Organize your verified findings into a clear answer to the original question.", + "5. Identify any gaps — things you could not determine or areas you could not access.", + "6. Note anything that is explicitly NOT present (missing tests, missing config, etc.).", + "", + "Report your verified findings and any gaps in the `thoughts` parameter.", + ], + }; + + case 4: + return { + title: SCOUT_STEP_NAMES[4], + instructions: [ + "Write your findings to the output file.", + "", + `**Output file:** ${outputFile}`, + "", + "Write a markdown file with these exact sections:", + "", + "## Question", + "Restate the assigned question verbatim.", + "", + "## Findings", + "Factual observations that answer the question. Use sub-sections if the answer has multiple parts.", + "Cite file paths and line numbers for every claim. Include code snippets where relevant.", + "Every finding must be backed by a file you actually read — no inferred claims.", + "", + "## Files Examined", + "List every file you read during this investigation.", + "", + "## Gaps", + "Note anything you could not determine. If no gaps, write: (none)", + ], + }; + + default: + return { + title: `Step ${step}`, + instructions: [`Execute step ${step}.`], + }; + } } From 8d5104c9f9e11ce1dd6efadc86b69fc479d73c40 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:48:35 +0700 Subject: [PATCH 054/412] inline scout findings content in koan_request_scouts response --- src/planner/tools/ask.ts | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/planner/tools/ask.ts b/src/planner/tools/ask.ts index 57a8b8a..c567549 100644 --- a/src/planner/tools/ask.ts +++ b/src/planner/tools/ask.ts @@ -5,6 +5,9 @@ // koan_ask_question — ask the user a question, get answers // koan_request_scouts — request parallel codebase scouts, get findings paths +import { promises as fs } from "node:fs"; +import * as path from "node:path"; + import { Type, type Static } from "@sinclair/typebox"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; @@ -322,19 +325,27 @@ export function registerAskTools(pi: ExtensionAPI, ctx: RuntimeContext): void { switch (pollResult) { case "completed": { - const lines: string[] = [ + const sections: string[] = [ `Scout findings: ${findings.length} completed, ${failures.length} failed.`, "", ]; - if (findings.length > 0) { - lines.push("Findings files (read these for codebase context):"); - for (const f of findings) lines.push(` ${f}`); + // Read each findings file and include contents verbatim. + for (const f of findings) { + try { + const content = await fs.readFile(f, "utf8"); + sections.push(`--- scout: ${path.basename(path.dirname(f))} ---`); + sections.push(content.trim()); + sections.push(""); + } catch { + sections.push(`--- scout: ${path.basename(path.dirname(f))} --- (could not read findings)`); + sections.push(""); + } } if (failures.length > 0) { - lines.push(`Failed scouts (non-fatal, proceed without them): ${failures.join(", ")}`); + sections.push(`Failed scouts (non-fatal, proceed without them): ${failures.join(", ")}`); } return { - content: [{ type: "text" as const, text: lines.join("\n") }], + content: [{ type: "text" as const, text: sections.join("\n") }], details: undefined, }; } From b25322e5f9c04bb8169635bcd3a12ffeeddac5ae Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:48:41 +0700 Subject: [PATCH 055/412] add web dashboard for pipeline monitoring and user interaction --- src/planner/web/ARCHITECTURE.md | 143 +++ src/planner/web/css/animations.css | 40 + src/planner/web/css/components.css | 740 ++++++++++++++++ src/planner/web/css/layout.css | 212 +++++ src/planner/web/css/variables.css | 75 ++ src/planner/web/html/index.html | 17 + src/planner/web/js/app.jsx | 17 + .../web/js/components/ActivityFeed.jsx | 76 ++ .../web/js/components/AgentMonitor.jsx | 48 ++ src/planner/web/js/components/AgentRow.jsx | 40 + src/planner/web/js/components/App.jsx | 39 + src/planner/web/js/components/Header.jsx | 24 + src/planner/web/js/components/ModelConfig.jsx | 152 ++++ .../web/js/components/Notifications.jsx | 25 + .../web/js/components/PhaseContent.jsx | 32 + src/planner/web/js/components/PillStrip.jsx | 29 + src/planner/web/js/components/ProgressBar.jsx | 15 + .../web/js/components/SubagentMeta.jsx | 20 + src/planner/web/js/components/Timer.jsx | 17 + .../web/js/components/forms/QuestionCard.jsx | 84 ++ .../web/js/components/forms/QuestionForm.jsx | 60 ++ .../web/js/components/forms/ReviewForm.jsx | 51 ++ .../web/js/components/phases/Completion.jsx | 23 + .../js/components/phases/Consolidation.jsx | 39 + .../js/components/phases/ContextAnalysis.jsx | 21 + .../web/js/components/phases/Execution.jsx | 34 + .../web/js/components/phases/Loading.jsx | 14 + .../js/components/phases/ScoutExploration.jsx | 60 ++ src/planner/web/js/lib/api.js | 27 + src/planner/web/js/lib/utils.js | 21 + src/planner/web/js/sse.js | 48 ++ src/planner/web/js/store.js | 19 + src/planner/web/server-types.ts | 249 ++++++ src/planner/web/server.ts | 815 ++++++++++++++++++ 34 files changed, 3326 insertions(+) create mode 100644 src/planner/web/ARCHITECTURE.md create mode 100644 src/planner/web/css/animations.css create mode 100644 src/planner/web/css/components.css create mode 100644 src/planner/web/css/layout.css create mode 100644 src/planner/web/css/variables.css create mode 100644 src/planner/web/html/index.html create mode 100644 src/planner/web/js/app.jsx create mode 100644 src/planner/web/js/components/ActivityFeed.jsx create mode 100644 src/planner/web/js/components/AgentMonitor.jsx create mode 100644 src/planner/web/js/components/AgentRow.jsx create mode 100644 src/planner/web/js/components/App.jsx create mode 100644 src/planner/web/js/components/Header.jsx create mode 100644 src/planner/web/js/components/ModelConfig.jsx create mode 100644 src/planner/web/js/components/Notifications.jsx create mode 100644 src/planner/web/js/components/PhaseContent.jsx create mode 100644 src/planner/web/js/components/PillStrip.jsx create mode 100644 src/planner/web/js/components/ProgressBar.jsx create mode 100644 src/planner/web/js/components/SubagentMeta.jsx create mode 100644 src/planner/web/js/components/Timer.jsx create mode 100644 src/planner/web/js/components/forms/QuestionCard.jsx create mode 100644 src/planner/web/js/components/forms/QuestionForm.jsx create mode 100644 src/planner/web/js/components/forms/ReviewForm.jsx create mode 100644 src/planner/web/js/components/phases/Completion.jsx create mode 100644 src/planner/web/js/components/phases/Consolidation.jsx create mode 100644 src/planner/web/js/components/phases/ContextAnalysis.jsx create mode 100644 src/planner/web/js/components/phases/Execution.jsx create mode 100644 src/planner/web/js/components/phases/Loading.jsx create mode 100644 src/planner/web/js/components/phases/ScoutExploration.jsx create mode 100644 src/planner/web/js/lib/api.js create mode 100644 src/planner/web/js/lib/utils.js create mode 100644 src/planner/web/js/sse.js create mode 100644 src/planner/web/js/store.js create mode 100644 src/planner/web/server-types.ts create mode 100644 src/planner/web/server.ts diff --git a/src/planner/web/ARCHITECTURE.md b/src/planner/web/ARCHITECTURE.md new file mode 100644 index 0000000..8731e36 --- /dev/null +++ b/src/planner/web/ARCHITECTURE.md @@ -0,0 +1,143 @@ +# Web UI Architecture + +Single-page dashboard served by `server.ts`. Pushes state via SSE; receives +user input via POST. Built with Preact + Zustand — see +`plans/2026-03-16-preact-zustand-rewrite.md` for the full decision record. + +--- + +## Directory layout + +``` +server.ts HTTP server, SSE push, WebServerHandle API +server-types.ts Shared TypeScript types +html/index.html Shell —

+ module script, no static skeleton +css/ Four unchanged stylesheets (variables, layout, components, animations) +dist/app.js Compiled bundle — generated, not committed +js/ + app.jsx Entry: render(), connectSSE(), heartbeat interval + store.js Zustand store (single source of truth) + sse.js SSE connection + store updates + lib/utils.js formatTokens, formatElapsed, shortenModel + lib/api.js submitAnswers, submitReview (fetch wrappers) + components/ Preact component tree (see §Component tree below) +``` + +--- + +## Build pipeline + +esbuild compiles `js/app.jsx` and all imports into `dist/app.js` (single ESM +bundle, ~44KB raw / ~16KB gzip). + +**The alias flags are mandatory.** zustand v4 imports from `react` internally. +Without aliasing, esbuild bundles the full React 19 runtime (~17KB) alongside +Preact — two competing VDOM reconcilers that cannot share a hook dispatcher. +The aliases redirect those imports to `preact/compat`: + +``` +--alias:react=preact/compat --alias:react-dom=preact/compat +``` + +These appear in both the npm script (`build:web`) and in the `esbuild.build()` +call inside `ensureBundle()` in `server.ts`. If you add them to one, add them +to both. + +**On-demand build:** `ensureBundle()` in `server.ts` runs at the top of +`startWebServer()`. It stats `dist/app.js` against the newest file in `js/` +and rebuilds only when stale. Adds ~100ms on first start; skips on subsequent +starts. No manual build step is needed during development — pi loads extensions +from source, so `startWebServer()` is always the entry point. + +**CI/test path:** `npm run build` runs `build:web` then `tsc`. The tsc step +does not process JSX; it type-checks the TypeScript source only. + +**zustand version:** Pinned to v4 (`^4.5.7`). zustand v5 moved its default +export to `zustand/react`, which imports React at module level and breaks +the esbuild bundle even with the alias. + +--- + +## Data flow + +``` +server.ts ──SSE──► sse.js ──setState──► Zustand store ──selector──► components + │ +user action ◄──fetch── lib/api.js ◄──────────────────────────┘ +``` + +1. `server.ts` pushes SSE events on a 2-second polling tick. +2. `sse.js` registers one `addEventListener` per event type. Each handler + calls `useStore.setState()` — the static method, callable outside + component context. +3. Components subscribe via `useStore(s => s.slice)`. Zustand shallow-merges + `setState` calls and notifies only subscribers whose selected slice changed. + A component reading `s.agents` does not re-render when `s.phase` changes. +4. User actions (form submit, heartbeat) call `lib/api.js` fetch wrappers + which POST to `/api/answer`, `/api/review`, or `/api/heartbeat`. + +`pendingInput` is cleared by the server: a phase transition out of `intake` +clears it in the `phase` handler; `ask-cancelled` / `review-cancelled` clear +it by request ID. + +--- + +## Component tree + +``` +App +├── ProgressBar reads intakeProgress.{subPhase,intakeDone} +├── Header +│ ├── PillStrip reads intakeProgress.{subPhase,intakeDone} +│ └── Timer reads subagent.startedAt, ticks via useEffect interval +├── main.phase-content +│ └── PhaseContent dispatch hub (see below) +├── AgentMonitor reads agents; renders AgentRow per agent +└── Notifications reads notifications; auto-dismisses via useEffect +``` + +**PhaseContent dispatch order:** + +1. `!phase` → `` +2. `pendingInput.type === 'ask'` → `` +3. `pendingInput.type === 'review'` → `` +4. `phase === 'intake'` → dispatches on `intakeProgress.subPhase`: + - `'context'` or null → `` + - `'explore'` → `` + - `'questions'` or `'spec'` → `` +5. `phase === 'completed'` → `` +6. default → `` + +`key={requestId}` on forms forces a full remount when a new request arrives, +resetting local selection state without any explicit cleanup. + +--- + +## Server-side changes + +**`ensureBundle()`** — async function before `startWebServer()` body. Uses +esbuild JS API via dynamic `await import("esbuild")`. `STATIC_ASSETS` is +constructed inside `startWebServer()` after this call completes (it was at +module scope in the old code; moved because asset loading must follow the build). + +**`intake-progress` SSE event** — denormalized event carrying +`{ subPhase: string | null, intakeDone: boolean }`. Pushed from: +- `startAgentPolling()` — after each `agents` push, if subPhase or intakeDone changed +- `handle.pushPhase()` — updates `intakeDone` on every phase transition + +Replayed in `replayState()` on SSE reconnect. Allows `PhaseContent`, +`PillStrip`, and `ProgressBar` to all subscribe to the same store slice +(`intakeProgress`) rather than using two different mechanisms. + +--- + +## Conventions + +| Convention | Rule | +|---|---| +| JSX attribute | `class`, not `className` (Preact uses HTML attribute names) | +| Hook imports | `import { useState, useEffect } from 'preact/hooks'` | +| Render import | `import { render } from 'preact'` (not `preact/compat`) | +| External setState | `useStore.setState(...)` — static method, works outside components | +| Fragment syntax | `<>…` — works because build uses `--jsx=automatic` | +| Zustand merge | `setState` merges shallowly; always replace the full slice, never mutate nested objects | diff --git a/src/planner/web/css/animations.css b/src/planner/web/css/animations.css new file mode 100644 index 0000000..046b2b8 --- /dev/null +++ b/src/planner/web/css/animations.css @@ -0,0 +1,40 @@ +/* CSS-only spinner */ +@keyframes spin { + to { transform: rotate(360deg); } +} + +/* Phase content crossfade */ +@keyframes fade-in { + from { opacity: 0; } + to { opacity: 1; } +} + +.phase-content .phase-inner { + animation: fade-in 250ms ease-out; +} + +/* Sliding text input for "Other" option */ +@keyframes slide-open { + from { max-height: 0; opacity: 0; } + to { max-height: 80px; opacity: 1; } +} + +/* Pill state transitions */ +.pill { + transition: background 200ms ease, color 200ms ease, border-color 200ms ease; +} + +/* Progress bar fill */ +.progress-fill { + transition: width 400ms cubic-bezier(0.4, 0, 0.2, 1); +} + +/* Notification fade-out */ +.notification.fade-out { + animation: fade-out 300ms ease-in forwards; +} + +@keyframes fade-out { + from { opacity: 1; transform: translateY(0); } + to { opacity: 0; transform: translateY(8px); } +} diff --git a/src/planner/web/css/components.css b/src/planner/web/css/components.css new file mode 100644 index 0000000..a81ccc7 --- /dev/null +++ b/src/planner/web/css/components.css @@ -0,0 +1,740 @@ +/* ---- Pill strip ---- */ +.pill-strip { + display: flex; + border-radius: var(--radius-md); + overflow: hidden; + border: 1px solid var(--border); +} + +.pill { + font-family: var(--font-mono); + font-size: var(--font-size-sm); + padding: 3px 10px; + border-right: 1px solid var(--border); + color: var(--text-dim); + background: var(--bg); + transition: background 150ms, color 150ms; + white-space: nowrap; +} + +.pill:last-child { + border-right: none; +} + +.pill.active { + background: var(--blue-border); + color: #fff; + border-color: var(--blue-border); +} + +.pill.done { + background: var(--green-border); + color: #fff; + border-color: var(--green-border); +} + +.pill.done::before { + content: "✓ "; +} + +.pill.active::before { + content: "● "; +} + +/* ---- Badges ---- */ +.badge { + font-family: var(--font-mono); + font-size: var(--font-size-xs); + padding: 1px 6px; + border-radius: 10px; + font-weight: 600; +} + +.badge.done { background: var(--green-border); color: #fff; } +.badge.active { background: var(--blue-border); color: #fff; } +.badge.failed { background: var(--red-border); color: #fff; } + +/* ---- Agent table ---- */ +.agent-table { + width: 100%; + border-collapse: collapse; + font-size: var(--font-size-sm); +} + +.agent-table th { + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--text-dim); + text-transform: uppercase; + letter-spacing: 0.06em; + padding: 4px 8px; + text-align: left; + border-bottom: 1px solid var(--border); +} + +.agent-table td { + padding: 5px 8px; + vertical-align: top; + border-bottom: 1px solid var(--border-light); +} + +.col-status { width: 24px; text-align: center; } +.col-model { width: 90px; white-space: nowrap; } +.col-parent { width: 90px; white-space: nowrap; } +.col-tokens { width: 60px; text-align: right; white-space: nowrap; } +.col-doing { /* flex */ } + +.agent-status-running { color: var(--blue); } +.agent-status-done { color: var(--green); font-weight: 600; } +.agent-status-failed { color: var(--red); } + +.agent-name-running { color: var(--text); font-weight: 600; font-family: var(--font-mono); } +.agent-name-done { color: var(--green); font-family: var(--font-mono); } +.agent-name-failed { color: var(--red); font-family: var(--font-mono); } + +.agent-model-cell { font-family: var(--font-mono); color: var(--text-muted); } +.agent-parent-cell { font-family: var(--font-mono); color: var(--text-dim); } +.agent-tokens-cell { font-family: var(--font-mono); color: var(--text-muted); } + +.agent-doing-lines { + display: flex; + flex-direction: column; + gap: 1px; +} + +.agent-doing-line { + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--text-muted); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + max-width: 600px; +} + +.agent-doing-line:last-child { + color: var(--text); +} + +/* ---- Card ---- */ +.card { + background: var(--bg-surface); + border: 1px solid var(--border); + border-radius: var(--radius-md); + padding: var(--gap-md) var(--gap-lg); + margin-bottom: var(--gap-md); +} + +.card.card-running { + border-left: 3px solid var(--blue); +} + +.card.card-done { + background: var(--green-bg); + border-color: var(--green-border); +} + +.card.card-failed { + background: var(--red-bg); + border-color: var(--red-border); +} + +.card-header { + display: flex; + align-items: center; + gap: var(--gap-sm); + margin-bottom: var(--gap-sm); +} + +.card-title { + font-family: var(--font-mono); + font-weight: 700; + font-size: var(--font-size-lg); + color: var(--text-strong); +} + +.card-role { + margin-left: auto; + font-family: var(--font-mono); + font-size: var(--font-size-sm); + color: var(--text-dim); +} + +.card-body { + font-family: var(--font-sans); + font-size: var(--font-size-lg); + color: var(--text-muted); + line-height: 1.5; +} + +/* ---- Question cards ---- */ +.question-card { + background: var(--bg-surface); + border: 1px solid var(--border); + border-radius: var(--radius-md); + padding: var(--gap-lg); + margin-bottom: var(--gap-lg); +} + +.question-header { + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--text-dim); + text-transform: uppercase; + letter-spacing: 0.06em; + margin-bottom: var(--gap-sm); +} + +.question-text { + font-family: var(--font-sans); + font-size: 18px; + font-weight: 500; + color: var(--text-strong); + margin-bottom: var(--gap-md); + line-height: 1.5; +} + +.question-multi-hint { + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--blue); + margin-bottom: var(--gap-sm); +} + +.options-list { + display: flex; + flex-direction: column; + gap: var(--gap-xs); +} + +.option { + display: flex; + align-items: flex-start; + gap: var(--gap-sm); + padding: var(--gap-sm) var(--gap-md); + border: 1px solid var(--border); + border-radius: var(--radius-sm); + background: var(--bg); + cursor: pointer; + transition: border-color 100ms, background 100ms; + user-select: none; +} + +.option:hover { + border-color: var(--text-dim); +} + +.option.selected { + border-color: var(--blue-border); + background: var(--blue-bg); +} + +.option-other { + border-style: dashed; +} + +.radio-dot, .checkbox-dot { + width: 14px; + height: 14px; + border: 2px solid var(--text-ghost); + border-radius: 50%; + flex-shrink: 0; + margin-top: 2px; + transition: border-color 100ms, background 100ms; +} + +.checkbox-dot { + border-radius: 3px; +} + +.option.selected .radio-dot, +.option.selected .checkbox-dot { + border-color: var(--blue); + background: var(--blue); +} + +.option.selected .checkbox-dot::after { + content: "✓"; + display: block; + color: #fff; + font-size: 9px; + text-align: center; + line-height: 10px; +} + +.option-text { + font-family: var(--font-sans); + font-size: var(--font-size-lg); + color: var(--text); + flex: 1; +} + +.option-other .option-text { + color: var(--text-dim); +} + +.recommended-badge { + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--blue); + margin-left: auto; + white-space: nowrap; +} + +.other-input { + display: none; + width: 100%; + margin-top: var(--gap-sm); + padding: var(--gap-sm); + background: var(--bg); + border: 1px solid var(--border); + border-radius: var(--radius-sm); + color: var(--text); + font-family: var(--font-sans); + font-size: var(--font-size-md); + outline: none; +} + +.other-input:focus { + border-color: var(--blue-border); +} + +.other-input.visible { + display: block; + animation: slide-open 150ms ease-out; +} + +/* ---- Form actions ---- */ +.form-actions { + display: flex; + gap: var(--gap-md); + margin-top: var(--gap-xl); + align-items: center; +} + +.form-helper { + font-family: var(--font-mono); + font-size: var(--font-size-sm); + color: var(--text-dim); + margin-left: auto; +} + +.btn { + padding: var(--gap-sm) var(--gap-lg); + border-radius: var(--radius-sm); + font-size: var(--font-size-md); + font-family: var(--font-sans); + cursor: pointer; + border: 1px solid transparent; + transition: opacity 100ms; +} + +.btn:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.btn-primary { + background: var(--green-border); + color: #fff; + border-color: var(--green-border); +} + +.btn-secondary { + background: transparent; + color: var(--text-muted); + border-color: var(--border); +} + +/* ---- Review checklist ---- */ +.review-story { + display: flex; + align-items: center; + gap: var(--gap-md); + padding: var(--gap-sm) var(--gap-md); + border: 1px solid var(--border); + border-radius: var(--radius-sm); + background: var(--bg); + margin-bottom: var(--gap-sm); + cursor: pointer; + user-select: none; +} + +.review-story.checked { + border-color: var(--green-border); + background: var(--green-bg); +} + +.review-story-checkbox { + width: 16px; + height: 16px; + border: 2px solid var(--text-ghost); + border-radius: 3px; + flex-shrink: 0; + transition: border-color 100ms, background 100ms; +} + +.review-story.checked .review-story-checkbox { + border-color: var(--green-border); + background: var(--green-border); +} + +.review-story.checked .review-story-checkbox::after { + content: "✓"; + display: block; + color: #fff; + font-size: 10px; + text-align: center; + line-height: 12px; +} + +.review-story-id { + font-family: var(--font-mono); + font-size: var(--font-size-md); + color: var(--text); + font-weight: 600; +} + +.review-story-title { + font-family: var(--font-sans); + font-size: var(--font-size-md); + color: var(--text-muted); +} + +/* ---- Loading spinner ---- */ +.spinner { + width: 24px; + height: 24px; + border: 2px solid var(--border); + border-top-color: var(--blue); + border-radius: 50%; + animation: spin 800ms linear infinite; +} + +/* ---- Topic card ---- */ +.topic-card { + background: var(--bg-surface); + border: 1px solid var(--border); + border-radius: var(--radius-md); + padding: var(--gap-md) var(--gap-lg); + margin-top: var(--gap-lg); + max-width: 640px; +} + +.topic-label { + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--text-dim); + text-transform: uppercase; + letter-spacing: 0.08em; + margin-bottom: var(--gap-xs); +} + +.topic-text { + font-family: var(--font-sans); + font-size: var(--font-size-lg); + color: var(--text); + font-style: italic; + line-height: 1.5; +} + +/* ---- Activity feed (context analysis) ---- */ +.activity-feed { + background: var(--bg-surface); + border: 1px solid var(--border); + border-radius: var(--radius-md); + padding: var(--gap-md); + margin-top: var(--gap-md); +} + +.activity-line { + display: flex; + gap: var(--gap-sm); + font-family: var(--font-mono); + font-size: var(--font-size-md); + color: var(--text-muted); + padding: 3px 0; +} + +.activity-tool { + color: var(--blue); + min-width: 60px; +} + +/* ---- Phase status messages ---- */ +.phase-status { + font-family: var(--font-sans); + font-size: var(--font-size-lg); + color: var(--text); + margin-bottom: var(--gap-lg); +} + +.phase-heading { + font-family: var(--font-sans); + font-size: 22px; + font-weight: 600; + color: var(--text-strong); + margin-bottom: var(--gap-lg); +} + +/* ---- Summary checklist ---- */ +.summary-list { + background: var(--bg-surface); + border: 1px solid var(--border); + border-radius: var(--radius-md); + padding: var(--gap-md) var(--gap-lg); +} + +.summary-item { + display: flex; + align-items: center; + gap: var(--gap-md); + padding: 4px 0; + font-family: var(--font-sans); + font-size: var(--font-size-md); +} + +.summary-item .icon-done { color: var(--green); } +.summary-item .icon-pending { color: var(--text-dim); } + +/* ---- Notification toasts ---- */ +#notifications { + position: fixed; + bottom: var(--gap-xl); + right: var(--gap-xl); + display: flex; + flex-direction: column; + gap: var(--gap-sm); + z-index: 200; +} + +.notification { + padding: var(--gap-sm) var(--gap-lg); + border-radius: var(--radius-md); + font-family: var(--font-sans); + font-size: var(--font-size-md); + color: #fff; + animation: fade-in 150ms ease-out; +} + +.notification.info { background: var(--blue-border); } +.notification.warning { background: #9a6700; } +.notification.error { background: var(--red-border); } + +/* ---- Count progress indicator ---- */ +.count-progress { + font-family: var(--font-mono); + font-size: var(--font-size-sm); + color: var(--text-dim); + margin-bottom: var(--gap-lg); +} + +/* ---- Context so far section ---- */ +.context-section-label { + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--text-dim); + text-transform: uppercase; + letter-spacing: 0.08em; + margin: var(--gap-lg) 0 var(--gap-sm); +} + +.context-items { + list-style: none; + padding: 0; + margin: 0; +} + +.context-items li { + padding: 3px 0; + font-family: var(--font-sans); + font-size: var(--font-size-md); + color: var(--text-muted); +} + +.context-items li::before { + content: "• "; + color: var(--green); +} + +/* ---- Model config ---- */ +.model-config-tiers { + display: flex; + flex-direction: column; + gap: var(--gap-lg); + margin-top: var(--gap-lg); + margin-bottom: var(--gap-xl); +} + +.model-tier-row { + background: var(--bg-surface); + border: 1px solid var(--border); + border-radius: var(--radius-md); + padding: var(--gap-md) var(--gap-lg); +} + +.model-tier-header { + display: flex; + align-items: center; + gap: var(--gap-sm); + margin-bottom: var(--gap-xs); +} + +.model-tier-label { + font-family: var(--font-mono); + font-size: var(--font-size-lg); + font-weight: 700; + color: var(--text-strong); + text-transform: uppercase; + letter-spacing: 0.06em; +} + +.model-tier-description { + font-family: var(--font-sans); + font-size: var(--font-size-md); + color: var(--text-muted); + line-height: 1.5; + margin: 0 0 var(--gap-md); +} + +.model-tier-input { + width: 100%; + padding: var(--gap-sm) var(--gap-md); + background: var(--bg); + border: 1px solid var(--border); + border-radius: var(--radius-sm); + color: var(--text); + font-family: var(--font-mono); + font-size: var(--font-size-md); + outline: none; + box-sizing: border-box; +} + +.model-tier-input:focus { + border-color: var(--blue-border); +} + +.model-tier-input::placeholder { + color: var(--text-dim); + font-style: italic; +} +.model-tier-select { + width: 100%; + padding: var(--gap-sm) var(--gap-md); + background: var(--bg); + border: 1px solid var(--border); + border-radius: var(--radius-sm); + color: var(--text); + font-family: var(--font-mono); + font-size: var(--font-size-md); + outline: none; + box-sizing: border-box; + cursor: pointer; + -webkit-appearance: none; + appearance: none; + background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='8'%3E%3Cpath d='M1 1l5 5 5-5' stroke='%23727d8a' stroke-width='1.5' fill='none'/%3E%3C/svg%3E"); + background-repeat: no-repeat; + background-position: right 12px center; + padding-right: 36px; +} + +.model-tier-select:focus { + border-color: var(--blue-border); +} + +.model-tier-select option { + background: var(--bg-surface); + color: var(--text); +} + +.model-tier-select optgroup { + color: var(--text-muted); + font-style: normal; +} + +.model-config-warning { + font-family: var(--font-sans); + font-size: var(--font-size-sm); + color: var(--red); + margin-bottom: var(--gap-md); +} + +/* ---- Settings button ---- */ +.header-right { + display: flex; + align-items: center; + gap: var(--gap-lg); +} + +.settings-btn { + background: none; + border: 1px solid var(--border); + border-radius: var(--radius-sm); + color: var(--text-muted); + font-size: 16px; + padding: 4px 8px; + cursor: pointer; + transition: color 150ms, border-color 150ms; + line-height: 1; +} + +.settings-btn:hover { + color: var(--text-strong); + border-color: var(--text-dim); +} + +/* ---- Activity feed: in-flight + flash ---- */ +@keyframes result-flash { + 0% { background: rgba(126, 231, 135, 0.15); } + 100% { background: transparent; } +} + +.activity-inflight .activity-summary { + color: var(--yellow); +} + +.activity-flash { + animation: result-flash 400ms ease-out; + border-radius: 3px; +} + +.activity-dots { + display: inline-block; + overflow: hidden; + vertical-align: bottom; + animation: dots-anim 1.5s steps(4, end) infinite; + width: 0; + max-width: 18px; +} + +@keyframes dots-anim { + 0% { width: 0; } + 100% { width: 18px; } +} + +/* ---- Agent row: spinner prefix dots ---- */ +@keyframes pulse-dot { + 0%, 100% { opacity: 0.3; } + 50% { opacity: 1; } +} + +.agent-doing-prefix { + display: inline-block; + width: 12px; + text-align: center; + margin-right: 4px; + flex-shrink: 0; +} + +.prefix-done { + color: var(--green); +} + +.prefix-active { + color: var(--blue); + animation: pulse-dot 1s ease-in-out infinite; +} + +.agent-doing-inflight { + color: var(--text) !important; +} diff --git a/src/planner/web/css/layout.css b/src/planner/web/css/layout.css new file mode 100644 index 0000000..b20b374 --- /dev/null +++ b/src/planner/web/css/layout.css @@ -0,0 +1,212 @@ +.app { + display: flex; + flex-direction: column; + height: 100vh; + overflow: hidden; +} + +/* Progress bar — 3px at the very top */ +.progress-bar { + position: fixed; + top: 0; + left: 0; + right: 0; + height: 3px; + background: var(--border); + z-index: 100; +} + +.progress-fill { + height: 100%; + width: 0%; + background: linear-gradient(90deg, var(--green), var(--blue)); + transition: width 400ms cubic-bezier(0.4, 0, 0.2, 1); +} + +/* Header */ +.header { + position: fixed; + top: 3px; /* below progress bar */ + left: 0; + right: 0; + height: var(--header-height); + display: flex; + align-items: center; + justify-content: space-between; + padding: 0 var(--gap-xl); + background: var(--bg); + border-bottom: 1px solid var(--border); + z-index: 50; +} + +.header-left { + display: flex; + align-items: center; + gap: var(--gap-lg); +} + +.logo { + font-family: var(--font-mono); + font-size: 18px; + font-weight: 600; + color: var(--text-strong); + letter-spacing: 0.05em; +} + +.timer { + font-family: var(--font-mono); + font-size: var(--font-size-md); + color: var(--text-muted); +} + +/* Main panel — fills all remaining vertical space */ +.main-panel { + flex: 1 1 0; + min-height: 0; + display: flex; + flex-direction: column; + margin-top: calc(3px + var(--header-height)); +} + +/* Subagent metadata bar */ +.subagent-meta { + flex: 0 0 auto; + display: flex; + align-items: center; + gap: var(--gap-lg); + padding: var(--gap-sm) var(--gap-xl); + border-bottom: 1px solid var(--border); + background: var(--bg-surface); + font-family: var(--font-mono); + font-size: var(--font-size-sm); +} + +.meta-role { + color: var(--blue); + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.06em; +} + +.meta-item { + color: var(--text-muted); +} + +.meta-item::before { + content: '·'; + color: var(--text-ghost); + margin-right: var(--gap-lg); +} + +.meta-tokens { + margin-left: auto; + color: var(--text-dim); +} + +/* Phase content area — scrollable, fills remaining space */ +.phase-content { + flex: 1 1 0; + min-height: 0; + overflow-y: auto; + padding: var(--gap-xl); + display: flex; + flex-direction: column; + align-items: center; +} + +.phase-inner { + width: 100%; + max-width: 960px; +} + +/* Activity feed — fills remaining space in phase-content, scrollable */ +.activity-feed-scroll { + flex: 1 1 0; + min-height: 0; + overflow-y: auto; + padding: var(--gap-md) var(--gap-xl); + /* Subtle fade at top when scrolled */ + mask-image: linear-gradient(to bottom, transparent, black 8px, black); + -webkit-mask-image: linear-gradient(to bottom, transparent, black 8px, black); +} + +.activity-feed-inner { + display: flex; + flex-direction: column; + gap: 1px; +} + +.activity-line { + display: flex; + gap: var(--gap-sm); + font-family: var(--font-mono); + font-size: var(--font-size-sm); + color: var(--text-dim); + padding: 2px 0; + line-height: 1.4; +} + +.activity-line.activity-high { + color: var(--text-muted); +} + +.activity-tool { + color: var(--text-ghost); + min-width: 48px; + flex-shrink: 0; +} + +.activity-high .activity-tool { + color: var(--blue); +} + +.activity-summary { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.activity-detail { + color: var(--text-ghost); + padding-left: 12px; +} + +/* Monitor — sticky bottom, sizes to content */ +.monitor { + flex: 0 0 auto; + max-height: 40vh; + overflow-y: auto; + border-top: 1px solid var(--border); + background: var(--bg-elevated); + padding: var(--gap-md) var(--gap-xl); + /* Fade at top edge when scrollable */ + mask-image: linear-gradient(to bottom, transparent, black 12px, black); + -webkit-mask-image: linear-gradient(to bottom, transparent, black 12px, black); +} + +.agent-table-header { + display: flex; + align-items: center; + gap: var(--gap-md); + margin-bottom: var(--gap-sm); +} + +.monitor-label { + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--text-dim); + text-transform: uppercase; + letter-spacing: 0.08em; +} + +.agent-badges { + display: flex; + gap: var(--gap-xs); +} + +.token-totals { + margin-left: auto; + font-family: var(--font-mono); + font-size: var(--font-size-sm); + color: var(--text-muted); +} diff --git a/src/planner/web/css/variables.css b/src/planner/web/css/variables.css new file mode 100644 index 0000000..3e967aa --- /dev/null +++ b/src/planner/web/css/variables.css @@ -0,0 +1,75 @@ +:root { + /* Background layers */ + --bg: #0d1117; + --bg-surface: #161b22; + --bg-elevated: #0c0f14; + + /* Borders */ + --border: #21262d; + --border-light: #161b22; + + /* Text hierarchy */ + --text: #d6dde5; + --text-strong: #f0f3f6; + --text-muted: #9da7b3; + --text-dim: #727d8a; + --text-ghost: #444d56; + + /* Status colors */ + --green: #7ee787; + --green-bg: rgba(35, 134, 54, 0.06); + --green-border: #238636; + --blue: #58a6ff; + --blue-bg: rgba(31, 111, 235, 0.06); + --blue-border: #1f6feb; + --purple: #d2a8ff; + --orange: #ffa657; + --red: #f85149; + --red-bg: rgba(248, 81, 73, 0.06); + --red-border: #da3633; + --yellow: #e3b341; + --pink: #f778ba; + + /* Typography */ + --font-mono: 'SF Mono', 'JetBrains Mono', 'Cascadia Code', 'Fira Code', monospace; + --font-sans: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; + + /* Font sizes */ + --font-size-xs: 12px; + --font-size-sm: 13px; + --font-size-md: 15px; + --font-size-lg: 16px; + + /* Spacing */ + --gap-xs: 4px; + --gap-sm: 8px; + --gap-md: 12px; + --gap-lg: 16px; + --gap-xl: 20px; + + /* Header */ + --header-height: 52px; + + /* Monitor */ + --monitor-min-height: 120px; + + /* Radius */ + --radius-sm: 4px; + --radius-md: 6px; + --radius-lg: 8px; +} + +*, *::before, *::after { + box-sizing: border-box; +} + +html, body { + margin: 0; + padding: 0; + height: 100%; + background: var(--bg); + color: var(--text); + font-family: var(--font-sans); + font-size: var(--font-size-md); + line-height: 1.5; +} diff --git a/src/planner/web/html/index.html b/src/planner/web/html/index.html new file mode 100644 index 0000000..91b9e10 --- /dev/null +++ b/src/planner/web/html/index.html @@ -0,0 +1,17 @@ + + + + + + koan + + + + + + + +
+ + + diff --git a/src/planner/web/js/app.jsx b/src/planner/web/js/app.jsx new file mode 100644 index 0000000..035a254 --- /dev/null +++ b/src/planner/web/js/app.jsx @@ -0,0 +1,17 @@ +import { render } from 'preact' +import { App } from './components/App.jsx' +import { connectSSE } from './sse.js' + +const data = window.__DATA__ +const token = data?.token || new URLSearchParams(location.search).get('session') || '' + +render(, document.getElementById('app')) +connectSSE(token) + +setInterval(() => { + fetch('/api/heartbeat', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ token }), + }).catch(() => {}) +}, 5000) diff --git a/src/planner/web/js/components/ActivityFeed.jsx b/src/planner/web/js/components/ActivityFeed.jsx new file mode 100644 index 0000000..ed71786 --- /dev/null +++ b/src/planner/web/js/components/ActivityFeed.jsx @@ -0,0 +1,76 @@ +import { useRef, useEffect, useState } from 'preact/hooks' +import { useStore } from '../store.js' + +export function ActivityFeed() { + const logs = useStore(s => s.logs) + const containerRef = useRef(null) + const stickRef = useRef(true) + + // Track previous last-line to detect in-flight → completed transitions. + const prevLastRef = useRef(null) + const [flashIndex, setFlashIndex] = useState(-1) + + // Auto-scroll to bottom when new logs arrive, but only if already at bottom. + useEffect(() => { + const el = containerRef.current + if (el && stickRef.current) { + el.scrollTop = el.scrollHeight + } + }, [logs]) + + // Detect when the last line transitions from in-flight to completed and flash it. + useEffect(() => { + const lastLine = logs[logs.length - 1] + if (prevLastRef.current?.inFlight && lastLine && !lastLine.inFlight) { + const idx = logs.length - 1 + setFlashIndex(idx) + setTimeout(() => setFlashIndex(-1), 400) + } + prevLastRef.current = lastLine ? { ...lastLine } : null + }, [logs]) + + function onScroll() { + const el = containerRef.current + if (!el) return + // "At bottom" if within 30px of the end. + stickRef.current = el.scrollTop + el.clientHeight >= el.scrollHeight - 30 + } + + if (logs.length === 0) return null + + return ( +
+
+ {logs.map((line, i) => { + // Only the last line can be in-flight — earlier lines are always done. + const isInFlight = !!line.inFlight && i === logs.length - 1 + const isFlashing = i === flashIndex + const cls = [ + 'activity-line', + line.highValue ? 'activity-high' : '', + isInFlight ? 'activity-inflight' : '', + isFlashing ? 'activity-flash' : '', + ].filter(Boolean).join(' ') + + return ( + <> +
+ {line.tool} + + {line.summary || ''} + {isInFlight && ...} + +
+ {line.details?.map((d, j) => ( +
+ + {d} +
+ ))} + + ) + })} +
+
+ ) +} diff --git a/src/planner/web/js/components/AgentMonitor.jsx b/src/planner/web/js/components/AgentMonitor.jsx new file mode 100644 index 0000000..b87d763 --- /dev/null +++ b/src/planner/web/js/components/AgentMonitor.jsx @@ -0,0 +1,48 @@ +import { useStore } from '../store.js' +import { formatTokens } from '../lib/utils.js' +import { AgentRow } from './AgentRow.jsx' + +export function AgentMonitor() { + const allAgents = useStore(s => s.agents) + // Only show nested subagents (those with a parent), and only running ones + const agents = allAgents.filter(a => a.status === 'running' && a.parent) + const sent = agents.reduce((s, a) => s + (a.tokensSent || 0), 0) + const recv = agents.reduce((s, a) => s + (a.tokensReceived || 0), 0) + + if (agents.length === 0) return null + + // Dynamic lines-per-agent based on count + const maxLines = agents.length <= 3 ? 5 + : agents.length <= 6 ? 3 + : agents.length <= 10 ? 2 + : 1 + + return ( +
+
+ Subagents +
+ {agents.length} +
+ + {(sent > 0 || recv > 0) ? `↑${formatTokens(sent)} ↓${formatTokens(recv)}` : ''} + +
+ + + + + + + + + + + + + {agents.map(a => )} + +
agentmodel↑ sent↓ recvdoing
+
+ ) +} diff --git a/src/planner/web/js/components/AgentRow.jsx b/src/planner/web/js/components/AgentRow.jsx new file mode 100644 index 0000000..8bc678f --- /dev/null +++ b/src/planner/web/js/components/AgentRow.jsx @@ -0,0 +1,40 @@ +import { shortenModel, formatTokens } from '../lib/utils.js' + +export function AgentRow({ agent, maxLines = 5 }) { + const actions = agent.recentActions || [] + const start = Math.max(0, actions.length - maxLines) + + return ( + + ● + {agent.name || agent.id} + {shortenModel(agent.model)} + {formatTokens(agent.tokensSent || 0)} + {formatTokens(agent.tokensReceived || 0)} + + {actions.length > 0 ? ( +
+ {actions.slice(start).map((action, i) => { + // Gracefully handle both old string[] and new object[] formats. + const text = typeof action === 'string' + ? action + : (action.summary ? `${action.tool}: ${action.summary}` : action.tool) + const inFlight = typeof action === 'object' && !!action.inFlight + + return ( +
+ + {inFlight ? '●' : '·'} + + {text} +
+ ) + })} +
+ ) : ( + initializing... + )} + + + ) +} diff --git a/src/planner/web/js/components/App.jsx b/src/planner/web/js/components/App.jsx new file mode 100644 index 0000000..031ae91 --- /dev/null +++ b/src/planner/web/js/components/App.jsx @@ -0,0 +1,39 @@ +import { ProgressBar } from './ProgressBar.jsx' +import { Header } from './Header.jsx' +import { SubagentMeta } from './SubagentMeta.jsx' +import { PhaseContent } from './PhaseContent.jsx' +import { ActivityFeed } from './ActivityFeed.jsx' +import { AgentMonitor } from './AgentMonitor.jsx' +import { Notifications } from './Notifications.jsx' +import { useStore } from '../store.js' + +export function App({ token, topic }) { + const phase = useStore(s => s.phase) + const pending = useStore(s => s.pendingInput) + const showSettings = useStore(s => s.showSettings) + + // When showing interactive content (forms, model config, loading, completion), use scroll layout + // When showing live subagent activity, use fill layout with activity feed + const isInteractive = !phase || pending || showSettings || phase === 'completed' + + return ( +
+ +
+ {isInteractive ? ( +
+
+ +
+
+ ) : ( +
+ + +
+ )} + + +
+ ) +} diff --git a/src/planner/web/js/components/Header.jsx b/src/planner/web/js/components/Header.jsx new file mode 100644 index 0000000..2e3dc9d --- /dev/null +++ b/src/planner/web/js/components/Header.jsx @@ -0,0 +1,24 @@ +import { PillStrip } from './PillStrip.jsx' +import { Timer } from './Timer.jsx' +import { useStore } from '../store.js' + +export function Header() { + return ( +
+
+ + +
+
+ + +
+
+ ) +} diff --git a/src/planner/web/js/components/ModelConfig.jsx b/src/planner/web/js/components/ModelConfig.jsx new file mode 100644 index 0000000..64feb3f --- /dev/null +++ b/src/planner/web/js/components/ModelConfig.jsx @@ -0,0 +1,152 @@ +import { useState, useEffect } from 'preact/hooks' +import { useStore } from '../store.js' + +const TIERS = [ + { + key: 'strong', + label: 'Strong', + description: 'Complex reasoning \u2014 intake analysis, task decomposition, orchestration, and planning. Requires deep understanding of requirements and codebase architecture.', + }, + { + key: 'standard', + label: 'Standard', + description: 'Implementation \u2014 executing planned changes based on well-specified work. Balances capability with cost for coding tasks.', + }, + { + key: 'cheap', + label: 'Cheap', + description: 'Narrow investigations \u2014 codebase scouting and targeted information gathering. Fast and cost-effective for focused questions.', + }, +] + +function groupByProvider(models) { + const groups = {} + for (const m of models) { + if (!groups[m.provider]) groups[m.provider] = [] + groups[m.provider].push(m) + } + // Sort providers alphabetically, models by name within each group + return Object.keys(groups).sort().map(provider => ({ + provider, + models: groups[provider].sort((a, b) => a.name.localeCompare(b.name)), + })) +} + +export function ModelConfig({ token, isGate = false, onClose }) { + const pending = useStore(s => s.pendingInput) + const availableModels = useStore(s => s.availableModels) + const [tiers, setTiers] = useState({ strong: '', standard: '', cheap: '' }) + const [loading, setLoading] = useState(true) + const [saving, setSaving] = useState(false) + + // Load current config on mount + useEffect(() => { + if (isGate && pending?.payload) { + const t = pending.payload + setTiers({ + strong: t?.strong || '', + standard: t?.standard || '', + cheap: t?.cheap || '', + }) + setLoading(false) + return + } + fetch(`/api/model-config?session=${encodeURIComponent(token)}`) + .then(r => r.json()) + .then(data => { + if (data.tiers) { + setTiers({ + strong: data.tiers.strong || '', + standard: data.tiers.standard || '', + cheap: data.tiers.cheap || '', + }) + } + setLoading(false) + }) + .catch(() => setLoading(false)) + }, []) + + const handleSave = async () => { + setSaving(true) + const body = { + tiers: { + strong: tiers.strong || null, + standard: tiers.standard || null, + cheap: tiers.cheap || null, + }, + } + if (isGate && pending?.requestId) { + body.requestId = pending.requestId + } + try { + await fetch(`/api/model-config?session=${encodeURIComponent(token)}`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }) + if (!isGate && onClose) onClose() + } finally { + setSaving(false) + } + } + + const grouped = groupByProvider(availableModels) + + if (loading) { + return ( +
+
+
+ ) + } + + return ( +
+

Model Configuration

+

+ Choose which models to use for each task type. Leave as “Inherited” to use the active model. +

+ +
+ {TIERS.map(tier => ( +
+
+ {tier.label} +
+

{tier.description}

+ +
+ ))} +
+ +
+ {!isGate && ( + + )} + + {isGate && !tiers.strong && !tiers.standard && !tiers.cheap && ( + All models will be inherited from the active model + )} +
+
+ ) +} diff --git a/src/planner/web/js/components/Notifications.jsx b/src/planner/web/js/components/Notifications.jsx new file mode 100644 index 0000000..09ab09d --- /dev/null +++ b/src/planner/web/js/components/Notifications.jsx @@ -0,0 +1,25 @@ +import { useEffect } from 'preact/hooks' +import { useStore } from '../store.js' + +export function Notifications() { + const notifications = useStore(s => s.notifications) + + useEffect(() => { + if (notifications.length === 0) return + const newest = notifications[notifications.length - 1] + const timer = setTimeout(() => { + useStore.setState(s => ({ + notifications: s.notifications.filter(n => n.id !== newest.id), + })) + }, 5000) + return () => clearTimeout(timer) + }, [notifications[notifications.length - 1]?.id]) + + return ( +
+ {notifications.map(n => ( +
{n.message}
+ ))} +
+ ) +} diff --git a/src/planner/web/js/components/PhaseContent.jsx b/src/planner/web/js/components/PhaseContent.jsx new file mode 100644 index 0000000..d552837 --- /dev/null +++ b/src/planner/web/js/components/PhaseContent.jsx @@ -0,0 +1,32 @@ +import { useStore } from '../store.js' +import { Loading } from './phases/Loading.jsx' +import { Completion } from './phases/Completion.jsx' +import { QuestionForm } from './forms/QuestionForm.jsx' +import { ReviewForm } from './forms/ReviewForm.jsx' +import { ModelConfig } from './ModelConfig.jsx' + +export function PhaseContent({ token, topic }) { + const phase = useStore(s => s.phase) + const pending = useStore(s => s.pendingInput) + + // Settings overlay + const showSettings = useStore(s => s.showSettings) + if (showSettings) { + return useStore.setState({ showSettings: false })} /> + } + + // Model config gate (startup) + if (pending?.type === 'model-config') { + return + } + + if (!phase) return + + if (pending?.type === 'ask') return + if (pending?.type === 'review') return + + if (phase === 'completed') return + + // For running phases, App renders ActivityFeed directly — this shouldn't be reached + return null +} diff --git a/src/planner/web/js/components/PillStrip.jsx b/src/planner/web/js/components/PillStrip.jsx new file mode 100644 index 0000000..079a38a --- /dev/null +++ b/src/planner/web/js/components/PillStrip.jsx @@ -0,0 +1,29 @@ +import { useStore } from '../store.js' + +const PHASES = [ + { id: 'intake', label: 'intake' }, + { id: 'decomposition', label: 'decompose' }, + { id: 'review', label: 'review' }, + { id: 'executing', label: 'execute' }, +] + +const PHASE_ORDER = ['intake', 'decomposition', 'review', 'executing', 'completed'] + +export function PillStrip() { + const phase = useStore(s => s.phase) + if (!phase) return null + + const phaseIdx = PHASE_ORDER.indexOf(phase) + + return ( +
+ {PHASES.map(({ id, label }) => { + const pillIdx = PHASE_ORDER.indexOf(id) + const cls = phase === 'completed' || phaseIdx > pillIdx ? 'pill done' + : phase === id ? 'pill active' + : 'pill pending' + return {label} + })} +
+ ) +} diff --git a/src/planner/web/js/components/ProgressBar.jsx b/src/planner/web/js/components/ProgressBar.jsx new file mode 100644 index 0000000..a5f6ab3 --- /dev/null +++ b/src/planner/web/js/components/ProgressBar.jsx @@ -0,0 +1,15 @@ +import { useStore } from '../store.js' + +const PHASE_ORDER = ['intake', 'decomposition', 'review', 'executing', 'completed'] + +export function ProgressBar() { + const phase = useStore(s => s.phase) + const idx = PHASE_ORDER.indexOf(phase || '') + const pct = idx < 0 ? 0 : (idx / (PHASE_ORDER.length - 1)) * 100 + + return ( +
+
+
+ ) +} diff --git a/src/planner/web/js/components/SubagentMeta.jsx b/src/planner/web/js/components/SubagentMeta.jsx new file mode 100644 index 0000000..ad3e117 --- /dev/null +++ b/src/planner/web/js/components/SubagentMeta.jsx @@ -0,0 +1,20 @@ +import { useStore } from '../store.js' +import { shortenModel, formatTokens } from '../lib/utils.js' + +export function SubagentMeta() { + const sub = useStore(s => s.subagent) + if (!sub) return null + + const stepLabel = sub.stepName || (sub.step && sub.totalSteps ? `Step ${sub.step}/${sub.totalSteps}` : null) + + return ( +
+ {sub.role} + {sub.model && {shortenModel(sub.model)}} + {stepLabel && {stepLabel}} + {(sub.tokensSent > 0 || sub.tokensReceived > 0) && ( + ↑{formatTokens(sub.tokensSent || 0)} ↓{formatTokens(sub.tokensReceived || 0)} + )} +
+ ) +} diff --git a/src/planner/web/js/components/Timer.jsx b/src/planner/web/js/components/Timer.jsx new file mode 100644 index 0000000..694d754 --- /dev/null +++ b/src/planner/web/js/components/Timer.jsx @@ -0,0 +1,17 @@ +import { useState, useEffect } from 'preact/hooks' +import { useStore } from '../store.js' +import { formatElapsed } from '../lib/utils.js' + +export function Timer() { + const startedAt = useStore(s => s.subagent?.startedAt) + const [now, setNow] = useState(Date.now()) + + useEffect(() => { + if (!startedAt) return + const id = setInterval(() => setNow(Date.now()), 1000) + return () => clearInterval(id) + }, [startedAt]) + + if (!startedAt) return + return {formatElapsed(now - startedAt)} +} diff --git a/src/planner/web/js/components/forms/QuestionCard.jsx b/src/planner/web/js/components/forms/QuestionCard.jsx new file mode 100644 index 0000000..97a92af --- /dev/null +++ b/src/planner/web/js/components/forms/QuestionCard.jsx @@ -0,0 +1,84 @@ +import { useState } from 'preact/hooks' + +export function QuestionCard({ question, index, total, onSelect }) { + const [selectedIndexes, setSelectedIndexes] = useState(() => new Set()) + const [otherInput, setOtherInput] = useState('') + + const options = question.options || [] + const allOptions = options.map(o => o.label) + const otherIndex = allOptions.findIndex(l => l === 'Other (type your own)') + + function buildSelection(indexes, otherVal) { + if (question.multi) { + const selectedOptions = [] + let customInput + for (const idx of indexes) { + if (idx === otherIndex) { + const val = otherVal.trim() + if (val) customInput = val + } else { + selectedOptions.push(allOptions[idx]) + } + } + return customInput !== undefined ? { selectedOptions, customInput } : { selectedOptions } + } else { + const idx = [...indexes][0] + if (idx === otherIndex) { + const val = otherVal.trim() + return val ? { selectedOptions: [], customInput: val } : null + } + return { selectedOptions: [allOptions[idx]] } + } + } + + function handleSelect(i) { + let next + if (question.multi) { + next = new Set(selectedIndexes) + if (next.has(i)) next.delete(i) + else next.add(i) + } else { + next = new Set([i]) + } + setSelectedIndexes(next) + onSelect(buildSelection(next, otherInput)) + } + + function handleOtherInput(e) { + const val = e.target.value + setOtherInput(val) + if (selectedIndexes.has(otherIndex)) { + onSelect(buildSelection(selectedIndexes, val)) + } + } + + const showOtherInput = otherIndex !== -1 && selectedIndexes.has(otherIndex) + + return ( +
+
{index + 1}/{total} · {question.id}
+ {question.multi &&
select all that apply
} +
{question.question}
+
+ {allOptions.map((label, i) => { + const isSelected = selectedIndexes.has(i) + const isRecommended = i === question.recommended && i !== otherIndex + return ( +
handleSelect(i)}> + + {label} + {isRecommended && recommended} +
+ ) + })} + +
+
+ ) +} diff --git a/src/planner/web/js/components/forms/QuestionForm.jsx b/src/planner/web/js/components/forms/QuestionForm.jsx new file mode 100644 index 0000000..b019463 --- /dev/null +++ b/src/planner/web/js/components/forms/QuestionForm.jsx @@ -0,0 +1,60 @@ +import { useState } from 'preact/hooks' +import { useStore } from '../../store.js' +import { submitAnswers } from '../../lib/api.js' +import { QuestionCard } from './QuestionCard.jsx' + +export function QuestionForm({ token }) { + const { requestId, payload: questions } = useStore(s => s.pendingInput) + const [selections, setSelections] = useState(() => new Array(questions.length).fill(null)) + + const allAnswered = selections.every(s => s !== null && (s.selectedOptions?.length > 0 || s.customInput)) + const answeredCount = selections.filter(s => s !== null && (s.selectedOptions?.length > 0 || s.customInput)).length + + function updateSelection(index, selection) { + setSelections(prev => { + const next = [...prev] + next[index] = selection + return next + }) + } + + function acceptDefaults() { + const answers = questions.map((q) => { + const idx = q.recommended ?? 0 + const label = q.options[idx]?.label + return { questionId: q.id, selectedOptions: label ? [label] : [] } + }) + submitAnswers({ token, requestId, answers }) + } + + function submit() { + const answers = questions.map((q, i) => ({ + questionId: q.id, + ...(selections[i] || { selectedOptions: [] }), + })) + submitAnswers({ token, requestId, answers }) + } + + return ( +
+

A few questions to shape the plan

+
{answeredCount} of {questions.length} answered
+ + {questions.map((q, i) => ( + updateSelection(i, sel)} + /> + ))} + +
+ + + {!allAnswered && {questions.length - answeredCount} remaining} +
+
+ ) +} diff --git a/src/planner/web/js/components/forms/ReviewForm.jsx b/src/planner/web/js/components/forms/ReviewForm.jsx new file mode 100644 index 0000000..ee878e3 --- /dev/null +++ b/src/planner/web/js/components/forms/ReviewForm.jsx @@ -0,0 +1,51 @@ +import { useState } from 'preact/hooks' +import { useStore } from '../../store.js' +import { submitReview } from '../../lib/api.js' + +export function ReviewForm({ token }) { + const { requestId, payload: stories } = useStore(s => s.pendingInput) + const [approved, setApproved] = useState(() => new Set(stories.map(s => s.storyId))) + + function toggle(storyId) { + setApproved(prev => { + const next = new Set(prev) + if (next.has(storyId)) next.delete(storyId) + else next.add(storyId) + return next + }) + } + + function approveAll() { + setApproved(new Set(stories.map(s => s.storyId))) + } + + function submit() { + const approvedList = stories.filter(s => approved.has(s.storyId)).map(s => s.storyId) + const skippedList = stories.filter(s => !approved.has(s.storyId)).map(s => s.storyId) + submitReview({ token, requestId, approved: approvedList, skipped: skippedList }) + } + + return ( +
+

Review story sketches

+

Review stories before execution begins.

+ + {stories.map(story => ( +
toggle(story.storyId)} + > +
+ {story.storyId} + — {story.title} +
+ ))} + +
+ + +
+
+ ) +} diff --git a/src/planner/web/js/components/phases/Completion.jsx b/src/planner/web/js/components/phases/Completion.jsx new file mode 100644 index 0000000..f016bf9 --- /dev/null +++ b/src/planner/web/js/components/phases/Completion.jsx @@ -0,0 +1,23 @@ +import { useStore } from '../../store.js' + +export function Completion() { + const pipelineEnd = useStore(s => s.pipelineEnd) + + return ( +
+

+ {pipelineEnd?.success ? 'Pipeline complete ✓' : 'Pipeline failed'} +

+ {pipelineEnd?.summary && ( +
+
+ + {pipelineEnd.success ? '✓' : '✗'} + + {pipelineEnd.summary} +
+
+ )} +
+ ) +} diff --git a/src/planner/web/js/components/phases/Consolidation.jsx b/src/planner/web/js/components/phases/Consolidation.jsx new file mode 100644 index 0000000..5af7e54 --- /dev/null +++ b/src/planner/web/js/components/phases/Consolidation.jsx @@ -0,0 +1,39 @@ +import { useStore } from '../../store.js' + +export function Consolidation() { + const logs = useStore(s => s.logs) + const scouts = useStore(s => s.scouts) + const scoutCount = scouts.length + + return ( +
+

Writing project specification...

+
+
+ + Context extracted from conversation +
+ {scoutCount > 0 && ( +
+ + {scoutCount} scout{scoutCount !== 1 ? 's' : ''} explored the codebase +
+ )} +
+ + Writing decisions.md... +
+
+ {logs.length > 0 && ( +
+ {logs.slice(-3).map((line, i) => ( +
+ {line.tool} + {line.summary || ''} +
+ ))} +
+ )} +
+ ) +} diff --git a/src/planner/web/js/components/phases/ContextAnalysis.jsx b/src/planner/web/js/components/phases/ContextAnalysis.jsx new file mode 100644 index 0000000..3962b03 --- /dev/null +++ b/src/planner/web/js/components/phases/ContextAnalysis.jsx @@ -0,0 +1,21 @@ +import { useStore } from '../../store.js' + +export function ContextAnalysis() { + const logs = useStore(s => s.logs) + + return ( +
+

Reading your conversation to understand the task...

+ {logs.length > 0 && ( +
+ {logs.slice(-4).map((line, i) => ( +
+ {line.tool} + {line.summary || ''} +
+ ))} +
+ )} +
+ ) +} diff --git a/src/planner/web/js/components/phases/Execution.jsx b/src/planner/web/js/components/phases/Execution.jsx new file mode 100644 index 0000000..5efa4bf --- /dev/null +++ b/src/planner/web/js/components/phases/Execution.jsx @@ -0,0 +1,34 @@ +import { useStore } from '../../store.js' + +export function Execution({ phase }) { + const stories = useStore(s => s.stories) + + const phaseLabel = phase === 'decomposition' ? 'Decomposing into stories...' + : phase === 'review' ? 'Awaiting spec review...' + : phase === 'executing' ? 'Executing stories...' + : `Phase: ${phase}` + + return ( +
+

{phaseLabel}

+ {stories.length > 0 && ( +
+ {stories.map(story => { + const icon = story.status === 'done' ? '✓' + : story.status === 'skipped' ? '—' + : (story.status === 'executing' || story.status === 'planning' || story.status === 'verifying') ? '●' + : '◌' + const iconCls = story.status === 'done' ? 'icon-done' : 'icon-pending' + return ( +
+ {icon} + {story.storyId} + [{story.status}] +
+ ) + })} +
+ )} +
+ ) +} diff --git a/src/planner/web/js/components/phases/Loading.jsx b/src/planner/web/js/components/phases/Loading.jsx new file mode 100644 index 0000000..6dbc4ad --- /dev/null +++ b/src/planner/web/js/components/phases/Loading.jsx @@ -0,0 +1,14 @@ +export function Loading({ topic }) { + return ( +
+
+

Initializing...

+ {topic && ( +
+
YOUR REQUEST
+
{topic}
+
+ )} +
+ ) +} diff --git a/src/planner/web/js/components/phases/ScoutExploration.jsx b/src/planner/web/js/components/phases/ScoutExploration.jsx new file mode 100644 index 0000000..7a287a1 --- /dev/null +++ b/src/planner/web/js/components/phases/ScoutExploration.jsx @@ -0,0 +1,60 @@ +import { useStore } from '../../store.js' + +const COLORS = ['var(--blue)', 'var(--purple)', 'var(--orange)', 'var(--yellow)', 'var(--pink)'] + +export function ScoutExploration() { + const scouts = useStore(s => s.scouts) + + return ( +
+

+ Exploring your codebase with {scouts.length} scout{scouts.length !== 1 ? 's' : ''}… +

+ {scouts.map((scout, i) => ( + + ))} + +
+ ) +} + +function ScoutCard({ scout, color }) { + const cls = scout.status === 'completed' ? 'card card-done' + : scout.status === 'failed' ? 'card card-failed' + : 'card card-running' + const symbol = scout.status === 'completed' ? '✓' : scout.status === 'failed' ? '✗' : '●' + + return ( +
+
+ {symbol} + {scout.id} + {scout.role} +
+
+ {scout.status === 'completed' ? scout.completionSummary + : scout.status === 'failed' ? Scout failed + : {scout.lastAction || 'Starting…'}} +
+
+ ) +} + +function CompletedContext({ scouts }) { + const completed = scouts.filter(s => s.status === 'completed' && s.completionSummary) + if (completed.length === 0) return null + + return ( + <> + +
    + {completed.map(s => ( +
  • + {s.id}: {s.completionSummary?.slice(0, 100)} + {(s.completionSummary?.length ?? 0) > 100 ? '…' : ''} +
  • + ))} +
+ + ) +} diff --git a/src/planner/web/js/lib/api.js b/src/planner/web/js/lib/api.js new file mode 100644 index 0000000..1f98da0 --- /dev/null +++ b/src/planner/web/js/lib/api.js @@ -0,0 +1,27 @@ +import { useStore } from '../store.js' + +export async function submitAnswers({ token, requestId, answers }) { + const resp = await fetch('/api/answer', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ token, requestId, answers }), + }) + if (resp.ok) { + useStore.setState({ pendingInput: null }) + } else { + console.error('Failed to submit answers:', await resp.text()) + } +} + +export async function submitReview({ token, requestId, approved, skipped }) { + const resp = await fetch('/api/review', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ token, requestId, approved, skipped }), + }) + if (resp.ok) { + useStore.setState({ pendingInput: null }) + } else { + console.error('Failed to submit review:', await resp.text()) + } +} diff --git a/src/planner/web/js/lib/utils.js b/src/planner/web/js/lib/utils.js new file mode 100644 index 0000000..2a55efe --- /dev/null +++ b/src/planner/web/js/lib/utils.js @@ -0,0 +1,21 @@ +export function formatTokens(n) { + if (n === 0) return '—'; + if (n < 1000) return String(n); + const k = n / 1000; + if (k >= 10) return `${Math.round(k)}k`; + return `${k.toFixed(1)}k`; +} + +export function formatElapsed(ms) { + const totalSeconds = Math.floor(ms / 1000); + const minutes = Math.floor(totalSeconds / 60); + const seconds = totalSeconds % 60; + return `${minutes}m ${String(seconds).padStart(2, '0')}s`; +} + +export function shortenModel(model) { + if (!model) return '—'; + const parts = model.split('/'); + const name = parts[parts.length - 1] ?? model; + return name.replace(/^claude-/, ''); +} diff --git a/src/planner/web/js/sse.js b/src/planner/web/js/sse.js new file mode 100644 index 0000000..20b5ae6 --- /dev/null +++ b/src/planner/web/js/sse.js @@ -0,0 +1,48 @@ +import { useStore } from './store.js' + +export function connectSSE(token) { + const es = new EventSource(`/events?session=${encodeURIComponent(token)}`) + const set = useStore.setState + + const handlers = { + 'init': (d) => set({ availableModels: d.availableModels || [] }), + phase: (d) => set({ phase: d.phase, ...(d.phase !== 'intake' && { pendingInput: null }) }), + 'intake-progress': () => {}, // data model preserved server-side; UI unused for now + stories: (d) => set({ stories: d.stories }), + scouts: (d) => set({ scouts: d.scouts }), + agents: (d) => set({ agents: d.agents }), + logs: (d) => set({ logs: d.lines, currentToolCallId: d.currentToolCallId ?? null }), + subagent: (d) => set({ subagent: d }), + 'subagent-idle': () => set({ subagent: null }), + 'pipeline-end': (d) => set(s => ({ phase: d.success ? 'completed' : s.phase, pipelineEnd: d })), + ask: (d) => set({ pendingInput: { type: 'ask', requestId: d.requestId, payload: d.questions } }), + review: (d) => set({ pendingInput: { type: 'review', requestId: d.requestId, payload: d.stories } }), + 'model-config': (d) => set(s => ({ + pendingInput: { type: 'model-config', requestId: d.requestId, payload: d.tiers }, + ...(d.availableModels ? { availableModels: d.availableModels } : {}), + })), + 'model-config-confirmed': () => set(s => s.pendingInput?.type === 'model-config' ? { pendingInput: null } : {}), + 'ask-cancelled': (d) => set(s => s.pendingInput?.requestId === d.requestId + ? { pendingInput: null, notifications: [...s.notifications, { id: Date.now(), message: 'The question was cancelled — the subagent has exited.', level: 'warning' }] } + : {}), + 'review-cancelled': (d) => set(s => s.pendingInput?.requestId === d.requestId + ? { pendingInput: null, notifications: [...s.notifications, { id: Date.now(), message: 'The review was cancelled.', level: 'warning' }] } + : {}), + notification: (d) => set(s => ({ + notifications: [...s.notifications, { id: Date.now(), message: d.message, level: d.level }], + })), + } + + for (const [event, handler] of Object.entries(handlers)) { + es.addEventListener(event, (e) => { + try { handler(JSON.parse(e.data)) } + catch (err) { console.error(`[koan] SSE "${event}":`, err) } + }) + } + + es.onerror = () => set(s => ({ + notifications: [...s.notifications, { id: Date.now(), message: 'Connection lost — reconnecting…', level: 'warning' }], + })) + + return es +} diff --git a/src/planner/web/js/store.js b/src/planner/web/js/store.js new file mode 100644 index 0000000..99c7b0a --- /dev/null +++ b/src/planner/web/js/store.js @@ -0,0 +1,19 @@ +import { create } from 'zustand' + +export const useStore = create((set) => ({ + // Server-pushed state + phase: null, + stories: [], + scouts: [], + agents: [], + logs: [], // Array<{ tool, summary, highValue, inFlight }> + currentToolCallId: null, // string | null — in-flight tool for the main agent + subagent: null, + pendingInput: null, + + // Client-only state + notifications: [], + pipelineEnd: null, + showSettings: false, + availableModels: [], +})) diff --git a/src/planner/web/server-types.ts b/src/planner/web/server-types.ts new file mode 100644 index 0000000..6cc8edc --- /dev/null +++ b/src/planner/web/server-types.ts @@ -0,0 +1,249 @@ +// Shared types for the koan web UI: WebServerHandle interface, SSE event +// types, result types, and ask model types relocated from ask-logic.ts. + +import type { LogLine } from "../lib/audit.js"; +import type { EpicPhase, StoryStatus } from "../types.js"; + +export type { LogLine, EpicPhase, StoryStatus }; + +// --------------------------------------------------------------------------- +// Ask model types (relocated from ui/ask/ask-logic.ts) +// --------------------------------------------------------------------------- + +export const OTHER_OPTION = "Other (type your own)"; +const RECOMMENDED_OPTION_TAG = " (Recommended)"; + +export interface AskOption { + label: string; +} + +export interface AskQuestion { + id: string; + question: string; + options: AskOption[]; + multi?: boolean; + recommended?: number; +} + +export interface AskSelection { + selectedOptions: string[]; + customInput?: string; +} + +export function appendRecommendedTagToOptionLabels( + optionLabels: string[], + recommendedOptionIndex?: number, +): string[] { + if ( + recommendedOptionIndex == null || + recommendedOptionIndex < 0 || + recommendedOptionIndex >= optionLabels.length + ) { + return optionLabels; + } + return optionLabels.map((label, idx) => { + if (idx !== recommendedOptionIndex) return label; + if (label.endsWith(RECOMMENDED_OPTION_TAG)) return label; + return `${label}${RECOMMENDED_OPTION_TAG}`; + }); +} + +function removeRecommendedTag(label: string): string { + if (!label.endsWith(RECOMMENDED_OPTION_TAG)) return label; + return label.slice(0, -RECOMMENDED_OPTION_TAG.length); +} + +export function buildSingleSelectionResult(selectedOptionLabel: string, note?: string): AskSelection { + const normalized = removeRecommendedTag(selectedOptionLabel); + const trimmedNote = note?.trim(); + if (normalized === OTHER_OPTION) { + return trimmedNote ? { selectedOptions: [], customInput: trimmedNote } : { selectedOptions: [] }; + } + if (trimmedNote) { + return { selectedOptions: [`${normalized} - ${trimmedNote}`] }; + } + return { selectedOptions: [normalized] }; +} + +export function buildMultiSelectionResult( + optionLabels: string[], + selectedOptionIndexes: number[], + optionNotes: string[], + otherOptionIndex: number, +): AskSelection { + const selected = new Set(selectedOptionIndexes); + const selectedOptions: string[] = []; + let customInput: string | undefined; + + for (let i = 0; i < optionLabels.length; i++) { + if (!selected.has(i)) continue; + const label = removeRecommendedTag(optionLabels[i]); + const note = optionNotes[i]?.trim(); + if (i === otherOptionIndex) { + if (note) customInput = note; + continue; + } + selectedOptions.push(note ? `${label} - ${note}` : label); + } + + return customInput ? { selectedOptions, customInput } : { selectedOptions }; +} + +// --------------------------------------------------------------------------- +// Result types +// --------------------------------------------------------------------------- + +export interface ReviewStory { + storyId: string; + title: string; +} + +export interface ReviewResult { + approved: string[]; + skipped: string[]; +} + +export type AnswerElement = AskSelection & { questionId: string }; + +export interface AnswerResult { + cancelled: boolean; + answers: AnswerElement[]; +} + +// --------------------------------------------------------------------------- +// SSE event payload types (server → browser) +// --------------------------------------------------------------------------- + +export interface AvailableModel { + id: string; + name: string; + provider: string; +} + +export interface InitEvent { + availableModels: AvailableModel[]; +} + +export interface PhaseEvent { + phase: EpicPhase; +} + +export interface StoriesEvent { + stories: Array<{ storyId: string; status: StoryStatus }>; +} + +export interface SubagentEvent { + role: string; + storyId?: string; + step: number; + totalSteps: number; + stepName: string; + startedAt: number; +} + +export interface SubagentIdleEvent {} + +export interface LogsEvent { + lines: LogLine[]; +} + +export interface NotificationEvent { + message: string; + level: "info" | "warning" | "error"; +} + +export interface AskEvent { + requestId: string; + questions: AskQuestion[]; +} + +export interface ReviewEvent { + requestId: string; + stories: ReviewStory[]; +} + +export interface AskCancelledEvent { + requestId: string; +} + +export interface PipelineEndEvent { + success: boolean; + summary: string; +} + +export interface ScoutState { + id: string; + role: string; + status: "running" | "completed" | "failed"; + lastAction: string | null; + eventCount: number; + model: string | null; + completionSummary: string | null; + tokensSent: number; + tokensReceived: number; +} + +export interface ScoutsEvent { + scouts: ScoutState[]; +} + +export interface AgentEntry { + id: string; + name: string; + role: string; + model: string | null; + parent: string | null; + status: "running" | "completed" | "failed"; + tokensSent: number; + tokensReceived: number; + recentActions: Array<{ tool: string; summary: string; inFlight: boolean }>; + subPhase: string | null; +} + +export interface AgentsEvent { + agents: AgentEntry[]; +} + +export interface ModelConfigEvent { + requestId: string; + tiers: Record | null; + availableModels: AvailableModel[]; +} + +// --------------------------------------------------------------------------- +// WebServerHandle interface +// --------------------------------------------------------------------------- + +export interface WebServerHandle { + readonly url: string; + readonly port: number; + + // Push methods (fire-and-forget, SSE) + pushPhase(phase: EpicPhase): void; + pushStories(stories: Array<{ storyId: string; status: StoryStatus }>): void; + pushLogs(lines: LogLine[], currentToolCallId?: string | null): void; + pushNotification(message: string, level: "info" | "warning" | "error"): void; + + // Observation polling (replaces startActivePolling) + trackSubagent(dir: string, role: string, storyId?: string): void; + clearSubagent(): void; + + // Agent registration for the flat table + registerAgent(info: { + id: string; + name: string; + dir: string; + role: string; + model: string | null; + parent: string | null; + }): void; + completeAgent(id: string): void; + + // Blocking input methods + requestReview(stories: ReviewStory[], signal?: AbortSignal): Promise; + requestAnswer(questions: AskQuestion[], signal: AbortSignal): Promise; + requestModelConfig(): Promise; + + // Lifecycle + close(): void; +} diff --git a/src/planner/web/server.ts b/src/planner/web/server.ts new file mode 100644 index 0000000..5424178 --- /dev/null +++ b/src/planner/web/server.ts @@ -0,0 +1,815 @@ +// Koan web UI HTTP server. +// Serves the single-page dashboard, pushes state via SSE, and receives +// user input via POST endpoints. One server per pipeline run; lifecycle +// owned by koan_plan.execute(). + +import http from "node:http"; +import { promises as fs, readFileSync } from "node:fs"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; +import { randomUUID } from "node:crypto"; + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; +import { AuthStorage, ModelRegistry } from "@mariozechner/pi-coding-agent"; + +import { readProjection, readRecentLogs } from "../lib/audit.js"; +import { loadModelTierConfig, saveModelTierConfig, type ModelTierConfig } from "../model-config.js"; +import type { + WebServerHandle, + AskQuestion, + ReviewStory, + ReviewResult, + AnswerResult, + AnswerElement, + LogLine, +} from "./server-types.js"; +import type { EpicPhase, StoryStatus } from "../types.js"; + +// --------------------------------------------------------------------------- +// Static asset loading (at module init) +// --------------------------------------------------------------------------- + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +function loadAsset(relativePath: string): string { + try { + return readFileSync(path.join(__dirname, relativePath), "utf8"); + } catch { + return ""; + } +} + +const HTML_TEMPLATE = loadAsset("html/index.html"); + +interface StaticAsset { + content: string; + mimeType: string; +} + +// --------------------------------------------------------------------------- +// On-demand bundle build +// --------------------------------------------------------------------------- + +async function ensureBundle(): Promise { + const entryPoint = path.join(__dirname, "js", "app.jsx"); + const outfile = path.join(__dirname, "dist", "app.js"); + + // Skip build if bundle exists and is newer than all source files + try { + const bundleStat = await fs.stat(outfile); + const sourceDir = path.join(__dirname, "js"); + const sourceFiles = await fs.readdir(sourceDir, { recursive: true }); + let newest = 0; + for (const f of sourceFiles) { + const s = await fs.stat(path.join(sourceDir, String(f))); + if (s.mtimeMs > newest) newest = s.mtimeMs; + } + if (bundleStat.mtimeMs >= newest) return; // bundle is fresh + } catch { + // Bundle doesn't exist — build it + } + + await fs.mkdir(path.join(__dirname, "dist"), { recursive: true }); + const esbuild = await import("esbuild"); + await esbuild.build({ + entryPoints: [entryPoint], + bundle: true, + format: "esm", + jsx: "automatic", + jsxImportSource: "preact", + alias: { + "react": "preact/compat", + "react-dom": "preact/compat", + }, + // Resolve aliases and node_modules from the koan package root, not + // process.cwd(). Without this, running `pi -e .../koan/extensions/koan.ts` + // from a different project directory fails because preact/compat is looked + // up in that project's node_modules instead of koan's. + absWorkingDir: path.resolve(__dirname, "../../.."), + outfile, + minify: true, + }); +} + +// --------------------------------------------------------------------------- +// Body parsing +// --------------------------------------------------------------------------- + +const MAX_BODY_SIZE = 1_000_000; + +function readBody(req: http.IncomingMessage): Promise { + return new Promise((resolve, reject) => { + const chunks: Buffer[] = []; + let total = 0; + req.on("data", (chunk: Buffer) => { + total += chunk.length; + if (total > MAX_BODY_SIZE) { + reject(new Error("Body too large")); + return; + } + chunks.push(chunk); + }); + req.on("end", () => { + try { + resolve(JSON.parse(Buffer.concat(chunks).toString("utf8"))); + } catch { + reject(new Error("Invalid JSON body")); + } + }); + req.on("error", reject); + }); +} + +function sendJson(res: http.ServerResponse, status: number, data: unknown): void { + const body = JSON.stringify(data); + res.writeHead(status, { + "Content-Type": "application/json; charset=utf-8", + "Content-Length": Buffer.byteLength(body), + }); + res.end(body); +} + +function sendText(res: http.ServerResponse, status: number, text: string): void { + res.writeHead(status, { "Content-Type": "text/plain; charset=utf-8" }); + res.end(text); +} + +function safeInlineJSON(data: unknown): string { + return JSON.stringify(data) + .replace(//g, "\\u003e") + .replace(/&/g, "\\u0026"); +} + +// --------------------------------------------------------------------------- +// Topic extraction from conversation.jsonl +// --------------------------------------------------------------------------- + +async function extractTopic(epicDir: string): Promise { + try { + const raw = await fs.readFile(path.join(epicDir, "conversation.jsonl"), "utf8"); + const lines = raw.trimEnd().split("\n").filter(Boolean); + let lastUserContent: string | null = null; + for (const line of lines) { + try { + const entry = JSON.parse(line) as { type?: string; role?: string; content?: unknown }; + if (entry.type === "message" && entry.role === "user") { + const content = entry.content; + if (typeof content === "string" && content.trim()) { + lastUserContent = content.trim().slice(0, 200); + } else if (Array.isArray(content)) { + for (const block of content as Array<{ type?: string; text?: string }>) { + if (block.type === "text" && block.text?.trim()) { + lastUserContent = block.text.trim().slice(0, 200); + break; + } + } + } + } + } catch { + // Skip malformed lines + } + } + return lastUserContent; + } catch { + return null; + } +} + +// --------------------------------------------------------------------------- +// Agent internal state +// --------------------------------------------------------------------------- + +interface AgentInfoInternal { + id: string; + name: string; + dir: string; + role: string; + model: string | null; + parent: string | null; + status: "running" | "completed" | "failed"; + tokensSent: number; + tokensReceived: number; + recentActions: Array<{ tool: string; summary: string; inFlight: boolean }>; + spawnOrder: number; + completionOrder?: number; + pollingTimer?: ReturnType; + // Internal derived fields + subPhase: string | null; + eventCount: number; + completionSummary: string | null; +} + +// --------------------------------------------------------------------------- +// startWebServer +// --------------------------------------------------------------------------- + +export async function startWebServer(epicDir: string): Promise { + await ensureBundle(); + + // Discover available models from pi's registry + const authStorage = new AuthStorage(); + const modelRegistry = new ModelRegistry(authStorage); + const availableModels = modelRegistry.getAll().map((m) => ({ + id: `${m.provider}/${m.id}`, + name: m.name, + provider: m.provider, + })); + + const STATIC_ASSETS: Map = new Map([ + ["/static/css/variables.css", { content: loadAsset("css/variables.css"), mimeType: "text/css; charset=utf-8" }], + ["/static/css/layout.css", { content: loadAsset("css/layout.css"), mimeType: "text/css; charset=utf-8" }], + ["/static/css/components.css", { content: loadAsset("css/components.css"), mimeType: "text/css; charset=utf-8" }], + ["/static/css/animations.css", { content: loadAsset("css/animations.css"), mimeType: "text/css; charset=utf-8" }], + ["/static/js/app.js", { content: loadAsset("dist/app.js"), mimeType: "application/javascript; charset=utf-8" }], + ]); + + const sessionToken = randomUUID(); + + // Buffered state for SSE replay on reconnect + let currentPhase: EpicPhase | null = null; + let currentStories: Array<{ storyId: string; status: StoryStatus }> = []; + let currentSubagent: unknown | null = null; + let lastLogs: LogLine[] = []; + let pipelineEnd: { success: boolean; summary: string } | null = null; + + // Denormalized intake progress buffer + let currentIntakeProgress: { subPhase: string | null; intakeDone: boolean } = { + subPhase: null, + intakeDone: false, + }; + + // SSE clients + const sseClients = new Set(); + + // Pending inputs (requestReview / requestAnswer / requestModelConfig) + interface PendingEntry { + type: "review" | "ask" | "model-config"; + resolve: (result: unknown) => void; + reject: (err: Error) => void; + payload: unknown; + } + const pendingInputs = new Map(); + + // Agent registry + const agents = new Map(); + let spawnCounter = 0; + let completionCounter = 0; + + // Subagent observation polling + let trackingTimer: ReturnType | null = null; + + // --------------------------------------------------------------------------- + // SSE helpers + // --------------------------------------------------------------------------- + + function pushEvent(name: string, payload: unknown): void { + const chunk = `event: ${name}\ndata: ${JSON.stringify(payload)}\n\n`; + for (const client of sseClients) { + try { + client.write(chunk); + } catch { + sseClients.delete(client); + } + } + } + + function replayState(res: http.ServerResponse): void { + const write = (name: string, payload: unknown) => { + try { + res.write(`event: ${name}\ndata: ${JSON.stringify(payload)}\n\n`); + } catch { + // Ignore broken connection + } + }; + + write("init", { availableModels }); + + if (currentPhase) write("phase", { phase: currentPhase }); + if (currentStories.length > 0) write("stories", { stories: currentStories }); + + const agentArray = buildAgentsArray(); + if (agentArray.length > 0) write("agents", { agents: agentArray }); + + const scoutArray = buildScoutsArray(); + if (scoutArray.length > 0) write("scouts", { scouts: scoutArray }); + + if (currentIntakeProgress.subPhase !== null || currentIntakeProgress.intakeDone) { + write("intake-progress", currentIntakeProgress); + } + + if (currentSubagent) write("subagent", currentSubagent); + if (lastLogs.length > 0) write("logs", { lines: lastLogs }); + + for (const [requestId, entry] of pendingInputs) { + if (entry.type === "ask") { + write("ask", { requestId, questions: entry.payload }); + } else if (entry.type === "review") { + write("review", { requestId, stories: entry.payload }); + } else if (entry.type === "model-config") { + write("model-config", entry.payload); + } + } + + if (pipelineEnd !== null) write("pipeline-end", pipelineEnd); + } + + // --------------------------------------------------------------------------- + // Agent array builders + // --------------------------------------------------------------------------- + + function buildAgentsArray(): Array<{ + id: string; name: string; role: string; model: string | null; + parent: string | null; status: string; tokensSent: number; + tokensReceived: number; recentActions: Array<{ tool: string; summary: string; inFlight: boolean }>; subPhase: string | null; + }> { + const sorted = Array.from(agents.values()).sort((a, b) => { + if (a.status === "running" && b.status !== "running") return -1; + if (b.status === "running" && a.status !== "running") return 1; + if (a.status !== "failed" && b.status === "failed") return -1; + if (b.status !== "failed" && a.status === "failed") return 1; + const aOrder = a.status === "running" ? a.spawnOrder : (a.completionOrder ?? a.spawnOrder); + const bOrder = b.status === "running" ? b.spawnOrder : (b.completionOrder ?? b.spawnOrder); + return aOrder - bOrder; + }); + return sorted.map((a) => ({ + id: a.id, + name: a.name, + role: a.role, + model: a.model, + parent: a.parent, + status: a.status, + tokensSent: a.tokensSent, + tokensReceived: a.tokensReceived, + recentActions: a.recentActions, + subPhase: a.subPhase, + })); + } + + function buildScoutsArray(): Array<{ + id: string; role: string; status: string; lastAction: string | null; + eventCount: number; model: string | null; completionSummary: string | null; + tokensSent: number; tokensReceived: number; + }> { + return Array.from(agents.values()) + .filter((a) => a.role === "scout") + .map((a) => ({ + id: a.id, + role: a.name, + status: a.status, + lastAction: a.recentActions.length > 0 ? (() => { const l = a.recentActions[a.recentActions.length - 1]; return l ? (l.summary ? `${l.tool}: ${l.summary}` : l.tool) : null; })() : null, + eventCount: a.eventCount, + model: a.model, + completionSummary: a.completionSummary, + tokensSent: a.tokensSent, + tokensReceived: a.tokensReceived, + })); + } + + // --------------------------------------------------------------------------- + // Agent polling + // --------------------------------------------------------------------------- + + async function pollAgent(agent: AgentInfoInternal): Promise { + try { + const [projection, logs] = await Promise.all([ + readProjection(agent.dir), + readRecentLogs(agent.dir, 5), + ]); + if (projection) { + agent.model = projection.model ?? agent.model; + agent.tokensSent = projection.tokensSent; + agent.tokensReceived = projection.tokensReceived; + agent.eventCount = projection.eventCount; + if (projection.status !== "running") { + agent.status = projection.status; + } + if (agent.role === "intake") { + const hasPendingAsk = Array.from(pendingInputs.values()).some((p) => p.type === "ask"); + const STEP_PHASE: Record = { 0: "context", 1: "context", 2: "explore", 3: "spec" }; + agent.subPhase = hasPendingAsk ? "questions" : (STEP_PHASE[projection.step] ?? "spec"); + } + } + if (logs.length > 0) { + agent.recentActions = logs.slice(-5).map((l) => ({ tool: l.tool, summary: l.summary || '', inFlight: l.inFlight })); + } + if (agent.role === "scout" && projection?.completionSummary && !agent.completionSummary) { + agent.completionSummary = projection.completionSummary; + } + } catch { + // Non-fatal + } + } + + function startAgentPolling(agent: AgentInfoInternal): void { + if (agent.pollingTimer) return; + const timer = setInterval(async () => { + await pollAgent(agent); + pushEvent("agents", { agents: buildAgentsArray() }); + if (agent.role === "scout") { + const scouts = buildScoutsArray(); + if (scouts.length > 0) pushEvent("scouts", { scouts }); + } + // Push intake-progress event if the intake agent's sub-phase changed + const intake = Array.from(agents.values()).find(a => a.role === "intake"); + if (intake) { + const next = { subPhase: intake.subPhase, intakeDone: currentPhase !== "intake" && currentPhase !== null }; + if (next.subPhase !== currentIntakeProgress.subPhase || next.intakeDone !== currentIntakeProgress.intakeDone) { + currentIntakeProgress = next; + pushEvent("intake-progress", currentIntakeProgress); + } + } + }, 500); + timer.unref(); + agent.pollingTimer = timer; + } + + function stopAgentPolling(agent: AgentInfoInternal): void { + if (agent.pollingTimer) { + clearInterval(agent.pollingTimer); + agent.pollingTimer = undefined; + } + } + + // --------------------------------------------------------------------------- + // HTTP server + // --------------------------------------------------------------------------- + + const server = http.createServer(async (req, res) => { + try { + const method = req.method ?? "GET"; + const url = new URL(req.url ?? "/", "http://127.0.0.1"); + const { pathname } = url; + + if (method === "GET" && pathname === "/") { + const token = url.searchParams.get("session"); + if (token !== sessionToken) { sendText(res, 403, "Invalid session token"); return; } + const topic = await extractTopic(epicDir); + const initialData = safeInlineJSON({ token: sessionToken, topic }); + const html = HTML_TEMPLATE.replace("/* __DATA__ */", initialData); + res.writeHead(200, { "Content-Type": "text/html; charset=utf-8", "Cache-Control": "no-store" }); + res.end(html); + return; + } + + if (method === "GET" && pathname.startsWith("/static/")) { + const asset = STATIC_ASSETS.get(pathname); + if (!asset) { sendText(res, 404, "Not found"); return; } + res.writeHead(200, { "Content-Type": asset.mimeType, "Cache-Control": "no-store" }); + res.end(asset.content); + return; + } + + if (method === "GET" && pathname === "/events") { + const token = url.searchParams.get("session"); + if (token !== sessionToken) { sendText(res, 403, "Invalid session token"); return; } + res.writeHead(200, { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache, no-transform", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }); + res.write(": connected\n\n"); + sseClients.add(res); + replayState(res); + req.on("close", () => { sseClients.delete(res); }); + return; + } + + if (method === "GET" && pathname === "/health") { + sendJson(res, 200, { ok: true }); + return; + } + + if (method === "GET" && pathname === "/api/model-config") { + const config = await loadModelTierConfig(); + sendJson(res, 200, { tiers: config }); + return; + } + + if (method === "PUT" && pathname === "/api/model-config") { + const body = await readBody(req).catch(() => null); + const b = body as { requestId?: string; tiers: Record } | null; + if (!b) { sendJson(res, 400, { ok: false, error: "Invalid body" }); return; } + const { requestId, tiers } = b; + + // Save config if all 3 tiers are non-null non-empty strings + const strong = tiers?.strong; + const standard = tiers?.standard; + const cheap = tiers?.cheap; + if (strong && standard && cheap) { + await saveModelTierConfig({ strong, standard, cheap } as ModelTierConfig); + } + + // Resolve the blocking gate if requestId matches + if (requestId) { + const entry = pendingInputs.get(requestId); + if (entry && entry.type === "model-config") { + pendingInputs.delete(requestId); + entry.resolve(undefined); + } + } + + // Push confirmation so client clears pendingInput + pushEvent("model-config-confirmed", {}); + + sendJson(res, 200, { ok: true }); + return; + } + + if (method === "POST" && pathname === "/api/heartbeat") { + const body = await readBody(req).catch(() => null); + const b = body as { token?: string } | null; + if (!b || b.token !== sessionToken) { sendJson(res, 403, { ok: false, error: "Invalid token" }); return; } + sendJson(res, 200, { ok: true }); + return; + } + + if (method === "POST" && pathname === "/api/answer") { + const body = await readBody(req).catch(() => null); + const b = body as { token?: string; requestId?: string; answers?: unknown[] } | null; + if (!b) { sendJson(res, 400, { ok: false, error: "Invalid body" }); return; } + if (b.token !== sessionToken) { sendJson(res, 403, { ok: false, error: "Invalid token" }); return; } + const { requestId, answers } = b; + if (!requestId || !Array.isArray(answers)) { + sendJson(res, 400, { ok: false, error: "Missing requestId or answers" }); return; + } + const pending = pendingInputs.get(requestId); + if (!pending || pending.type !== "ask") { + sendJson(res, 409, { ok: false, error: "No pending ask with this requestId" }); return; + } + const result: AnswerResult = { cancelled: false, answers: answers as AnswerElement[] }; + pending.resolve(result); + pendingInputs.delete(requestId); + sendJson(res, 200, { ok: true }); + return; + } + + if (method === "POST" && pathname === "/api/review") { + const body = await readBody(req).catch(() => null); + const b = body as { token?: string; requestId?: string; approved?: string[]; skipped?: string[] } | null; + if (!b) { sendJson(res, 400, { ok: false, error: "Invalid body" }); return; } + if (b.token !== sessionToken) { sendJson(res, 403, { ok: false, error: "Invalid token" }); return; } + const { requestId, approved, skipped } = b; + if (!requestId || !Array.isArray(approved) || !Array.isArray(skipped)) { + sendJson(res, 400, { ok: false, error: "Missing fields" }); return; + } + const pending = pendingInputs.get(requestId); + if (!pending || pending.type !== "review") { + sendJson(res, 409, { ok: false, error: "No pending review with this requestId" }); return; + } + const result: ReviewResult = { approved, skipped }; + pending.resolve(result); + pendingInputs.delete(requestId); + sendJson(res, 200, { ok: true }); + return; + } + + if (method === "POST" && pathname === "/api/cancel") { + const body = await readBody(req).catch(() => null); + const b = body as { token?: string } | null; + if (!b || b.token !== sessionToken) { sendJson(res, 403, { ok: false, error: "Invalid token" }); return; } + pipelineEnd = { success: false, summary: "Cancelled by user" }; + pushEvent("pipeline-end", pipelineEnd); + const err = new Error("Pipeline cancelled by user"); + err.name = "AbortError"; + for (const [, entry] of pendingInputs) entry.reject(err); + pendingInputs.clear(); + sendJson(res, 200, { ok: true }); + return; + } + + sendText(res, 404, "Not found"); + } catch (err) { + const msg = err instanceof Error ? err.message : "Server error"; + sendJson(res, 500, { ok: false, error: msg }); + } + }); + + return new Promise((resolve, reject) => { + server.once("error", (err: Error) => { + reject(new Error(`Failed to start koan web server: ${err.message}`)); + }); + + server.listen(0, "127.0.0.1", () => { + const addr = server.address(); + if (!addr || typeof addr === "string") { + reject(new Error("Failed to start koan web server: invalid address")); + return; + } + const { port } = addr; + const url = `http://127.0.0.1:${port}/?session=${sessionToken}`; + + const handle: WebServerHandle = { + url, + port, + + pushPhase(phase: EpicPhase): void { + currentPhase = phase; + pushEvent("phase", { phase }); + currentIntakeProgress = { ...currentIntakeProgress, intakeDone: phase !== "intake" }; + pushEvent("intake-progress", currentIntakeProgress); + }, + + pushStories(stories: Array<{ storyId: string; status: StoryStatus }>): void { + currentStories = stories; + pushEvent("stories", { stories }); + }, + + pushLogs(lines: LogLine[], currentToolCallId?: string | null): void { + lastLogs = lines; + pushEvent("logs", { lines, currentToolCallId: currentToolCallId ?? null }); + }, + + pushNotification(message: string, level: "info" | "warning" | "error"): void { + pushEvent("notification", { message, level }); + }, + + trackSubagent(dir: string, role: string, storyId?: string): void { + if (trackingTimer) { clearInterval(trackingTimer); trackingTimer = null; } + const startedAt = Date.now(); + const timer = setInterval(async () => { + try { + const [projection, logs] = await Promise.all([readProjection(dir), readRecentLogs(dir, 50)]); + if (logs.length > 0) { + lastLogs = logs; + pushEvent("logs", { lines: logs, currentToolCallId: projection?.currentToolCallId ?? null }); + } + if (projection) { + const event = { + role, storyId, + model: projection.model, + step: projection.step, + totalSteps: projection.totalSteps, + stepName: projection.stepName, + tokensSent: projection.tokensSent, + tokensReceived: projection.tokensReceived, + startedAt, + }; + currentSubagent = event; + pushEvent("subagent", event); + } + } catch { /* Non-fatal */ } + }, 500); + timer.unref(); + trackingTimer = timer; + }, + + clearSubagent(): void { + if (trackingTimer) { clearInterval(trackingTimer); trackingTimer = null; } + currentSubagent = null; + pushEvent("subagent-idle", {}); + }, + + registerAgent(info: { + id: string; name: string; dir: string; role: string; + model: string | null; parent: string | null; + }): void { + const agent: AgentInfoInternal = { + ...info, + status: "running", + tokensSent: 0, + tokensReceived: 0, + recentActions: [], + spawnOrder: spawnCounter++, + subPhase: null, + eventCount: 0, + completionSummary: null, + }; + agents.set(info.id, agent); + startAgentPolling(agent); + pushEvent("agents", { agents: buildAgentsArray() }); + if (info.role === "scout") pushEvent("scouts", { scouts: buildScoutsArray() }); + }, + + completeAgent(id: string): void { + const agent = agents.get(id); + if (!agent) return; + stopAgentPolling(agent); + void readProjection(agent.dir).then((projection) => { + if (projection) { + agent.tokensSent = projection.tokensSent; + agent.tokensReceived = projection.tokensReceived; + agent.status = projection.status !== "running" ? projection.status : "failed"; + } else { + agent.status = "failed"; + } + agent.completionOrder = completionCounter++; + pushEvent("agents", { agents: buildAgentsArray() }); + if (agent.role === "scout") { + agent.completionSummary = projection?.completionSummary ?? null; + pushEvent("scouts", { scouts: buildScoutsArray() }); + } + }); + }, + + requestReview(stories: ReviewStory[], signal?: AbortSignal): Promise { + return new Promise((res, rej) => { + const requestId = randomUUID(); + const abortHandler = () => { + pendingInputs.delete(requestId); + pushEvent("review-cancelled", { requestId }); + const err = new Error(`Review cancelled: signal aborted`); + (err as NodeJS.ErrnoException).name = "AbortError"; + rej(err); + }; + pendingInputs.set(requestId, { + type: "review", + resolve: (result: unknown) => { + signal?.removeEventListener("abort", abortHandler); + res(result as ReviewResult); + }, + reject: (err: Error) => { + signal?.removeEventListener("abort", abortHandler); + rej(err); + }, + payload: stories, + }); + pushEvent("review", { requestId, stories }); + if (signal?.aborted) { + abortHandler(); + } else { + signal?.addEventListener("abort", abortHandler, { once: true }); + } + }); + }, + + requestAnswer(questions: AskQuestion[], signal: AbortSignal): Promise { + return new Promise((res, rej) => { + const requestId = randomUUID(); + const abortHandler = () => { + pendingInputs.delete(requestId); + pushEvent("ask-cancelled", { requestId }); + const err = new Error(`Ask cancelled: signal aborted`); + (err as NodeJS.ErrnoException).name = "AbortError"; + rej(err); + }; + pendingInputs.set(requestId, { + type: "ask", + resolve: (result: unknown) => { + signal.removeEventListener("abort", abortHandler); + res(result as AnswerResult); + }, + reject: (err: Error) => { + signal.removeEventListener("abort", abortHandler); + rej(err); + }, + payload: questions, + }); + pushEvent("ask", { requestId, questions }); + if (signal.aborted) { + abortHandler(); + } else { + signal.addEventListener("abort", abortHandler, { once: true }); + } + }); + }, + + async requestModelConfig(): Promise { + const requestId = randomUUID(); + const config = await loadModelTierConfig(); + const payload = { requestId, tiers: config, availableModels }; + return new Promise((resolve, reject) => { + pendingInputs.set(requestId, { + type: "model-config" as const, + resolve: resolve as (v: unknown) => void, + reject, + payload, + }); + pushEvent("model-config", payload); + }); + }, + + close(): void { + for (const [, entry] of pendingInputs) entry.reject(new Error("Server closed")); + pendingInputs.clear(); + if (trackingTimer) { clearInterval(trackingTimer); trackingTimer = null; } + for (const agent of agents.values()) stopAgentPolling(agent); + for (const client of sseClients) { try { client.end(); } catch { /* Ignore */ } } + sseClients.clear(); + try { server.close(); } catch { /* Ignore */ } + }, + }; + + resolve(handle); + }); + }); +} + +// --------------------------------------------------------------------------- +// Open browser helper (§12.2) +// --------------------------------------------------------------------------- + +export async function openBrowser(pi: ExtensionAPI, url: string): Promise { + try { + if (process.platform === "darwin") { + await pi.exec("open", [url]); + } else if (process.platform === "win32") { + await pi.exec("cmd", ["/c", "start", "", url]); + } else { + await pi.exec("xdg-open", [url]); + } + } catch { + // Non-fatal — URL is always in the tool result + } +} From ea2b391eefe13eb8f8af2d4c9ae2dc3b50aae0c8 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:48:50 +0700 Subject: [PATCH 056/412] remove old TUI UI components replaced by web dashboard --- src/planner/ui/ask/ask-inline-note.ts | 65 ---- src/planner/ui/ask/ask-inline-ui.ts | 221 ----------- src/planner/ui/ask/ask-logic.ts | 98 ----- src/planner/ui/ask/ask-tabs-ui.ts | 512 -------------------------- src/planner/ui/epic-widget.ts | 243 ------------ src/planner/ui/spec-review.ts | 152 -------- 6 files changed, 1291 deletions(-) delete mode 100644 src/planner/ui/ask/ask-inline-note.ts delete mode 100644 src/planner/ui/ask/ask-inline-ui.ts delete mode 100644 src/planner/ui/ask/ask-logic.ts delete mode 100644 src/planner/ui/ask/ask-tabs-ui.ts delete mode 100644 src/planner/ui/epic-widget.ts delete mode 100644 src/planner/ui/spec-review.ts diff --git a/src/planner/ui/ask/ask-inline-note.ts b/src/planner/ui/ask/ask-inline-note.ts deleted file mode 100644 index a22ab8f..0000000 --- a/src/planner/ui/ask/ask-inline-note.ts +++ /dev/null @@ -1,65 +0,0 @@ -import { wrapTextWithAnsi } from "@mariozechner/pi-tui"; - -const INLINE_NOTE_SEPARATOR = " — note: "; -const INLINE_EDIT_CURSOR = "▍"; - -export const INLINE_NOTE_WRAP_PADDING = 2; - -function sanitizeNoteForInlineDisplay(rawNote: string): string { - return rawNote.replace(/[\r\n\t]/g, " ").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); -} - -function truncateTextKeepingTail(text: string, maxLength: number): string { - if (maxLength <= 0) return ""; - if (text.length <= maxLength) return text; - if (maxLength === 1) return "…"; - return `…${text.slice(-(maxLength - 1))}`; -} - -function truncateTextKeepingHead(text: string, maxLength: number): string { - if (maxLength <= 0) return ""; - if (text.length <= maxLength) return text; - if (maxLength === 1) return "…"; - return `${text.slice(0, maxLength - 1)}…`; -} - -export function buildOptionLabelWithInlineNote( - baseOptionLabel: string, - rawNote: string, - isEditingNote: boolean, - maxInlineLabelLength?: number, -): string { - const sanitizedNote = sanitizeNoteForInlineDisplay(rawNote); - if (!isEditingNote && sanitizedNote.trim().length === 0) { - return baseOptionLabel; - } - - const labelPrefix = `${baseOptionLabel}${INLINE_NOTE_SEPARATOR}`; - const inlineNote = isEditingNote ? `${sanitizedNote}${INLINE_EDIT_CURSOR}` : sanitizedNote.trim(); - const inlineLabel = `${labelPrefix}${inlineNote}`; - - if (maxInlineLabelLength == null) { - return inlineLabel; - } - - return isEditingNote - ? truncateTextKeepingTail(inlineLabel, maxInlineLabelLength) - : truncateTextKeepingHead(inlineLabel, maxInlineLabelLength); -} - -export function buildWrappedOptionLabelWithInlineNote( - baseOptionLabel: string, - rawNote: string, - isEditingNote: boolean, - maxInlineLabelLength: number, - wrapPadding = INLINE_NOTE_WRAP_PADDING, -): string[] { - const inlineLabel = buildOptionLabelWithInlineNote(baseOptionLabel, rawNote, isEditingNote); - const sanitizedWrapPadding = Number.isFinite(wrapPadding) ? Math.max(0, Math.floor(wrapPadding)) : 0; - const sanitizedMaxInlineLabelLength = Number.isFinite(maxInlineLabelLength) - ? Math.max(1, Math.floor(maxInlineLabelLength)) - : 1; - const wrapWidth = Math.max(1, sanitizedMaxInlineLabelLength - sanitizedWrapPadding); - const wrappedLines = wrapTextWithAnsi(inlineLabel, wrapWidth); - return wrappedLines.length > 0 ? wrappedLines : [""]; -} diff --git a/src/planner/ui/ask/ask-inline-ui.ts b/src/planner/ui/ask/ask-inline-ui.ts deleted file mode 100644 index e57ed04..0000000 --- a/src/planner/ui/ask/ask-inline-ui.ts +++ /dev/null @@ -1,221 +0,0 @@ -import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; -import { Editor, type EditorTheme, Key, matchesKey, truncateToWidth, visibleWidth } from "@mariozechner/pi-tui"; -import { - OTHER_OPTION, - appendRecommendedTagToOptionLabels, - buildSingleSelectionResult, - type AskOption, - type AskSelection, -} from "./ask-logic.js"; -import { INLINE_NOTE_WRAP_PADDING, buildWrappedOptionLabelWithInlineNote } from "./ask-inline-note.js"; - -interface SingleQuestionInput { - question: string; - options: AskOption[]; - recommended?: number; -} - -interface InlineSelectionResult { - cancelled: boolean; - selectedOption?: string; - note?: string; -} - -function resolveInitialCursorIndexFromRecommendedOption( - recommendedOptionIndex: number | undefined, - optionCount: number, -): number { - if (recommendedOptionIndex == null) return 0; - if (recommendedOptionIndex < 0 || recommendedOptionIndex >= optionCount) return 0; - return recommendedOptionIndex; -} - -export async function askSingleQuestionWithInlineNote( - ui: ExtensionUIContext, - questionInput: SingleQuestionInput, -): Promise { - const baseOptionLabels = questionInput.options.map((option) => option.label); - const optionLabelsWithRecommendedTag = appendRecommendedTagToOptionLabels( - baseOptionLabels, - questionInput.recommended, - ); - const selectableOptionLabels = [...optionLabelsWithRecommendedTag, OTHER_OPTION]; - const initialCursorIndex = resolveInitialCursorIndexFromRecommendedOption( - questionInput.recommended, - optionLabelsWithRecommendedTag.length, - ); - - const result = await ui.custom((tui, theme, _keybindings, done) => { - let cursorOptionIndex = initialCursorIndex; - let isNoteEditorOpen = false; - let cachedRenderedLines: string[] | undefined; - const noteByOptionIndex = new Map(); - - const editorTheme: EditorTheme = { - borderColor: (text) => theme.fg("accent", text), - selectList: { - selectedPrefix: (text) => theme.fg("accent", text), - selectedText: (text) => theme.fg("accent", text), - description: (text) => theme.fg("muted", text), - scrollInfo: (text) => theme.fg("dim", text), - noMatch: (text) => theme.fg("warning", text), - }, - }; - const noteEditor = new Editor(tui, editorTheme); - - const requestUiRerender = () => { - cachedRenderedLines = undefined; - tui.requestRender(); - }; - - const getRawNoteForOption = (optionIndex: number): string => noteByOptionIndex.get(optionIndex) ?? ""; - const getTrimmedNoteForOption = (optionIndex: number): string => getRawNoteForOption(optionIndex).trim(); - - const loadCurrentNoteIntoEditor = () => { - noteEditor.setText(getRawNoteForOption(cursorOptionIndex)); - }; - - const saveCurrentNoteFromEditor = (value: string) => { - noteByOptionIndex.set(cursorOptionIndex, value); - }; - - const submitCurrentSelection = (selectedOptionLabel: string, note: string) => { - done({ - cancelled: false, - selectedOption: selectedOptionLabel, - note, - }); - }; - - noteEditor.onChange = (value) => { - saveCurrentNoteFromEditor(value); - requestUiRerender(); - }; - - noteEditor.onSubmit = (value) => { - saveCurrentNoteFromEditor(value); - const selectedOptionLabel = selectableOptionLabels[cursorOptionIndex]; - const trimmedNote = value.trim(); - - if (selectedOptionLabel === OTHER_OPTION && !trimmedNote) { - requestUiRerender(); - return; - } - - submitCurrentSelection(selectedOptionLabel, trimmedNote); - }; - - const render = (width: number): string[] => { - if (cachedRenderedLines) return cachedRenderedLines; - - const renderedLines: string[] = []; - const addLine = (line: string) => renderedLines.push(truncateToWidth(line, width)); - - addLine(theme.fg("accent", "─".repeat(width))); - addLine(theme.fg("text", ` ${questionInput.question}`)); - renderedLines.push(""); - - for (let optionIndex = 0; optionIndex < selectableOptionLabels.length; optionIndex++) { - const optionLabel = selectableOptionLabels[optionIndex]; - const isCursorOption = optionIndex === cursorOptionIndex; - const isEditingThisOption = isNoteEditorOpen && isCursorOption; - const cursorPrefixText = isCursorOption ? "→ " : " "; - const cursorPrefix = isCursorOption ? theme.fg("accent", cursorPrefixText) : cursorPrefixText; - const bullet = isCursorOption ? "●" : "○"; - const markerText = `${bullet} `; - const optionColor = isCursorOption ? "accent" : "text"; - const prefixWidth = visibleWidth(cursorPrefixText) + visibleWidth(markerText); - const wrappedInlineLabelLines = buildWrappedOptionLabelWithInlineNote( - optionLabel, - getRawNoteForOption(optionIndex), - isEditingThisOption, - Math.max(1, width - prefixWidth), - INLINE_NOTE_WRAP_PADDING, - ); - const continuationPrefix = " ".repeat(prefixWidth); - addLine(`${cursorPrefix}${theme.fg(optionColor, `${markerText}${wrappedInlineLabelLines[0] ?? ""}`)}`); - for (const wrappedLine of wrappedInlineLabelLines.slice(1)) { - addLine(`${continuationPrefix}${theme.fg(optionColor, wrappedLine)}`); - } - } - - renderedLines.push(""); - - if (isNoteEditorOpen) { - addLine(theme.fg("dim", " Typing note inline • Enter submit • Tab/Esc stop editing")); - } else if (getTrimmedNoteForOption(cursorOptionIndex).length > 0) { - addLine(theme.fg("dim", " ↑↓ move • Enter submit • Tab edit note • Esc cancel")); - } else { - addLine(theme.fg("dim", " ↑↓ move • Enter submit • Tab add note • Esc cancel")); - } - - addLine(theme.fg("accent", "─".repeat(width))); - cachedRenderedLines = renderedLines; - return renderedLines; - }; - - const handleInput = (data: string) => { - if (isNoteEditorOpen) { - if (matchesKey(data, Key.tab) || matchesKey(data, Key.escape)) { - isNoteEditorOpen = false; - requestUiRerender(); - return; - } - noteEditor.handleInput(data); - requestUiRerender(); - return; - } - - if (matchesKey(data, Key.up)) { - cursorOptionIndex = Math.max(0, cursorOptionIndex - 1); - requestUiRerender(); - return; - } - if (matchesKey(data, Key.down)) { - cursorOptionIndex = Math.min(selectableOptionLabels.length - 1, cursorOptionIndex + 1); - requestUiRerender(); - return; - } - - if (matchesKey(data, Key.tab)) { - isNoteEditorOpen = true; - loadCurrentNoteIntoEditor(); - requestUiRerender(); - return; - } - - if (matchesKey(data, Key.enter)) { - const selectedOptionLabel = selectableOptionLabels[cursorOptionIndex]; - const trimmedNote = getTrimmedNoteForOption(cursorOptionIndex); - - if (selectedOptionLabel === OTHER_OPTION && !trimmedNote) { - isNoteEditorOpen = true; - loadCurrentNoteIntoEditor(); - requestUiRerender(); - return; - } - - submitCurrentSelection(selectedOptionLabel, trimmedNote); - return; - } - - if (matchesKey(data, Key.escape)) { - done({ cancelled: true }); - } - }; - - return { - render, - invalidate: () => { - cachedRenderedLines = undefined; - }, - handleInput, - }; - }); - - if (result.cancelled || !result.selectedOption) { - return { selectedOptions: [] }; - } - - return buildSingleSelectionResult(result.selectedOption, result.note); -} diff --git a/src/planner/ui/ask/ask-logic.ts b/src/planner/ui/ask/ask-logic.ts deleted file mode 100644 index ccdf6fc..0000000 --- a/src/planner/ui/ask/ask-logic.ts +++ /dev/null @@ -1,98 +0,0 @@ -export const OTHER_OPTION = "Other (type your own)"; -const RECOMMENDED_OPTION_TAG = " (Recommended)"; - -export interface AskOption { - label: string; -} - -export interface AskQuestion { - id: string; - question: string; - options: AskOption[]; - multi?: boolean; - recommended?: number; -} - -export interface AskSelection { - selectedOptions: string[]; - customInput?: string; -} - -export function appendRecommendedTagToOptionLabels( - optionLabels: string[], - recommendedOptionIndex?: number, -): string[] { - if ( - recommendedOptionIndex == null || - recommendedOptionIndex < 0 || - recommendedOptionIndex >= optionLabels.length - ) { - return optionLabels; - } - - return optionLabels.map((optionLabel, optionIndex) => { - if (optionIndex !== recommendedOptionIndex) return optionLabel; - if (optionLabel.endsWith(RECOMMENDED_OPTION_TAG)) return optionLabel; - return `${optionLabel}${RECOMMENDED_OPTION_TAG}`; - }); -} - -function removeRecommendedTagFromOptionLabel(optionLabel: string): string { - if (!optionLabel.endsWith(RECOMMENDED_OPTION_TAG)) { - return optionLabel; - } - return optionLabel.slice(0, -RECOMMENDED_OPTION_TAG.length); -} - -export function buildSingleSelectionResult(selectedOptionLabel: string, note?: string): AskSelection { - const normalizedSelectedOption = removeRecommendedTagFromOptionLabel(selectedOptionLabel); - const normalizedNote = note?.trim(); - - if (normalizedSelectedOption === OTHER_OPTION) { - if (normalizedNote) { - return { selectedOptions: [], customInput: normalizedNote }; - } - return { selectedOptions: [] }; - } - - if (normalizedNote) { - return { selectedOptions: [`${normalizedSelectedOption} - ${normalizedNote}`] }; - } - - return { selectedOptions: [normalizedSelectedOption] }; -} - -export function buildMultiSelectionResult( - optionLabels: string[], - selectedOptionIndexes: number[], - optionNotes: string[], - otherOptionIndex: number, -): AskSelection { - const selectedOptionSet = new Set(selectedOptionIndexes); - const selectedOptions: string[] = []; - let customInput: string | undefined; - - for (let optionIndex = 0; optionIndex < optionLabels.length; optionIndex++) { - if (!selectedOptionSet.has(optionIndex)) continue; - - const optionLabel = removeRecommendedTagFromOptionLabel(optionLabels[optionIndex]); - const optionNote = optionNotes[optionIndex]?.trim(); - - if (optionIndex === otherOptionIndex) { - if (optionNote) customInput = optionNote; - continue; - } - - if (optionNote) { - selectedOptions.push(`${optionLabel} - ${optionNote}`); - } else { - selectedOptions.push(optionLabel); - } - } - - if (customInput) { - return { selectedOptions, customInput }; - } - - return { selectedOptions }; -} diff --git a/src/planner/ui/ask/ask-tabs-ui.ts b/src/planner/ui/ask/ask-tabs-ui.ts deleted file mode 100644 index dd58190..0000000 --- a/src/planner/ui/ask/ask-tabs-ui.ts +++ /dev/null @@ -1,512 +0,0 @@ -import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; -import { Editor, type EditorTheme, Key, matchesKey, truncateToWidth, visibleWidth } from "@mariozechner/pi-tui"; -import { - OTHER_OPTION, - appendRecommendedTagToOptionLabels, - buildMultiSelectionResult, - buildSingleSelectionResult, - type AskQuestion, - type AskSelection, -} from "./ask-logic.js"; -import { INLINE_NOTE_WRAP_PADDING, buildWrappedOptionLabelWithInlineNote } from "./ask-inline-note.js"; - -interface PreparedQuestion { - id: string; - question: string; - options: string[]; - tabLabel: string; - multi: boolean; - otherOptionIndex: number; -} - -interface TabsUIState { - cancelled: boolean; - selectedOptionIndexesByQuestion: number[][]; - noteByQuestionByOption: string[][]; -} - -export function formatSelectionForSubmitReview(selection: AskSelection, isMulti: boolean): string { - const hasSelectedOptions = selection.selectedOptions.length > 0; - const hasCustomInput = Boolean(selection.customInput); - - if (hasSelectedOptions && hasCustomInput) { - const selectedPart = isMulti - ? `[${selection.selectedOptions.join(", ")}]` - : selection.selectedOptions[0]; - return `${selectedPart} + Other: ${selection.customInput}`; - } - - if (hasCustomInput) { - return `Other: ${selection.customInput}`; - } - - if (hasSelectedOptions) { - return isMulti ? `[${selection.selectedOptions.join(", ")}]` : selection.selectedOptions[0]; - } - - return "(not answered)"; -} - -function clampIndex(index: number | undefined, maxExclusive: number): number { - if (index == null || Number.isNaN(index) || maxExclusive <= 0) return 0; - if (index < 0) return 0; - if (index >= maxExclusive) return maxExclusive - 1; - return index; -} - -function normalizeTabLabel(id: string, fallback: string): string { - const normalized = id.trim().replace(/[_-]+/g, " "); - return normalized.length > 0 ? normalized : fallback; -} - -function buildSelectionForQuestion( - question: PreparedQuestion, - selectedOptionIndexes: number[], - noteByOptionIndex: string[], -): AskSelection { - if (selectedOptionIndexes.length === 0) { - return { selectedOptions: [] }; - } - - if (question.multi) { - return buildMultiSelectionResult(question.options, selectedOptionIndexes, noteByOptionIndex, question.otherOptionIndex); - } - - const selectedOptionIndex = selectedOptionIndexes[0]; - const selectedOptionLabel = question.options[selectedOptionIndex] ?? OTHER_OPTION; - const note = noteByOptionIndex[selectedOptionIndex] ?? ""; - return buildSingleSelectionResult(selectedOptionLabel, note); -} - -function isQuestionSelectionValid( - question: PreparedQuestion, - selectedOptionIndexes: number[], - noteByOptionIndex: string[], -): boolean { - if (selectedOptionIndexes.length === 0) return false; - if (!selectedOptionIndexes.includes(question.otherOptionIndex)) return true; - const otherNote = noteByOptionIndex[question.otherOptionIndex]?.trim() ?? ""; - return otherNote.length > 0; -} - -function createTabsUiStateSnapshot( - cancelled: boolean, - selectedOptionIndexesByQuestion: number[][], - noteByQuestionByOption: string[][], -): TabsUIState { - return { - cancelled, - selectedOptionIndexesByQuestion: selectedOptionIndexesByQuestion.map((indexes) => [...indexes]), - noteByQuestionByOption: noteByQuestionByOption.map((notes) => [...notes]), - }; -} - -function addIndexToSelection(selectedOptionIndexes: number[], optionIndex: number): number[] { - if (selectedOptionIndexes.includes(optionIndex)) return selectedOptionIndexes; - return [...selectedOptionIndexes, optionIndex].sort((a, b) => a - b); -} - -function removeIndexFromSelection(selectedOptionIndexes: number[], optionIndex: number): number[] { - return selectedOptionIndexes.filter((index) => index !== optionIndex); -} - -export async function askQuestionsWithTabs( - ui: ExtensionUIContext, - questions: AskQuestion[], -): Promise<{ cancelled: boolean; selections: AskSelection[] }> { - const preparedQuestions: PreparedQuestion[] = questions.map((question, questionIndex) => { - const baseOptionLabels = question.options.map((option) => option.label); - const optionLabels = [...appendRecommendedTagToOptionLabels(baseOptionLabels, question.recommended), OTHER_OPTION]; - return { - id: question.id, - question: question.question, - options: optionLabels, - tabLabel: normalizeTabLabel(question.id, `Q${questionIndex + 1}`), - multi: question.multi === true, - otherOptionIndex: optionLabels.length - 1, - }; - }); - - const initialCursorOptionIndexByQuestion = preparedQuestions.map((preparedQuestion, questionIndex) => - clampIndex(questions[questionIndex].recommended, preparedQuestion.options.length), - ); - - const result = await ui.custom((tui, theme, _keybindings, done) => { - let activeTabIndex = 0; - let isNoteEditorOpen = false; - let cachedRenderedLines: string[] | undefined; - const cursorOptionIndexByQuestion = [...initialCursorOptionIndexByQuestion]; - const selectedOptionIndexesByQuestion = preparedQuestions.map(() => [] as number[]); - const noteByQuestionByOption = preparedQuestions.map((preparedQuestion) => - Array(preparedQuestion.options.length).fill("") as string[], - ); - - const editorTheme: EditorTheme = { - borderColor: (text) => theme.fg("accent", text), - selectList: { - selectedPrefix: (text) => theme.fg("accent", text), - selectedText: (text) => theme.fg("accent", text), - description: (text) => theme.fg("muted", text), - scrollInfo: (text) => theme.fg("dim", text), - noMatch: (text) => theme.fg("warning", text), - }, - }; - const noteEditor = new Editor(tui, editorTheme); - - const submitTabIndex = preparedQuestions.length; - - const requestUiRerender = () => { - cachedRenderedLines = undefined; - tui.requestRender(); - }; - - const getActiveQuestionIndex = (): number | null => { - if (activeTabIndex >= preparedQuestions.length) return null; - return activeTabIndex; - }; - - const getQuestionNote = (questionIndex: number, optionIndex: number): string => - noteByQuestionByOption[questionIndex]?.[optionIndex] ?? ""; - - const getTrimmedQuestionNote = (questionIndex: number, optionIndex: number): string => - getQuestionNote(questionIndex, optionIndex).trim(); - - const isAllQuestionSelectionsValid = (): boolean => - preparedQuestions.every((preparedQuestion, questionIndex) => - isQuestionSelectionValid( - preparedQuestion, - selectedOptionIndexesByQuestion[questionIndex], - noteByQuestionByOption[questionIndex], - ), - ); - - const openNoteEditorForActiveOption = () => { - const questionIndex = getActiveQuestionIndex(); - if (questionIndex == null) return; - - isNoteEditorOpen = true; - const optionIndex = cursorOptionIndexByQuestion[questionIndex]; - noteEditor.setText(getQuestionNote(questionIndex, optionIndex)); - requestUiRerender(); - }; - - const advanceToNextTabOrSubmit = () => { - activeTabIndex = Math.min(submitTabIndex, activeTabIndex + 1); - }; - - noteEditor.onChange = (value) => { - const questionIndex = getActiveQuestionIndex(); - if (questionIndex == null) return; - const optionIndex = cursorOptionIndexByQuestion[questionIndex]; - noteByQuestionByOption[questionIndex][optionIndex] = value; - requestUiRerender(); - }; - - noteEditor.onSubmit = (value) => { - const questionIndex = getActiveQuestionIndex(); - if (questionIndex == null) return; - - const preparedQuestion = preparedQuestions[questionIndex]; - const optionIndex = cursorOptionIndexByQuestion[questionIndex]; - noteByQuestionByOption[questionIndex][optionIndex] = value; - const trimmedNote = value.trim(); - - if (preparedQuestion.multi) { - if (trimmedNote.length > 0) { - selectedOptionIndexesByQuestion[questionIndex] = addIndexToSelection( - selectedOptionIndexesByQuestion[questionIndex], - optionIndex, - ); - } - if (optionIndex === preparedQuestion.otherOptionIndex && trimmedNote.length === 0) { - requestUiRerender(); - return; - } - isNoteEditorOpen = false; - requestUiRerender(); - return; - } - - selectedOptionIndexesByQuestion[questionIndex] = [optionIndex]; - if (optionIndex === preparedQuestion.otherOptionIndex && trimmedNote.length === 0) { - requestUiRerender(); - return; - } - - isNoteEditorOpen = false; - advanceToNextTabOrSubmit(); - requestUiRerender(); - }; - - const renderTabs = (): string => { - const tabParts: string[] = ["← "]; - for (let questionIndex = 0; questionIndex < preparedQuestions.length; questionIndex++) { - const preparedQuestion = preparedQuestions[questionIndex]; - const isActiveTab = questionIndex === activeTabIndex; - const isQuestionValid = isQuestionSelectionValid( - preparedQuestion, - selectedOptionIndexesByQuestion[questionIndex], - noteByQuestionByOption[questionIndex], - ); - const statusIcon = isQuestionValid ? "■" : "□"; - const tabLabel = ` ${statusIcon} ${preparedQuestion.tabLabel} `; - const styledTabLabel = isActiveTab - ? theme.bg("selectedBg", theme.fg("text", tabLabel)) - : theme.fg(isQuestionValid ? "success" : "muted", tabLabel); - tabParts.push(`${styledTabLabel} `); - } - - const isSubmitTabActive = activeTabIndex === submitTabIndex; - const canSubmit = isAllQuestionSelectionsValid(); - const submitLabel = " ✓ Submit "; - const styledSubmitLabel = isSubmitTabActive - ? theme.bg("selectedBg", theme.fg("text", submitLabel)) - : theme.fg(canSubmit ? "success" : "dim", submitLabel); - tabParts.push(`${styledSubmitLabel} →`); - return tabParts.join(""); - }; - - const renderSubmitTab = (width: number, renderedLines: string[]): void => { - const addLine = (line: string) => renderedLines.push(truncateToWidth(line, width)); - - addLine(theme.fg("accent", theme.bold(" Review answers"))); - renderedLines.push(""); - - for (let questionIndex = 0; questionIndex < preparedQuestions.length; questionIndex++) { - const preparedQuestion = preparedQuestions[questionIndex]; - const selection = buildSelectionForQuestion( - preparedQuestion, - selectedOptionIndexesByQuestion[questionIndex], - noteByQuestionByOption[questionIndex], - ); - const value = formatSelectionForSubmitReview(selection, preparedQuestion.multi); - const isValid = isQuestionSelectionValid( - preparedQuestion, - selectedOptionIndexesByQuestion[questionIndex], - noteByQuestionByOption[questionIndex], - ); - const statusIcon = isValid ? theme.fg("success", "●") : theme.fg("warning", "○"); - addLine(` ${statusIcon} ${theme.fg("muted", `${preparedQuestion.tabLabel}:`)} ${theme.fg("text", value)}`); - } - - renderedLines.push(""); - if (isAllQuestionSelectionsValid()) { - addLine(theme.fg("success", " Press Enter to submit")); - } else { - const missingQuestions = preparedQuestions - .filter((preparedQuestion, questionIndex) => - !isQuestionSelectionValid( - preparedQuestion, - selectedOptionIndexesByQuestion[questionIndex], - noteByQuestionByOption[questionIndex], - ), - ) - .map((preparedQuestion) => preparedQuestion.tabLabel) - .join(", "); - addLine(theme.fg("warning", ` Complete required answers: ${missingQuestions}`)); - } - addLine(theme.fg("dim", " ←/→ switch tabs • Esc cancel")); - }; - - const renderQuestionTab = (width: number, renderedLines: string[], questionIndex: number): void => { - const addLine = (line: string) => renderedLines.push(truncateToWidth(line, width)); - const preparedQuestion = preparedQuestions[questionIndex]; - const cursorOptionIndex = cursorOptionIndexByQuestion[questionIndex]; - const selectedOptionIndexes = selectedOptionIndexesByQuestion[questionIndex]; - - addLine(theme.fg("text", ` ${preparedQuestion.question}`)); - renderedLines.push(""); - - for (let optionIndex = 0; optionIndex < preparedQuestion.options.length; optionIndex++) { - const optionLabel = preparedQuestion.options[optionIndex]; - const isCursorOption = optionIndex === cursorOptionIndex; - const isOptionSelected = selectedOptionIndexes.includes(optionIndex); - const isEditingThisOption = isNoteEditorOpen && isCursorOption; - const cursorPrefixText = isCursorOption ? "→ " : " "; - const cursorPrefix = isCursorOption ? theme.fg("accent", cursorPrefixText) : cursorPrefixText; - const markerText = preparedQuestion.multi - ? `${isOptionSelected ? "[x]" : "[ ]"} ` - : `${isOptionSelected ? "●" : "○"} `; - const optionColor = isCursorOption ? "accent" : isOptionSelected ? "success" : "text"; - const prefixWidth = visibleWidth(cursorPrefixText) + visibleWidth(markerText); - const wrappedInlineLabelLines = buildWrappedOptionLabelWithInlineNote( - optionLabel, - getQuestionNote(questionIndex, optionIndex), - isEditingThisOption, - Math.max(1, width - prefixWidth), - INLINE_NOTE_WRAP_PADDING, - ); - const continuationPrefix = " ".repeat(prefixWidth); - addLine(`${cursorPrefix}${theme.fg(optionColor, `${markerText}${wrappedInlineLabelLines[0] ?? ""}`)}`); - for (const wrappedLine of wrappedInlineLabelLines.slice(1)) { - addLine(`${continuationPrefix}${theme.fg(optionColor, wrappedLine)}`); - } - } - - renderedLines.push(""); - if (isNoteEditorOpen) { - addLine(theme.fg("dim", " Typing note inline • Enter save note • Tab/Esc stop editing")); - } else { - if (preparedQuestion.multi) { - addLine( - theme.fg( - "dim", - " ↑↓ move • Enter toggle/select • Tab add note • ←/→ switch tabs • Esc cancel", - ), - ); - } else { - addLine( - theme.fg("dim", " ↑↓ move • Enter select • Tab add note • ←/→ switch tabs • Esc cancel"), - ); - } - } - }; - - const render = (width: number): string[] => { - if (cachedRenderedLines) return cachedRenderedLines; - - const renderedLines: string[] = []; - const addLine = (line: string) => renderedLines.push(truncateToWidth(line, width)); - - addLine(theme.fg("accent", "─".repeat(width))); - addLine(` ${renderTabs()}`); - renderedLines.push(""); - - if (activeTabIndex === submitTabIndex) { - renderSubmitTab(width, renderedLines); - } else { - renderQuestionTab(width, renderedLines, activeTabIndex); - } - - addLine(theme.fg("accent", "─".repeat(width))); - cachedRenderedLines = renderedLines; - return renderedLines; - }; - - const handleInput = (data: string) => { - if (isNoteEditorOpen) { - if (matchesKey(data, Key.tab) || matchesKey(data, Key.escape)) { - isNoteEditorOpen = false; - requestUiRerender(); - return; - } - noteEditor.handleInput(data); - requestUiRerender(); - return; - } - - if (matchesKey(data, Key.left)) { - activeTabIndex = (activeTabIndex - 1 + preparedQuestions.length + 1) % (preparedQuestions.length + 1); - requestUiRerender(); - return; - } - - if (matchesKey(data, Key.right)) { - activeTabIndex = (activeTabIndex + 1) % (preparedQuestions.length + 1); - requestUiRerender(); - return; - } - - if (activeTabIndex === submitTabIndex) { - if (matchesKey(data, Key.enter) && isAllQuestionSelectionsValid()) { - done(createTabsUiStateSnapshot(false, selectedOptionIndexesByQuestion, noteByQuestionByOption)); - return; - } - if (matchesKey(data, Key.escape)) { - done(createTabsUiStateSnapshot(true, selectedOptionIndexesByQuestion, noteByQuestionByOption)); - } - return; - } - - const questionIndex = activeTabIndex; - const preparedQuestion = preparedQuestions[questionIndex]; - - if (matchesKey(data, Key.up)) { - cursorOptionIndexByQuestion[questionIndex] = Math.max(0, cursorOptionIndexByQuestion[questionIndex] - 1); - requestUiRerender(); - return; - } - - if (matchesKey(data, Key.down)) { - cursorOptionIndexByQuestion[questionIndex] = Math.min( - preparedQuestion.options.length - 1, - cursorOptionIndexByQuestion[questionIndex] + 1, - ); - requestUiRerender(); - return; - } - - if (matchesKey(data, Key.tab)) { - openNoteEditorForActiveOption(); - return; - } - - if (matchesKey(data, Key.enter)) { - const cursorOptionIndex = cursorOptionIndexByQuestion[questionIndex]; - - if (preparedQuestion.multi) { - const currentlySelected = selectedOptionIndexesByQuestion[questionIndex]; - if (currentlySelected.includes(cursorOptionIndex)) { - selectedOptionIndexesByQuestion[questionIndex] = removeIndexFromSelection(currentlySelected, cursorOptionIndex); - } else { - selectedOptionIndexesByQuestion[questionIndex] = addIndexToSelection(currentlySelected, cursorOptionIndex); - } - - if ( - cursorOptionIndex === preparedQuestion.otherOptionIndex && - selectedOptionIndexesByQuestion[questionIndex].includes(cursorOptionIndex) && - getTrimmedQuestionNote(questionIndex, cursorOptionIndex).length === 0 - ) { - openNoteEditorForActiveOption(); - return; - } - - requestUiRerender(); - return; - } - - selectedOptionIndexesByQuestion[questionIndex] = [cursorOptionIndex]; - if ( - cursorOptionIndex === preparedQuestion.otherOptionIndex && - getTrimmedQuestionNote(questionIndex, cursorOptionIndex).length === 0 - ) { - openNoteEditorForActiveOption(); - return; - } - - advanceToNextTabOrSubmit(); - requestUiRerender(); - return; - } - - if (matchesKey(data, Key.escape)) { - done(createTabsUiStateSnapshot(true, selectedOptionIndexesByQuestion, noteByQuestionByOption)); - } - }; - - return { - render, - invalidate: () => { - cachedRenderedLines = undefined; - }, - handleInput, - }; - }); - - if (result.cancelled) { - return { - cancelled: true, - selections: preparedQuestions.map(() => ({ selectedOptions: [] } satisfies AskSelection)), - }; - } - - const selections = preparedQuestions.map((preparedQuestion, questionIndex) => - buildSelectionForQuestion( - preparedQuestion, - result.selectedOptionIndexesByQuestion[questionIndex] ?? [], - result.noteByQuestionByOption[questionIndex] ?? Array(preparedQuestion.options.length).fill(""), - ), - ); - - return { cancelled: result.cancelled, selections }; -} diff --git a/src/planner/ui/epic-widget.ts b/src/planner/ui/epic-widget.ts deleted file mode 100644 index 88e9cb7..0000000 --- a/src/planner/ui/epic-widget.ts +++ /dev/null @@ -1,243 +0,0 @@ -// Epic execution status widget. Renders a TUI panel showing: -// - Story list with status icons -// - Active subagent: role, step, elapsed time -// - Recent log tail from the active subagent directory -// - Autonomous decision counter -// -// The driver creates one instance at the start of runEpicPipeline (before intake) -// and calls update() after each state change. Spans the full epic lifecycle (Phase -// A + B), not just story execution. Pure observation layer — never influences routing. -// Self-renders via pi's setWidget API; a 1-second unref'd timer keeps elapsed time fresh. - -import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; -import type { Theme, ThemeColor } from "@mariozechner/pi-coding-agent"; -import { truncateToWidth, visibleWidth } from "@mariozechner/pi-tui"; - -import type { EpicPhase, StoryStatus } from "../types.js"; -import type { LogLine } from "../lib/audit.js"; - -// -- Types -- - -export interface ActiveSubagentInfo { - role: string; - storyId?: string; - step: number; - totalSteps: number; - stepName: string; - startedAt: number; -} - -export interface EpicWidgetState { - epicId: string; - epicPhase: EpicPhase; - stories: Array<{ storyId: string; status: StoryStatus }>; - activeSubagent: ActiveSubagentInfo | null; - logLines: LogLine[]; -} - -export interface EpicWidgetUpdate { - epicPhase?: EpicPhase; - stories?: Array<{ storyId: string; status: StoryStatus }>; - activeSubagent?: ActiveSubagentInfo | null; - logLines?: LogLine[]; -} - -// -- Constants -- - -const WIDGET_KEY = "koan-epic"; -const PAD = 2; -const MAX_LOG_LINES = 5; - -// Status icons and colors — no escalated status per §11.3.1. -const STATUS_ICON: Record = { - pending: "○", - selected: "◎", - planning: "◐", - executing: "●", - verifying: "◑", - done: "✓", - retry: "↺", - skipped: "—", -}; - -const STATUS_COLOR: Record = { - pending: "muted", - selected: "accent", - planning: "accent", - executing: "accent", - verifying: "accent", - done: "success", - retry: "warning", - skipped: "dim", -}; - -// -- Helpers -- - -function cw(termWidth: number): number { - return Math.max(40, termWidth - PAD * 2); -} - -function line(content: string, termWidth: number, theme: Theme): string { - const w = cw(termWidth); - const inner = clamp(content, w); - return theme.bg("toolPendingBg", " ".repeat(PAD) + inner + " ".repeat(PAD)); -} - -function clamp(text: string, width: number): string { - const truncated = truncateToWidth(text, width, "", false); - const vw = visibleWidth(truncated); - return vw >= width ? truncated : truncated + " ".repeat(width - vw); -} - -function formatElapsed(ms: number): string { - const s = Math.floor(ms / 1000); - const h = Math.floor(s / 3600); - const m = Math.floor((s % 3600) / 60); - const sec = s % 60; - if (h > 0) return `${h}h ${String(m).padStart(2, "0")}m`; - return `${m}m ${String(sec).padStart(2, "0")}s`; -} - -// -- Render -- - -function renderHeader(state: EpicWidgetState, theme: Theme, width: number): string { - const elapsed = state.activeSubagent - ? theme.fg("dim", formatElapsed(Date.now() - state.activeSubagent.startedAt)) - : ""; - const title = theme.bold(theme.fg("accent", `Epic · ${state.epicId}`)); - const phaseBadge = theme.fg("muted", ` · ${state.epicPhase}`); - const left = `${title}${phaseBadge}`; - const gap = Math.max(1, width - visibleWidth(left) - visibleWidth(elapsed)); - return clamp(`${left}${" ".repeat(gap)}${elapsed}`, width); -} - -function renderStoryList(state: EpicWidgetState, theme: Theme, width: number): string[] { - if (state.stories.length === 0) { - return [clamp(theme.fg("muted", " No stories yet"), width)]; - } - return state.stories.map(({ storyId, status }) => { - const icon = STATUS_ICON[status] ?? "?"; - const color = STATUS_COLOR[status] ?? "muted"; - const iconStr = theme.fg(color, icon); - const label = status === "executing" || status === "planning" || status === "verifying" - ? theme.bold(theme.fg(color, storyId)) - : theme.fg(color, storyId); - const statusLabel = theme.fg("dim", ` (${status})`); - return clamp(` ${iconStr} ${label}${statusLabel}`, width); - }); -} - -function renderActiveSubagent(state: EpicWidgetState, theme: Theme, width: number): string[] { - const sa = state.activeSubagent; - if (!sa) { - return [clamp(theme.fg("muted", " idle"), width)]; - } - const roleLabel = sa.storyId ? `${sa.role} · ${sa.storyId}` : sa.role; - const stepLabel = sa.totalSteps > 0 - ? `step ${sa.step}/${sa.totalSteps}${sa.stepName ? ` · ${sa.stepName}` : ""}` - : "starting"; - const elapsedStr = formatElapsed(Date.now() - sa.startedAt); - return [ - clamp(` ${theme.bold(theme.fg("accent", roleLabel))} ${theme.fg("muted", stepLabel)}`, width), - clamp(` ${theme.fg("dim", elapsedStr)}`, width), - ]; -} - -function renderLogTail(state: EpicWidgetState, theme: Theme, width: number): string[] { - const entries = state.logLines.slice(-MAX_LOG_LINES); - if (entries.length === 0) { - return [clamp(theme.fg("dim", " (no log entries)"), width)]; - } - return entries.map((entry) => { - const toolStr = theme.bold(theme.fg("accent", entry.tool)); - const summary = entry.summary.trim(); - const sep = summary ? " " : ""; - return clamp(` ${toolStr}${sep}${theme.fg("muted", summary)}`, width); - }); -} - -function renderDivider(label: string, theme: Theme, width: number): string { - const tag = ` ${label} `; - const tagLen = visibleWidth(tag); - const dashCount = Math.max(0, width - tagLen); - const left = Math.floor(dashCount / 2); - const right = dashCount - left; - return clamp( - `${theme.fg("dim", "─".repeat(left))}${theme.bold(theme.fg("muted", tag))}${theme.fg("dim", "─".repeat(right))}`, - width, - ); -} - -function render(state: EpicWidgetState, theme: Theme, termWidth: number): string[] { - const w = cw(termWidth); - const L = (content: string) => line(content, termWidth, theme); - const lines: string[] = []; - - lines.push(L("")); - lines.push(L(renderHeader(state, theme, w))); - lines.push(L(renderDivider("stories", theme, w))); - for (const l of renderStoryList(state, theme, w)) lines.push(L(l)); - lines.push(L(renderDivider("active", theme, w))); - for (const l of renderActiveSubagent(state, theme, w)) lines.push(L(l)); - lines.push(L(renderDivider("log", theme, w))); - for (const l of renderLogTail(state, theme, w)) lines.push(L(l)); - lines.push(L("")); - - return lines; -} - -// -- EpicWidgetController -- - -export class EpicWidgetController { - private state: EpicWidgetState; - private lastHash = ""; - private timer: ReturnType; - private ui: ExtensionUIContext; - - constructor(ui: ExtensionUIContext, epicId: string) { - this.ui = ui; - this.state = { - epicId, - epicPhase: "intake", - stories: [], - activeSubagent: null, - logLines: [], - }; - this.timer = setInterval(() => this.doRender(), 1000); - this.timer.unref(); - this.doRender(); - } - - update(patch: EpicWidgetUpdate): void { - if (patch.epicPhase !== undefined) this.state.epicPhase = patch.epicPhase; - if (patch.stories !== undefined) this.state.stories = patch.stories; - if (patch.activeSubagent !== undefined) this.state.activeSubagent = patch.activeSubagent; - if (patch.logLines !== undefined) this.state.logLines = patch.logLines; - this.doRender(); - } - - destroy(): void { - clearInterval(this.timer); - this.ui.setWidget(WIDGET_KEY, undefined); - } - - private doRender(): void { - const snapshot = { - ...this.state, - stories: this.state.stories.map((s) => ({ ...s })), - logLines: this.state.logLines.map((l) => ({ ...l })), - activeSubagent: this.state.activeSubagent ? { ...this.state.activeSubagent } : null, - }; - const { theme } = this.ui; - - const hashLines = render(snapshot, theme, 0); - const hash = hashLines.join("\n"); - if (hash === this.lastHash) return; - this.lastHash = hash; - - this.ui.setWidget(WIDGET_KEY, (_tui, th) => ({ - render: (width: number) => render(snapshot, th, width), - invalidate: () => {}, - })); - } -} diff --git a/src/planner/ui/spec-review.ts b/src/planner/ui/spec-review.ts deleted file mode 100644 index 9f5e1a3..0000000 --- a/src/planner/ui/spec-review.ts +++ /dev/null @@ -1,152 +0,0 @@ -// Spec review gate: interactive story approval UI. -// Shown after decomposition so the user can approve, or skip individual stories -// before execution begins. Driver blocks until the user confirms. -// -// Controls: -// ↑↓ move cursor -// Space toggle selected story between "include" and "skip" -// A approve all (mark all as include) -// Enter confirm and proceed -// Esc confirm current selections and proceed - -import { promises as fs } from "node:fs"; -import * as path from "node:path"; - -import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; -import { Key, matchesKey, truncateToWidth, visibleWidth } from "@mariozechner/pi-tui"; - -export interface SpecReviewResult { - approved: string[]; - skipped: string[]; -} - -interface StoryEntry { - storyId: string; - title: string; - include: boolean; -} - -async function readStoryTitle(epicDir: string, storyId: string): Promise { - try { - const raw = await fs.readFile(path.join(epicDir, "stories", storyId, "story.md"), "utf8"); - // Extract first non-empty, non-heading line after a heading, or first heading text. - for (const rawLine of raw.split("\n")) { - const l = rawLine.trim(); - if (!l) continue; - // Strip leading # characters for headings. - const text = l.replace(/^#+\s*/, "").trim(); - if (text) return text.slice(0, 80); - } - return storyId; - } catch { - return storyId; - } -} - -export async function reviewStorySketches( - epicDir: string, - storyIds: string[], - ui: ExtensionUIContext, -): Promise { - if (storyIds.length === 0) { - return { approved: [], skipped: [] }; - } - - // Load story titles asynchronously. - const titles = await Promise.all(storyIds.map((id) => readStoryTitle(epicDir, id))); - const entries: StoryEntry[] = storyIds.map((storyId, i) => ({ - storyId, - title: titles[i] ?? storyId, - include: true, - })); - - const result = await ui.custom<{ entries: StoryEntry[] }>((tui, theme, _keybindings, done) => { - let cursor = 0; - let cachedLines: string[] | undefined; - - const requestRender = () => { - cachedLines = undefined; - tui.requestRender(); - }; - - const render = (width: number): string[] => { - if (cachedLines) return cachedLines; - const lines: string[] = []; - const addLine = (l: string) => lines.push(truncateToWidth(l, width)); - - addLine(theme.fg("accent", "─".repeat(width))); - addLine( - ` ${theme.bold(theme.fg("accent", "Spec Review"))} ${theme.fg("muted", `${entries.length} stories`)}`, - ); - addLine(theme.fg("dim", " Review story sketches before execution begins.")); - addLine(""); - - for (let i = 0; i < entries.length; i++) { - const e = entries[i]; - const isCursor = i === cursor; - const prefix = isCursor ? theme.fg("accent", "→ ") : " "; - const checkbox = e.include - ? theme.fg("success", "[✓]") - : theme.fg("dim", "[ ]"); - const label = isCursor - ? theme.bold(theme.fg(e.include ? "text" : "dim", e.storyId)) - : theme.fg(e.include ? "text" : "dim", e.storyId); - const titleStr = theme.fg("muted", ` — ${e.title}`); - addLine(`${prefix}${checkbox} ${label}${titleStr}`); - } - - addLine(""); - - const approvedCount = entries.filter((e) => e.include).length; - const skippedCount = entries.length - approvedCount; - addLine( - ` ${theme.fg("success", `${approvedCount} approved`)} ${theme.fg("dim", `${skippedCount} skipped`)}`, - ); - addLine(""); - addLine( - theme.fg("dim", " ↑↓ move • Space toggle • A approve all • Enter confirm • Esc confirm"), - ); - addLine(theme.fg("accent", "─".repeat(width))); - - cachedLines = lines; - return lines; - }; - - const handleInput = (data: string) => { - if (matchesKey(data, Key.up)) { - cursor = Math.max(0, cursor - 1); - requestRender(); - return; - } - if (matchesKey(data, Key.down)) { - cursor = Math.min(entries.length - 1, cursor + 1); - requestRender(); - return; - } - if (data === " ") { - entries[cursor].include = !entries[cursor].include; - requestRender(); - return; - } - if (data === "a" || data === "A") { - for (const e of entries) e.include = true; - requestRender(); - return; - } - if (matchesKey(data, Key.enter) || matchesKey(data, Key.escape)) { - done({ entries: entries.map((e) => ({ ...e })) }); - return; - } - }; - - return { - render, - invalidate: () => { cachedLines = undefined; }, - handleInput, - }; - }); - - const approved = result.entries.filter((e) => e.include).map((e) => e.storyId); - const skipped = result.entries.filter((e) => !e.include).map((e) => e.storyId); - return { approved, skipped }; -} From 43303d59aa7dfc2668cc39d3331d5134c0e7f3f6 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:48:58 +0700 Subject: [PATCH 057/412] consolidate subagent spawning into single task-driven function --- src/planner/subagent.ts | 297 +++++++++++++--------------------------- 1 file changed, 94 insertions(+), 203 deletions(-) diff --git a/src/planner/subagent.ts b/src/planner/subagent.ts index 99c637d..d722d2d 100644 --- a/src/planner/subagent.ts +++ b/src/planner/subagent.ts @@ -1,19 +1,27 @@ -// Subagent spawn helpers. Each public function delegates to spawnSubagent, -// which handles process lifecycle, stdout/stderr routing to disk, and -// exit-code normalization. When a UI context is provided, an IPC responder -// runs concurrently so subagents can ask questions and request scouts. +// Subagent spawn infrastructure. +// +// A single public function, spawnSubagent(), handles all six roles. +// It writes task.json to the subagent directory before spawning (the +// directory-as-contract invariant: the child reads task.json to discover +// its role and parameters — no structured data flows through CLI flags). +// +// The spawn command carries only what pi needs at the OS level: +// pi -p -e {ext} --koan-dir {subagentDir} [--model {model}] "{bootPrompt}" +// +// All tools register unconditionally at init. Task-specific content is +// intentionally absent from spawn prompts: it arrives as step 1 guidance +// returned by the first koan_complete_step call, after the calling pattern +// is established. import { spawn } from "node:child_process"; import { createWriteStream } from "node:fs"; import * as path from "node:path"; -import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; - import { createLogger, type Logger } from "../utils/logger.js"; -import type { SubagentRole, StepSequence } from "./types.js"; import { resolveModelForRole } from "./model-resolver.js"; import { runIpcResponder, type ScoutSpawnContext } from "./lib/ipc-responder.js"; -import type { ScoutTask } from "./lib/ipc.js"; +import { writeTaskFile, type SubagentTask, type ScoutTask } from "./lib/task.js"; +import type { WebServerHandle } from "./web/server-types.js"; // -- Result type -- @@ -23,66 +31,95 @@ export interface SubagentResult { subagentDir: string; } -// -- Public spawn option types -- +// -- Spawn options -- export interface SpawnOptions { - epicDir: string; - subagentDir: string; cwd: string; extensionPath: string; modelOverride?: string; log?: Logger; - ui?: ExtensionUIContext; + webServer?: WebServerHandle; } -export interface SpawnStoryOptions extends SpawnOptions { - storyId: string; -} +// -- Constants -- -// -- Internal spawn infrastructure -- +// Roles that support koan_request_scouts and therefore need a ScoutSpawnContext +// wired into their IPC responder. +const ROLES_WITH_SCOUT_SUPPORT = new Set([ + "intake", + "decomposer", + "planner", +]); -interface SpawnSubagentOpts { - epicDir: string; - subagentDir: string; - cwd: string; - extensionPath: string; - extraFlags?: string[]; - modelOverride?: string; - ui?: ExtensionUIContext; - // Scout spawning context for the IPC responder. Provided for all non-scout - // subagents that may call koan_request_scouts. - scoutContext?: ScoutSpawnContext; +// -- Private helpers -- + +// The entire spawn prompt. Kept to one sentence deliberately: the LLM must +// call koan_complete_step before seeing any task instructions. Putting task +// content here risks text output + immediate exit on weaker models. +function bootPrompt(role: string): string { + return `You are a koan ${role} agent. Call koan_complete_step to receive your instructions.`; +} + +// Builds the ScoutSpawnContext injected into the IPC responder. Scouts spawned +// via this context do not receive a web server — they are narrow investigators +// with no user interaction and no nested IPC. +function makeScoutSpawnContext( + parentRole: string, + epicDir: string, + opts: SpawnOptions, + log: Logger, +): ScoutSpawnContext { + return { + epicDir, + parentRole, + async spawnScout(task: ScoutTask, scoutSubagentDir: string): Promise { + const result = await spawnSubagent(task, scoutSubagentDir, { + cwd: opts.cwd, + extensionPath: opts.extensionPath, + // Deliberately no webServer — scouts are narrow investigators. + log, + }); + return result.exitCode; + }, + }; } -export function buildSpawnArgs( - role: string, - prompt: string, - opts: SpawnSubagentOpts, -): string[] { - return [ +// -- Public API -- + +/** + * Spawn a koan subagent for the given task. + * + * Writes task.json to subagentDir before spawning so the child process can + * read its role and parameters without relying on CLI flags. + */ +export async function spawnSubagent( + task: SubagentTask, + subagentDir: string, + opts: SpawnOptions, +): Promise { + const log = opts.log ?? createLogger("Subagent"); + + await writeTaskFile(subagentDir, task); + + const modelOverride = opts.modelOverride ?? await resolveModelForRole(task.role); + + const scoutContext = ROLES_WITH_SCOUT_SUPPORT.has(task.role) + ? makeScoutSpawnContext(task.role, task.epicDir, opts, log) + : undefined; + + const args = [ "-p", "-e", opts.extensionPath, - "--koan-role", role, - "--koan-epic-dir", opts.epicDir, - "--koan-subagent-dir", opts.subagentDir, - ...(opts.extraFlags ?? []), - ...(opts.modelOverride ? ["--model", opts.modelOverride] : []), - prompt, + "--koan-dir", subagentDir, + ...(modelOverride ? ["--model", modelOverride] : []), + bootPrompt(task.role), ]; -} -function spawnSubagent( - role: string, - prompt: string, - opts: SpawnSubagentOpts, - log: Logger, -): Promise { - const args = buildSpawnArgs(role, prompt, opts); - log(`Spawning ${role} subagent`, { epicDir: opts.epicDir, subagentDir: opts.subagentDir }); + log(`Spawning ${task.role} subagent`, { subagentDir }); return new Promise((resolve) => { - const stdoutLog = createWriteStream(path.join(opts.subagentDir, "stdout.log"), { flags: "w" }); - const stderrLog = createWriteStream(path.join(opts.subagentDir, "stderr.log"), { flags: "w" }); + const stdoutLog = createWriteStream(path.join(subagentDir, "stdout.log"), { flags: "w" }); + const stderrLog = createWriteStream(path.join(subagentDir, "stderr.log"), { flags: "w" }); const proc = spawn("pi", args, { cwd: opts.cwd, @@ -90,20 +127,12 @@ function spawnSubagent( stdio: ["ignore", "pipe", "pipe"], }); - // Start IPC responder concurrently when a UI context is available. - // The responder polls ipc.json in the subagent directory and routes - // ask-question requests to the ask UI and scout-request requests to - // the scout spawning pool. + // Start IPC responder concurrently when a web server handle is available. let abortIpc: (() => void) | undefined; - if (opts.ui) { + if (opts.webServer) { const ac = new AbortController(); abortIpc = () => ac.abort(); - void runIpcResponder( - opts.subagentDir, - opts.ui, - ac.signal, - opts.scoutContext, - ); + void runIpcResponder(subagentDir, opts.webServer, ac.signal, scoutContext); } let stderr = ""; @@ -122,154 +151,16 @@ function spawnSubagent( stdoutLog.end(); stderrLog.end(); const exitCode = code ?? 1; - log(`${role} subagent exited`, { exitCode }); - resolve({ exitCode, stderr, subagentDir: opts.subagentDir }); + log(`${task.role} subagent exited`, { exitCode }); + resolve({ exitCode, stderr, subagentDir }); }); proc.on("error", (error) => { abortIpc?.(); stdoutLog.end(); stderrLog.end(); - log(`${role} subagent spawn error`, { error: error.message }); - resolve({ exitCode: 1, stderr: error.message, subagentDir: opts.subagentDir }); + log(`${task.role} subagent spawn error`, { error: error.message }); + resolve({ exitCode: 1, stderr: error.message, subagentDir }); }); }); } - -// -- Scout spawner (injected into IPC responder) -- -// Defined here to avoid circular imports: ipc-responder.ts uses a callback -// type, not a direct import from this module. - -function makeScoutSpawnContext( - opts: SpawnOptions, - log: Logger, -): ScoutSpawnContext { - return { - epicDir: opts.epicDir, - async spawnScout(task: ScoutTask, scoutSubagentDir: string, outputFile: string): Promise { - const scoutModel = await resolveModelForRole("scout"); - const prompt = `${task.prompt}\n\nWrite your findings to: ${outputFile}\nYour investigator role: ${task.role}`; - const result = await spawnSubagent( - "scout", - prompt, - { - epicDir: opts.epicDir, - subagentDir: scoutSubagentDir, - cwd: opts.cwd, - extensionPath: opts.extensionPath, - modelOverride: scoutModel, - // Scouts do not get an IPC responder — they are narrow investigators. - }, - log, - ); - return result.exitCode; - }, - }; -} - -// -- Public spawn functions -- - -// Intake: reads conversation, extracts context, requests scouts, asks user questions. -export async function spawnIntake(opts: SpawnOptions): Promise { - const role: SubagentRole = "intake"; - const log = opts.log ?? createLogger("Subagent"); - const modelOverride = opts.modelOverride ?? await resolveModelForRole(role); - const scoutContext = makeScoutSpawnContext(opts, log); - return spawnSubagent( - role, - "Begin the intake phase.", - { ...opts, modelOverride, scoutContext }, - log, - ); -} - -// Scout: answers one narrow codebase question and writes findings to outputFile. -// Note: scouts are spawned by the IPC responder (via makeScoutSpawnContext) when -// a subagent calls koan_request_scouts. This function is also callable directly -// from the driver if needed. -export async function spawnScout( - opts: SpawnOptions & { question: string; role?: string; outputFile: string }, -): Promise { - const subagentRole: SubagentRole = "scout"; - const log = opts.log ?? createLogger("Subagent"); - const modelOverride = opts.modelOverride ?? await resolveModelForRole(subagentRole); - const prompt = [ - opts.question, - opts.role ? `Your investigator role: ${opts.role}` : "", - `Write your findings to: ${opts.outputFile}`, - ].filter(Boolean).join("\n"); - return spawnSubagent(subagentRole, prompt, { ...opts, modelOverride }, log); -} - -// Decomposer: splits the epic into stories. -export async function spawnDecomposer(opts: SpawnOptions): Promise { - const role: SubagentRole = "decomposer"; - const log = opts.log ?? createLogger("Subagent"); - const modelOverride = opts.modelOverride ?? await resolveModelForRole(role); - const scoutContext = makeScoutSpawnContext(opts, log); - return spawnSubagent( - role, - "Begin the decomposition phase.", - { ...opts, modelOverride, scoutContext }, - log, - ); -} - -// Orchestrator: pre-execution or post-execution decision making. -export async function spawnOrchestrator( - opts: SpawnOptions & { stepSequence: StepSequence; storyId?: string }, -): Promise { - const role: SubagentRole = "orchestrator"; - const log = opts.log ?? createLogger("Subagent"); - const modelOverride = opts.modelOverride ?? await resolveModelForRole(role); - const extraFlags: string[] = ["--koan-step-sequence", opts.stepSequence]; - if (opts.storyId) { - extraFlags.push("--koan-story-id", opts.storyId); - } - const prompt = `Begin the ${opts.stepSequence} orchestrator phase.`; - return spawnSubagent( - role, - prompt, - { ...opts, extraFlags, modelOverride }, - log, - ); -} - -// Planner: produces a detailed plan for a story. -export async function spawnPlanner(opts: SpawnStoryOptions): Promise { - const role: SubagentRole = "planner"; - const log = opts.log ?? createLogger("Subagent"); - const modelOverride = opts.modelOverride ?? await resolveModelForRole(role); - const extraFlags: string[] = ["--koan-story-id", opts.storyId]; - const scoutContext = makeScoutSpawnContext(opts, log); - const prompt = `Begin the planning phase for story ${opts.storyId}.`; - return spawnSubagent( - role, - prompt, - { ...opts, extraFlags, modelOverride, scoutContext }, - log, - ); -} - -// Executor: implements a story plan. -export async function spawnExecutor( - opts: SpawnStoryOptions & { retryContext?: string }, -): Promise { - const role: SubagentRole = "executor"; - const log = opts.log ?? createLogger("Subagent"); - const modelOverride = opts.modelOverride ?? await resolveModelForRole(role); - const extraFlags: string[] = ["--koan-story-id", opts.storyId]; - if (opts.retryContext) { - extraFlags.push("--koan-retry-context", opts.retryContext); - } - const basePrompt = `Implement the plan for story ${opts.storyId}.`; - const prompt = opts.retryContext - ? `${basePrompt}\n\nPrevious attempt failed: ${opts.retryContext}` - : basePrompt; - return spawnSubagent( - role, - prompt, - { ...opts, extraFlags, modelOverride }, - log, - ); -} From 2e6be1206342a65a0a19938bb484f9081baf1433 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:49:05 +0700 Subject: [PATCH 058/412] rewrite phase dispatch to read from task manifest --- src/planner/phases/dispatch.ts | 99 ++++++++++++---------------------- 1 file changed, 34 insertions(+), 65 deletions(-) diff --git a/src/planner/phases/dispatch.ts b/src/planner/phases/dispatch.ts index b880cc0..5bc63f5 100644 --- a/src/planner/phases/dispatch.ts +++ b/src/planner/phases/dispatch.ts @@ -1,14 +1,15 @@ -// Phase dispatch: detects subagent mode from CLI flags and routes to the -// appropriate phase class based on role. Flags are unavailable at extension -// init (getFlag returns undefined before _buildRuntime), so detection is -// deferred to before_agent_start. +// Phase dispatch: routes a SubagentTask to the appropriate phase class. +// +// Called from koan.ts after readTaskFile() resolves the task manifest. +// There is no flag-parsing here — all task parameters come from task.json. +import * as path from "node:path"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { createLogger, type Logger } from "../../utils/logger.js"; import type { RuntimeContext } from "../lib/runtime-context.js"; import type { EventLog } from "../lib/audit.js"; -import type { SubagentRole, StepSequence } from "../types.js"; +import type { SubagentTask } from "../lib/task.js"; import { IntakePhase } from "./intake/phase.js"; import { ScoutPhase } from "./scout/phase.js"; import { DecomposerPhase } from "./decomposer/phase.js"; @@ -16,109 +17,77 @@ import { OrchestratorPhase } from "./orchestrator/phase.js"; import { PlannerPhase } from "./planner/phase.js"; import { ExecutorPhase } from "./executor/phase.js"; -// -- Config -- - -export interface SubagentConfig { - role: SubagentRole; - epicDir: string; - subagentDir: string; - storyId: string | null; - stepSequence: StepSequence | null; -} - -// -- Detection -- - -// Detects subagent mode by reading flags set via CLI -// (pi -p --koan-role intake --koan-epic-dir /path ...). -// Must be called from before_agent_start or later; flags are -// undefined before _buildRuntime() runs. -export function detectSubagentMode(pi: ExtensionAPI): SubagentConfig | null { - const role = pi.getFlag("koan-role"); - if (!role || typeof role !== "string" || role.trim().length === 0) { - return null; - } - - const epicDir = pi.getFlag("koan-epic-dir"); - const subagentDir = pi.getFlag("koan-subagent-dir"); - const storyId = pi.getFlag("koan-story-id"); - const stepSequence = pi.getFlag("koan-step-sequence"); - - return { - role: role.trim() as SubagentRole, - epicDir: typeof epicDir === "string" ? epicDir.trim() : "", - subagentDir: typeof subagentDir === "string" ? subagentDir.trim() : "", - storyId: typeof storyId === "string" && storyId.trim().length > 0 ? storyId.trim() : null, - stepSequence: typeof stepSequence === "string" && stepSequence.trim().length > 0 - ? stepSequence.trim() as StepSequence - : null, - }; -} - -// -- Dispatch -- - export async function dispatchPhase( pi: ExtensionAPI, - config: SubagentConfig, + task: SubagentTask, ctx: RuntimeContext, log?: Logger, eventLog?: EventLog, ): Promise { const logger = log ?? createLogger("Dispatch"); - switch (config.role) { + switch (task.role) { case "intake": { - const phase = new IntakePhase(pi, { epicDir: config.epicDir }, ctx, logger, eventLog); + const phase = new IntakePhase(pi, { epicDir: task.epicDir }, ctx, logger, eventLog); await phase.begin(); break; } + case "scout": { - const phase = new ScoutPhase(pi, { epicDir: config.epicDir }, ctx, logger, eventLog); + // outputFile is relative to subagentDir in the task manifest. + // ScoutPhase receives the resolved absolute path. + const phase = new ScoutPhase(pi, { + epicDir: task.epicDir, + question: task.question, + outputFile: path.join(ctx.subagentDir!, task.outputFile), + investigatorRole: task.investigatorRole, + }, ctx, logger, eventLog); await phase.begin(); break; } + case "decomposer": { - const phase = new DecomposerPhase(pi, { epicDir: config.epicDir }, ctx, logger, eventLog); + const phase = new DecomposerPhase(pi, { epicDir: task.epicDir }, ctx, logger, eventLog); await phase.begin(); break; } + case "orchestrator": { - const stepSequence = config.stepSequence ?? "pre-execution"; const phase = new OrchestratorPhase( pi, - { epicDir: config.epicDir, stepSequence, storyId: config.storyId ?? undefined }, + { epicDir: task.epicDir, stepSequence: task.stepSequence, storyId: task.storyId }, ctx, logger, eventLog, ); await phase.begin(); break; } + case "planner": { - // Fail-fast: missing storyId produces malformed paths like stories//plan/plan.md (§12.4.3). - if (!config.storyId) throw new Error("planner phase requires --koan-story-id flag"); const phase = new PlannerPhase( pi, - { epicDir: config.epicDir, storyId: config.storyId }, + { epicDir: task.epicDir, storyId: task.storyId }, ctx, logger, eventLog, ); await phase.begin(); break; } + case "executor": { - // Fail-fast: missing storyId produces malformed paths like stories//plan/plan.md (§12.4.3). - if (!config.storyId) throw new Error("executor phase requires --koan-story-id flag"); - const retryContext = pi.getFlag("koan-retry-context"); const phase = new ExecutorPhase( pi, - { - epicDir: config.epicDir, - storyId: config.storyId, - retryContext: typeof retryContext === "string" && retryContext.length > 0 ? retryContext : undefined, - }, + { epicDir: task.epicDir, storyId: task.storyId, retryContext: task.retryContext }, ctx, logger, eventLog, ); await phase.begin(); break; } - default: - logger("Unknown role", { role: config.role }); + + default: { + // TypeScript narrows task to `never` here — this branch is unreachable + // when all roles are covered above. + const exhaustive: never = task; + logger("Unrecognized role in task manifest", { role: (exhaustive as { role: string }).role }); + break; + } } } From 44403036c05f1cd1fc10581b9ed19de12d1c7ff6 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:49:15 +0700 Subject: [PATCH 059/412] route IPC responder through web server instead of TUI --- src/planner/lib/ipc-responder.ts | 201 ++++++++++++++++++------------- 1 file changed, 114 insertions(+), 87 deletions(-) diff --git a/src/planner/lib/ipc-responder.ts b/src/planner/lib/ipc-responder.ts index bf9c31a..7d55cee 100644 --- a/src/planner/lib/ipc-responder.ts +++ b/src/planner/lib/ipc-responder.ts @@ -2,29 +2,30 @@ // handles them, and writes responses back. Runs concurrently with subagent // process execution and terminates when the provided AbortSignal fires. // -// Supports two request types (§11.2.4): -// "ask" → render ask UI, write answer back +// Supports two request types: +// "ask" → route to web server, write answer back // "scout-request" → spawn scouts via pool(), write findings paths back import { promises as fs } from "node:fs"; import * as path from "node:path"; -import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; - import { readIpcFile, writeIpcFile, createAskResponse, createCancelledResponse, type AskAnswerPayload, - type ScoutTask, type AskIpcFile, type ScoutIpcFile, } from "./ipc.js"; +// ipc.ts exports ScoutTask (IPC-level: id/role/prompt for the LLM-facing request); +// task.ts also exports ScoutTask (manifest-level: role/epicDir/question/outputFile/investigatorRole). +// Aliased here to avoid shadowing the ipc.ts type used by ScoutIpcFile fields. +import type { ScoutTask as TaskScoutTask } from "./task.js"; import { pool } from "./pool.js"; -import { askSingleQuestionWithInlineNote } from "../ui/ask/ask-inline-ui.js"; -import { askQuestionsWithTabs } from "../ui/ask/ask-tabs-ui.js"; -import type { AskQuestion, AskSelection } from "../ui/ask/ask-logic.js"; +import { readProjection } from "./audit.js"; +import type { WebServerHandle, AskQuestion, AnswerResult } from "../web/server-types.js"; +import { OTHER_OPTION } from "../web/server-types.js"; const POLL_INTERVAL_MS = 300; @@ -32,91 +33,81 @@ function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } -// Provided by subagent.ts when starting the IPC responder. Avoids circular -// imports: ipc-responder.ts never imports from subagent.ts. +/** + * Provided by subagent.ts when starting the IPC responder. Avoids circular + * imports: ipc-responder.ts never imports from subagent.ts. + * + * `spawnScout` does not accept an `outputFile` argument — the output path is + * part of the task manifest (task.json). The responder writes `outputFile` + * into the ScoutTask before handing it to `spawnScout`, then resolves the + * absolute path via `path.join(subagentDir, scoutTask.outputFile)` itself. + */ export interface ScoutSpawnContext { epicDir: string; + // The role of the subagent that requested scouting (intake, decomposer, planner). + // Used for UI attribution when registering scouts with the web server. + parentRole: string; // Spawns a single scout; returns exit code. - spawnScout: (task: ScoutTask, scoutSubagentDir: string, outputFile: string) => Promise; + spawnScout: (task: TaskScoutTask, scoutSubagentDir: string) => Promise; } -// Handles a pending ask request: renders UI, writes response. +// Handles a pending ask request: routes to web server, writes response. async function handleAskRequest( subagentDir: string, ipc: AskIpcFile, - ui: ExtensionUIContext, + webServer: WebServerHandle, signal: AbortSignal, ): Promise { const { payload } = ipc; const questions: AskQuestion[] = payload.questions.map((q) => ({ id: q.id, question: q.question, - options: q.options, + options: q.options.map((o) => ({ label: o.label })), multi: q.multi, recommended: q.recommended, })); - let cancelled = false; - let answers: AskAnswerPayload["answers"] = []; - - if (questions.length === 1) { - const q = questions[0]; - const selection = await askSingleQuestionWithInlineNote(ui, { - question: q.question, - options: q.options, - recommended: q.recommended, - }); - - // ask UI components do not accept an AbortSignal — they block until the - // user interacts even after the subagent exits. Check after return to - // prevent writing a stale answer to a dead subagent's IPC file. - if (signal.aborted) { - const current = await readIpcFile(subagentDir); - if (current !== null && current.type === "ask" && current.response === null && current.id === ipc.id) { - await writeIpcFile(subagentDir, { ...current, response: createCancelledResponse(ipc.id) }); - } - return; - } - - cancelled = selection.selectedOptions.length === 0 && !selection.customInput; - if (!cancelled) { - answers = [{ - id: q.id, - selectedOptions: selection.selectedOptions, - customInput: selection.customInput, - }]; - } - } else { - const result = await askQuestionsWithTabs(ui, questions); + // Append "Other" option to each question before presenting to the user. + const withOther: AskQuestion[] = questions.map((q) => ({ + ...q, + options: [...q.options, { label: OTHER_OPTION }], + })); - if (signal.aborted) { + let result: AnswerResult; + try { + result = await webServer.requestAnswer(withOther, signal); + } catch (err: unknown) { + if (err instanceof Error && (err.name === "AbortError" || signal.aborted)) { const current = await readIpcFile(subagentDir); if (current !== null && current.type === "ask" && current.response === null && current.id === ipc.id) { await writeIpcFile(subagentDir, { ...current, response: createCancelledResponse(ipc.id) }); } return; } + throw err; + } - cancelled = result.cancelled; - if (!cancelled) { - answers = questions.map((q, i) => { - const sel: AskSelection = result.selections[i] ?? { selectedOptions: [] }; - const entry: AskAnswerPayload["answers"][number] = { - id: q.id, - selectedOptions: sel.selectedOptions, - }; - if (sel.customInput !== undefined) { - entry.customInput = sel.customInput; - } - return entry; - }); + if (result.cancelled) { + const current = await readIpcFile(subagentDir); + if (current !== null && current.type === "ask" && current.response === null && current.id === ipc.id) { + await writeIpcFile(subagentDir, { ...current, response: createCancelledResponse(ipc.id) }); } + return; } - const response = cancelled - ? createCancelledResponse(ipc.id) - : createAskResponse(ipc.id, { answers }); + const answers: AskAnswerPayload["answers"] = result.answers.map((a) => { + const entry: AskAnswerPayload["answers"][number] = { + id: a.questionId, + selectedOptions: a.selectedOptions, + }; + if (a.customInput !== undefined) { + entry.customInput = a.customInput; + } + return entry; + }); + const response = createAskResponse(ipc.id, { answers }); + // Re-read and validate before writing — idempotence guard against stale requests. const current = await readIpcFile(subagentDir); if (current !== null && current.type === "ask" && current.response === null && current.id === ipc.id) { await writeIpcFile(subagentDir, { ...current, response }); @@ -128,40 +119,82 @@ async function handleScoutRequest( subagentDir: string, ipc: ScoutIpcFile, scoutCtx: ScoutSpawnContext, + webServer: WebServerHandle | undefined, signal: AbortSignal, ): Promise { - const { scouts, id } = ipc; - const { epicDir } = scoutCtx; + const { scouts: ipcScouts, id } = ipc; const findings: string[] = []; const failures: string[] = []; - // Each scout writes to ${subagentDir}/output.md — output is scoped to the - // scout's own directory, avoiding collisions. Compute subagentDir once and - // derive outputFile from it (never call Date.now() twice for the same entry). - const scoutEntries = scouts.map((task) => { - const scoutDir = path.join(epicDir, "subagents", `scout-${task.id}-${Date.now()}`); - return { task, subagentDir: scoutDir, outputFile: path.join(scoutDir, "output.md") }; + // Compute per-scout directories. Scout dirs live under the epic's subagents/ + // directory so they appear in the standard directory layout. + const scoutEntries = ipcScouts.map((ipcTask) => { + const scoutDir = path.join(scoutCtx.epicDir, "subagents", `scout-${ipcTask.id}-${Date.now()}`); + return { ipcTask, subagentDir: scoutDir }; }); - const taskIds = scoutEntries.map((t) => t.task.id); + // Register scouts with the web server before spawning so the UI shows them + // immediately rather than waiting for the first audit poll. + if (webServer) { + for (const entry of scoutEntries) { + webServer.registerAgent({ + id: entry.ipcTask.id, + name: entry.ipcTask.id, + dir: entry.subagentDir, + role: "scout", + model: null, + parent: scoutCtx.parentRole, + }); + } + } + + const taskIds = scoutEntries.map((t) => t.ipcTask.id); await pool( taskIds, - 4, // up to 4 concurrent scouts + 4, async (taskId) => { if (signal.aborted) return { exitCode: 1, stderr: "aborted", subagentDir: "" }; - const entry = scoutEntries.find((t) => t.task.id === taskId)!; + + const entry = scoutEntries.find((t) => t.ipcTask.id === taskId)!; await fs.mkdir(entry.subagentDir, { recursive: true }); - const exitCode = await scoutCtx.spawnScout(entry.task, entry.subagentDir, entry.outputFile); + + // Construct the task manifest for this scout. The IPC-level ipcTask carries + // id/role/prompt (LLM-facing); the task manifest carries the full SubagentTask + // fields the scout process needs. + const scoutTask: TaskScoutTask = { + role: "scout", + epicDir: scoutCtx.epicDir, + question: entry.ipcTask.prompt, + outputFile: "findings.md", // relative — ScoutPhase resolves to absolute + investigatorRole: entry.ipcTask.role, + }; + + const exitCode = await scoutCtx.spawnScout(scoutTask, entry.subagentDir); + + // Derive success from the JSON audit projection, not from file existence. + // A scout can write a partial findings.md and then crash. + let succeeded = false; if (exitCode === 0) { - findings.push(entry.outputFile); + const projection = await readProjection(entry.subagentDir); + succeeded = projection?.status === "completed"; + } + + const absoluteOutputFile = path.join(entry.subagentDir, scoutTask.outputFile); + if (succeeded) { + findings.push(absoluteOutputFile); } else { failures.push(taskId); } + + if (webServer) { + webServer.completeAgent(taskId); + } + return { exitCode, stderr: "", subagentDir: entry.subagentDir }; }, ); - // Write response back to the ipc file. + // Re-read and validate before writing response — idempotence guard. const current = await readIpcFile(subagentDir); if (current !== null && current.type === "scout-request" && current.response === null && current.id === id) { const updated: ScoutIpcFile = { ...current, response: { findings, failures } }; @@ -169,13 +202,9 @@ async function handleScoutRequest( } } -// Runs the parent-side IPC poll loop for a single subagent directory. -// Routes to ask UI or scout spawning based on request type. -// Terminates when `signal` is aborted. Errors are swallowed — transient -// filesystem issues must not crash the parent session. export async function runIpcResponder( subagentDir: string, - ui: ExtensionUIContext, + webServer: WebServerHandle, signal: AbortSignal, scoutContext?: ScoutSpawnContext, ): Promise { @@ -188,14 +217,12 @@ export async function runIpcResponder( if (ipc === null || ipc.response !== null) continue; if (ipc.type === "ask") { - await handleAskRequest(subagentDir, ipc, ui, signal); + await handleAskRequest(subagentDir, ipc, webServer, signal); } else if (ipc.type === "scout-request" && scoutContext) { - await handleScoutRequest(subagentDir, ipc, scoutContext, signal); + await handleScoutRequest(subagentDir, ipc, scoutContext, webServer, signal); } - // Unknown type: ignore (forward-compatibility) } catch { - // Swallow all errors — transient filesystem or UI issues must not - // abort the parent session. + // Swallow all errors — transient filesystem issues must not abort the parent session. } } } From 26467a8442116abb166fd620ac1e46104a3abf12 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:49:22 +0700 Subject: [PATCH 060/412] replace TUI widget with web server in pipeline driver --- src/planner/driver.ts | 492 ++++++++++++++++++------------------------ 1 file changed, 206 insertions(+), 286 deletions(-) diff --git a/src/planner/driver.ts b/src/planner/driver.ts index 5d03543..0216637 100644 --- a/src/planner/driver.ts +++ b/src/planner/driver.ts @@ -1,8 +1,14 @@ // Epic pipeline driver — deterministic coordinator for the full epic lifecycle. // Reads JSON state and exit codes; applies routing rules. Never parses markdown. // Per AGENTS.md: driver owns .json state; LLMs own .md files. +// +// Spawn pattern used throughout: spawnSubagent(task, subagentDir, opts). +// epicDir is part of the task (written to task.json) rather than SpawnOptions +// because it is subagent configuration, not process infrastructure. SpawnOptions +// holds only what the OS-level spawn needs: cwd, extensionPath, model, webServer. -import type { ExtensionUIContext } from "@mariozechner/pi-coding-agent"; +import { promises as fs } from "node:fs"; +import * as path from "node:path"; import { loadEpicState, @@ -14,18 +20,29 @@ import { ensureStoryDirectory, discoverStoryIds, } from "./epic/state.js"; -import { - spawnIntake, - spawnDecomposer, - spawnOrchestrator, - spawnPlanner, - spawnExecutor, -} from "./subagent.js"; +import { spawnSubagent, type SpawnOptions } from "./subagent.js"; import type { Logger } from "../utils/logger.js"; import type { StoryState } from "./epic/types.js"; -import { readRecentLogs, readProjection } from "./lib/audit.js"; -import { EpicWidgetController } from "./ui/epic-widget.js"; -import { reviewStorySketches } from "./ui/spec-review.js"; +import type { WebServerHandle, ReviewStory } from "./web/server-types.js"; + +// --------------------------------------------------------------------------- +// readStoryTitle +// --------------------------------------------------------------------------- + +async function readStoryTitle(epicDir: string, storyId: string): Promise { + try { + const raw = await fs.readFile(path.join(epicDir, "stories", storyId, "story.md"), "utf8"); + for (const rawLine of raw.split("\n")) { + const l = rawLine.trim(); + if (!l) continue; + const text = l.replace(/^#+\s*/, "").trim(); + if (text) return text.slice(0, 80); + } + return storyId; + } catch { + return storyId; + } +} // --------------------------------------------------------------------------- // Routing @@ -37,15 +54,9 @@ interface RoutingDecision { error?: string; } -// Simplified routing — no escalation path per §11.3.1 and §11.6.3. -// Retry budget exhaustion is handled inside the retry case (skip + notify). function routeFromState(stories: StoryState[], log: Logger): RoutingDecision { - // Priority order: - // 1. Any story with status 'retry'? → check budget, then re-execute or skip - // 2. Any story with status 'selected'? → execute it - // 3. All stories terminal? → complete - // 4. None of the above → error - + // retry is checked before selected — a story queued for retry takes + // precedence over a newly selected story. const retry = stories.find((s) => s.status === "retry"); if (retry) { log("Routing: retry", { storyId: retry.storyId }); @@ -58,6 +69,7 @@ function routeFromState(stories: StoryState[], log: Logger): RoutingDecision { return { action: "execute", storyId: selected.storyId }; } + // Terminal states are exactly "done" and "skipped". const terminal = new Set(["done", "skipped"]); const allTerminal = stories.every((s) => terminal.has(s.status)); if (allTerminal && stories.length > 0) { @@ -71,47 +83,6 @@ function routeFromState(stories: StoryState[], log: Logger): RoutingDecision { }; } -// --------------------------------------------------------------------------- -// Active widget polling (§11.6.1) -// --------------------------------------------------------------------------- - -// Starts a 2s polling interval that reads the active subagent's projection -// and log tail, then updates the widget. Interval is unref'd so it does not -// prevent process exit. -function startActivePolling( - activeSubagentDir: string, - widget: EpicWidgetController, - startedAt: number, - role: string, - storyId?: string, -): () => void { - const timer = setInterval(async () => { - try { - const [projection, logs] = await Promise.all([ - readProjection(activeSubagentDir), - readRecentLogs(activeSubagentDir), - ]); - widget.update({ logLines: logs }); - if (projection) { - widget.update({ - activeSubagent: { - role, - storyId, - step: projection.step, - totalSteps: projection.totalSteps, - stepName: projection.stepName, - startedAt, - }, - }); - } - } catch { - // Non-fatal — polling is best-effort. - } - }, 2000); - timer.unref(); - return () => clearInterval(timer); -} - // --------------------------------------------------------------------------- // Phase A helpers // --------------------------------------------------------------------------- @@ -121,22 +92,20 @@ async function runIntake( cwd: string, extensionPath: string, log: Logger, - ui: ExtensionUIContext | null, - widget: EpicWidgetController | null, + webServer: WebServerHandle | null, ): Promise { const subagentDir = await ensureSubagentDirectory(epicDir, "intake"); - const startedAt = Date.now(); - let stopPolling: (() => void) | undefined; - if (widget) { - widget.update({ activeSubagent: { role: "intake", step: 0, totalSteps: 3, stepName: "", startedAt } }); - stopPolling = startActivePolling(subagentDir, widget, startedAt, "intake"); - } - const result = await spawnIntake({ epicDir, subagentDir, cwd, extensionPath, log, ui: ui ?? undefined }); - stopPolling?.(); - if (widget) { - const logs = await readRecentLogs(subagentDir); - widget.update({ logLines: logs, activeSubagent: null }); - } + webServer?.registerAgent({ id: "intake", name: "intake", dir: subagentDir, role: "intake", model: null, parent: null }); + webServer?.trackSubagent(subagentDir, "intake"); + + const result = await spawnSubagent( + { role: "intake", epicDir }, + subagentDir, + { cwd, extensionPath, log, webServer: webServer ?? undefined }, + ); + + webServer?.clearSubagent(); + webServer?.completeAgent("intake"); if (result.exitCode !== 0) { log("Intake failed", { exitCode: result.exitCode }); return false; @@ -149,22 +118,20 @@ async function runDecomposer( cwd: string, extensionPath: string, log: Logger, - ui: ExtensionUIContext | null, - widget: EpicWidgetController | null, + webServer: WebServerHandle | null, ): Promise { const subagentDir = await ensureSubagentDirectory(epicDir, "decomposer"); - const startedAt = Date.now(); - let stopPolling: (() => void) | undefined; - if (widget) { - widget.update({ activeSubagent: { role: "decomposer", step: 0, totalSteps: 2, stepName: "", startedAt } }); - stopPolling = startActivePolling(subagentDir, widget, startedAt, "decomposer"); - } - const result = await spawnDecomposer({ epicDir, subagentDir, cwd, extensionPath, log, ui: ui ?? undefined }); - stopPolling?.(); - if (widget) { - const logs = await readRecentLogs(subagentDir); - widget.update({ logLines: logs, activeSubagent: null }); - } + webServer?.registerAgent({ id: "decomposer", name: "decomposer", dir: subagentDir, role: "decomposer", model: null, parent: null }); + webServer?.trackSubagent(subagentDir, "decomposer"); + + const result = await spawnSubagent( + { role: "decomposer", epicDir }, + subagentDir, + { cwd, extensionPath, log, webServer: webServer ?? undefined }, + ); + + webServer?.clearSubagent(); + webServer?.completeAgent("decomposer"); if (result.exitCode !== 0) { log("Decomposer failed", { exitCode: result.exitCode }); return false; @@ -182,88 +149,61 @@ async function runStoryExecution( extensionPath: string, storyId: string, log: Logger, - ui: ExtensionUIContext | null, - widget: EpicWidgetController | null, + webServer: WebServerHandle | null, ): Promise { + const opts: SpawnOptions = { cwd, extensionPath, log, webServer: webServer ?? undefined }; + // 1. Set status to 'planning'. const story = await loadStoryState(epicDir, storyId); - await saveStoryState(epicDir, storyId, { - ...story, - status: "planning", - updatedAt: new Date().toISOString(), - }); + await saveStoryState(epicDir, storyId, { ...story, status: "planning", updatedAt: new Date().toISOString() }); // 2. Spawn planner. const plannerDir = await ensureSubagentDirectory(epicDir, `planner-${storyId}`); - const plannerStarted = Date.now(); - let stopPolling: (() => void) | undefined; - if (widget) { - widget.update({ - activeSubagent: { role: "planner", storyId, step: 0, totalSteps: 3, stepName: "", startedAt: plannerStarted }, - }); - stopPolling = startActivePolling(plannerDir, widget, plannerStarted, "planner", storyId); - } + const plannerId = `planner-${storyId}`; + webServer?.registerAgent({ id: plannerId, name: `planner-${storyId}`, dir: plannerDir, role: "planner", model: null, parent: null }); + webServer?.trackSubagent(plannerDir, "planner", storyId); - const planResult = await spawnPlanner({ epicDir, subagentDir: plannerDir, cwd, extensionPath, storyId, log, ui: ui ?? undefined }); - stopPolling?.(); + const planResult = await spawnSubagent({ role: "planner", epicDir, storyId }, plannerDir, opts); - if (widget) { - const logs = await readRecentLogs(plannerDir); - widget.update({ logLines: logs }); - } + webServer?.clearSubagent(); + webServer?.completeAgent(plannerId); if (planResult.exitCode !== 0) { + // Planner failed — skip executor, proceed directly to post-execution + // orchestrator so it can make a routing decision (retry or skip). log("Planner failed — skipping executor, proceeding to post-execution orchestrator", { storyId, exitCode: planResult.exitCode, }); const s2 = await loadStoryState(epicDir, storyId); - await saveStoryState(epicDir, storyId, { - ...s2, - status: "verifying", - updatedAt: new Date().toISOString(), - }); + await saveStoryState(epicDir, storyId, { ...s2, status: "verifying", updatedAt: new Date().toISOString() }); const postDir = await ensureSubagentDirectory(epicDir, `orchestrator-post-${storyId}`); - const orchStarted = Date.now(); - if (widget) { - widget.update({ activeSubagent: { role: "orchestrator", storyId, step: 0, totalSteps: 4, stepName: "", startedAt: orchStarted } }); - stopPolling = startActivePolling(postDir, widget, orchStarted, "orchestrator", storyId); - } + const postId = `orchestrator-post-${storyId}`; + webServer?.registerAgent({ id: postId, name: `orchestrator-post-${storyId}`, dir: postDir, role: "orchestrator", model: null, parent: null }); + webServer?.trackSubagent(postDir, "orchestrator", storyId); - await spawnOrchestrator({ epicDir, subagentDir: postDir, cwd, extensionPath, stepSequence: "post-execution", storyId, log, ui: ui ?? undefined }); - stopPolling?.(); + await spawnSubagent({ role: "orchestrator", epicDir, stepSequence: "post-execution", storyId }, postDir, opts); - if (widget) { - const logs = await readRecentLogs(postDir); - widget.update({ logLines: logs }); - } + webServer?.clearSubagent(); + webServer?.completeAgent(postId); return; } // 3. Set status to 'executing'. const s3 = await loadStoryState(epicDir, storyId); - await saveStoryState(epicDir, storyId, { - ...s3, - status: "executing", - updatedAt: new Date().toISOString(), - }); + await saveStoryState(epicDir, storyId, { ...s3, status: "executing", updatedAt: new Date().toISOString() }); // 4. Spawn executor. const execDir = await ensureSubagentDirectory(epicDir, `executor-${storyId}`); - const execStarted = Date.now(); - if (widget) { - widget.update({ activeSubagent: { role: "executor", storyId, step: 0, totalSteps: 2, stepName: "", startedAt: execStarted } }); - stopPolling = startActivePolling(execDir, widget, execStarted, "executor", storyId); - } + const execId = `executor-${storyId}`; + webServer?.registerAgent({ id: execId, name: `executor-${storyId}`, dir: execDir, role: "executor", model: null, parent: null }); + webServer?.trackSubagent(execDir, "executor", storyId); - const execResult = await spawnExecutor({ epicDir, subagentDir: execDir, cwd, extensionPath, storyId, log, ui: ui ?? undefined }); - stopPolling?.(); + const execResult = await spawnSubagent({ role: "executor", epicDir, storyId }, execDir, opts); - if (widget) { - const logs = await readRecentLogs(execDir); - widget.update({ logLines: logs }); - } + webServer?.clearSubagent(); + webServer?.completeAgent(execId); if (execResult.exitCode !== 0) { log("Executor failed", { storyId, exitCode: execResult.exitCode }); @@ -271,33 +211,20 @@ async function runStoryExecution( // 5. Set status to 'verifying'. const s4 = await loadStoryState(epicDir, storyId); - await saveStoryState(epicDir, storyId, { - ...s4, - status: "verifying", - updatedAt: new Date().toISOString(), - }); + await saveStoryState(epicDir, storyId, { ...s4, status: "verifying", updatedAt: new Date().toISOString() }); - // 6. Spawn orchestrator (post-execution) — writes verdict to story state. + // 6. Spawn orchestrator (post-execution). const postDir = await ensureSubagentDirectory(epicDir, `orchestrator-post-${storyId}`); - const orchStarted = Date.now(); - if (widget) { - widget.update({ activeSubagent: { role: "orchestrator", storyId, step: 0, totalSteps: 4, stepName: "", startedAt: orchStarted } }); - stopPolling = startActivePolling(postDir, widget, orchStarted, "orchestrator", storyId); - } + const postId = `orchestrator-post-${storyId}`; + webServer?.registerAgent({ id: postId, name: `orchestrator-post-${storyId}`, dir: postDir, role: "orchestrator", model: null, parent: null }); + webServer?.trackSubagent(postDir, "orchestrator", storyId); - await spawnOrchestrator({ epicDir, subagentDir: postDir, cwd, extensionPath, stepSequence: "post-execution", storyId, log, ui: ui ?? undefined }); - stopPolling?.(); + await spawnSubagent({ role: "orchestrator", epicDir, stepSequence: "post-execution", storyId }, postDir, opts); - if (widget) { - const logs = await readRecentLogs(postDir); - widget.update({ logLines: logs }); - } + webServer?.clearSubagent(); + webServer?.completeAgent(postId); } -// retryCount is the 1-based retry attempt number (1 for first retry, 2 for -// second, etc.). It is included in directory names so each retry gets its own -// isolated stdout.log and events.jsonl, preventing directory collision when -// DEFAULT_MAX_RETRIES > 1. async function runStoryReexecution( epicDir: string, cwd: string, @@ -306,54 +233,42 @@ async function runStoryReexecution( retryCount: number, failureContext: string | undefined, log: Logger, - ui: ExtensionUIContext | null, - widget: EpicWidgetController | null, + webServer: WebServerHandle | null, ): Promise { + const opts: SpawnOptions = { cwd, extensionPath, log, webServer: webServer ?? undefined }; + const execDir = await ensureSubagentDirectory(epicDir, `executor-${storyId}-retry-${retryCount}`); - const execStarted = Date.now(); - let stopPolling: (() => void) | undefined; - if (widget) { - widget.update({ activeSubagent: { role: "executor", storyId, step: 0, totalSteps: 2, stepName: "retry", startedAt: execStarted } }); - stopPolling = startActivePolling(execDir, widget, execStarted, "executor", storyId); - } + const execId = `executor-${storyId}-retry-${retryCount}`; + webServer?.registerAgent({ id: execId, name: `executor-${storyId}-retry-${retryCount}`, dir: execDir, role: "executor", model: null, parent: null }); + webServer?.trackSubagent(execDir, "executor", storyId); - await spawnExecutor({ epicDir, subagentDir: execDir, cwd, extensionPath, storyId, retryContext: failureContext, log, ui: ui ?? undefined }); - stopPolling?.(); + // retryContext flows from koan_retry_story's failure_summary into the task + // manifest, where the executor reads it from step 1 guidance. + await spawnSubagent({ role: "executor", epicDir, storyId, retryContext: failureContext }, execDir, opts); - if (widget) { - const logs = await readRecentLogs(execDir); - widget.update({ logLines: logs }); - } + webServer?.clearSubagent(); + webServer?.completeAgent(execId); const story = await loadStoryState(epicDir, storyId); - await saveStoryState(epicDir, storyId, { - ...story, - status: "verifying", - updatedAt: new Date().toISOString(), - }); + await saveStoryState(epicDir, storyId, { ...story, status: "verifying", updatedAt: new Date().toISOString() }); const postDir = await ensureSubagentDirectory(epicDir, `orchestrator-post-${storyId}-retry-${retryCount}`); - const orchStarted = Date.now(); - if (widget) { - widget.update({ activeSubagent: { role: "orchestrator", storyId, step: 0, totalSteps: 4, stepName: "", startedAt: orchStarted } }); - stopPolling = startActivePolling(postDir, widget, orchStarted, "orchestrator", storyId); - } + const postId = `orchestrator-post-${storyId}-retry-${retryCount}`; + webServer?.registerAgent({ id: postId, name: `orchestrator-post-${storyId}-retry-${retryCount}`, dir: postDir, role: "orchestrator", model: null, parent: null }); + webServer?.trackSubagent(postDir, "orchestrator", storyId); - await spawnOrchestrator({ epicDir, subagentDir: postDir, cwd, extensionPath, stepSequence: "post-execution", storyId, log, ui: ui ?? undefined }); - stopPolling?.(); + await spawnSubagent({ role: "orchestrator", epicDir, stepSequence: "post-execution", storyId }, postDir, opts); - if (widget) { - const logs = await readRecentLogs(postDir); - widget.update({ logLines: logs }); - } + webServer?.clearSubagent(); + webServer?.completeAgent(postId); } -async function refreshWidgetStories(epicDir: string, widget: EpicWidgetController): Promise { +async function refreshWebServerStories(epicDir: string, webServer: WebServerHandle): Promise { try { const stories = await loadAllStoryStates(epicDir); - widget.update({ stories: stories.map((s) => ({ storyId: s.storyId, status: s.status })) }); + webServer.pushStories(stories.map((s) => ({ storyId: s.storyId, status: s.status }))); } catch { - // Non-fatal — widget update is best-effort. + // Non-fatal } } @@ -362,43 +277,42 @@ async function runStoryLoop( cwd: string, extensionPath: string, log: Logger, - ui: ExtensionUIContext | null, - widget: EpicWidgetController | null, + webServer: WebServerHandle | null, ): Promise<{ success: boolean; summary: string }> { { - - // 2. Spawn orchestrator (pre-execution) — selects first story. + // 1. Spawn orchestrator (pre-execution) — selects first story. const preDir = await ensureSubagentDirectory(epicDir, "orchestrator-pre"); - const preStarted = Date.now(); - let stopPolling: (() => void) | undefined; - if (widget) { - widget.update({ activeSubagent: { role: "orchestrator", step: 0, totalSteps: 2, stepName: "pre-execution", startedAt: preStarted } }); - stopPolling = startActivePolling(preDir, widget, preStarted, "orchestrator"); - } + const preId = "orchestrator-pre"; + webServer?.registerAgent({ id: preId, name: "orchestrator-pre", dir: preDir, role: "orchestrator", model: null, parent: null }); + webServer?.trackSubagent(preDir, "orchestrator"); + + const preResult = await spawnSubagent( + { role: "orchestrator", epicDir, stepSequence: "pre-execution" }, + preDir, + { cwd, extensionPath, log, webServer: webServer ?? undefined }, + ); - const preResult = await spawnOrchestrator({ epicDir, subagentDir: preDir, cwd, extensionPath, stepSequence: "pre-execution", log, ui: ui ?? undefined }); - stopPolling?.(); + webServer?.clearSubagent(); + webServer?.completeAgent(preId); if (preResult.exitCode !== 0) { return { success: false, summary: "Pre-execution orchestrator failed" }; } - if (widget) await refreshWidgetStories(epicDir, widget); + if (webServer) await refreshWebServerStories(epicDir, webServer); - // 3. Story execution loop — route until terminal state. + // 2. Story execution loop — route until terminal state. while (true) { const stories = await loadAllStoryStates(epicDir); - if (widget) { - widget.update({ stories: stories.map((s) => ({ storyId: s.storyId, status: s.status })) }); - } + webServer?.pushStories(stories.map((s) => ({ storyId: s.storyId, status: s.status }))); const routing = routeFromState(stories, log); switch (routing.action) { case "execute": { const storyId = routing.storyId as string; - await runStoryExecution(epicDir, cwd, extensionPath, storyId, log, ui, widget); - if (widget) await refreshWidgetStories(epicDir, widget); + await runStoryExecution(epicDir, cwd, extensionPath, storyId, log, webServer); + if (webServer) await refreshWebServerStories(epicDir, webServer); break; } @@ -406,7 +320,6 @@ async function runStoryLoop( const storyId = routing.storyId as string; const story = stories.find((s) => s.storyId === storyId) as StoryState; - // Retry budget exhaustion: skip + notify per §11.6.3. if (story.retryCount >= story.maxRetries) { log("Retry budget exhausted, skipping story", { storyId, retryCount: story.retryCount }); await saveStoryState(epicDir, storyId, { @@ -415,9 +328,11 @@ async function runStoryLoop( skipReason: `Retry budget exhausted after ${story.retryCount} attempt(s). Last failure: ${story.failureSummary ?? "(none recorded)"}`, updatedAt: new Date().toISOString(), }); - ui?.notify(`Story ${storyId} skipped after ${story.retryCount} failed attempt(s).`, "warning"); - if (widget) await refreshWidgetStories(epicDir, widget); - // Continue loop — other stories may still be runnable. + webServer?.pushNotification( + `Story ${storyId} skipped after ${story.retryCount} failed attempt(s).`, + "warning", + ); + if (webServer) await refreshWebServerStories(epicDir, webServer); continue; } @@ -427,15 +342,14 @@ async function runStoryLoop( retryCount: story.retryCount + 1, updatedAt: new Date().toISOString(), }); - await runStoryReexecution(epicDir, cwd, extensionPath, storyId, story.retryCount + 1, story.failureSummary, log, ui, widget); - if (widget) await refreshWidgetStories(epicDir, widget); + await runStoryReexecution(epicDir, cwd, extensionPath, storyId, story.retryCount + 1, story.failureSummary, log, webServer); + if (webServer) await refreshWebServerStories(epicDir, webServer); break; } case "complete": { const done = stories.filter((s) => s.status === "done").length; const skipped = stories.filter((s) => s.status === "skipped").length; - if (widget) widget.update({ activeSubagent: null }); return { success: true, summary: `Epic complete: ${done} done, ${skipped} skipped` }; } @@ -450,90 +364,96 @@ async function runStoryLoop( // Public API // --------------------------------------------------------------------------- -export async function runEpicPipeline( +export async function runPipeline( epicDir: string, cwd: string, extensionPath: string, log: Logger, - ui: ExtensionUIContext | null, + webServer: WebServerHandle | null, ): Promise<{ success: boolean; summary: string }> { - // Widget created at pipeline start — spans the full epic lifecycle (Phase A + B). - // Widget is an observation layer: receives one-way update() calls, never - // influences routing decisions. const epicState = await loadEpicState(epicDir); - const widget = ui ? new EpicWidgetController(ui, epicState.epicId) : null; - try { - // Phase A: Epic Creation. - ui?.notify("Starting intake...", "info"); - await saveEpicState(epicDir, { ...epicState, phase: "intake" }); - if (widget) widget.update({ epicPhase: "intake" }); + // Model config gate — blocks until user confirms model selection in the web UI. + if (webServer) { + await webServer.requestModelConfig(); + } - const intakeOk = await runIntake(epicDir, cwd, extensionPath, log, ui, widget); - if (!intakeOk) return { success: false, summary: "Intake phase failed" }; + // Phase A: Epic Creation. + webServer?.pushNotification("Starting intake...", "info"); + await saveEpicState(epicDir, { ...epicState, phase: "intake" }); + webServer?.pushPhase("intake"); - const afterIntake = await loadEpicState(epicDir); - await saveEpicState(epicDir, { ...afterIntake, phase: "decomposition" }); - if (widget) widget.update({ epicPhase: "decomposition" }); + const intakeOk = await runIntake(epicDir, cwd, extensionPath, log, webServer); + if (!intakeOk) return { success: false, summary: "Intake phase failed" }; - const decompOk = await runDecomposer(epicDir, cwd, extensionPath, log, ui, widget); - if (!decompOk) return { success: false, summary: "Decomposition phase failed" }; + const afterIntake = await loadEpicState(epicDir); + await saveEpicState(epicDir, { ...afterIntake, phase: "decomposition" }); + webServer?.pushPhase("decomposition"); - // Discover stories by scanning the filesystem — per AGENTS.md invariant, - // LLMs write markdown files only. The decomposer wrote stories/{id}/story.md - // files; the driver scans to discover IDs and populates epic-state.json. - const storyIds = await discoverStoryIds(epicDir); - log("Discovered story IDs", { count: storyIds.length, ids: storyIds }); + const decompOk = await runDecomposer(epicDir, cwd, extensionPath, log, webServer); + if (!decompOk) return { success: false, summary: "Decomposition phase failed" }; - for (const storyId of storyIds) { - await ensureStoryDirectory(epicDir, storyId); - } + // Discover stories by scanning the filesystem — the decomposer LLM wrote + // story.md files using the write tool; the driver discovers them here and + // populates the JSON story list (never asks the LLM to update JSON directly). + const storyIds = await discoverStoryIds(epicDir); + log("Discovered story IDs", { count: storyIds.length, ids: storyIds }); - const afterDecomp = await loadEpicState(epicDir); - await saveEpicState(epicDir, { ...afterDecomp, stories: storyIds, phase: "review" }); - if (widget) { - widget.update({ epicPhase: "review" }); - const initialStories = await loadAllStoryStates(epicDir); - widget.update({ stories: initialStories.map((s) => ({ storyId: s.storyId, status: s.status })) }); - } + for (const storyId of storyIds) { + await ensureStoryDirectory(epicDir, storyId); + } - // Spec review gate — present story sketches for human approval if UI is available. - if (ui && storyIds.length > 0) { - ui.notify("Decomposition complete. Review story sketches...", "info"); - const reviewResult = await reviewStorySketches(epicDir, storyIds, ui); - log("Spec review complete", { approved: reviewResult.approved.length, skipped: reviewResult.skipped.length }); - - for (const skippedId of reviewResult.skipped) { - const skippedStory = await loadStoryState(epicDir, skippedId); - await saveStoryState(epicDir, skippedId, { - ...skippedStory, - status: "skipped", - skipReason: "Removed during spec review", - updatedAt: new Date().toISOString(), - }); - } + const afterDecomp = await loadEpicState(epicDir); + await saveEpicState(epicDir, { ...afterDecomp, stories: storyIds, phase: "review" }); + webServer?.pushPhase("review"); + + if (webServer) { + const initialStories = await loadAllStoryStates(epicDir); + webServer.pushStories(initialStories.map((s) => ({ storyId: s.storyId, status: s.status }))); + } - const reviewedState = await loadEpicState(epicDir); - await saveEpicState(epicDir, { ...reviewedState, stories: storyIds }); - } else { - log("Spec review gate: auto-approving (no UI or no stories)"); + // Spec review gate — present story sketches for human approval. + // Auto-approves when no web server is running (CI/headless mode). + if (webServer && storyIds.length > 0) { + webServer.pushNotification("Decomposition complete. Review story sketches...", "info"); + + const titles = await Promise.all(storyIds.map((id) => readStoryTitle(epicDir, id))); + const reviewStories: ReviewStory[] = storyIds.map((storyId, i) => ({ + storyId, + title: titles[i] ?? storyId, + })); + + const reviewResult = await webServer.requestReview(reviewStories); + log("Spec review complete", { approved: reviewResult.approved.length, skipped: reviewResult.skipped.length }); + + for (const skippedId of reviewResult.skipped) { + const skippedStory = await loadStoryState(epicDir, skippedId); + await saveStoryState(epicDir, skippedId, { + ...skippedStory, + status: "skipped", + skipReason: "Removed during spec review", + updatedAt: new Date().toISOString(), + }); } - // Phase B: Execution. - const beforeExec = await loadEpicState(epicDir); - await saveEpicState(epicDir, { ...beforeExec, phase: "executing" }); - if (widget) widget.update({ epicPhase: "executing" }); + const reviewedState = await loadEpicState(epicDir); + await saveEpicState(epicDir, { ...reviewedState, stories: storyIds }); + } else { + log("Spec review gate: auto-approving (no web server or no stories)"); + } - const result = await runStoryLoop(epicDir, cwd, extensionPath, log, ui, widget); + // Phase B: Execution. + const beforeExec = await loadEpicState(epicDir); + await saveEpicState(epicDir, { ...beforeExec, phase: "executing" }); + webServer?.pushPhase("executing"); - if (result.success) { - const afterExec = await loadEpicState(epicDir); - await saveEpicState(epicDir, { ...afterExec, phase: "completed" }); - if (widget) widget.update({ epicPhase: "completed" }); - } + const result = await runStoryLoop(epicDir, cwd, extensionPath, log, webServer); - return result; - } finally { - widget?.destroy(); + if (result.success) { + const afterExec = await loadEpicState(epicDir); + await saveEpicState(epicDir, { ...afterExec, phase: "completed" }); + webServer?.pushPhase("completed"); } + + return result; } From 7c766f147751866a90c99c2f460966ca8a5e3155 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:49:30 +0700 Subject: [PATCH 061/412] wire extension entry point to task manifest and web dashboard --- extensions/koan.ts | 214 ++++++++++++++++++++++----------------------- 1 file changed, 104 insertions(+), 110 deletions(-) diff --git a/extensions/koan.ts b/extensions/koan.ts index 24e0efc..5a5a999 100644 --- a/extensions/koan.ts +++ b/extensions/koan.ts @@ -1,23 +1,32 @@ -// Entry point for the koan pi extension. Serves dual roles: parent session -// (registers koan_plan tool and /koan commands) and subagent mode (dispatches -// to phase workflow via CLI flags). All tools register unconditionally at init; -// phases restrict access via tool_call blocking at runtime. +// Entry point for the koan pi extension. Serves dual roles: // -// RuntimeContext replaces the three separate mutable refs (PlanRef, -// SubagentRef, WorkflowDispatch) used in the previous design. +// Parent session mode — registers the koan_plan tool and /koan commands. +// Subagent mode — reads task.json from --koan-dir, dispatches to +// the appropriate phase workflow. +// +// All tools register unconditionally at init; phases restrict access at +// runtime via the tool_call permission fence in BasePhase. +// +// RuntimeContext is a mutable carrier set once during before_agent_start. +// Tools register at init (before flags are available) and read ctx at +// call time — the mutable-ref pattern decouples static registration from +// dynamic phase routing. import * as path from "node:path"; import { Type } from "@sinclair/typebox"; import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; -import { detectSubagentMode, dispatchPhase } from "../src/planner/phases/dispatch.js"; +import { dispatchPhase } from "../src/planner/phases/dispatch.js"; import { registerAllTools, createRuntimeContext } from "../src/planner/tools/index.js"; import { createLogger, setLogDir } from "../src/utils/logger.js"; -import { EventLog, extractToolEvent } from "../src/planner/lib/audit.js"; +import { EventLog, extractToolCall, extractToolResult } from "../src/planner/lib/audit.js"; +import { readTaskFile } from "../src/planner/lib/task.js"; import { openKoanConfig } from "../src/planner/ui/config/menu.js"; import { createEpicDirectory } from "../src/planner/epic/state.js"; import { exportConversation } from "../src/planner/conversation.js"; -import { runEpicPipeline } from "../src/planner/driver.js"; +import { runPipeline } from "../src/planner/driver.js"; +import { startWebServer, openBrowser } from "../src/planner/web/server.js"; +import { registerTruncationOverride } from "../src/planner/lib/truncation-override.js"; function currentModelId(ctx: ExtensionContext): string | null { const model = ctx.model; @@ -28,92 +37,97 @@ function currentModelId(ctx: ExtensionContext): string | null { export default function koan(pi: ExtensionAPI): void { const log = createLogger("Koan"); - // -- Flags -- - pi.registerFlag("koan-role", { - description: "Koan subagent role", - type: "string", - default: "", - }); - pi.registerFlag("koan-epic-dir", { - description: "Koan epic directory path", - type: "string", - default: "", - }); - pi.registerFlag("koan-subagent-dir", { - description: "Koan subagent working directory", - type: "string", - default: "", - }); - pi.registerFlag("koan-story-id", { - description: "Current story ID for per-story subagents", - type: "string", - default: "", - }); - pi.registerFlag("koan-step-sequence", { - description: "Orchestrator step sequence (pre-execution or post-execution)", - type: "string", - default: "", - }); - pi.registerFlag("koan-retry-context", { - description: "Failure context from previous execution attempt", + // Single flag: the subagent directory path. The child reads task.json from + // this directory to discover its role and task parameters — no structured + // data flows through CLI flags. + pi.registerFlag("koan-dir", { + description: "Subagent working directory (internal — set by parent before spawn)", type: "string", default: "", }); - // RuntimeContext: single mutable object that carries epicDir, subagentDir, - // and the active onCompleteStep handler. Replaces the old PlanRef + - // SubagentRef + WorkflowDispatch triple. const ctx = createRuntimeContext(); registerAllTools(pi, ctx); - + // Registered unconditionally — applies in both parent and subagent mode. + // Self-guards: no-op when bash output fits within pi's default limits. + // Must precede before_agent_start so the audit tool_result handler (which + // registers later, inside before_agent_start) sees the original event and + // does not interfere with the replacement content we return. + registerTruncationOverride(pi); + + // Dispatch happens exactly once per session (guard prevents re-entry on + // subsequent before_agent_start calls, which pi may emit on reconnect). let dispatched = false; pi.on("before_agent_start", async (_event, extCtx) => { if (dispatched) return; dispatched = true; - const config = detectSubagentMode(pi); - if (config) { - // Populate RuntimeContext from CLI flags. - if (config.epicDir) { - ctx.epicDir = config.epicDir; - } - - let eventLog: EventLog | undefined; - if (config.subagentDir) { - ctx.subagentDir = config.subagentDir; - eventLog = new EventLog( - config.subagentDir, - config.role, - config.role, - currentModelId(extCtx), - ); - await eventLog.open(); - - pi.on("tool_result", (event) => { - void eventLog!.append(extractToolEvent(event as { - toolName: string; - input: Record; - content: Array<{ type: string; text?: string }>; - isError: boolean; - })); - }); + const dirFlag = pi.getFlag("koan-dir"); + if (!dirFlag || typeof dirFlag !== "string" || dirFlag.trim().length === 0) { + // No --koan-dir flag: running as parent session, not as a subagent. + return; + } - pi.on("session_shutdown", () => { - void eventLog!.close(); + const subagentDir = dirFlag.trim(); + + // task.json was written by the parent before spawning this process. + // Throws if missing or malformed — that is a programming error, not a user error. + const task = await readTaskFile(subagentDir); + + ctx.epicDir = task.epicDir; + ctx.subagentDir = subagentDir; + + const eventLog = new EventLog( + subagentDir, + task.role, + task.role, + currentModelId(extCtx), + ); + await eventLog.open(); + + pi.on("tool_call", (event) => { + void eventLog.append(extractToolCall(event as { + toolCallId: string; + toolName: string; + input: Record; + })); + }); + + pi.on("tool_result", (event) => { + void eventLog.append(extractToolResult(event as { + toolCallId: string; + toolName: string; + input: Record; + content: Array<{ type: string; text?: string }>; + isError: boolean; + })); + }); + + pi.on("turn_end", (event) => { + const msg = event.message as { + role: string; + usage?: { input: number; output: number; cacheRead: number; cacheWrite: number }; + }; + if (msg.role === "assistant" && msg.usage) { + void eventLog.append({ + kind: "usage", + input: msg.usage.input, + output: msg.usage.output, + cacheRead: msg.usage.cacheRead, + cacheWrite: msg.usage.cacheWrite, }); } + }); - await dispatchPhase(pi, config, ctx, log, eventLog); - } + pi.on("session_shutdown", () => { + void eventLog.close(); + }); + + await dispatchPhase(pi, task, ctx, log, eventLog); }); // -- koan_plan tool -- - // Requires an interactive terminal session: subagents use koan_ask_question - // and koan_request_scouts, which are answered by the IPC responder running - // in the parent session. Without a UI, no IPC responder starts and any - // subagent calling those tools will poll ipc.json forever, hanging the - // pipeline permanently. pi.registerTool({ name: "koan_plan", label: "Plan", @@ -130,33 +144,27 @@ export default function koan(pi: ExtensionAPI): void { ].join("\n"), parameters: Type.Object({}), async execute(_toolCallId, _params, _signal, _onUpdate, extCtx) { - // koan_plan requires an interactive terminal session. Subagents use - // koan_ask_question and koan_request_scouts, which are answered by the - // IPC responder that only starts when a UI is present. Without a UI, - // subagents would poll ipc.json forever and the pipeline would hang. - if (!extCtx.hasUI) { - return { - content: [{ type: "text" as const, text: "koan_plan requires an interactive terminal session." }], - details: undefined, - }; - } - const epicInfo = await createEpicDirectory("", extCtx.cwd); ctx.epicDir = epicInfo.directory; setLogDir(epicInfo.directory); - await exportConversation(extCtx.sessionManager, epicInfo.directory); - log("Conversation exported", { epicDir: epicInfo.directory }); - const extensionPath = path.resolve(import.meta.dirname, "koan.ts"); - const ui = extCtx.hasUI ? extCtx.ui : null; - const result = await runEpicPipeline(epicInfo.directory, extCtx.cwd, extensionPath, log, ui); + const server = await startWebServer(epicInfo.directory); + try { + await openBrowser(pi, server.url); + await exportConversation(extCtx.sessionManager, epicInfo.directory); + log("Conversation exported", { epicDir: epicInfo.directory }); - return { - content: [{ type: "text" as const, text: result.summary }], - details: undefined, - }; + const result = await runPipeline(epicInfo.directory, extCtx.cwd, extensionPath, log, server); + + return { + content: [{ type: "text" as const, text: `Dashboard: ${server.url}\n\n${result.summary}` }], + details: undefined, + }; + } finally { + server.close(); + } }, }); @@ -174,18 +182,4 @@ export default function koan(pi: ExtensionAPI): void { } }, }); - - pi.registerCommand("koan-execute", { - description: "Execute a koan plan", - handler: async (_args, extCtx) => { - extCtx.ui.notify("Execution mode is not yet implemented.", "warning"); - }, - }); - - pi.registerCommand("koan-status", { - description: "Show koan workflow status", - handler: async (_args, extCtx) => { - extCtx.ui.notify("Status: idle", "info"); - }, - }); } From 6b1faeab165619326e1f94fde8530b7f8d2a3c7d Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Wed, 18 Mar 2026 23:49:39 +0700 Subject: [PATCH 062/412] update AGENTS.md with six core architecture invariants --- AGENTS.md | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 24d8ad4..ea5ff9f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,7 +1,66 @@ -# Koan Architecture Invariant +# Koan Architecture Invariants -LLMs write **markdown files only**. LLMs communicate with the driver through **tool calls only**. -The driver maintains `.json` state files internally — no LLM ever reads or writes a `.json` file. +Full architecture documentation: **[docs/architecture.md](docs/architecture.md)** -Example: orchestrator calls `koan_complete_story(story_id)` → tool code writes `state.json` + `status.md` → -driver reads `state.json` to route next action. The orchestrator never touches `state.json` directly. +Spoke documents: +- [docs/subagents.md](docs/subagents.md) — spawn lifecycle, task manifest, step-first workflow, permissions +- [docs/ipc.md](docs/ipc.md) — file-based IPC protocol, scout spawning, question routing +- [docs/state.md](docs/state.md) — driver/LLM boundary, epic and story state, routing rules + +--- + +The six core invariants (see architecture.md for full detail + pitfalls): + +## 1. File Boundary + +LLMs write **markdown files only**. The driver maintains **JSON state files** +internally — no LLM ever reads or writes a `.json` file. Tool code bridges +both worlds. + +## 2. Step-First Workflow Pattern (critical) + +Every subagent is a `pi -p` process. Once the LLM produces text without a tool +call, the process exits — there is no stdin to recover. + +**The first thing any subagent does is call `koan_complete_step`.** The spawn +prompt contains *only* this directive. The tool returns step 1 instructions. +This establishes the calling pattern before the LLM sees complex instructions. + +``` +Boot prompt: "You are a koan {role} agent. Call koan_complete_step to receive your instructions." + ↓ LLM calls koan_complete_step (step 0 → 1 transition) +Tool returns: Step 1 instructions (rich context, task details, guidance) + ↓ LLM does work... + ↓ LLM calls koan_complete_step +Tool returns: Step 2 instructions (or "Phase complete.") +``` + +## 3. Driver Determinism + +The driver reads JSON state files and exit codes, applies routing rules, and +spawns the next subagent. It never makes judgment calls or parses free-text. + +## 4. Default-Deny Permissions + +Every tool call passes through a role-based permission fence. Unknown roles +and tools are blocked. Planning roles can only write inside the epic directory. + +## 5. Need-to-Know Prompts + +Boot prompt is one sentence. System prompt has role identity, no task details. +Task details arrive via step 1 guidance after the tool-calling pattern is +established. + +## 6. Directory-as-Contract + +The subagent directory is the sole interface between parent and child. +Three well-known JSON files: + +| File | Writer | Reader | Purpose | +|------|--------|--------|---------| +| `task.json` | Parent (before spawn) | Child (once, at startup) | What to do | +| `state.json` | Child (continuously) | Parent (polling) | What has been done | +| `ipc.json` | Both (request/response) | Both (polling) | What is needed right now | + +No structured configuration flows through CLI flags. The spawn command carries +only the directory path. From 454635f624bd9867b97a56895919fb9e74fac731 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 19 Mar 2026 20:47:29 +0700 Subject: [PATCH 063/412] audit infrastructure for thinking and confidence events --- extensions/koan.ts | 15 ++++ src/planner/lib/audit.ts | 167 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 174 insertions(+), 8 deletions(-) diff --git a/extensions/koan.ts b/extensions/koan.ts index 5a5a999..c5c6cae 100644 --- a/extensions/koan.ts +++ b/extensions/koan.ts @@ -86,6 +86,9 @@ export default function koan(pi: ExtensionAPI): void { ); await eventLog.open(); + // Make the event log available to tools (e.g. koan_set_confidence) via ctx. + ctx.eventLog = eventLog; + pi.on("tool_call", (event) => { void eventLog.append(extractToolCall(event as { toolCallId: string; @@ -108,6 +111,7 @@ export default function koan(pi: ExtensionAPI): void { const msg = event.message as { role: string; usage?: { input: number; output: number; cacheRead: number; cacheWrite: number }; + content?: Array<{ type: string; thinking?: string }>; }; if (msg.role === "assistant" && msg.usage) { void eventLog.append({ @@ -118,6 +122,17 @@ export default function koan(pi: ExtensionAPI): void { cacheWrite: msg.usage.cacheWrite, }); } + if (msg.role === "assistant" && Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "thinking" && typeof block.thinking === "string" && block.thinking.length > 0) { + void eventLog.append({ + kind: "thinking", + text: block.thinking, + chars: block.thinking.length, + }); + } + } + } }); pi.on("session_shutdown", () => { diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index 86f9ebb..91ebabe 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -75,6 +75,30 @@ export interface UsageEvent extends EventBase { cacheWrite: number; } +export interface ThinkingEvent extends EventBase { + kind: "thinking"; + // Truncated thinking content (first 2000 chars for log size). + text: string; + // Original length before truncation. + chars: number; +} + +export interface ConfidenceChangeEvent extends EventBase { + kind: "confidence_change"; + // The confidence level declared by the intake agent via koan_set_confidence. + level: "exploring" | "low" | "medium" | "high" | "certain"; + // Which iteration of the Scout→Deliberate→Reflect loop this was declared in. + iteration: number; +} + +export interface IterationStartEvent extends EventBase { + kind: "iteration_start"; + // The new iteration number (incremented from the previous Reflect step). + iteration: number; + // Maximum allowed iterations before the loop is forced to exit. + maxIterations: number; +} + export type AuditEvent = | ToolCallEvent | ToolResultEvent @@ -82,7 +106,10 @@ export type AuditEvent = | StepTransitionEvent | PhaseEndEvent | HeartbeatEvent - | UsageEvent; + | UsageEvent + | ThinkingEvent + | ConfidenceChangeEvent + | IterationStartEvent; // Distributive Omit — distributes over union members so object literals // with fields specific to one member are accepted. @@ -111,6 +138,13 @@ export interface Projection { completionSummary: string | null; tokensSent: number; tokensReceived: number; + // Timestamp of the most recent tool_result event; used to track thinking gaps. + lastToolResultAt: string | null; + // Intake-specific: the most recent confidence level declared by koan_set_confidence. + // Null for non-intake subagents or before any confidence is declared. + intakeConfidence: "exploring" | "low" | "medium" | "high" | "certain" | null; + // Intake-specific: the current loop iteration (1-based). Zero for non-intake. + intakeIteration: number; } // -- Correlated tool invocations -- @@ -355,6 +389,7 @@ export function fold(s: Projection, e: AuditEvent): Projection { ...base, lastAction: summarizeResult(e), currentToolCallId: null, + lastToolResultAt: e.ts, }; case "heartbeat": @@ -366,6 +401,22 @@ export function fold(s: Projection, e: AuditEvent): Projection { tokensSent: s.tokensSent + e.input, tokensReceived: s.tokensReceived + e.output, }; + + case "thinking": + return base; + + case "confidence_change": + return { + ...base, + intakeConfidence: e.level, + intakeIteration: e.iteration, + }; + + case "iteration_start": + return { + ...base, + intakeIteration: e.iteration, + }; } } @@ -404,6 +455,9 @@ export class EventLog { completionSummary: null, tokensSent: 0, tokensReceived: 0, + lastToolResultAt: null, + intakeConfidence: null, + intakeIteration: 0, }; } @@ -462,6 +516,22 @@ export class EventLog { } as Omit); } + async emitConfidenceChange(level: ConfidenceChangeEvent["level"], iteration: number): Promise { + await this.append({ + kind: "confidence_change", + level, + iteration, + } as Omit); + } + + async emitIterationStart(iteration: number, maxIterations: number): Promise { + await this.append({ + kind: "iteration_start", + iteration, + maxIterations, + } as Omit); + } + async close(): Promise { if (this.heartbeat) { clearInterval(this.heartbeat); @@ -507,6 +577,10 @@ export interface LogLine { highValue: boolean; inFlight: boolean; details?: string[]; + // Timestamp used by thinking entries to drive the live elapsed timer. + ts?: string; + // Expandable content body: thinking text, tool output, etc. + body?: string; } interface ToolShape { @@ -548,35 +622,101 @@ export async function readRecentLogs(dir: string, count = 8): Promise // Builds a chronological log by walking events in order and emitting // one LogLine per tool invocation (at result time, or at call time if -// still in-flight) plus lifecycle events. +// still in-flight) plus lifecycle events. Inserts thinking lines to +// represent gaps between visible events where the LLM is reasoning. function buildChronologicalLog(events: AuditEvent[], count: number): LogLine[] { const pendingCalls = new Map }>(); const lines: LogLine[] = []; + let thinkingStartTs: string | null = null; + // Index of the last thinking line pushed to `lines`. Thinking events fire + // AFTER the turn's tool_result (message_update is a post-turn event), so the + // text belongs to the PREVIOUS thinking gap, not the current one. We + // retroactively set body on the already-emitted line. + let lastThinkingIdx = -1; + let phaseEnded = false; for (const e of events) { if (e.kind === "heartbeat" || e.kind === "usage") continue; + if (e.kind === "confidence_change" || e.kind === "iteration_start") continue; + + if (e.kind === "thinking") { + // Retroactive: this text is from the turn that just completed. + // Overwrite (not append) — later message_update events have more + // complete content, so the last one wins. + if (lastThinkingIdx >= 0) { + lines[lastThinkingIdx].body = e.text; + } + continue; + } if (e.kind === "tool_call") { - // Stash tool name + input for when the result arrives (or for - // in-flight rendering if no result appears by end of loop). + // Before a visible tool_call, insert a completed thinking line if gap ≥ 1s + if (e.tool !== "koan_complete_step" && thinkingStartTs) { + const gapMs = new Date(e.ts).getTime() - new Date(thinkingStartTs).getTime(); + if (gapMs >= 1000) { + lines.push({ + tool: "thinking", + summary: formatThinkingDuration(gapMs), + highValue: false, + inFlight: false, + }); + lastThinkingIdx = lines.length - 1; + } + thinkingStartTs = null; + } pendingCalls.set(e.toolCallId, { tool: e.tool, input: e.input }); continue; } if (e.kind === "tool_result") { - if (e.tool === "koan_complete_step") continue; + if (e.tool === "koan_complete_step") { + pendingCalls.delete(e.toolCallId); + continue; + } const call = pendingCalls.get(e.toolCallId); lines.push(formatPairedResult(e, call?.input ?? {})); pendingCalls.delete(e.toolCallId); + thinkingStartTs = e.ts; continue; } - // Lifecycle event. - lines.push(formatLifecycleEvent(e)); + if ( + e.kind === "phase_start" || + e.kind === "step_transition" || + e.kind === "phase_end" + ) { + // Flush any pending thinking gap before the lifecycle line. + if (thinkingStartTs) { + const gapMs = new Date(e.ts).getTime() - new Date(thinkingStartTs).getTime(); + if (gapMs >= 1000) { + lines.push({ + tool: "thinking", + summary: formatThinkingDuration(gapMs), + highValue: false, + inFlight: false, + }); + lastThinkingIdx = lines.length - 1; + } + thinkingStartTs = null; + } + if (e.kind === "phase_end") phaseEnded = true; + lines.push(formatLifecycleEvent(e)); + thinkingStartTs = e.ts; + } + } + + // Currently-thinking indicator: all tools completed, phase still running + if (thinkingStartTs && pendingCalls.size === 0 && !phaseEnded) { + lines.push({ + tool: "thinking", + summary: "", + highValue: false, + inFlight: true, + ts: thinkingStartTs, + }); } // Emit remaining calls without results as in-flight lines. - // The ActivityFeed renders the last in-flight line with animated dots. for (const [, call] of pendingCalls) { if (call.tool === "koan_complete_step") continue; lines.push(formatInFlightCall(call.tool, call.input)); @@ -636,6 +776,14 @@ function responseSize(response: string[]): string { return textStats(response.join("\n")); } +function formatThinkingDuration(ms: number): string { + const sec = Math.round(ms / 1000); + if (sec < 60) return `${sec}s`; + const min = Math.floor(sec / 60); + const remSec = sec % 60; + return remSec > 0 ? `${min}m ${remSec}s` : `${min}m`; +} + function truncateUnicode(text: string, maxChars: number): string { const chars = Array.from(text); if (chars.length <= maxChars) return text; @@ -830,3 +978,6 @@ function formatLifecycleEvent(e: PhaseStartEvent | StepTransitionEvent | PhaseEn return { tool: "phase", summary: e.detail ? `${e.outcome} · ${e.detail}` : e.outcome, highValue: false, inFlight: false }; } } + +// formatToolInvocation is kept for callers outside buildChronologicalLog. +void formatToolInvocation; From 081529e943cd69c8b582b3f2762de604ab93358b Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 19 Mar 2026 20:47:48 +0700 Subject: [PATCH 064/412] intake phase 5-step confidence loop --- src/planner/lib/permissions.ts | 26 ++ src/planner/lib/runtime-context.ts | 26 ++ src/planner/phases/base-phase.ts | 94 ++++- src/planner/phases/decomposer/prompts.ts | 5 +- src/planner/phases/intake/phase.ts | 133 ++++++- src/planner/phases/intake/prompts.ts | 364 +++++++++++++----- src/planner/phases/orchestrator/prompts.ts | 8 +- src/planner/phases/planner/prompts.ts | 6 +- src/planner/tools/confidence.ts | 76 ++++ src/planner/tools/index.ts | 2 + .../js/components/phases/Consolidation.jsx | 2 +- 11 files changed, 619 insertions(+), 123 deletions(-) create mode 100644 src/planner/tools/confidence.ts diff --git a/src/planner/lib/permissions.ts b/src/planner/lib/permissions.ts index 058be7c..2a5bf6e 100644 --- a/src/planner/lib/permissions.ts +++ b/src/planner/lib/permissions.ts @@ -33,6 +33,7 @@ export const ROLE_PERMISSIONS: ReadonlyMap> = new Ma "koan_complete_step", "koan_ask_question", "koan_request_scouts", + "koan_set_confidence", "edit", "write", ]), @@ -99,17 +100,42 @@ export const ROLE_PERMISSIONS: ReadonlyMap> = new Ma // Executor has unrestricted write access (must implement stories in the codebase). const PLANNING_ROLES = new Set(["intake", "scout", "decomposer", "orchestrator", "planner"]); +// STEP_1_BLOCKED_TOOLS: tools disallowed during the intake Extract step (step 1). +// Step 1 is read-only comprehension. Blocking these tools here provides a +// mechanical enforcement layer on top of the prompt-level prohibition, ensuring +// the LLM cannot frontload scouting or question-asking before understanding +// the conversation. +const STEP_1_BLOCKED_TOOLS = new Set([ + "koan_request_scouts", + "koan_ask_question", + "koan_set_confidence", + "write", + "edit", +]); + export function checkPermission( role: string, toolName: string, epicDir?: string, toolArgs?: Record, + intakeStep?: number, ): { allowed: boolean; reason?: string } { // Read tools are always allowed — check before role map lookup. if (READ_TOOLS.has(toolName)) { return { allowed: true }; } + // Intake step 1 (Extract) is read-only: block all side-effecting tools so + // the LLM cannot frontload scouting or question-asking before it has read + // and understood the conversation. + if (role === "intake" && intakeStep === 1 && STEP_1_BLOCKED_TOOLS.has(toolName)) { + return { + allowed: false, + reason: `${toolName} is not available during the Extract step (step 1). ` + + "Complete koan_complete_step first to advance to the Scout step.", + }; + } + // Unknown role: blocked under default-deny policy. if (!ROLE_PERMISSIONS.has(role)) { log("Unknown role blocked", { role, toolName }); diff --git a/src/planner/lib/runtime-context.ts b/src/planner/lib/runtime-context.ts index 5019bb1..1138f88 100644 --- a/src/planner/lib/runtime-context.ts +++ b/src/planner/lib/runtime-context.ts @@ -5,10 +5,32 @@ // onCompleteStep return value: // string → next step's formatted prompt (tool returns it to the LLM) // null → phase is complete (tool returns "Phase complete.") +// +// intakeConfidence: set by koan_set_confidence during the intake Reflect step. +// IntakePhase reads this in getNextStep() to decide whether to loop or advance. +// Reset to null after each loop-back to enforce re-assessment each iteration. +// +// intakeStep: current step number, kept in sync by IntakePhase.onStepUpdated(). +// The permission fence reads this to block side-effecting tools during the +// read-only Extract step (step 1). +// +// intakeIteration: current loop iteration (1-based), kept in sync by IntakePhase. +// The confidence tool uses this when emitting confidence_change audit events. +// +// eventLog: the active EventLog for the current subagent session. Set during +// before_agent_start after the log file is opened. Tools that need to emit +// audit events (e.g. koan_set_confidence) read this at call time. + +import type { EventLog } from "./audit.js"; + export interface RuntimeContext { epicDir: string | null; subagentDir: string | null; onCompleteStep: ((thoughts: string) => Promise) | null; + intakeConfidence: "exploring" | "low" | "medium" | "high" | "certain" | null; + intakeStep: number; + intakeIteration: number; + eventLog: EventLog | null; } export function createRuntimeContext(): RuntimeContext { @@ -16,5 +38,9 @@ export function createRuntimeContext(): RuntimeContext { epicDir: null, subagentDir: null, onCompleteStep: null, + intakeConfidence: null, + intakeStep: 0, + intakeIteration: 1, + eventLog: null, }; } diff --git a/src/planner/phases/base-phase.ts b/src/planner/phases/base-phase.ts index 62bf1a7..6830e20 100644 --- a/src/planner/phases/base-phase.ts +++ b/src/planner/phases/base-phase.ts @@ -11,11 +11,25 @@ // transitions 0→1 and returns step 1 guidance (just-in-time delivery). // Subsequent calls advance through steps until the phase completes. // +// Non-linear step progression: +// Subclasses may override getNextStep() to implement loops or conditional +// transitions. getNextStep() MUST be pure — it only returns the next step +// number. Side effects that accompany a loop decision (state resets, counter +// increments, event emission) belong in onLoopBack(), which handleStepComplete +// calls whenever getNextStep() returns a step number less than the current one. +// +// The default implementation is strictly linear: each step advances to the +// next, and the final step (totalSteps) signals completion by returning null. +// IntakePhase overrides both getNextStep() and onLoopBack() to loop steps 2–4 +// until the confidence gate is satisfied. +// // Lifecycle: // constructor → registerHandlers() (hooks event listeners) // begin() → activates phase at step 0, arms onCompleteStep, emits phase_start // handleStepComplete(0) → returns step 1 guidance, emits step_transition(1) -// handleStepComplete(N) → returns step N+1 guidance, or null when done +// handleStepComplete(N) → calls getNextStep(N) to determine next step, +// calls onLoopBack() on backward transitions, +// returns guidance or null when done import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; @@ -50,6 +64,23 @@ export abstract class BasePhase { this.registerHandlers(); } + // -- Non-linear progression hook -- + // + // Returns the step number to transition to after `currentStep` completes, + // or null to signal phase completion. Subclasses override this to implement + // confidence loops, conditional branches, or any other non-linear flow. + // + // MUST be pure: do not mutate state or emit events here. Side effects that + // accompany a loop-back (counter increments, state resets, event emission) + // belong in onLoopBack(), which handleStepComplete calls after this method + // returns a backward step number. + // + // Default: linear progression. The step after totalSteps is null (done). + protected getNextStep(currentStep: number): number | null { + if (currentStep === this.totalSteps) return null; + return currentStep + 1; + } + // -- Event handler registration -- private registerHandlers(): void { @@ -71,8 +102,15 @@ export abstract class BasePhase { event.toolName, this.ctx.epicDir ?? undefined, event.input as Record, + this.ctx.intakeStep, ); if (!perm.allowed) { + void this.eventLog?.append({ + kind: "tool_result", + toolCallId: event.toolCallId, + tool: event.toolName, + error: true, + }); return { block: true, reason: perm.reason }; } return undefined; @@ -113,13 +151,24 @@ export abstract class BasePhase { // boot prompt. Reward it with step 1 guidance. This is the critical moment // that establishes the call→receive→work→call pattern for the session. this.step = 1; + this.onStepUpdated(1); const prompt = formatStep(this.getStepGuidance(1)); await this.eventLog?.emitStepTransition(1, this.getStepName(1), this.totalSteps); this.log("Boot transition", { role: this.role, to: 1 }); return prompt; } - if (this.step === this.totalSteps) { + // Validate pre-conditions before advancing (subclasses may override). + const preError = await this.validateStepCompletion(this.step); + if (preError !== null) { + // Return the error as the tool result — the LLM sees it and must fix + // the pre-condition before calling koan_complete_step again. + return preError; + } + + const nextStep = this.getNextStep(this.step); + + if (nextStep === null) { // Phase complete — return null signals koan_complete_step to reply "Phase complete." this.active = false; this.ctx.onCompleteStep = null; @@ -128,12 +177,49 @@ export abstract class BasePhase { return null; } - // Advance to next step. const prev = this.step; - this.step = prev + 1; + this.step = nextStep; + + // If the step went backward (loop-back), give the subclass a chance to + // perform side effects before the new step's guidance is delivered: + // resetting state, incrementing counters, emitting events. This keeps + // getNextStep() pure — it only decides where to go, not what to do there. + if (nextStep < prev) { + await this.onLoopBack(prev, nextStep); + } + + this.onStepUpdated(nextStep); const prompt = formatStep(this.getStepGuidance(this.step)); await this.eventLog?.emitStepTransition(this.step, this.getStepName(this.step), this.totalSteps); this.log("Step transition", { role: this.role, from: prev, to: this.step }); return prompt; } + + // -- Overridable hooks -- + + // Called whenever this.step is updated (including loop-backs). Subclasses + // use this to sync ctx fields (e.g., intakeStep) with the current step. + // eslint-disable-next-line @typescript-eslint/no-unused-vars + protected onStepUpdated(_step: number): void { + // Default: no-op. + } + + // Called when a loop-back occurs (nextStep < previousStep), after this.step + // has been updated but before onStepUpdated() and getStepGuidance() run. + // Subclasses use this to perform side effects that accompany the loop decision + // — resetting state, incrementing counters, emitting events — separate from + // the pure getNextStep() query. The hook is async so event emission can be + // properly awaited, preserving event order in events.jsonl. + // eslint-disable-next-line @typescript-eslint/no-unused-vars + protected async onLoopBack(_from: number, _to: number): Promise { + // Default: no-op. + } + + // Called before advancing from the given step. Return null to allow + // advancement, or an error string to block it (returned as the tool + // result so the LLM sees the message and must fix the pre-condition). + // eslint-disable-next-line @typescript-eslint/no-unused-vars + protected async validateStepCompletion(_step: number): Promise { + return null; // Default: no pre-conditions. + } } diff --git a/src/planner/phases/decomposer/prompts.ts b/src/planner/phases/decomposer/prompts.ts index f54b48d..6cbfc8b 100644 --- a/src/planner/phases/decomposer/prompts.ts +++ b/src/planner/phases/decomposer/prompts.ts @@ -35,7 +35,7 @@ This format is sortable and human-readable. - MUST NOT include implementation details (specific functions, algorithms, data structures). - MUST NOT make decisions that require user input. Those belong to intake. -- MUST NOT invent scope not present in context.md or decisions.md. +- MUST NOT invent scope not present in context.md. - MUST produce one story sketch per deliverable unit of work. - SHOULD keep stories small: prefer 4–8 stories over 1–2 large ones. - SHOULD order stories so foundational work (types, interfaces, data models) comes first. @@ -69,8 +69,7 @@ export function decomposerStepGuidance(step: number): StepGuidance { "## Files to read", "", "From the epic directory:", - "- `context.md` — structured requirements extracted from the conversation", - "- `decisions.md` — user answers to clarifying questions", + "- `context.md` — intake analysis: conversation context, codebase findings, and user decisions", "", "If scout reports were referenced in your initial instructions above, read them now.", "If no scout reports were mentioned, proceed without them.", diff --git a/src/planner/phases/intake/phase.ts b/src/planner/phases/intake/phase.ts index 5ef4d79..f7becf0 100644 --- a/src/planner/phases/intake/phase.ts +++ b/src/planner/phases/intake/phase.ts @@ -1,6 +1,31 @@ -// Intake phase: reads conversation, extracts context, requests scouts, -// identifies gaps, asks user questions, writes context.md and decisions.md. -// Three-step sequence per §11.2.2. +// Intake phase: reads conversation, scouts codebase, asks clarifying questions, +// and writes context.md — the sole input for all downstream phases. +// +// Five-step workflow with a confidence-gated loop: +// +// Step 1 (Extract) — read-only comprehension of conversation.jsonl +// Step 2 (Scout) — dispatch codebase scouts for targeted exploration +// Step 3 (Deliberate) — enumerate knowns/unknowns, ask user questions +// Step 4 (Reflect) — self-verify completeness, set confidence level +// Step 5 (Synthesize) — write context.md from all accumulated findings +// +// Steps 2–4 form the confidence loop. After Reflect, getNextStep() checks +// ctx.intakeConfidence: +// - If "certain" or max iterations reached → return 5 (Synthesize) +// - Otherwise → return 2 (Scout), triggering a loop-back +// +// getNextStep() is pure — it only returns the next step number. All side effects +// that accompany a loop-back (confidence reset, iteration increment, event emission) +// live in onLoopBack(), which BasePhase calls after detecting a backward transition. +// This keeps the two concerns separate and makes getNextStep() safe to reason about. +// +// The loop enforces that koan_set_confidence is called before koan_complete_step +// in Reflect via validateStepCompletion(). Confidence is reset to null in onLoopBack() +// so each iteration requires a fresh assessment. +// +// Step 1 is read-only: the permission fence blocks koan_request_scouts, +// koan_ask_question, koan_set_confidence, write, and edit during that step, +// enforced via ctx.intakeStep which is kept in sync via onStepUpdated(). import * as path from "node:path"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; @@ -14,7 +39,15 @@ import type { StepGuidance } from "../../lib/step.js"; export class IntakePhase extends BasePhase { protected readonly role = "intake"; - protected readonly totalSteps = 3; + protected readonly totalSteps = 5; + + // Maximum number of Scout→Deliberate→Reflect iterations before forcing exit + // to Synthesize regardless of confidence level. + private static readonly MAX_ITERATIONS = 4; + + // Current loop iteration (1-based). Starts at 1 for the initial pass through + // steps 2–4; incremented in onLoopBack() each time the loop continues. + private iteration = 1; private readonly conversationPath: string; @@ -34,10 +67,98 @@ export class IntakePhase extends BasePhase { } protected getStepName(step: number): string { - return INTAKE_STEP_NAMES[step] ?? `Step ${step}`; + const base = INTAKE_STEP_NAMES[step] ?? `Step ${step}`; + // Annotate loop steps with the iteration number so the UI shows + // e.g. "Scout (round 2)" instead of just "Scout". + if (step >= 2 && step <= 4 && this.iteration > 1) { + return `${base} (round ${this.iteration})`; + } + return base; } protected getStepGuidance(step: number): StepGuidance { - return intakeStepGuidance(step, this.conversationPath); + return intakeStepGuidance(step, this.conversationPath, this.iteration); + } + + // -- Non-linear progression: pure query, no side effects -- + // + // Step 4 (Reflect) is the loop gate. Returns 2 (Scout) to loop back, or 5 + // (Synthesize) to exit. Side effects for the loop-back case (iteration + // increment, confidence reset, event emission) live in onLoopBack(). + protected getNextStep(currentStep: number): number | null { + if (currentStep === 4) { + const confidence = this.ctx.intakeConfidence; + const isExhausted = this.iteration >= IntakePhase.MAX_ITERATIONS; + + if (confidence === "certain" || isExhausted) { + if (isExhausted && confidence !== "certain") { + this.log("Max iterations reached — forcing exit to Synthesize", { + iteration: this.iteration, + confidence, + }); + } + return 5; + } + + // Signal loop-back. onLoopBack() handles the side effects. + return 2; + } + + // Step 5 (Synthesize) is the final step. + if (currentStep === 5) return null; + + // All other steps: linear progression. + return currentStep + 1; + } + + // -- Loop-back side effects -- + // + // Called by BasePhase after getNextStep() returns a backward step number. + // Increments the iteration counter, resets confidence so the next Reflect + // step requires a fresh assessment, and emits the iteration_start event. + // Properly awaited so the event appears in correct sequence in events.jsonl. + protected override async onLoopBack(_from: number, _to: number): Promise { + this.iteration++; + this.ctx.intakeConfidence = null; + this.ctx.intakeIteration = this.iteration; + await this.eventLog?.emitIterationStart(this.iteration, IntakePhase.MAX_ITERATIONS); + this.log("Confidence loop: iterating", { newIteration: this.iteration }); + } + + // -- Pre-condition enforcement for Reflect (step 4) -- + // + // The LLM must call koan_set_confidence before koan_complete_step during + // the Reflect step. If it hasn't, we return an error message that the LLM + // sees as the tool result — it must fix the pre-condition before retrying. + protected async validateStepCompletion(step: number): Promise { + if (step === 4 && this.ctx.intakeConfidence === null) { + return "You must call koan_set_confidence before completing the Reflect step. " + + "Assess your confidence level based on the verification questions you answered, " + + "then call koan_set_confidence, then call koan_complete_step."; + } + return null; + } + + // -- Sync ctx fields whenever the active step changes -- + // + // ctx.intakeStep is read by the permission fence to block side-effecting tools + // during the read-only Extract step (step 1). + // + // iteration_start is emitted here for iteration 1 when Scout (step 2) is first + // entered. Subsequent iterations emit iteration_start via onLoopBack(). This + // ensures the web UI always knows which iteration is active from the moment + // scouting begins, not just after the first confidence assessment. + // + // The void on emitIterationStart is intentional: onStepUpdated is synchronous. + // EventLog.append() serializes all appends via an internal promise queue, so + // this event is enqueued before the emitStepTransition that follows in + // handleStepComplete, preserving correct order in events.jsonl. + protected override onStepUpdated(step: number): void { + this.ctx.intakeStep = step; + this.ctx.intakeIteration = this.iteration; + + if (step === 2 && this.iteration === 1) { + void this.eventLog?.emitIterationStart(1, IntakePhase.MAX_ITERATIONS); + } } } diff --git a/src/planner/phases/intake/prompts.ts b/src/planner/phases/intake/prompts.ts index 0e13445..de1525e 100644 --- a/src/planner/phases/intake/prompts.ts +++ b/src/planner/phases/intake/prompts.ts @@ -1,174 +1,334 @@ -// Intake phase prompts — 3-step sequence per §11.2.2: -// Step 1: Context extraction (read conversation → write context.md) -// Step 2: Codebase scouting (call koan_request_scouts with targeted questions) -// Step 3: Gap analysis + questions (review findings → ask user → write decisions.md) +// Intake phase prompts — 5-step workflow with a confidence-gated loop. +// +// Step 1 (Extract) — read-only comprehension of conversation.jsonl +// Step 2 (Scout) — dispatch codebase scouts for targeted exploration +// Step 3 (Deliberate) — enumerate knowns/unknowns, formulate & ask questions +// Step 4 (Reflect) — self-verify completeness, declare confidence level +// Step 5 (Synthesize) — write context.md from all accumulated findings +// +// Steps 2–4 repeat until the LLM declares "certain" confidence (or max +// iterations are exhausted). The iteration parameter is threaded through +// intakeStepGuidance() to produce iteration-aware prompts for steps 2–4: +// first-iteration guidance focuses on initial exploration; subsequent +// iterations focus on narrowing remaining gaps from the previous reflection. +// +// Design note — Prompt Chaining over Stepwise: +// Each step has exactly one cognitive goal (scout / deliberate / reflect). +// This prevents the "simulated refinement" anti-pattern where a monolithic +// prompt causes the model to artificially downgrade its draft quality to +// manufacture visible improvement. Separate koan_complete_step calls enforce +// genuinely isolated reasoning for each phase of the loop. import type { StepGuidance } from "../../lib/step.js"; export const INTAKE_STEP_NAMES: Record = { - 1: "Context Extraction", - 2: "Codebase Scouting", - 3: "Gap Analysis & Questions", + 1: "Extract", + 2: "Scout", + 3: "Deliberate", + 4: "Reflect", + 5: "Synthesize", }; export function intakeSystemPrompt(): string { - return `You are an intake analyst for a coding task planner. You read a conversation history, extract structured context, explore the codebase via scouts, and ask the user targeted clarifying questions grounded in both the conversation and what actually exists in the codebase. + return `You are an intake analyst for a coding task planner. You read a conversation history, explore the codebase, and ask the user targeted questions until you have complete context for planning. + +Your output — a single context.md file — is the sole foundation for all downstream work. Every story boundary, every implementation plan, and every line of code written downstream depends on the quality and completeness of this file. Gaps here compound into wrong plans and wrong code. ## Your role -You extract and organize information. You do NOT plan, design, or implement. +You extract, verify, and organize information. You do NOT plan, design, or implement. -## Strict rules — violations invalidate your output +## Strict rules -- MUST NOT infer decisions that were not explicitly stated in the conversation. +- MUST NOT infer decisions not explicitly stated in the conversation. - MUST NOT add architectural opinions or suggest approaches. -- MUST NOT summarize, paraphrase, or analyze code beyond extracting factual references. -- MUST NOT produce implementation recommendations of any kind. -- MUST only capture what was explicitly said. If something is unclear, note it as an unresolved question. -- MUST ask at most 8 questions total. Prioritize the most important gaps. +- MUST NOT produce implementation recommendations. +- MUST capture only what was explicitly said. If unclear, mark it as unresolved. - SHOULD prefer multiple-choice questions when the answer space is bounded. -- SHOULD ask open-ended questions only when the space of valid answers is genuinely unbounded. -- SHOULD ask questions grounded in what you found in the codebase (e.g., "the codebase uses X — should this story follow the same pattern or switch to Y?"). +- SHOULD ground questions in codebase findings. + +## Workflow -## Output files +You work in a loop: scout the codebase, think through what you know, ask the user questions, then verify your understanding. You repeat until you are certain the decomposer has everything it needs. -You write two files, both inside the epic directory: +## Output -1. **context.md** — structured extraction of what was said in the conversation. -2. **decisions.md** — answers to the questions you asked the user. +One file: **context.md** in the epic directory. -## Tools available +## Tools -- All read tools (read, bash, grep, glob, find, ls) — for reading the conversation and codebase. -- \`koan_request_scouts\` — to request parallel codebase exploration. -- \`koan_ask_question\` — to ask the user clarifying questions via IPC. -- \`write\` / \`edit\` — for writing output files inside the epic directory only. -- \`koan_complete_step\` — to signal step completion with your findings.`; +- Read tools (read, bash, grep, glob, find, ls) — reading the conversation and codebase. +- \`koan_request_scouts\` — request parallel codebase exploration. +- \`koan_ask_question\` — ask the user clarifying questions. +- \`koan_set_confidence\` — declare your confidence level. +- \`write\` / \`edit\` — for writing context.md (final step only). +- \`koan_complete_step\` — signal step completion.`; } -export function intakeStepGuidance(step: number, conversationPath?: string): StepGuidance { +export function intakeStepGuidance(step: number, conversationPath?: string, iteration = 1): StepGuidance { switch (step) { + // ------------------------------------------------------------------------- + // Step 1: Extract — read the conversation, build a mental model. + // + // This step is intentionally read-only. The permission fence blocks + // koan_request_scouts, koan_ask_question, koan_set_confidence, write, and + // edit during step 1 so that comprehension cannot be short-circuited by + // premature action. + // ------------------------------------------------------------------------- case 1: return { title: INTAKE_STEP_NAMES[1], instructions: [ - "Read the conversation file and extract structured context into `context.md`.", + "Read the conversation file. Build a thorough mental model of what is being requested.", "", conversationPath ? `Conversation file: ${conversationPath}` : "Conversation file: locate `conversation.jsonl` in the epic directory.", "", - "The conversation file is JSONL (JSON Lines). Each line is a JSON object.", - "Look for entries with type 'message' and role 'user' or 'assistant' for content.", - "Ignore internal session entries (header, compaction, etc.).", - "", - "Write `context.md` to the epic directory with these exact sections:", - "", - "## Topic", - "One paragraph describing what is being built or changed. Use only information explicitly stated in the conversation.", - "", - "## File References", - "List every file, directory, or module mentioned in the conversation. One item per line.", - "If none were mentioned, write: (none mentioned)", + "The file is JSONL. Each line is a JSON object.", + "Read entries with type 'message' and role 'user' or 'assistant'.", + "Ignore internal entries (header, compaction, etc.).", "", - "## Decisions Made", - "List every decision that was explicitly stated and agreed upon. Format: `- [decision text]`", - "A decision must be explicitly stated — do not infer from context.", - "If none were made, write: (none recorded)", + "## What to internalize", "", - "## Constraints", - "List every explicit constraint: technical, timeline, compatibility, budget, etc.", - "If none were stated, write: (none stated)", + "As you read, track these categories:", + "- **Topic**: What is being built or changed?", + "- **File references**: Every file, directory, or module mentioned.", + "- **Decisions already made**: Only those explicitly stated and agreed upon.", + "- **Constraints**: Technical, timeline, compatibility requirements.", + "- **Gaps**: Questions raised but unanswered. Things unclear or unstated that would affect story boundaries.", "", - "## Unresolved Questions", - "List every question raised in the conversation that was NOT answered.", - "Also list any gaps you observe — things that must be known before planning can proceed.", - "Format: `- [question or gap description]`", + "## Rules for this step", "", - "Be faithful to the conversation. Do not invent context.", + "- Do NOT call koan_request_scouts, koan_ask_question, koan_set_confidence, write, or edit.", + "- This step is read-only. Understand the conversation before acting on it.", + "- Be faithful to what was said. Do not invent context or infer unstated decisions.", + "- If the conversation references specific files or systems, note them — you will scout those next.", ], }; + // ------------------------------------------------------------------------- + // Step 2: Scout — dispatch codebase investigators. + // + // Iteration-aware: first iteration explores based on the conversation; + // subsequent iterations follow up on gaps from the previous Reflect step. + // This is a focused step — do NOT ask the user questions here. + // ------------------------------------------------------------------------- case 2: return { title: INTAKE_STEP_NAMES[2], instructions: [ - "Based on the file references and topic in context.md, identify what needs codebase exploration.", + iteration === 1 + ? "Based on your reading of the conversation, identify areas of the codebase that need exploration." + : "Based on gaps identified in your previous reflection, identify follow-up areas to explore.", "", - "Use `koan_request_scouts` to gather codebase context before asking the user questions.", - "This grounds the questions in what actually exists — preventing questions the codebase already answers.", + "## What to scout", "", - "## When to scout", + "Use `koan_request_scouts` to dispatch parallel codebase investigators.", + "Each scout answers one narrow question. Formulate 1–5 scout tasks.", "", - "Scout when context.md mentions:", - "- Specific files, modules, or packages that should be verified or understood.", - "- Integration points with existing code (APIs, databases, auth, etc.).", - "- Areas where the user's assumptions may not match the codebase (e.g., 'we use React' but you should verify).", - "", - "Formulate 1–5 focused scout tasks. Each scout answers one narrow question.", - "", - "## Scout task format", + "Scout when:", + "- The conversation references specific files, modules, or systems.", + "- Integration points with existing code need verification (APIs, databases, auth).", + "- User assumptions about the codebase might not match reality.", + ...(iteration > 1 ? ["- Previous scout findings raised new questions or revealed unexpected patterns."] : []), "", "Each scout needs:", - "- id: short kebab-case identifier (e.g., 'auth-setup', 'api-structure')", - "- role: a focused investigator role (e.g., 'auth system auditor', 'API structure analyst')", - "- prompt: exactly what to find (e.g., 'Find all auth-related files and identify which auth library is used')", + "- id: short kebab-case identifier (e.g., 'auth-setup')", + "- role: investigator focus (e.g., 'authentication auditor')", + "- prompt: what to find (e.g., 'Find all auth middleware in src/ and identify the auth library used')", "", "## If no scouting is needed", "", - "If context.md has no file references and the topic is purely conceptual (no codebase inspection needed),", - "skip scouting and call koan_complete_step with: 'Scouting skipped — no codebase references in context.'", + "If the topic is purely conceptual and no codebase inspection is needed, skip scouting.", + "Do NOT ask the user questions in this step — that happens in Deliberate.", ], }; + // ------------------------------------------------------------------------- + // Step 3: Deliberate — enumerate knowns/unknowns, ask questions. + // + // Thread-of-Thought technique: explicitly walking through each area before + // formulating questions prevents asking things already answered and surfaces + // gaps that would otherwise be missed. + // + // Iteration-aware: first iteration covers all areas; subsequent iterations + // focus on new information and updated understanding. + // ------------------------------------------------------------------------- case 3: return { title: INTAKE_STEP_NAMES[3], instructions: [ - "Review `context.md` and scout findings together. Identify gaps. Ask the user. Write `decisions.md`.", + "Before asking questions, explicitly enumerate what you know and what you don't.", + "This grounds your questions in reality and prevents asking things already answered.", + "", + "## Phase A: Recite what you know", + "", + "Walk through each area relevant to the task and state what you have learned.", + "Use this structure for each area:", + "", + " **[Area name]** (e.g., 'Authentication', 'Database schema', 'API endpoints')", + " - Known: [what the conversation and/or scouts established]", + " - Unknown: [what remains unclear or unverified]", + " - Source: [conversation / scout findings / user answer from round N]", "", - "## Gap identification criteria", + iteration === 1 + ? "Cover every area relevant to the task. Be thorough — gaps you miss here become gaps in the final output." + : "Focus on areas where new information arrived since last round. Re-state updated understanding.", "", - "Ask about a gap if:", - "- The answer materially changes WHAT is built (scope, features, API shape).", - "- The answer materially changes HOW the work is sequenced (dependencies, ordering).", - "- Without the answer, the decomposer cannot split the work into stories.", - "- Scout findings reveal a contradiction with what the user described (e.g., user said 'we use Postgres' but scout found SQLite).", + "## Phase B: Formulate and ask questions", "", - "Do NOT ask about:", - "- Implementation choices (those belong to the planner role).", - "- Things the scout findings already answered.", - "- Nice-to-have clarifications that don't change the plan.", + "Review your 'Unknown' items. For each, decide:", + "- Can a follow-up scout answer this? → Note it for the next scout round.", + "- Must the user decide this? → Include it in your questions.", + "- Is this an implementation detail the planner should decide? → Skip it.", "", - "## Asking questions", + "Ask about a gap ONLY if:", + "- It materially changes WHAT is built (scope, features, API shape).", + "- It materially changes HOW work is sequenced (dependencies, ordering).", + "- Without the answer, story boundaries cannot be determined.", + "- Scout findings contradict what the user described.", "", - "Use `koan_ask_question` to send questions to the user. Maximum 8 questions.", + "Use `koan_ask_question`. Limit: 5 questions per round.", "Prefer multiple-choice when the answer space is bounded.", - "Reference scout findings in questions when relevant: 'The codebase uses X — should this follow the same pattern?'", + "Ground questions in specific findings: 'Scout found X — should this story follow the same pattern?'", + "", + "## If no questions are needed", + "", + "If all 'Unknown' items are either implementation details or answerable by follow-up scouts,", + "you may skip asking questions. Your recitation of knowns/unknowns is still required.", + ], + }; + + // ------------------------------------------------------------------------- + // Step 4: Reflect — verify completeness, declare confidence. + // + // Chain-of-Verification (CoVe) technique: the LLM generates its own + // verification questions and answers them using only gathered evidence + // (not intuition). This surfaces gaps that casual self-assessment misses. + // + // Metacognitive structure: understand → judge → critique → decide → assess. + // The "certain" level has a contrastive definition (positive checklist + + // "you are NOT certain if" list) to prevent premature exits from the loop. + // + // REQUIRED: koan_set_confidence must be called before koan_complete_step. + // The phase handler enforces this — koan_complete_step will be rejected + // with an error message if confidence has not been set. + // ------------------------------------------------------------------------- + case 4: + return { + title: INTAKE_STEP_NAMES[4], + instructions: [ + "Verify the completeness of your understanding before deciding whether to continue or stop.", + "This step is pure verification — do not scout or ask questions here.", + "", + "## Step 1: Verification questions", + "", + "Generate 3–5 questions that test whether your understanding is complete.", + "Frame them from the decomposer's perspective — the decomposer must split this work into stories.", + "", + "Example verification questions:", + "- 'Could I define the boundary between story 1 and story 2 right now?'", + "- 'If the user's codebase uses pattern X (per scout), does our understanding account for that?'", + "- 'Are there any user decisions that could split one story into two or merge two into one?'", + "", + "## Step 2: Answer each question", + "", + "Answer each verification question using ONLY evidence you have:", + "- Direct quotes or facts from the conversation", + "- Specific findings from scouts", + "- Explicit answers from the user", + "", + "If you cannot answer a verification question with evidence, that is a gap.", + "", + "## Step 3: Assess confidence", + "", + "Based on your verification answers, call `koan_set_confidence`.", + "", + "**certain** — all verification questions answered with evidence. The decomposer can define every story boundary.", + "**high** — most questions answered. Remaining unknowns would not change story structure.", + "**medium** — broad shape understood, but specific boundaries or sequencing decisions are unclear.", + "**low** — major gaps remain. Cannot define story boundaries.", + "**exploring** — have not yet scouted or asked questions.", + "", + "### Certain means ALL of these are true:", + "- Topic and scope are unambiguous.", + "- Codebase architecture relevant to the task is understood.", + "- All user decisions affecting story boundaries have been made.", + "- No question you could ask would change the number, order, or scope of stories.", + "", + "### You are NOT certain if:", + "- A scout revealed something surprising that needs follow-up.", + "- A user answer raised a new question you haven't explored.", + "- You skipped scouting an area that might affect story boundaries.", + "- You're unsure whether two pieces of work should be one story or two.", + "", + "## Step 4: If not certain, plan the next round", + "", + "If confidence < certain, briefly note:", + "- What gaps remain?", + "- Should the next round focus on scouting, asking, or both?", + "- What specific areas need follow-up?", + "", + "This plan will guide your next Scout step.", + ], + invokeAfter: [ + "WHEN DONE: First call koan_set_confidence, then call koan_complete_step.", + "You MUST call koan_set_confidence before koan_complete_step — step completion will be rejected without it.", + "Do NOT call koan_complete_step until you have worked through all four steps above.", + ].join("\n"), + }; + + // ------------------------------------------------------------------------- + // Step 5: Synthesize — write context.md. + // + // This step runs once, after the confidence loop exits. The LLM consolidates + // everything gathered across all iterations into a single structured file. + // + // A pre-write verification checklist ensures the output serves the + // decomposer's needs: if any checklist question cannot be answered, it must + // be noted in Open Items rather than silently omitted. + // ------------------------------------------------------------------------- + case 5: + return { + title: INTAKE_STEP_NAMES[5], + instructions: [ + "Write `context.md` to the epic directory.", + "This file is the sole input for all downstream phases. Write it carefully.", + "", + "## Required sections", + "", + "### Topic", + "One paragraph: what is being built or changed. Facts from the conversation only.", + "", + "### Codebase Findings", + "Key findings from scouts: architecture, patterns, existing code, integration points.", + "Organize by area, not by scout task or iteration.", + "If no scouts were needed: (no codebase exploration was needed)", "", - "## Writing decisions.md", + "### Decisions", + "Every question asked and the user's answer, across all rounds.", + "Format: **Q: [question]** / A: [answer]", + "If no questions were needed: (no questions were needed — context was sufficient)", "", - "After the user responds, write `decisions.md` to the epic directory:", + "### Constraints", + "All constraints discovered: from conversation, from codebase (scouts), from user answers.", + "If none: (none identified)", "", - "## Answers", - "For each question asked, record the question and the user's answer.", - "Format:", - "```", - "**Q: [question text]**", - "A: [user's answer]", - "```", + "### Open Items", + "Anything unresolved. Should be empty or near-empty if confidence was 'certain'.", + "If none: (none)", "", - "## Remaining Unknowns", - "List any gaps that remain unresolved. If none: write (none)", + "## Pre-write verification", "", - "If there were no meaningful gaps, write:", - "`## Answers\\n(no questions were needed — context and codebase survey were sufficient)`", + "Before writing, verify context.md answers these questions (the decomposer needs them):", + "- What is the top-level goal?", + "- What are the distinct deliverable units of work?", + "- What existing code does this touch and how is it structured?", + "- What decisions constrain how the work is split?", + "- Are there dependencies between work units?", "", - "Then call `koan_complete_step` with a brief summary:", - "- File references found", - "- Scouts requested and key findings", - "- Questions asked and answered", - "- Any remaining unknowns", + "If you cannot answer any of these from what you've gathered, note it in Open Items.", ], }; diff --git a/src/planner/phases/orchestrator/prompts.ts b/src/planner/phases/orchestrator/prompts.ts index efafd41..942ee78 100644 --- a/src/planner/phases/orchestrator/prompts.ts +++ b/src/planner/phases/orchestrator/prompts.ts @@ -41,7 +41,7 @@ You are a decision-maker. You read content, apply judgment, and direct the workf - **Verification**: Running the checks defined in a story's verify.md to determine whether the implementation is correct. - **Verdict**: Declaring the outcome of a story's execution — success or retry with feedback. - **Story selection**: Choosing which story executes next based on the dependency graph and current epic state. -- **Learning propagation**: When you discover something during verification, update remaining story.md files and decisions.md. Mark every autonomous update with \`[autonomous]\`. +- **Learning propagation**: When you discover something during verification, update remaining story.md files and the Decisions section of context.md. Mark every autonomous update with \`[autonomous]\`. - **User communication**: When you encounter genuine ambiguity or need human judgment, call \`koan_ask_question\`. After getting the answer, decide what to do (retry with new context, skip, etc.) and call the appropriate tool. ## When to ask the user @@ -92,7 +92,7 @@ export function orchestratorPreStepGuidance(step: number): StepGuidance { "## What to read", "", "1. Read `epic.md` in the epic directory — understand the overall goal and scope.", - "2. Read `decisions.md` in the epic directory — understand decisions that shape execution.", + "2. Read the Decisions section of `context.md` in the epic directory — understand decisions that shape execution.", "3. Read each `story.md` file for every story in the epic — understand what each story builds and depends on.", "", "## What to analyze", @@ -223,7 +223,7 @@ export function orchestratorPostStepGuidance(step: number, storyId?: string): St return { title: ORCHESTRATOR_POST_STEP_NAMES[3], instructions: [ - "Propagate lessons from this story's execution to remaining stories and the decisions log.", + "Propagate lessons from this story's execution to remaining stories and the Decisions section of context.md.", "", "## What to propagate", "", @@ -240,7 +240,7 @@ export function orchestratorPostStepGuidance(step: number, storyId?: string): St "1. Read its `story.md`.", "2. Add a `## [autonomous] Propagated Context` section with the relevant information.", "", - "Update `decisions.md` if a new decision was made or an existing one was invalidated.", + "Update the Decisions section of `context.md` if a new decision was made or an existing one was invalidated.", "Add `[autonomous]` prefix to any autonomous additions.", "", "If no propagation is needed, skip file updates and proceed.", diff --git a/src/planner/phases/planner/prompts.ts b/src/planner/phases/planner/prompts.ts index 1b6a9e0..bff6125 100644 --- a/src/planner/phases/planner/prompts.ts +++ b/src/planner/phases/planner/prompts.ts @@ -11,7 +11,7 @@ export function plannerSystemPrompt(): string { ## Your role -You read stories, codebase artifacts, and scout reports, then produce three output files: a step-by-step plan, a curated code context file, and a verification checklist. You do NOT write code. You do NOT make design decisions beyond what the story and decisions log specify. +You read stories, codebase artifacts, and scout reports, then produce three output files: a step-by-step plan, a curated code context file, and a verification checklist. You do NOT write code. You do NOT make design decisions beyond what the story and context.md specify. ## What you produce @@ -74,7 +74,7 @@ export function plannerStepGuidance(step: number, storyId: string): StepGuidance "## What to read", "", `1. Read \`stories/${storyId}/story.md\` in the epic directory — understand exactly what this story must accomplish, its acceptance criteria, and any noted constraints or dependencies.`, - "2. Read `decisions.md` in the epic directory — understand the architectural decisions and open questions that apply to this story. If a decision is marked as unresolved, check whether it blocks this story.", + "2. Read `context.md` in the epic directory — understand the scope, codebase findings, constraints, and decisions that apply to this story. If a decision is marked as unresolved, check whether it blocks this story.", "3. Read the scout reports returned by `koan_request_scouts` for current codebase context.", "", "## What to analyze", @@ -93,7 +93,7 @@ export function plannerStepGuidance(step: number, storyId: string): StepGuidance "- The list of files that will be modified or created", "- The sequence you plan for the steps (high-level)", "- Any risks or unresolved questions you identified", - "- Whether any open decisions in decisions.md block this story", + "- Whether any open decisions in context.md block this story", ], }; diff --git a/src/planner/tools/confidence.ts b/src/planner/tools/confidence.ts new file mode 100644 index 0000000..f293614 --- /dev/null +++ b/src/planner/tools/confidence.ts @@ -0,0 +1,76 @@ +// koan_set_confidence tool — intake phase confidence gate. +// +// Called by the intake agent during the Reflect step (step 4) to declare its +// current confidence that sufficient context has been gathered for the +// decomposer to split the work into stories. +// +// The IntakePhase reads ctx.intakeConfidence in getNextStep() to decide +// whether to loop back to Scout (step 2) or advance to Synthesize (step 5). +// Confidence is reset to null at every loop-back, so each Reflect step +// requires a fresh assessment — carry-over from a previous iteration is +// not possible. +// +// Confidence changes are appended to events.jsonl via the EventLog. The +// web server polls state.json (the folded projection) and can push SSE events +// to the UI when the intakeConfidence or intakeIteration fields change. + +import { Type } from "@sinclair/typebox"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +import type { RuntimeContext } from "../lib/runtime-context.js"; + +// All valid confidence levels, ordered from least to most confident. +export type ConfidenceLevel = "exploring" | "low" | "medium" | "high" | "certain"; + +const CONFIDENCE_TOOL_DESCRIPTION = ` +Declare your current confidence that you have gathered sufficient context for the decomposer to split the work into stories. + +Call this BEFORE koan_complete_step during the Reflect step. Required — step completion will be rejected without it. + +Levels (from lowest to highest): +- exploring: Just started. Have not yet scouted or asked questions. +- low: Major gaps. Cannot define story boundaries. +- medium: Broad shape understood, specific boundaries unclear. +- high: Scope, boundaries, key decisions understood. Minor unknowns remain that would not change story structure. +- certain: Decomposer has everything it needs. No question would change story boundaries. +`.trim(); + +export function registerConfidenceTool(pi: ExtensionAPI, ctx: RuntimeContext): void { + pi.registerTool({ + name: "koan_set_confidence", + label: "Set intake confidence", + description: CONFIDENCE_TOOL_DESCRIPTION, + parameters: Type.Object({ + level: Type.Union( + [ + Type.Literal("exploring"), + Type.Literal("low"), + Type.Literal("medium"), + Type.Literal("high"), + Type.Literal("certain"), + ], + { description: "Your current confidence level (exploring | low | medium | high | certain)" }, + ), + }), + async execute(_toolCallId, params) { + const { level } = params as { level: ConfidenceLevel }; + + // Store on context so IntakePhase.getNextStep() can read it at step completion. + ctx.intakeConfidence = level; + + // Emit a confidence_change audit event. The EventLog folds it into + // state.json (updating intakeConfidence and intakeIteration fields), + // which the web server polls to push SSE events to the UI. + if (ctx.eventLog) { + // ctx.intakeIteration is set by IntakePhase.onStepUpdated() when each step + // is entered, so it always reflects the current iteration at tool call time. + await ctx.eventLog.emitConfidenceChange(level, ctx.intakeIteration); + } + + return { + content: [{ type: "text" as const, text: `Confidence set to ${level}.` }], + details: undefined, + }; + }, + }); +} diff --git a/src/planner/tools/index.ts b/src/planner/tools/index.ts index 6383a34..51f62fe 100644 --- a/src/planner/tools/index.ts +++ b/src/planner/tools/index.ts @@ -8,6 +8,7 @@ import type { RuntimeContext } from "../lib/runtime-context.js"; import { registerWorkflowTools } from "./workflow.js"; import { registerOrchestratorTools } from "./orchestrator.js"; import { registerAskTools } from "./ask.js"; +import { registerConfidenceTool } from "./confidence.js"; export type { RuntimeContext } from "../lib/runtime-context.js"; export { createRuntimeContext } from "../lib/runtime-context.js"; @@ -16,4 +17,5 @@ export function registerAllTools(pi: ExtensionAPI, ctx: RuntimeContext): void { registerWorkflowTools(pi, ctx); registerOrchestratorTools(pi, ctx); registerAskTools(pi, ctx); + registerConfidenceTool(pi, ctx); } diff --git a/src/planner/web/js/components/phases/Consolidation.jsx b/src/planner/web/js/components/phases/Consolidation.jsx index 5af7e54..96c3c51 100644 --- a/src/planner/web/js/components/phases/Consolidation.jsx +++ b/src/planner/web/js/components/phases/Consolidation.jsx @@ -21,7 +21,7 @@ export function Consolidation() { )}
- Writing decisions.md... + Writing context.md...
{logs.length > 0 && ( From 88f8f42bdf3b16f76e1b974988fb10911cb13f7f Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 19 Mar 2026 20:48:05 +0700 Subject: [PATCH 065/412] thinking cards in activity feed with centered layout --- src/planner/web/css/animations.css | 22 +++ src/planner/web/css/layout.css | 70 ++++++++- .../web/js/components/ActivityFeed.jsx | 146 ++++++++++++++---- 3 files changed, 211 insertions(+), 27 deletions(-) diff --git a/src/planner/web/css/animations.css b/src/planner/web/css/animations.css index 046b2b8..d5a4680 100644 --- a/src/planner/web/css/animations.css +++ b/src/planner/web/css/animations.css @@ -38,3 +38,25 @@ from { opacity: 1; transform: translateY(0); } to { opacity: 0; transform: translateY(8px); } } + +/* Thinking indicator */ +@keyframes thinking-pulse { + 0%, 100% { opacity: 0.3; } + 50% { opacity: 1; } +} + +.thinking-dot { + animation: thinking-pulse 1.5s ease-in-out infinite; +} + +.thinking-timer { + color: var(--text-muted); + font-variant-numeric: tabular-nums; + margin-left: 0.4em; +} + +.agent-doing-thinking { + color: var(--text-muted); +} + + diff --git a/src/planner/web/css/layout.css b/src/planner/web/css/layout.css index b20b374..43e5214 100644 --- a/src/planner/web/css/layout.css +++ b/src/planner/web/css/layout.css @@ -133,7 +133,75 @@ .activity-feed-inner { display: flex; flex-direction: column; - gap: 1px; + gap: 2px; + max-width: 960px; + margin: 0 auto; +} + +/* ---- Activity cards (thinking, future: tool results) ---- */ + +.activity-card { + background: var(--bg-surface); + border: 1px solid var(--border); + border-radius: var(--radius-md); + margin: var(--gap-xs) 0; + overflow: hidden; +} + +.activity-card-active { + border-color: var(--blue-border); +} + +.activity-card-header { + display: flex; + justify-content: space-between; + align-items: center; + padding: var(--gap-xs) var(--gap-md); + font-family: var(--font-mono); + font-size: var(--font-size-sm); +} + +.activity-card-tool { + color: var(--text-muted); +} + +.activity-card-thinking .activity-card-tool { + color: var(--purple); +} + +.activity-card-meta { + color: var(--text-dim); + font-size: var(--font-size-xs); +} + +.activity-card-body { + padding: 0 var(--gap-md) var(--gap-sm); + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--text-dim); + white-space: pre-wrap; + word-break: break-word; + line-height: 1.5; +} + +.activity-card-body:not(.expanded) { + display: -webkit-box; + -webkit-line-clamp: 3; + -webkit-box-orient: vertical; + overflow: hidden; +} + +.activity-card-more { + padding: 2px var(--gap-md) var(--gap-sm); + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--blue); + cursor: pointer; + user-select: none; +} + +.activity-card-more:hover { + color: var(--text-strong); } .activity-line { diff --git a/src/planner/web/js/components/ActivityFeed.jsx b/src/planner/web/js/components/ActivityFeed.jsx index ed71786..f3c5c50 100644 --- a/src/planner/web/js/components/ActivityFeed.jsx +++ b/src/planner/web/js/components/ActivityFeed.jsx @@ -1,6 +1,106 @@ -import { useRef, useEffect, useState } from 'preact/hooks' +import { useRef, useEffect, useState, useCallback } from 'preact/hooks' import { useStore } from '../store.js' +function ThinkingTimer({ since }) { + const [elapsed, setElapsed] = useState(0) + + useEffect(() => { + const start = new Date(since).getTime() + const tick = () => setElapsed(Math.floor((Date.now() - start) / 1000)) + tick() + const id = setInterval(tick, 1000) + return () => clearInterval(id) + }, [since]) + + const text = elapsed < 60 + ? `${elapsed}s` + : `${Math.floor(elapsed / 60)}m ${elapsed % 60}s` + + return {text} +} + +/** Card for thinking entries — shows expandable thought content */ +function ThinkingCard({ line, isInFlight, isFlashing }) { + const [expanded, setExpanded] = useState(false) + const bodyRef = useRef(null) + const [isClamped, setIsClamped] = useState(false) + + // Detect whether the body text is actually clamped (more content than visible) + useEffect(() => { + const el = bodyRef.current + if (el) setIsClamped(el.scrollHeight > el.clientHeight + 2) + }, [line.body, expanded]) + + const cls = [ + 'activity-card', + 'activity-card-thinking', + isInFlight ? 'activity-card-active' : '', + isFlashing ? 'activity-flash' : '', + ].filter(Boolean).join(' ') + + return ( +
+
+ thinking + + {isInFlight + ? + : line.summary + } + +
+ {line.body && ( + <> +
+ {line.body} +
+ {(isClamped && !expanded) && ( +
setExpanded(true)}> + show more ▸ +
+ )} + {expanded && ( +
setExpanded(false)}> + show less ▴ +
+ )} + + )} +
+ ) +} + +/** Standard line for tool calls and lifecycle events */ +function ActivityLine({ line, isInFlight, isFlashing }) { + const cls = [ + 'activity-line', + line.highValue ? 'activity-high' : '', + isInFlight ? 'activity-inflight' : '', + isFlashing ? 'activity-flash' : '', + ].filter(Boolean).join(' ') + + return ( + <> +
+ {line.tool} + + {line.summary || ''} + {isInFlight && ...} + +
+ {line.details?.map((d, j) => ( +
+ + {d} +
+ ))} + + ) +} + export function ActivityFeed() { const logs = useStore(s => s.logs) const containerRef = useRef(null) @@ -29,12 +129,11 @@ export function ActivityFeed() { prevLastRef.current = lastLine ? { ...lastLine } : null }, [logs]) - function onScroll() { + const onScroll = useCallback(() => { const el = containerRef.current if (!el) return - // "At bottom" if within 30px of the end. stickRef.current = el.scrollTop + el.clientHeight >= el.scrollHeight - 30 - } + }, []) if (logs.length === 0) return null @@ -42,32 +141,27 @@ export function ActivityFeed() {
{logs.map((line, i) => { - // Only the last line can be in-flight — earlier lines are always done. const isInFlight = !!line.inFlight && i === logs.length - 1 const isFlashing = i === flashIndex - const cls = [ - 'activity-line', - line.highValue ? 'activity-high' : '', - isInFlight ? 'activity-inflight' : '', - isFlashing ? 'activity-flash' : '', - ].filter(Boolean).join(' ') + + if (line.tool === 'thinking') { + return ( + + ) + } return ( - <> -
- {line.tool} - - {line.summary || ''} - {isInFlight && ...} - -
- {line.details?.map((d, j) => ( -
- - {d} -
- ))} - + ) })}
From cc27be69a3c8ea194408ed9bd7f501e5ab224332 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 19 Mar 2026 21:00:31 +0700 Subject: [PATCH 066/412] scout queued status, agent monitor UI, configurable concurrency --- src/planner/lib/ipc-responder.ts | 11 ++- src/planner/model-config.ts | 39 ++++++++ src/planner/web/css/components.css | 57 +++++++++-- .../web/js/components/AgentMonitor.jsx | 15 ++- src/planner/web/js/components/AgentRow.jsx | 99 ++++++++++++++----- src/planner/web/js/components/ModelConfig.jsx | 21 ++++ src/planner/web/js/sse.js | 2 +- src/planner/web/server-types.ts | 21 +++- src/planner/web/server.ts | 97 +++++++++++++----- 9 files changed, 292 insertions(+), 70 deletions(-) diff --git a/src/planner/lib/ipc-responder.ts b/src/planner/lib/ipc-responder.ts index 7d55cee..49ff0bd 100644 --- a/src/planner/lib/ipc-responder.ts +++ b/src/planner/lib/ipc-responder.ts @@ -24,6 +24,7 @@ import { import type { ScoutTask as TaskScoutTask } from "./task.js"; import { pool } from "./pool.js"; import { readProjection } from "./audit.js"; +import { loadScoutConcurrency } from "../model-config.js"; import type { WebServerHandle, AskQuestion, AnswerResult } from "../web/server-types.js"; import { OTHER_OPTION } from "../web/server-types.js"; @@ -133,8 +134,9 @@ async function handleScoutRequest( return { ipcTask, subagentDir: scoutDir }; }); - // Register scouts with the web server before spawning so the UI shows them - // immediately rather than waiting for the first audit poll. + // Register scouts with the web server as queued (status: null) so the UI + // shows them immediately. They transition to "running" when the pool picks + // them up and the pi process is actually launched. if (webServer) { for (const entry of scoutEntries) { webServer.registerAgent({ @@ -144,18 +146,21 @@ async function handleScoutRequest( role: "scout", model: null, parent: scoutCtx.parentRole, + status: null, }); } } const taskIds = scoutEntries.map((t) => t.ipcTask.id); + const concurrency = await loadScoutConcurrency(); await pool( taskIds, - 4, + concurrency, async (taskId) => { if (signal.aborted) return { exitCode: 1, stderr: "aborted", subagentDir: "" }; const entry = scoutEntries.find((t) => t.ipcTask.id === taskId)!; + webServer?.startAgent(taskId); await fs.mkdir(entry.subagentDir, { recursive: true }); // Construct the task manifest for this scout. The IPC-level ipcTask carries diff --git a/src/planner/model-config.ts b/src/planner/model-config.ts index 80d968a..248d727 100644 --- a/src/planner/model-config.ts +++ b/src/planner/model-config.ts @@ -18,6 +18,7 @@ export type ModelTierConfig = Record; interface KoanConfigFile { modelTiers?: Record; + scoutConcurrency?: number; [key: string]: unknown; } @@ -73,6 +74,44 @@ export async function loadModelTierConfig(): Promise { return result as ModelTierConfig; } +// -- Scout concurrency ------------------------------------------------------- + +const DEFAULT_SCOUT_CONCURRENCY = 8; + +export async function loadScoutConcurrency(): Promise { + try { + const raw = await fs.readFile(CONFIG_PATH, "utf8"); + const parsed = JSON.parse(raw) as KoanConfigFile; + if (typeof parsed.scoutConcurrency === "number" && parsed.scoutConcurrency > 0) { + return parsed.scoutConcurrency; + } + } catch { + // File missing or invalid — use default. + } + return DEFAULT_SCOUT_CONCURRENCY; +} + +export async function saveScoutConcurrency(concurrency: number): Promise { + const configDir = path.dirname(CONFIG_PATH); + await fs.mkdir(configDir, { recursive: true }); + + let existing: KoanConfigFile = {}; + try { + const raw = await fs.readFile(CONFIG_PATH, "utf8"); + existing = JSON.parse(raw) as KoanConfigFile; + } catch { + // Start fresh. + } + + existing.scoutConcurrency = concurrency; + + const tmpPath = `${CONFIG_PATH}.tmp`; + await fs.writeFile(tmpPath, `${JSON.stringify(existing, null, 2)}\n`, "utf8"); + await fs.rename(tmpPath, CONFIG_PATH); +} + +// -- Model tiers (save) ------------------------------------------------------ + export async function saveModelTierConfig(config: ModelTierConfig): Promise { const configDir = path.dirname(CONFIG_PATH); await fs.mkdir(configDir, { recursive: true }); diff --git a/src/planner/web/css/components.css b/src/planner/web/css/components.css index a81ccc7..ce2c191 100644 --- a/src/planner/web/css/components.css +++ b/src/planner/web/css/components.css @@ -58,6 +58,7 @@ .agent-table { width: 100%; border-collapse: collapse; + table-layout: fixed; font-size: var(--font-size-sm); } @@ -78,24 +79,39 @@ border-bottom: 1px solid var(--border-light); } -.col-status { width: 24px; text-align: center; } -.col-model { width: 90px; white-space: nowrap; } -.col-parent { width: 90px; white-space: nowrap; } -.col-tokens { width: 60px; text-align: right; white-space: nowrap; } -.col-doing { /* flex */ } +.col-status { width: 28px; text-align: center; } +.col-agent { width: 170px; } +.col-model { width: 170px; } +.col-tokens { width: 70px; text-align: right; } +.col-doing { /* takes remaining */ } +.agent-table td, +.agent-table th { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.agent-table td.col-doing { + white-space: normal; +} + +.agent-status-queued { color: var(--text-dim); } .agent-status-running { color: var(--blue); } .agent-status-done { color: var(--green); font-weight: 600; } .agent-status-failed { color: var(--red); } +.agent-name-queued { color: var(--text-dim); font-family: var(--font-mono); } .agent-name-running { color: var(--text); font-weight: 600; font-family: var(--font-mono); } .agent-name-done { color: var(--green); font-family: var(--font-mono); } .agent-name-failed { color: var(--red); font-family: var(--font-mono); } .agent-model-cell { font-family: var(--font-mono); color: var(--text-muted); } -.agent-parent-cell { font-family: var(--font-mono); color: var(--text-dim); } .agent-tokens-cell { font-family: var(--font-mono); color: var(--text-muted); } +.agent-doing-dim { font-family: var(--font-mono); font-size: var(--font-size-xs); color: var(--text-dim); } +.agent-doing-failed { color: var(--red); } + .agent-doing-lines { display: flex; flex-direction: column; @@ -109,7 +125,6 @@ white-space: nowrap; overflow: hidden; text-overflow: ellipsis; - max-width: 600px; } .agent-doing-line:last-child { @@ -304,6 +319,34 @@ animation: slide-open 150ms ease-out; } +/* ---- Config sections ---- */ +.model-config-section { + margin-top: var(--gap-xl); +} + +.model-config-section-heading { + font-size: var(--font-size-lg); + font-weight: 600; + color: var(--text-strong); + margin: 0 0 var(--gap-xs) 0; +} + +.scout-concurrency-input { + width: 80px; + padding: var(--gap-sm) var(--gap-md); + background: var(--bg); + border: 1px solid var(--border); + border-radius: var(--radius-sm); + color: var(--text); + font-family: var(--font-mono); + font-size: var(--font-size-md); +} + +.scout-concurrency-input:focus { + border-color: var(--blue-border); + outline: none; +} + /* ---- Form actions ---- */ .form-actions { display: flex; diff --git a/src/planner/web/js/components/AgentMonitor.jsx b/src/planner/web/js/components/AgentMonitor.jsx index b87d763..e6a7966 100644 --- a/src/planner/web/js/components/AgentMonitor.jsx +++ b/src/planner/web/js/components/AgentMonitor.jsx @@ -4,13 +4,17 @@ import { AgentRow } from './AgentRow.jsx' export function AgentMonitor() { const allAgents = useStore(s => s.agents) - // Only show nested subagents (those with a parent), and only running ones - const agents = allAgents.filter(a => a.status === 'running' && a.parent) + const agents = allAgents.filter(a => a.parent) + + // Hide entirely when no agents, or when all are done (batch complete) + const hasActive = agents.some(a => a.status === 'running' || a.status === null) + if (agents.length === 0 || !hasActive) return null + + const running = agents.filter(a => a.status === 'running' || a.status === null).length + const done = agents.filter(a => a.status === 'completed').length const sent = agents.reduce((s, a) => s + (a.tokensSent || 0), 0) const recv = agents.reduce((s, a) => s + (a.tokensReceived || 0), 0) - if (agents.length === 0) return null - // Dynamic lines-per-agent based on count const maxLines = agents.length <= 3 ? 5 : agents.length <= 6 ? 3 @@ -22,7 +26,8 @@ export function AgentMonitor() {
Subagents
- {agents.length} + {running} + {done > 0 && {done}}
{(sent > 0 || recv > 0) ? `↑${formatTokens(sent)} ↓${formatTokens(recv)}` : ''} diff --git a/src/planner/web/js/components/AgentRow.jsx b/src/planner/web/js/components/AgentRow.jsx index 8bc678f..aad1a0b 100644 --- a/src/planner/web/js/components/AgentRow.jsx +++ b/src/planner/web/js/components/AgentRow.jsx @@ -1,40 +1,91 @@ +import { useState, useEffect } from 'preact/hooks' import { shortenModel, formatTokens } from '../lib/utils.js' +function ThinkingTimer({ since }) { + const [elapsed, setElapsed] = useState(0) + + useEffect(() => { + const start = new Date(since).getTime() + const tick = () => setElapsed(Math.floor((Date.now() - start) / 1000)) + tick() + const id = setInterval(tick, 1000) + return () => clearInterval(id) + }, [since]) + + const text = elapsed < 60 + ? `${elapsed}s` + : `${Math.floor(elapsed / 60)}m ${elapsed % 60}s` + + return {text} +} + +const STATUS = { + null: { symbol: '○', statusCls: 'agent-status-queued', nameCls: 'agent-name-queued' }, + running: { symbol: '●', statusCls: 'agent-status-running', nameCls: 'agent-name-running' }, + completed: { symbol: '✓', statusCls: 'agent-status-done', nameCls: 'agent-name-done' }, + failed: { symbol: '✗', statusCls: 'agent-status-failed', nameCls: 'agent-name-failed' }, +} + export function AgentRow({ agent, maxLines = 5 }) { + const s = STATUS[agent.status] || STATUS.running const actions = agent.recentActions || [] const start = Math.max(0, actions.length - maxLines) return ( - ● - {agent.name || agent.id} + {s.symbol} + {agent.name || agent.id} {shortenModel(agent.model)} {formatTokens(agent.tokensSent || 0)} {formatTokens(agent.tokensReceived || 0)} - {actions.length > 0 ? ( -
- {actions.slice(start).map((action, i) => { - // Gracefully handle both old string[] and new object[] formats. - const text = typeof action === 'string' - ? action - : (action.summary ? `${action.tool}: ${action.summary}` : action.tool) - const inFlight = typeof action === 'object' && !!action.inFlight - - return ( -
- - {inFlight ? '●' : '·'} - - {text} -
- ) - })} -
- ) : ( - initializing... - )} + ) } + +function DoingCell({ status, actions, start }) { + if (status === null) return queued + if (status === 'completed') return done + if (status === 'failed') return failed + + // running + if (actions.length === 0) return initializing... + + return ( +
+ {actions.slice(start).map((action, i) => { + const isThinking = typeof action === 'object' && action.tool === 'thinking' + const inFlight = typeof action === 'object' && !!action.inFlight + + if (isThinking) { + return ( +
+ + {inFlight ? '●' : '·'} + + {inFlight + ? <>thinking + : `thought for ${action.summary}` + } +
+ ) + } + + const text = typeof action === 'string' + ? action + : (action.summary ? `${action.tool}: ${action.summary}` : action.tool) + + return ( +
+ + {inFlight ? '●' : '·'} + + {text} +
+ ) + })} +
+ ) +} diff --git a/src/planner/web/js/components/ModelConfig.jsx b/src/planner/web/js/components/ModelConfig.jsx index 64feb3f..f067a46 100644 --- a/src/planner/web/js/components/ModelConfig.jsx +++ b/src/planner/web/js/components/ModelConfig.jsx @@ -32,10 +32,13 @@ function groupByProvider(models) { })) } +const DEFAULT_SCOUT_CONCURRENCY = 8 + export function ModelConfig({ token, isGate = false, onClose }) { const pending = useStore(s => s.pendingInput) const availableModels = useStore(s => s.availableModels) const [tiers, setTiers] = useState({ strong: '', standard: '', cheap: '' }) + const [scoutConcurrency, setScoutConcurrency] = useState(DEFAULT_SCOUT_CONCURRENCY) const [loading, setLoading] = useState(true) const [saving, setSaving] = useState(false) @@ -48,6 +51,7 @@ export function ModelConfig({ token, isGate = false, onClose }) { standard: t?.standard || '', cheap: t?.cheap || '', }) + setScoutConcurrency(t?.scoutConcurrency || DEFAULT_SCOUT_CONCURRENCY) setLoading(false) return } @@ -61,6 +65,7 @@ export function ModelConfig({ token, isGate = false, onClose }) { cheap: data.tiers.cheap || '', }) } + if (data.scoutConcurrency) setScoutConcurrency(data.scoutConcurrency) setLoading(false) }) .catch(() => setLoading(false)) @@ -74,6 +79,7 @@ export function ModelConfig({ token, isGate = false, onClose }) { standard: tiers.standard || null, cheap: tiers.cheap || null, }, + scoutConcurrency, } if (isGate && pending?.requestId) { body.requestId = pending.requestId @@ -132,6 +138,21 @@ export function ModelConfig({ token, isGate = false, onClose }) { ))}
+
+

Scout Concurrency

+

+ Maximum number of scout agents to run in parallel during codebase investigation. +

+ setScoutConcurrency(Math.max(1, Math.min(32, parseInt(e.target.value) || DEFAULT_SCOUT_CONCURRENCY)))} + /> +
+
{!isGate && ( diff --git a/src/planner/web/js/sse.js b/src/planner/web/js/sse.js index 20b5ae6..35d0cb1 100644 --- a/src/planner/web/js/sse.js +++ b/src/planner/web/js/sse.js @@ -18,7 +18,7 @@ export function connectSSE(token) { ask: (d) => set({ pendingInput: { type: 'ask', requestId: d.requestId, payload: d.questions } }), review: (d) => set({ pendingInput: { type: 'review', requestId: d.requestId, payload: d.stories } }), 'model-config': (d) => set(s => ({ - pendingInput: { type: 'model-config', requestId: d.requestId, payload: d.tiers }, + pendingInput: { type: 'model-config', requestId: d.requestId, payload: { ...d.tiers, scoutConcurrency: d.scoutConcurrency } }, ...(d.availableModels ? { availableModels: d.availableModels } : {}), })), 'model-config-confirmed': () => set(s => s.pendingInput?.type === 'model-config' ? { pendingInput: null } : {}), diff --git a/src/planner/web/server-types.ts b/src/planner/web/server-types.ts index 6cc8edc..2812073 100644 --- a/src/planner/web/server-types.ts +++ b/src/planner/web/server-types.ts @@ -171,10 +171,23 @@ export interface PipelineEndEvent { summary: string; } +// Confidence level type for the intake confidence loop. +export type IntakeConfidenceLevel = "exploring" | "low" | "medium" | "high" | "certain" | null; + +export interface IntakeProgressEvent { + subPhase: string | null; + intakeDone: boolean; + // The most recent confidence level declared by koan_set_confidence. + // Null before the first Reflect step completes. + confidence: IntakeConfidenceLevel; + // The current loop iteration (1-based). Zero before the loop begins. + iteration: number; +} + export interface ScoutState { id: string; role: string; - status: "running" | "completed" | "failed"; + status: "running" | "completed" | "failed" | null; lastAction: string | null; eventCount: number; model: string | null; @@ -193,10 +206,10 @@ export interface AgentEntry { role: string; model: string | null; parent: string | null; - status: "running" | "completed" | "failed"; + status: "running" | "completed" | "failed" | null; tokensSent: number; tokensReceived: number; - recentActions: Array<{ tool: string; summary: string; inFlight: boolean }>; + recentActions: Array<{ tool: string; summary: string; inFlight: boolean; ts?: string }>; subPhase: string | null; } @@ -236,7 +249,9 @@ export interface WebServerHandle { role: string; model: string | null; parent: string | null; + status?: "running" | null; }): void; + startAgent(id: string): void; completeAgent(id: string): void; // Blocking input methods diff --git a/src/planner/web/server.ts b/src/planner/web/server.ts index 5424178..075a30a 100644 --- a/src/planner/web/server.ts +++ b/src/planner/web/server.ts @@ -13,7 +13,7 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { AuthStorage, ModelRegistry } from "@mariozechner/pi-coding-agent"; import { readProjection, readRecentLogs } from "../lib/audit.js"; -import { loadModelTierConfig, saveModelTierConfig, type ModelTierConfig } from "../model-config.js"; +import { loadModelTierConfig, saveModelTierConfig, loadScoutConcurrency, saveScoutConcurrency, type ModelTierConfig } from "../model-config.js"; import type { WebServerHandle, AskQuestion, @@ -22,6 +22,7 @@ import type { AnswerResult, AnswerElement, LogLine, + IntakeProgressEvent, } from "./server-types.js"; import type { EpicPhase, StoryStatus } from "../types.js"; @@ -187,10 +188,10 @@ interface AgentInfoInternal { role: string; model: string | null; parent: string | null; - status: "running" | "completed" | "failed"; + status: "running" | "completed" | "failed" | null; tokensSent: number; tokensReceived: number; - recentActions: Array<{ tool: string; summary: string; inFlight: boolean }>; + recentActions: Array<{ tool: string; summary: string; inFlight: boolean; ts?: string }>; spawnOrder: number; completionOrder?: number; pollingTimer?: ReturnType; @@ -198,6 +199,9 @@ interface AgentInfoInternal { subPhase: string | null; eventCount: number; completionSummary: string | null; + // Cached most-recent projection from pollAgent(), used by the polling timer + // to read confidence/iteration without issuing a second readProjection call. + lastProjection?: import("../lib/audit.js").Projection; } // --------------------------------------------------------------------------- @@ -233,10 +237,14 @@ export async function startWebServer(epicDir: string): Promise let lastLogs: LogLine[] = []; let pipelineEnd: { success: boolean; summary: string } | null = null; - // Denormalized intake progress buffer - let currentIntakeProgress: { subPhase: string | null; intakeDone: boolean } = { + // Denormalized intake progress buffer. Includes confidence and iteration from + // the intake agent's projection so the UI can visualize loop progress. + // Typed as IntakeProgressEvent so the SSE payload is compile-time verified. + let currentIntakeProgress: IntakeProgressEvent = { subPhase: null, intakeDone: false, + confidence: null, + iteration: 0, }; // SSE clients @@ -294,7 +302,7 @@ export async function startWebServer(epicDir: string): Promise const scoutArray = buildScoutsArray(); if (scoutArray.length > 0) write("scouts", { scouts: scoutArray }); - if (currentIntakeProgress.subPhase !== null || currentIntakeProgress.intakeDone) { + if (currentIntakeProgress.subPhase !== null || currentIntakeProgress.intakeDone || currentIntakeProgress.confidence !== null) { write("intake-progress", currentIntakeProgress); } @@ -320,18 +328,10 @@ export async function startWebServer(epicDir: string): Promise function buildAgentsArray(): Array<{ id: string; name: string; role: string; model: string | null; - parent: string | null; status: string; tokensSent: number; - tokensReceived: number; recentActions: Array<{ tool: string; summary: string; inFlight: boolean }>; subPhase: string | null; + parent: string | null; status: string | null; tokensSent: number; + tokensReceived: number; recentActions: Array<{ tool: string; summary: string; inFlight: boolean; ts?: string }>; subPhase: string | null; }> { - const sorted = Array.from(agents.values()).sort((a, b) => { - if (a.status === "running" && b.status !== "running") return -1; - if (b.status === "running" && a.status !== "running") return 1; - if (a.status !== "failed" && b.status === "failed") return -1; - if (b.status !== "failed" && a.status === "failed") return 1; - const aOrder = a.status === "running" ? a.spawnOrder : (a.completionOrder ?? a.spawnOrder); - const bOrder = b.status === "running" ? b.spawnOrder : (b.completionOrder ?? b.spawnOrder); - return aOrder - bOrder; - }); + const sorted = Array.from(agents.values()).sort((a, b) => a.spawnOrder - b.spawnOrder); return sorted.map((a) => ({ id: a.id, name: a.name, @@ -347,7 +347,7 @@ export async function startWebServer(epicDir: string): Promise } function buildScoutsArray(): Array<{ - id: string; role: string; status: string; lastAction: string | null; + id: string; role: string; status: string | null; lastAction: string | null; eventCount: number; model: string | null; completionSummary: string | null; tokensSent: number; tokensReceived: number; }> { @@ -381,17 +381,31 @@ export async function startWebServer(epicDir: string): Promise agent.tokensSent = projection.tokensSent; agent.tokensReceived = projection.tokensReceived; agent.eventCount = projection.eventCount; + // Cache the latest projection so polling timers can read confidence/iteration + // without issuing a second readProjection call for the same agent. + agent.lastProjection = projection; if (projection.status !== "running") { agent.status = projection.status; } if (agent.role === "intake") { const hasPendingAsk = Array.from(pendingInputs.values()).some((p) => p.type === "ask"); - const STEP_PHASE: Record = { 0: "context", 1: "context", 2: "explore", 3: "spec" }; - agent.subPhase = hasPendingAsk ? "questions" : (STEP_PHASE[projection.step] ?? "spec"); + // Map intake step numbers to display sub-phase names. + // Steps 2-4 repeat across iterations; show "questions" when user input is pending. + const STEP_PHASE: Record = { + 0: "extract", 1: "extract", + 2: "scout", 3: "deliberate", 4: "reflect", + 5: "synthesize", + }; + agent.subPhase = hasPendingAsk ? "questions" : (STEP_PHASE[projection.step] ?? "reflect"); } } if (logs.length > 0) { - agent.recentActions = logs.slice(-5).map((l) => ({ tool: l.tool, summary: l.summary || '', inFlight: l.inFlight })); + agent.recentActions = logs.slice(-5).map((l) => ({ + tool: l.tool, + summary: l.summary || '', + inFlight: l.inFlight, + ...(l.ts ? { ts: l.ts } : {}), + })); } if (agent.role === "scout" && projection?.completionSummary && !agent.completionSummary) { agent.completionSummary = projection.completionSummary; @@ -413,8 +427,21 @@ export async function startWebServer(epicDir: string): Promise // Push intake-progress event if the intake agent's sub-phase changed const intake = Array.from(agents.values()).find(a => a.role === "intake"); if (intake) { - const next = { subPhase: intake.subPhase, intakeDone: currentPhase !== "intake" && currentPhase !== null }; - if (next.subPhase !== currentIntakeProgress.subPhase || next.intakeDone !== currentIntakeProgress.intakeDone) { + // Use the projection already read by pollAgent (cached on agent.lastProjection) + // to avoid a redundant readProjection call for the same file in the same tick. + const intakeProjection = intake.lastProjection ?? null; + const next: IntakeProgressEvent = { + subPhase: intake.subPhase, + intakeDone: currentPhase !== "intake" && currentPhase !== null, + confidence: intakeProjection?.intakeConfidence ?? null, + iteration: intakeProjection?.intakeIteration ?? 0, + }; + const changed = + next.subPhase !== currentIntakeProgress.subPhase || + next.intakeDone !== currentIntakeProgress.intakeDone || + next.confidence !== currentIntakeProgress.confidence || + next.iteration !== currentIntakeProgress.iteration; + if (changed) { currentIntakeProgress = next; pushEvent("intake-progress", currentIntakeProgress); } @@ -489,7 +516,7 @@ export async function startWebServer(epicDir: string): Promise if (method === "PUT" && pathname === "/api/model-config") { const body = await readBody(req).catch(() => null); - const b = body as { requestId?: string; tiers: Record } | null; + const b = body as { requestId?: string; tiers: Record; scoutConcurrency?: number } | null; if (!b) { sendJson(res, 400, { ok: false, error: "Invalid body" }); return; } const { requestId, tiers } = b; @@ -501,6 +528,11 @@ export async function startWebServer(epicDir: string): Promise await saveModelTierConfig({ strong, standard, cheap } as ModelTierConfig); } + // Save scout concurrency + if (typeof b.scoutConcurrency === "number" && b.scoutConcurrency > 0) { + await saveScoutConcurrency(b.scoutConcurrency); + } + // Resolve the blocking gate if requestId matches if (requestId) { const entry = pendingInputs.get(requestId); @@ -664,10 +696,11 @@ export async function startWebServer(epicDir: string): Promise registerAgent(info: { id: string; name: string; dir: string; role: string; model: string | null; parent: string | null; + status?: "running" | null; }): void { const agent: AgentInfoInternal = { ...info, - status: "running", + status: info.status ?? "running", tokensSent: 0, tokensReceived: 0, recentActions: [], @@ -677,11 +710,20 @@ export async function startWebServer(epicDir: string): Promise completionSummary: null, }; agents.set(info.id, agent); - startAgentPolling(agent); + if (agent.status === "running") startAgentPolling(agent); pushEvent("agents", { agents: buildAgentsArray() }); if (info.role === "scout") pushEvent("scouts", { scouts: buildScoutsArray() }); }, + startAgent(id: string): void { + const agent = agents.get(id); + if (!agent || agent.status !== null) return; + agent.status = "running"; + startAgentPolling(agent); + pushEvent("agents", { agents: buildAgentsArray() }); + if (agent.role === "scout") pushEvent("scouts", { scouts: buildScoutsArray() }); + }, + completeAgent(id: string): void { const agent = agents.get(id); if (!agent) return; @@ -768,7 +810,8 @@ export async function startWebServer(epicDir: string): Promise async requestModelConfig(): Promise { const requestId = randomUUID(); const config = await loadModelTierConfig(); - const payload = { requestId, tiers: config, availableModels }; + const scoutConcurrency = await loadScoutConcurrency(); + const payload = { requestId, tiers: config, scoutConcurrency, availableModels }; return new Promise((resolve, reject) => { pendingInputs.set(requestId, { type: "model-config" as const, From 1161c4e8ccb37a95276707292a1579d7bc4b700a Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 19 Mar 2026 21:00:38 +0700 Subject: [PATCH 067/412] architecture documentation --- AGENTS.md | 8 + docs/architecture.md | 289 +++++++++++++++++++++++++++++++ docs/intake-loop.md | 388 ++++++++++++++++++++++++++++++++++++++++++ docs/ipc.md | 320 ++++++++++++++++++++++++++++++++++ docs/state.md | 298 ++++++++++++++++++++++++++++++++ docs/subagents.md | 397 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1700 insertions(+) create mode 100644 docs/architecture.md create mode 100644 docs/intake-loop.md create mode 100644 docs/ipc.md create mode 100644 docs/state.md create mode 100644 docs/subagents.md diff --git a/AGENTS.md b/AGENTS.md index ea5ff9f..d0bcee7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,6 +6,7 @@ Spoke documents: - [docs/subagents.md](docs/subagents.md) — spawn lifecycle, task manifest, step-first workflow, permissions - [docs/ipc.md](docs/ipc.md) — file-based IPC protocol, scout spawning, question routing - [docs/state.md](docs/state.md) — driver/LLM boundary, epic and story state, routing rules +- [docs/intake-loop.md](docs/intake-loop.md) — confidence-gated loop, non-linear step progression, prompt engineering --- @@ -35,6 +36,10 @@ Tool returns: Step 1 instructions (rich context, task details, guidance) Tool returns: Step 2 instructions (or "Phase complete.") ``` +Step progression is normally linear, but subclasses may override `getNextStep()` +to implement non-linear flows. The intake phase loops steps 2–4 until a +confidence gate is satisfied. See [docs/intake-loop.md](docs/intake-loop.md). + ## 3. Driver Determinism The driver reads JSON state files and exit codes, applies routing rules, and @@ -45,6 +50,9 @@ spawns the next subagent. It never makes judgment calls or parses free-text. Every tool call passes through a role-based permission fence. Unknown roles and tools are blocked. Planning roles can only write inside the epic directory. +The fence also supports step-level gating for individual roles: the intake +phase blocks side-effecting tools during its read-only Extract step (step 1). + ## 5. Need-to-Know Prompts Boot prompt is one sentence. System prompt has role identity, no task details. diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..ad5c34d --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,289 @@ +# Koan Architecture + +Koan is a deterministic pipeline that spawns isolated LLM subagents to plan and +execute complex coding tasks. This document captures the design invariants, +principles, and pitfalls that govern the codebase. + +**Spoke documents** cover subsystems in depth: + +- [Subagents](./subagents.md) — spawn lifecycle, boot protocol, step-first + workflow, phase dispatch, permissions, model tiers +- [IPC](./ipc.md) — file-based inter-process communication between parent and + subagent, scout spawning, question routing +- [State & Driver](./state.md) — the driver/LLM boundary, JSON vs markdown + ownership, epic and story state, routing rules +- [Intake Loop](./intake-loop.md) — confidence-gated investigation loop, + non-linear step progression, prompt engineering principles + +--- + +## Core Invariants + +These are load-bearing rules. Violating any one of them breaks the system in +ways that are difficult to diagnose. + +### 1. File boundary + +LLMs write **markdown files only**. The driver maintains **JSON state files** +internally — no LLM ever reads or writes a `.json` file. + +Tool code bridges both worlds: orchestrator tools write JSON state (for the +driver) and templated `status.md` (for LLMs). The driver reads JSON and exit +codes; it never parses markdown. + +``` +Orchestrator calls koan_complete_story(story_id) + → tool code writes state.json + status.md + → driver reads state.json to route next action + → LLM reads status.md if it needs to reference the decision +``` + +**Why:** If an LLM writes JSON, schema drift and parse errors become runtime +failures in the deterministic driver. Markdown is forgiving; JSON is not. + +### 2. Step-first workflow + +Every subagent is a `pi -p` process. Once the LLM produces text without a tool +call, the process exits — there is no stdin to recover. The entire workflow +depends on the LLM calling `koan_complete_step` reliably. + +**The first thing any subagent does is call `koan_complete_step`.** The spawn +prompt contains *only* this directive. The tool returns step 1 instructions. +This establishes the calling pattern before the LLM sees complex instructions. + +``` +Boot prompt: "You are a koan {role} agent. Call koan_complete_step to receive your instructions." + ↓ LLM calls koan_complete_step (step 0 → 1 transition) +Tool returns: Step 1 instructions (rich context, task details, guidance) + ↓ LLM does work... + ↓ LLM calls koan_complete_step +Tool returns: Step 2 instructions (or "Phase complete.") +``` + +Three reinforcement mechanisms make this robust across model capability levels: + +| Mechanism | Where | Why | +|-----------|-------|-----| +| **Primacy** | Boot prompt is the LLM's very first message | First action = tool call, at the top of conversation history | +| **Recency** | `formatStep()` appends "WHEN DONE: Call koan_complete_step..." last | LLMs weight end-of-context instructions heavily | +| **Muscle memory** | By step 2+ the LLM has called the tool N times | Pattern is locked in through repetition | + +### 3. Driver determinism + +The driver (`driver.ts`) is a deterministic state machine. It reads JSON state +files and exit codes, applies routing rules, and spawns the next subagent. It +never makes judgment calls, parses free-text output, or adapts to LLM behavior. + +**Routing priority** in the story loop: +1. `retry` status → re-execute (retry takes precedence over new work) +2. `selected` status → plan + execute +3. All stories `done` or `skipped` → epic complete +4. None of the above → error ("orchestrator may have exited without a routing decision") + +### 4. Default-deny permissions + +Every tool call in a subagent passes through a permission fence (`tool_call` +event handler in `BasePhase`). Unknown roles are blocked. Unknown tools are +blocked. Planning roles can only write inside the epic directory. + +The one accepted limitation: `READ_TOOLS` (bash, read, grep, glob, find, ls) +are always allowed because distinguishing "read bash" from "write bash" is +intractable at the permission layer. **Prompt engineering constrains intended +bash use; enforcement does not.** + +### 5. Need-to-know prompts + +Each subagent receives only the minimum context for its task: + +- The **boot prompt** is one sentence (role identity + "call koan_complete_step") +- The **system prompt** establishes role identity and rules, but no task details +- **Task details** arrive via step 1 guidance (returned by the first tool call) + +This is not just tidiness — it is load-bearing. A previous design injected +step 1 guidance into the first user message (via a `context` event handler), +but that front-loaded complex instructions before the LLM had established the +`koan_complete_step` calling pattern. Weaker models (haiku) produced text +output and exited without entering the workflow. The `context` event handler +was deliberately removed; step guidance is now delivered exclusively through +`koan_complete_step` return values. + +### 6. Directory-as-contract + +The subagent directory is the **sole interface** between parent and child. +Everything a subagent needs — its task, its communication channel, its +observable state — lives in well-known files inside that directory. + +Three JSON files, three lifecycles: + +| File | Writer | Reader | Lifecycle | +|------|--------|--------|-----------| +| **`task.json`** | Parent (before spawn) | Child (once, at startup) | Write-once, never modified | +| **`state.json`** | Child (continuously) | Parent (polling) | Eagerly materialized audit projection | +| **`ipc.json`** | Both (request/response) | Both (polling) | Temporary — created per request, deleted after response | + +The spawn command carries only the directory path. The child reads `task.json` +to discover its role, epic context, and task-specific parameters. No +structured configuration flows through CLI flags, environment variables, or +other process-level channels. + +``` +# Spawn interface: one koan flag, the rest is pi-level +pi -p -e {extensionPath} --koan-dir {subagentDir} [--model {model}] "{bootPrompt}" +``` + +**Why:** CLI flags are a flat namespace — they cause naming collisions (e.g., +`--koan-role` for pipeline role vs `--koan-scout-role` for investigator +persona), cannot represent nested structure, are visible in process listings, +and are subject to `ARG_MAX` limits for large values like retry context. +Files are structured, inspectable (`cat task.json`), typed, and consistent +with how we already handle runtime communication (IPC) and observation (audit). + +See [subagents.md § Task Manifest](./subagents.md#task-manifest) for the +`task.json` schema and spawn flow. + +--- + +## Atomic Writes + +All persistent writes (JSON state, IPC files, status.md, audit state.json) +use the same pattern: write to a `.tmp` file, then `fs.rename()` to the target. +This prevents partial reads during concurrent access. + +```typescript +const tmp = path.join(dir, "file.tmp"); +await fs.writeFile(tmp, content, "utf8"); +await fs.rename(tmp, target); +``` + +This is not optional — the IPC responder, web server, and audit system all +poll files concurrently. A partial read of `ipc.json` or `state.json` would +cause silent data corruption or spurious errors. + +--- + +## Tool Registration Constraint + +All tools **must** be registered unconditionally at extension init, before +pi's `_buildRuntime()` snapshot. Tools registered after `_buildRuntime()` are +invisible to the LLM. + +CLI flags are unavailable during init (`getFlag()` returns undefined before +`_buildRuntime()` sets flagValues), so conditional registration based on role +is impossible. Instead: + +1. All tools register at init, reading from the mutable `RuntimeContext` at call time +2. `BasePhase.registerHandlers()` adds a `tool_call` event listener that checks permissions per-role at runtime +3. The `RuntimeContext` is populated later, during `before_agent_start` + +This is the **mutable-ref pattern**: static registration, dynamic dispatch. + +--- + +## Pitfalls + +Lessons learned from previous failures. Check new changes against these. + +### Don't put task content in spawn prompts + +The boot prompt must be exactly one sentence: role identity + "call +koan_complete_step". Putting task content (file paths, instructions, context) +risks the LLM producing text output on the first turn and exiting. This has +happened with haiku-class models and is not recoverable. + +### Don't inject step guidance via the `context` event + +A `context` event handler that injects step 1 guidance into the first user +message was tried and removed. It creates the same problem as putting content +in the spawn prompt — the LLM sees complex instructions before establishing +the tool-calling pattern. + +### Don't add `escalated` as a story status + +Escalation is handled via `koan_ask_question` (IPC → web server → user +answers → IPC response). A separate `escalated` status was tried and created +a dead routing path — the driver had nowhere clean to send it without +duplicating the ask UI flow that IPC already handles. + +### Don't add `scouting` as an epic phase + +Scouts run inside the IPC responder during intake/decomposer/planner phases, +not as a top-level driver phase. Adding `scouting` to `EpicPhase` would imply +a driver state that never exists, creating dead code paths. + +### Don't rely on file existence for scout success + +Scout success is derived from the JSON projection (`readProjection()` → +`status === "completed"`), not from checking whether `findings.md` exists. +A scout can write a partial findings file and then crash — file existence is +not proof of completion. + +### Don't write state.json from outside state.ts / tool code + +The state module (`epic/state.ts`) and orchestrator tools are the only +writers of JSON state. `status.md` writes belong exclusively in +`tools/orchestrator.ts`. Mixing these responsibilities violates the file +boundary invariant. + +### Don't call koan_complete_step in the tool description eagerly + +The tool description says "DO NOT call this tool until the step instructions +explicitly tell you to." Without this guard, aggressive models call +`koan_complete_step` immediately after receiving step guidance, skipping +the actual work. + +### Don't assume bash is restricted per role + +`bash` is in `READ_TOOLS` and always allowed. The permission layer cannot +distinguish a read-bash from a write-bash. Prompt engineering is the only +constraint. Do not assume bash calls are blocked for planning roles. + +### Don't rely on prompt instructions alone to restrict step behavior + +Prompt instructions can be ignored by the LLM. The intake phase learned this +the hard way: the original 3-step design told the LLM not to scout in step 1, +but the LLM frontloaded all work into step 1 anyway, causing duplicate scout +requests in later steps. + +Mechanical enforcement is required for any behavior that is critical to +correctness. Use the permission fence (`checkPermission` with `intakeStep`) to +block tools that must not be used in a given step. Use +`validateStepCompletion()` to block step advancement when required pre-calls +have not been made. Prompts express intent; enforcement catches non-compliance. + +See [intake-loop.md § Step-Aware Permission Gating](./intake-loop.md#step-aware-permission-gating). + +### Don't parse free-text for loop control decisions + +Confidence (the gate that controls the intake loop) is a structured enum +value set via a dedicated tool call, not a sentiment extracted from the LLM's +`thoughts` text. The driver determinism invariant prohibits parsing free-text +for routing decisions. Any loop gate must flow through a typed tool parameter +and a structured context field. + +### Don't put side effects in getNextStep() + +`getNextStep()` must be a pure query — it returns the next step number and +nothing else. Putting state mutations, counter increments, or event emission +inside `getNextStep()` violates this contract and makes the method unsafe to +reason about (e.g., a test that calls `getNextStep()` to inspect the decision +should not trigger side effects). + +Side effects that accompany a loop-back belong in `onLoopBack()`, which +`BasePhase` calls after detecting a backward transition: + +``` +BAD: getNextStep(4) { this.iteration++; this.ctx.confidence = null; return 2; } +GOOD: getNextStep(4) { return 2; } + onLoopBack(4, 2) { this.iteration++; this.ctx.confidence = null; } +``` + +The `onLoopBack()` hook is async and properly awaited, ensuring event +emission (`emitIterationStart`) is correctly sequenced in `events.jsonl`. + +### Don't pass structured data through CLI flags + +If information is needed by a subagent, write it to `task.json` in the +subagent directory before spawning. CLI flags are for bootstrap only (locating +the directory). Structured data in flags creates flat-namespace collisions, +size limits, and an uninspectable interface. The directory-as-contract +invariant exists specifically to prevent this. diff --git a/docs/intake-loop.md b/docs/intake-loop.md new file mode 100644 index 0000000..47a4563 --- /dev/null +++ b/docs/intake-loop.md @@ -0,0 +1,388 @@ +# Intake Loop Design + +How the intake phase implements a confidence-gated investigation loop, and the +prompt engineering principles that govern it. + +> Parent doc: [architecture.md](./architecture.md) +> Related: [subagents.md § Step-First Workflow](./subagents.md#step-first-workflow-basephase) + +--- + +## Overview + +The intake phase is the most consequential subagent in the pipeline. Its +single output — `context.md` — is the sole input for all downstream phases. +Every story boundary, every implementation plan, and every line of code +produced downstream depends on the completeness and accuracy of that file. +Gaps in `context.md` compound: a missed decision becomes a wrong story +boundary becomes a wrong plan becomes wrong code. + +This weight justifies a more elaborate workflow than other phases. Rather than +a fixed sequence of steps, intake runs a **confidence-gated loop**: the LLM +scouts the codebase, enumerates what it knows, asks the user questions, and +then explicitly self-verifies its understanding. The loop repeats until the +LLM declares it is "certain" the decomposer has everything it needs. + +### Step structure + +| Step | Name | Runs | Purpose | +|------|------|------|---------| +| 1 | Extract | 1× | Read `conversation.jsonl`. No side effects. | +| 2 | Scout | 1–4× | Dispatch codebase investigators. | +| 3 | Deliberate | 1–4× | Enumerate knowns/unknowns, ask user questions. | +| 4 | Reflect | 1–4× | Self-verify completeness, declare confidence. | +| 5 | Synthesize | 1× | Write `context.md`. | + +Steps 2–4 form the loop. Each call to `koan_complete_step` during these steps +either returns the next step in sequence or loops back from step 4 to step 2. +Steps 1 and 5 execute exactly once. + +--- + +## Non-Linear Step Progression + +### `getNextStep()` hook + +The `BasePhase` class previously used a hardcoded linear counter: +`step+1` until `totalSteps`, then `null` (done). This was extended with a +`getNextStep(currentStep)` hook that subclasses override to implement +non-linear flows. + +```typescript +// Default: strictly linear. +protected getNextStep(currentStep: number): number | null { + if (currentStep === this.totalSteps) return null; + return currentStep + 1; +} +``` + +`IntakePhase` overrides this to implement the confidence gate: + +```typescript +// Pure query — returns where to go, does not mutate state. +protected getNextStep(currentStep: number): number | null { + if (currentStep === 4) { // Reflect step + if (confidence === "certain" || isExhausted) { + return 5; // → Synthesize + } + return 2; // → Scout (loop back) + } + if (currentStep === 5) return null; // Synthesize → done + return currentStep + 1; // linear for steps 1–3 +} + +// Side effects of the loop-back decision live here, not in getNextStep(). +protected override async onLoopBack(_from: number, _to: number): Promise { + this.iteration++; + this.ctx.intakeConfidence = null; // reset for next round + await this.eventLog?.emitIterationStart(this.iteration, MAX_ITERATIONS); +} +``` + +`getNextStep()` is a **pure query** — it only decides where to go. All side +effects (counter increments, state resets, event emission) belong in +`onLoopBack()`, which `BasePhase.handleStepComplete()` calls whenever +`getNextStep()` returns a step number less than the current one. This +separation makes `getNextStep()` safe to reason about and test in isolation. + +All other phase classes inherit the default linear behavior. The hook localizes +non-linear logic to the one class that needs it without touching other phases. + +**Why not a separate loop-phase class?** The `BasePhase` machinery (boot +transition, permission fence, event logging, step formatting) is the same +regardless of whether progression is linear or not. A hook is cheaper than a +new abstraction tier and does not require refactoring the six existing phase +classes. + +### `totalSteps` semantics with a loop + +For `IntakePhase`, `totalSteps = 5` reflects the number of distinct step +definitions, not the number of `koan_complete_step` calls. The loop may +execute steps 2–4 up to four times, producing up to 1 + (3 × 4) + 1 = 14 +calls in the worst case. The `step_transition` event carries both the step +number and the iteration-annotated step name (e.g., "Scout (round 3)") so the +UI can distinguish loop iterations. + +--- + +## The Confidence Gate + +### Why a separate tool, not a parameter + +An earlier design considered adding `confidence` as an optional parameter to +`koan_complete_step`. This was rejected for two reasons: + +1. **Optional parameters are skippable.** LLMs frequently omit optional + parameters, especially when under token pressure. A separate tool call is + harder to skip accidentally — the LLM must make an explicit decision. + +2. **`koan_complete_step` is shared across all phases.** Adding confidence to + it would either bloat the parameter schema for roles that never set + confidence, or require conditional schema logic that the permission fence + cannot express cleanly. A dedicated `koan_set_confidence` tool, restricted + to the intake role via `ROLE_PERMISSIONS`, keeps the boundary clean. + +### Mandatory enforcement via `validateStepCompletion()` + +`BasePhase` exposes a `validateStepCompletion(step)` hook that runs before +`getNextStep()`. It returns null to allow advancement or an error string that +is returned as the `koan_complete_step` tool result — the LLM sees it and +must fix the pre-condition before retrying. + +`IntakePhase` uses this to enforce that `koan_set_confidence` was called in +the Reflect step: + +```typescript +protected async validateStepCompletion(step: number): Promise { + if (step === 4 && this.ctx.intakeConfidence === null) { + return "You must call koan_set_confidence before completing the Reflect step. ..."; + } + return null; +} +``` + +This is mechanical enforcement on top of the prompt-level instruction. If the +LLM ignores the prompt and calls `koan_complete_step` without first calling +`koan_set_confidence`, it receives an error and must comply. + +### Confidence reset on loop-back + +When `getNextStep()` returns step 2 (loop-back), `BasePhase` detects the +backward transition and calls `onLoopBack()`. `IntakePhase.onLoopBack()` +resets `ctx.intakeConfidence = null`. This ensures that in the next Reflect +step, the LLM must call `koan_set_confidence` again — carry-over from the +previous iteration is not possible. + +Without the reset, a LLM that set confidence to "high" in iteration 1 could +call `koan_complete_step` in iteration 2's Reflect step without reassessing, +and `validateStepCompletion` would let it through. + +**Note:** The audit projection's `intakeConfidence` field is updated only when +a `confidence_change` event is appended (i.e., when `koan_set_confidence` is +called). Between loop-back and the next Reflect step, the projection still +shows the previous iteration's confidence level. This is intentional: the +projection reflects the last declared state, not the reset internal state. The +UI reads the projection, so it shows the previous confidence until a new one +is declared. + +### Maximum iterations + +The loop is bounded at 4 iterations (`IntakePhase.MAX_ITERATIONS`). When +exhausted, `getNextStep()` returns step 5 (Synthesize) instead of step 2. +`IntakePhase` logs a warning when this forced exit occurs. This prevents +infinite loops if the LLM consistently declares non-certain confidence. + +--- + +## Step-Aware Permission Gating + +### Why step 1 is mechanically read-only + +Step 1 (Extract) should only read the conversation. Before this redesign, step +isolation was enforced only through prompt instructions ("do NOT call +koan_request_scouts in this step"). The LLM frequently violated this by +frontloading all work into step 1, leading to duplicate scout requests in +later steps. + +The new design adds a mechanical layer: `checkPermission()` accepts an +optional `intakeStep` parameter and blocks a defined set of tools when +`role === "intake" && intakeStep === 1`: + +``` +koan_request_scouts, koan_ask_question, koan_set_confidence, write, edit +``` + +The current step is propagated via `ctx.intakeStep`, kept in sync by the +`onStepUpdated()` hook in `IntakePhase`: + +```typescript +protected onStepUpdated(step: number): void { + this.ctx.intakeStep = step; + this.ctx.intakeIteration = this.iteration; +} +``` + +`BasePhase.handleStepComplete()` calls `onStepUpdated()` on every step +transition (including loop-backs), so `ctx.intakeStep` always reflects the +current active step at tool call time. + +### Prompt + enforcement is not redundant + +The prompt still tells the LLM not to use side-effecting tools in step 1. +The permission gate is a fallback that catches prompt non-compliance. Together: +the prompt prevents the behavior; the gate catches it when the prompt fails. +Neither alone is sufficient — the prompt can be ignored; the gate with no +prompt would produce confusing "blocked" errors with no context for the LLM. + +--- + +## Audit Events and SSE Propagation + +Two new audit event types support UI visualization of confidence and iteration: + +| Event | Emitted by | When | +|-------|-----------|------| +| `confidence_change` | `koan_set_confidence` tool | Every call to koan_set_confidence | +| `iteration_start` | `IntakePhase.onLoopBack()` + `onStepUpdated()` | At every loop iteration start: `onLoopBack` for iterations 2+, `onStepUpdated` for iteration 1 | + +Both events are folded into the `state.json` projection: + +- `confidence_change` → `intakeConfidence`, `intakeIteration` +- `iteration_start` → `intakeIteration` + +The web server polls `state.json` every 500ms for each active agent. When it +detects a change in `intakeConfidence` or `intakeIteration`, it pushes an +`intake-progress` SSE event to connected browser clients. The event payload +includes both the `confidence` string and the `iteration` number, allowing the +UI to render a progress visualization without maintaining its own state. + +The `confidence_change` event requires `ctx.eventLog` to be set. This is +populated in `extensions/koan.ts` during `before_agent_start`, after +`eventLog.open()`. The confidence tool reads `ctx.eventLog` at call time +(mutable-ref pattern) — no reference is needed at registration time. + +--- + +## Prompt Engineering Principles + +The intake loop prompts apply several techniques from the prompting literature. +This section records the reasoning so future changes don't inadvertently remove +mechanisms that address specific failure modes. + +### Prompt Chaining over Stepwise (Scout / Deliberate / Reflect as separate steps) + +A monolithic "investigate" step — containing scouting, deliberation, and +reflection in sequence within a single prompt — was rejected in favor of three +separate `koan_complete_step` calls. + +The risk with a monolithic step is **simulated refinement**: the LLM +artificially degrades its initial output to manufacture visible improvement. +When draft, critique, and refine happen in one cognitive context, the model +sandbaggs the draft to make its self-correction look meaningful. When each +phase is a separate tool call with a distinct cognitive goal, the model must +genuinely complete each phase before seeing the next instruction. There is no +opportunity to pre-plan the "improvement" because the next step's instructions +are not yet visible. + +This is why Scout, Deliberate, and Reflect are separate steps rather than +phases within a single step. + +### Thread-of-Thought in Deliberate (explicit enumeration before questions) + +The Deliberate step instructs the LLM to walk through each area relevant to +the task and explicitly state what is known, unknown, and its source — before +formulating questions. This is the Thread-of-Thought pattern: "walk through +this context in manageable parts step by step, summarizing and analyzing as we +go." + +Without this enumeration, the LLM tends to ask questions based on what +immediately comes to mind rather than what is actually unknown. Gaps that are +not top-of-mind are missed. Forcing explicit enumeration of knowns and unknowns +before question formulation surfaces those gaps and prevents asking questions +the conversation or scouts already answered. + +The enumeration also has a secondary benefit in iteration 2+: it forces the +LLM to re-state updated understanding before forming follow-up questions, +preventing the "lost in the middle" problem where findings from early scout +tool results are effectively forgotten by the time questions are formulated. + +### Chain-of-Verification in Reflect (evidence-grounded self-assessment) + +The Reflect step instructs the LLM to generate 3–5 verification questions +framed from the decomposer's perspective, then answer each using only concrete +evidence (quotes from conversation, specific scout findings, explicit user +answers). Verification questions that cannot be answered with evidence identify +gaps. This is the Chain-of-Verification (CoVe) pattern. + +The framing matters: "from the decomposer's perspective" anchors the LLM's +self-assessment to the actual consumer of its output. Without this framing, the +LLM tends to ask generic comprehension questions ("do I understand the topic?") +rather than boundary-defining questions ("could I define the scope of story 1 +vs story 2 right now?"). Generic questions produce generic assessments; +boundary-specific questions surface the gaps that actually matter downstream. + +This is explicitly NOT intrinsic self-correction, which degrades reasoning +performance when no external feedback source is available. The LLM is not +being asked to critique its reasoning — it is being asked to generate specific +verification questions and answer them against gathered evidence. The evidence +is external (conversation, scouts, user answers), not the LLM's own reasoning. + +### Contrastive confidence definitions (preventing premature "certain") + +The Reflect step provides two contrastive definitions of the "certain" +confidence level: + +- **Positive:** "certain means ALL of these are true" (four specific + conditions about scope, codebase knowledge, user decisions, and story + immutability) +- **Negative:** "you are NOT certain if" (four failure modes that preclude + certainty) + +This is the Contrastive Chain-of-Thought pattern. A single positive definition +("certain means you have everything you need") leaves the LLM to interpret what +"everything" means — and LLMs tend to interpret this charitably, setting +confidence to "certain" prematurely to exit the loop faster (token-saving +behavior). The negative examples make the failure modes concrete and explicit, +raising the bar for claiming certainty. + +### Iteration-aware guidance (first iteration vs. refinement) + +Steps 2 (Scout) and 3 (Deliberate) produce different instruction text for +the first iteration vs. subsequent iterations. First-iteration Scout says: +"Based on your reading of the conversation..." Subsequent Scout says: "Based +on gaps identified in your previous reflection..." + +This is context reframing. The first iteration is an initial exploration; the +second iteration is a targeted follow-up. If both iterations received the same +prompt, the LLM would repeat its initial exploration rather than narrowing in +on the gaps surfaced by reflection. The iteration number is passed as a +parameter to `intakeStepGuidance()`, which branches on it to produce the +appropriate framing. + +--- + +## Pitfalls + +### Don't put confidence in koan_complete_step's `thoughts` parameter + +`thoughts` is for internal chain-of-thought reasoning. A previous design +considered parsing confidence from the thoughts string. This violates the +driver determinism invariant: the driver never parses free-text. Confidence +must flow through a structured tool call with a typed parameter. + +### Don't rely on the Reflect prompt alone to enforce koan_set_confidence + +The Reflect step prompt ends with "WHEN DONE: First call koan_set_confidence, +then call koan_complete_step." This is a prompt instruction and can be ignored. +The `validateStepCompletion()` hook is the mechanical enforcement layer. Both +must be present: the prompt tells the LLM what to do; the hook catches +non-compliance. + +### Don't remove the confidence null-reset on loop-back + +The null-reset lives in `onLoopBack()` in `IntakePhase`. When looping from +step 4 → step 2, `ctx.intakeConfidence` must be set to null. Without this +reset, the `validateStepCompletion()` check in the next Reflect step sees the +old confidence value and allows `koan_complete_step` through without the LLM +calling `koan_set_confidence` again. + +The reset must happen in `onLoopBack()`, not in `getNextStep()`. Placing it +in `getNextStep()` would make the query impure — see +[architecture.md § Don't put side effects in getNextStep()](./architecture.md#dont-put-side-effects-in-getnextstep). + +### Don't add koan_set_confidence to non-intake roles + +`koan_set_confidence` is gated to the intake role via `ROLE_PERMISSIONS`. If +it were available to other roles, they could set `ctx.intakeConfidence` +spuriously, affecting the intake loop's behavior if intake is running +concurrently (which it isn't currently, but could be in the future). + +### Don't skip `ctx.intakeStep` sync in onStepUpdated + +The permission gate reads `ctx.intakeStep` at tool call time. If +`onStepUpdated()` were not called on loop-back (step 4 → step 2), step 2 +would execute with `ctx.intakeStep = 4`, and the step-1 gate would not fire +(step 4 ≠ 1). The step 1 gate is specifically `intakeStep === 1`. Only step 1 +needs gating, so the only critical sync is the boot → step 1 transition. But +keeping `ctx.intakeStep` accurate at all times makes the invariant easier to +reason about and avoids subtle bugs if the gating logic is ever extended. diff --git a/docs/ipc.md b/docs/ipc.md new file mode 100644 index 0000000..a2de236 --- /dev/null +++ b/docs/ipc.md @@ -0,0 +1,320 @@ +# IPC Protocol + +File-based inter-process communication between parent and subagent processes. + +> Parent doc: [architecture.md](./architecture.md) +> +> `ipc.json` is one of three well-known files in the subagent directory. +> See [architecture.md § Directory-as-contract](./architecture.md#6-directory-as-contract) +> for how it relates to `task.json` (input) and `state.json` (observation). + +--- + +## Overview + +Subagent `pi -p` processes cannot communicate with the parent via stdin (it is +`"ignore"`). Instead, they share a single `ipc.json` file in the subagent +directory. The subagent writes a request; the parent polls, handles it, and +writes the response back. The subagent polls for the response. + +``` +subagent: writeIpcFile(dir, { response: null }) ← atomic write creates request +subagent: poll loop (500ms): readIpcFile(dir) ← blocks LLM turn +parent: poll loop (300ms): readIpcFile(dir) ← detects request +parent: handles request (web server or scout pool) ← does work +parent: writeIpcFile(dir, { ..., response: data }) ← atomic write with response +subagent: readIpcFile → response !== null ← breaks poll loop +subagent: deleteIpcFile(dir) ← cleanup +``` + +### Why file-based IPC + +- **Cross-process simplicity** — no socket management, no connection lifecycle +- **Debuggable** — `cat ipc.json` shows the current state +- **Atomic via rename** — tmp file → `fs.rename()` prevents partial reads +- **Cross-platform** — no POSIX-specific constructs + +### Constraints + +- **One request at a time** per subagent directory. Tools check + `ipcFileExists(dir)` before writing and return an error if a request is + already pending. +- **Polling, not push** — inherent latency of poll intervals (300ms parent, + 500ms subagent). +- **The subagent's LLM turn is blocked** while polling. The tool's `execute` + function is in a `sleep(500)` loop — the LLM cannot do other work until + the response arrives. + +--- + +## Message Types + +The protocol supports exactly two request types, discriminated by the `type` +field: + +### `ask` — User questions + +The subagent needs human input. The request contains questions with options; +the response contains the user's selections. + +```typescript +interface AskIpcFile { + type: "ask"; + id: string; // UUID, for response correlation + createdAt: string; + payload: { + questions: Array<{ + id: string; + question: string; + options: Array<{ label: string }>; + multi?: boolean; + recommended?: number; // 0-indexed + }>; + }; + response: AskResponse | null; // null = pending, non-null = answered +} +``` + +### `scout-request` — Parallel codebase exploration + +The subagent needs codebase context. The request contains scout task +definitions; the response contains file paths to findings. + +```typescript +interface ScoutIpcFile { + type: "scout-request"; + id: string; + createdAt: string; + scouts: Array<{ + id: string; // e.g., "auth-patterns" + role: string; // e.g., "security auditor" + prompt: string; // e.g., "Find all auth middleware in src/" + }>; + response: { findings: string[]; failures: string[] } | null; +} +``` + +--- + +## Atomic Writes + +All IPC file operations use atomic tmp-rename: + +```typescript +// Write: .ipc.tmp.json → rename → ipc.json +async function writeIpcFile(dir, data) { + const tmp = path.join(dir, ".ipc.tmp.json"); + const target = path.join(dir, "ipc.json"); + await fs.writeFile(tmp, JSON.stringify(data, null, 2) + "\n", "utf8"); + await fs.rename(tmp, target); +} + +// Read: returns null on missing file OR parse error +// Parse errors are treated as "not ready" — handles partial writes on non-POSIX systems +async function readIpcFile(dir): IpcFile | null { + try { + const raw = await fs.readFile(path.join(dir, "ipc.json"), "utf8"); + return JSON.parse(raw); + } catch { + return null; + } +} + +// Delete: removes both ipc.json and .ipc.tmp.json, swallows ENOENT +async function deleteIpcFile(dir) { ... } +``` + +--- + +## Poll Timing + +| Poller | Interval | Purpose | +|--------|----------|---------| +| **Parent IPC responder** | 300ms | Detect subagent requests quickly | +| **Subagent tool** | 500ms | Wait for parent response | +| **Web server agent polling** | 500ms | Update agent status in UI | + +The parent polls slightly faster than the subagent to ensure it picks up +requests promptly. Both intervals are low enough for interactive feel. + +--- + +## Parent-Side IPC Responder + +`runIpcResponder()` starts concurrently with the child process (when a web +server handle is available) and terminates when the `AbortSignal` fires +(child process exit → abort). + +``` +while (!signal.aborted) { + sleep(300ms) + ipc = readIpcFile(subagentDir) + if ipc === null or ipc.response !== null → continue + if ipc.type === "ask" → handleAskRequest(...) + if ipc.type === "scout-request" → handleScoutRequest(...) +} +``` + +### Error handling + +The poll loop swallows **all** errors. Transient filesystem issues (e.g., +file being renamed) must not abort the parent session. The next poll cycle +will pick up the file successfully. + +### Idempotence guard + +Before writing a response, the responder re-reads `ipc.json` and validates: +- The file still exists +- The `type` matches the expected request type +- The `id` matches the original request ID +- `response` is still `null` + +This prevents writing a response to a stale or replaced request. + +### Circular import avoidance + +The IPC responder needs to spawn scouts, but importing from `subagent.ts` +would create a circular dependency. Instead, `subagent.ts` injects a +`ScoutSpawnContext` interface at startup: + +```typescript +interface ScoutSpawnContext { + epicDir: string; + spawnScout(task: ScoutTask, scoutDir: string, outputFile: string): Promise; +} +``` + +--- + +## Ask Flow + +``` +intake-llm calls koan_ask_question({ questions: [...] }) + → tool writes AskIpcFile { type: "ask", response: null } + → tool enters 500ms poll loop (LLM turn blocked) + +ipc-responder detects { type: "ask", response: null } + → appends "Other" option to each question + → calls webServer.requestAnswer(questions, signal) + → creates Promise in pendingInputs map + → SSE "ask" event → browser renders QuestionForm + → user fills form, clicks Submit + → POST /api/answer → resolves Promise + → maps answers to AskAnswerPayload + → writes AskResponse to ipc.json (atomic) + +tool poll detects response !== null + → breaks loop + → deleteIpcFile(dir) + → formats answers as structured text + → returns to LLM +``` + +The "Other" option is appended server-side — the LLM never includes it. On +the result side, `removeRecommendedTag()` strips the ` (Recommended)` display +suffix before building selection results. + +--- + +## Scout Flow + +``` +intake-llm calls koan_request_scouts({ scouts: [...] }) + → tool writes ScoutIpcFile { type: "scout-request", response: null } + → tool enters 500ms poll loop (LLM turn blocked) + +ipc-responder detects { type: "scout-request", response: null } + → computes scoutDir + outputFile for each task + → webServer.registerAgent(...) for each scout (UI tracking) + → pool(taskIds, concurrency=4, worker): + for each scout (up to 4 concurrent): + → mkdir(scoutDir, { recursive: true }) + → spawnScout(task, scoutDir, outputFile) + → full subagent lifecycle: boot → step 1 → work → complete → exit + → readProjection(scoutDir) → check status === "completed" + → if succeeded: findings.push(outputFile) + → if failed: failures.push(taskId) + → webServer.completeAgent(taskId) + → writes ScoutResponse { findings: [paths], failures: [ids] } to ipc.json + +tool poll detects response !== null + → breaks loop + → deleteIpcFile(dir) + → reads each findings.md file verbatim (inline, not just paths) + → returns concatenated content to LLM +``` + +### Scout pool behavior + +The pool uses a semaphore with limit 4. All scouts are submitted to +`Promise.all` simultaneously; the semaphore gates actual execution. The pool: + +- **Runs all items to completion** regardless of individual failures +- **Reports progress** via optional callback (done/total/active/queued) +- **Does not implement timeouts** — timeout logic belongs in the worker closure + +### Scout success determination + +Scout success is derived from the JSON audit projection, not file existence: + +```typescript +const projection = await readProjection(scoutDir); +succeeded = projection?.status === "completed"; +``` + +A scout can write a partial `findings.md` and then crash. File existence is +not proof of completion. + +### Failed scouts are non-fatal + +The tool result tells the LLM: +`"Failed scouts (non-fatal, proceed without them): task-id-1, task-id-2"` + +The LLM must proceed with whatever findings are available. + +--- + +## Audit Integration + +The audit system (`lib/audit.ts`) runs inside each subagent process and +provides the observability bridge between subagent work and parent/UI polling. + +### Event-sourced design + +- `events.jsonl` — append-only truth (one JSON object per line) +- `state.json` — eagerly materialized projection, written atomically after + every event + +The parent polls `state.json` (cheap file read) instead of parsing the event +log. `fold()` is a pure function so the projection can be rebuilt from the raw +log for testing and crash recovery. + +### Event types + +| Event | Trigger | Key data | +|-------|---------|----------| +| `phase_start` | `BasePhase.begin()` | totalSteps | +| `step_transition` | `handleStepComplete()` | step number, name, total | +| `tool_call` | pi `tool_call` hook | toolCallId, name, input | +| `tool_result` | pi `tool_result` hook | toolCallId, summarized metrics (not full content) | +| `usage` | pi `turn_end` hook | input/output/cacheRead/cacheWrite tokens | +| `heartbeat` | 10s timer | (keeps `updatedAt` fresh during long tool calls) | +| `phase_end` | phase completion | "completed" | + +### Projection fields consumed by parent + +| Field | Consumer | Purpose | +|-------|----------|---------| +| `status` | IPC responder, web server | Scout success, agent completion | +| `step` | Web server | Intake sub-phase derivation | +| `currentToolCallId` | Web server | "doing X" vs "done with X" in UI | +| `completionSummary` | Web server | Scout card summary (500-char prefix of `thoughts`) | +| `tokensSent/Received` | Web server | Token usage display | +| `model` | Web server | Model display | + +### Serialization + +`EventLog.append()` calls are serialized via a promise chain. The heartbeat +timer and `tool_result` handler both call `append()` concurrently — without +serialization, two `writeState()` calls race on the shared `.tmp.json` file, +causing ENOENT on rename. diff --git a/docs/state.md b/docs/state.md new file mode 100644 index 0000000..e959595 --- /dev/null +++ b/docs/state.md @@ -0,0 +1,298 @@ +# State & Driver + +How the driver manages epic and story state, routes between phases, and +enforces the file boundary invariant. + +> Parent doc: [architecture.md](./architecture.md) + +--- + +## The File Boundary in Practice + +The driver writes JSON; LLMs write markdown. Tool code bridges both. + +| Actor | Reads | Writes | +|-------|-------|--------| +| **Driver** | `.json` state files, exit codes | `.json` state files | +| **LLM** | `.md` files, codebase files | `.md` files (output) | +| **Tool code** | `.json` state (to validate) | `.json` state + `.md` status (both) | + +### Why state.ts must not write markdown + +The state module (`epic/state.ts`) reads and writes JSON only. Putting +`writeStatusMarkdown()` there would make one module responsible for both +communication channels. `status.md` writes belong exclusively in +`tools/orchestrator.ts`, which bridges the two worlds by writing JSON state +(for the driver) and templated markdown (for LLMs) in the same operation. + +### Filesystem-driven story discovery + +Story IDs are discovered by scanning `stories/*/story.md`, not by reading a +driver-maintained JSON list. The decomposer LLM creates `story.md` files using +the `write` tool — it has no reason to know the JSON state format. Requiring +it to update `epic-state.json` would force an LLM to write JSON, violating the +core invariant. + +The driver discovers what the LLM created by scanning, then populates the JSON +story list itself. + +--- + +## Epic State + +`epic-state.json` in the epic directory root. Tracks the current pipeline +phase and the list of story IDs. + +```typescript +interface EpicState { + phase: EpicPhase; // intake → decomposition → review → executing → completed + stories: string[]; // populated by driver after filesystem scan +} +``` + +### Epic phases + +| Phase | What happens | +|-------|-------------| +| `intake` | Intake subagent reads conversation, scouts codebase, asks user questions | +| `decomposition` | Decomposer subagent splits work into stories | +| `review` | User reviews story sketches in web UI (approve/remove) | +| `executing` | Story loop: orchestrator → planner → executor → orchestrator → next | +| `completed` | All stories done or skipped | + +**`scouting` is intentionally absent.** Scouts run inside the IPC responder +during intake/decomposer/planner phases, not as a top-level phase. Adding it +would imply a driver state that never exists. + +--- + +## Story State + +One `state.json` per story in `stories/{storyId}/`. + +```typescript +interface StoryState { + storyId: string; + status: StoryStatus; + retryCount: number; + maxRetries: number; // default: 2 + failureSummary?: string; // set by koan_retry_story + skipReason?: string; // set by koan_skip_story or driver on budget exhaustion + updatedAt: string; +} +``` + +### Story status lifecycle + +``` +pending ──→ selected ──→ planning ──→ executing ──→ verifying ──→ done + │ ↑ │ + │ └──────────── retry ←───────────────────┤ + │ │ + └──→ skipped ←───────────────────────────────────────┘ +``` + +| Status | Set by | Meaning | +|--------|--------|---------| +| `pending` | Driver (initial) | Story exists, not yet started | +| `selected` | Orchestrator (`koan_select_story`) | Chosen for execution | +| `planning` | Driver | Planner subagent is running | +| `executing` | Driver | Executor subagent is running | +| `verifying` | Driver | Post-execution orchestrator is evaluating | +| `done` | Orchestrator (`koan_complete_story`) | Successfully completed | +| `retry` | Orchestrator (`koan_retry_story`) | Failed, queued for re-execution | +| `skipped` | Orchestrator (`koan_skip_story`) or Driver | Permanently skipped | + +**Driver-internal states** (`planning`, `executing`, `verifying`) are set by +the driver only. The LLM never writes these — it reads them indirectly via +`status.md`. + +**Orchestrator-driven transitions** (`selected`, `done`, `retry`, `skipped`) +are set by orchestrator tool calls. Each tool validates the source status +before transitioning: + +| Tool | Valid source | Target | +|------|-------------|--------| +| `koan_select_story` | `pending`, `retry` | `selected` | +| `koan_complete_story` | `verifying` | `done` | +| `koan_retry_story` | `verifying` | `retry` | +| `koan_skip_story` | `pending`, `retry` | `skipped` | + +### No `escalated` status + +Escalation is handled via `koan_ask_question` — the orchestrator asks the user +a question through IPC, gets an answer, then decides `retry` or `skip`. A +separate `escalated` status was tried and created a dead routing path. + +### Retry budget + +Each story starts with `maxRetries: 2`. When the driver sees `status: "retry"`, +it increments `retryCount` and re-executes. When `retryCount >= maxRetries`, +the driver sets the story to `skipped`: + +``` +skipReason: "Retry budget exhausted after N attempt(s). Last failure: {failureSummary}" +``` + +The `failureSummary` field flows from `koan_retry_story` (the orchestrator +writes a concrete description of what went wrong) to `retryContext` in the +executor's `task.json` on re-execution. + +--- + +## Driver Routing + +The driver's story loop is a deterministic state machine: + +```typescript +while (true) { + const stories = await loadAllStoryStates(epicDir); + const routing = routeFromState(stories); + + switch (routing.action) { + case "retry": → re-execute story (increment retryCount) + case "execute": → plan + execute story + case "complete": → all stories terminal → exit loop + case "error": → no actionable state → fail + } +} +``` + +**Priority:** `retry` is checked before `selected`. A story queued for retry +takes precedence over a newly selected story. + +**Terminal states:** exactly `done` and `skipped`. The epic is complete when +every story is in a terminal state. + +**Error state:** If no story is `retry` or `selected` and not all are terminal, +the driver reports: "orchestrator may have exited without a routing decision." + +### Story execution pipeline + +For each story selected for execution: + +``` +Driver sets status → planning + → spawn planner subagent + → if planner fails: skip executor, go to post-execution orchestrator +Driver sets status → executing + → spawn executor subagent +Driver sets status → verifying + → spawn orchestrator (post-execution) + → orchestrator decides: koan_complete_story / koan_retry_story / koan_skip_story +``` + +### Planner failure fallthrough + +When the planner exits with non-zero exit code, the driver skips the executor +and proceeds directly to the post-execution orchestrator. This gives the +orchestrator a chance to make a routing decision (retry, skip) rather than +leaving the story in a dead state. + +### Model config gate + +When a web server is available, the pipeline blocks at startup until the user +confirms model tier selection. This happens before any subagent spawns. + +### Spec review gate + +After decomposition, story sketches are presented for human review in the web +UI. The user can approve or remove stories. Removed stories get +`status: "skipped"`, `skipReason: "Removed during spec review"`. When no web +server is running, the gate auto-approves. + +--- + +## Atomic Writes + +All state writes use atomic tmp-file + rename: + +```typescript +async function atomicWriteJson(filePath: string, data: unknown): Promise { + const tmp = `${filePath}.tmp`; + await fs.writeFile(tmp, JSON.stringify(data, null, 2) + "\n", "utf8"); + await fs.rename(tmp, filePath); +} +``` + +This applies to: +- `epic-state.json` (driver) +- `stories/{id}/state.json` (driver + orchestrator tools) +- `stories/{id}/status.md` (orchestrator tools) +- `subagents/{label}/task.json` (driver, before spawn) +- `subagents/{label}/state.json` (audit projection) +- `subagents/{label}/ipc.json` (both sides) + +--- + +## Epic Directory Structure + +``` +{epicDir}/ + epic-state.json # Epic phase + story list + conversation.jsonl # Exported conversation (input to intake) + context.md # Written by intake (conversation, codebase findings, decisions) + stories/ + {storyId}/ + story.md # Written by decomposer + state.json # Story lifecycle state + status.md # Templated status for LLM consumption + plan/ + plan.md # Written by planner + subagents/ + intake/ + task.json # Task manifest + state.json # Audit projection + events.jsonl # Audit log + stdout.log, stderr.log + decomposer/ + ... + scout-{id}-{timestamp}/ + task.json + findings.md # Scout output + ... + planner-{storyId}/ + ... + executor-{storyId}/ + ... + orchestrator-pre/ + ... + orchestrator-post-{storyId}/ + ... +``` + +--- + +## Audit Projection (`state.json`) + +Each subagent writes a `state.json` (the "projection") to its directory. The +projection is an eagerly-materialized summary of the subagent's current state, +updated atomically after every audit event. The web server polls it to push +SSE events to the UI without having to replay the full `events.jsonl`. + +Key projection fields common to all roles: + +| Field | Type | Meaning | +|-------|------|---------| +| `phase` | string | Overall phase name (e.g., "intake", "decomposition") | +| `step` | number | Current step index within the phase | +| `stepName` | string | Human-readable step label (e.g., "Scout (round 2)") | +| `tokensSent` | number | Cumulative tokens in | +| `tokensReceived` | number | Cumulative tokens out | + +Intake-specific fields (zero/null for all other roles): + +| Field | Type | Meaning | +|-------|------|---------| +| `intakeConfidence` | `"exploring"\|"low"\|"medium"\|"high"\|"certain"\|null` | Last confidence level declared by `koan_set_confidence`. Null until first declaration; retains last value between loop iterations (not reset in projection on loop-back). | +| `intakeIteration` | number | Current loop iteration (1-based). Updated by `confidence_change` and `iteration_start` events. Zero for non-intake subagents. | + +**Note on `intakeConfidence` and loop-back:** When `getNextStep()` decides to +loop from Reflect (step 4) back to Scout (step 2), it resets +`ctx.intakeConfidence = null` internally. This internal reset is NOT +propagated to the projection immediately — the projection retains the +previous iteration's confidence level until the next `koan_set_confidence` +call emits a `confidence_change` event. The UI therefore shows the last +declared confidence between iterations, which is intentional: it reflects +the most recent authoritative assessment rather than showing a transient +null state. diff --git a/docs/subagents.md b/docs/subagents.md new file mode 100644 index 0000000..5278690 --- /dev/null +++ b/docs/subagents.md @@ -0,0 +1,397 @@ +# Subagents + +How koan spawns, manages, and terminates LLM subagent processes. + +> Parent doc: [architecture.md](./architecture.md) + +--- + +## Task Manifest + +Every subagent starts as a generic `pi -p` process with one koan-specific +input: a directory path. The koan extension reads `task.json` from that +directory to learn what kind of subagent it is, what epic it belongs to, and +what work to perform. + +### `task.json` schema + +The manifest is a discriminated union on the `role` field. Common fields +(`role`, `epicDir`) appear on every variant; role-specific fields are nested +naturally rather than flattened into a shared namespace. + +```typescript +// Common to all subagents +interface SubagentTaskBase { + role: SubagentRole; + epicDir: string; +} + +// Role-specific variants +interface IntakeTask extends SubagentTaskBase { + role: "intake"; +} + +interface ScoutTask extends SubagentTaskBase { + role: "scout"; + question: string; // What to investigate + outputFile: string; // Where to write findings (relative to subagentDir) + investigatorRole: string; // Persona for the scout ("security auditor", etc.) +} + +interface DecomposerTask extends SubagentTaskBase { + role: "decomposer"; +} + +interface OrchestratorTask extends SubagentTaskBase { + role: "orchestrator"; + stepSequence: "pre-execution" | "post-execution"; + storyId?: string; +} + +interface PlannerTask extends SubagentTaskBase { + role: "planner"; + storyId: string; +} + +interface ExecutorTask extends SubagentTaskBase { + role: "executor"; + storyId: string; + retryContext?: string; // Failure summary from previous attempt +} + +type SubagentTask = + | IntakeTask | ScoutTask | DecomposerTask + | OrchestratorTask | PlannerTask | ExecutorTask; +``` + +### Lifecycle + +`task.json` is **write-once, read-once**: + +1. Parent calls `ensureSubagentDirectory()` → creates the directory +2. Parent writes `task.json` (atomic: tmp + rename) +3. Parent spawns `pi -p --koan-dir {subagentDir} ...` +4. Child extension reads `task.json` at startup → dispatches to phase +5. `task.json` is never modified after spawn + +This makes every subagent directory **self-describing** and **inspectable** +after the fact. `cat task.json` shows exactly what the subagent was asked +to do. + +### Why not CLI flags + +The previous design passed task configuration as 9 CLI flags +(`--koan-role`, `--koan-epic-dir`, `--koan-subagent-dir`, +`--koan-story-id`, `--koan-step-sequence`, `--koan-retry-context`, +`--koan-scout-question`, `--koan-scout-output-file`, `--koan-scout-role`). + +Problems this caused: + +| Problem | Example | +|---------|---------| +| **Flat namespace collision** | `--koan-role` (pipeline role: "scout") vs `--koan-scout-role` (investigator persona: "security auditor") — two unrelated concepts sharing a prefix | +| **Unstructured** | Role-specific fields mixed with common fields; `extraFlags: string[]` escape hatch needed for extensibility | +| **Size limits** | `--koan-retry-context` carries multi-paragraph failure summaries — visible in `ps aux`, subject to `ARG_MAX` | +| **Uninspectable** | After a crash, reconstructing what a subagent was asked to do requires parsing process arguments from logs | +| **Inconsistent** | Runtime communication uses files (ipc.json); observation uses files (state.json); but task input used CLI args | + +--- + +## Spawn Flow + +### Parent side + +``` +driver: ensureSubagentDirectory(epicDir, label) → subagentDir +driver: write task.json to subagentDir (atomic) +driver: webServer.registerAgent(...) +driver: webServer.trackSubagent(subagentDir, role) +driver: spawnSubagent(task, subagentDir, opts) + → resolves model for role (3-tier: strong/standard/cheap) + → builds CLI args: pi -p -e ext --koan-dir dir [--model model] "boot prompt" + → spawn("pi", args, { cwd, stdio: ["ignore", "pipe", "pipe"] }) + → captures stdout/stderr to subagentDir/stdout.log, stderr.log + → starts IPC responder concurrently (if webServer available) + → waits for proc.on("close") + → aborts IPC responder + → returns { exitCode, stderr, subagentDir } +driver: webServer.clearSubagent() +driver: webServer.completeAgent(id) +driver: checks exitCode, routes to next phase +``` + +### Child side + +``` +pi -p starts with koan extension +koan.ts init: + → registers --koan-dir flag + → creates RuntimeContext { epicDir: null, subagentDir: null, onCompleteStep: null } + → registerAllTools(pi, ctx) — all tools, unconditionally + +before_agent_start fires (after _buildRuntime snapshot): + → reads --koan-dir flag + → reads task.json from dir → SubagentTask (typed, validated) + → sets ctx.epicDir = task.epicDir, ctx.subagentDir = dir + → opens EventLog (audit trail) + → wires pi event hooks (tool_call, tool_result, turn_end, session_shutdown) + → dispatchPhase(pi, task, ctx): + → matches task.role → instantiates phase class → phase.begin() + +phase.begin(): + → step = 0, active = true + → ctx.onCompleteStep = handleStepComplete + +LLM receives boot prompt: + "You are a koan {role} agent. Call koan_complete_step to receive your instructions." +``` + +### Boot prompt + +``` +"You are a koan {role} agent. Call koan_complete_step to receive your instructions." +``` + +One sentence. No task content. The role name is included for primacy — it +anchors the LLM's identity before it receives any instructions. Task-specific +parameters live in `task.json` and flow into step guidance via the phase class. + +### Fail-fast guards + +`dispatchPhase` validates required `task.json` fields before instantiating: + +| Role | Required fields | Failure if missing | +|------|----------------|-------------------| +| scout | `question`, `outputFile` | Step 1 guidance has no assignment → LLM outputs confused text → exits | +| planner | `storyId` | Malformed paths like `stories//plan/plan.md` | +| executor | `storyId` | Same path issue | + +--- + +## Step-First Workflow (BasePhase) + +`BasePhase` is the abstract superclass for all six phase classes. It manages: + +- **Step counter** — starts at 0 (boot state), increments monotonically +- **System prompt injection** — via `before_agent_start` event handler +- **Permission fence** — via `tool_call` event handler (default-deny) +- **Step transition** — via `handleStepComplete()` callback + +### Step progression state machine + +``` +begin() → step=0, active=true, arms ctx.onCompleteStep + +LLM calls koan_complete_step: + step == 0 → step=1, return formatStep(getStepGuidance(1)) [boot transition] + otherwise → validateStepCompletion(step) [pre-condition check] + → nextStep = getNextStep(step) [pure: decides where to go] + nextStep == null → active=false, return null → "Phase complete." [done] + nextStep < prev → onLoopBack(prev, nextStep) [side effects of loop] + nextStep != null → onStepUpdated(nextStep) [sync ctx fields] + → step=nextStep, return formatStep(getStepGuidance(nextStep)) [advance] +``` + +`BasePhase` provides three overridable hooks for non-linear flows: + +| Hook | Purpose | Default | +|------|---------|---------| +| `getNextStep(step)` | Returns next step number or null (done). **Must be pure.** | Linear: step+1, null at totalSteps | +| `onLoopBack(from, to)` | Side effects of backward transitions: state resets, counter increments, event emission. Async — properly awaited. | no-op | +| `validateStepCompletion(step)` | Pre-condition check before advancing. Returns null to allow or an error string to block (returned as tool result so LLM can fix it). | null (always allow) | + +`IntakePhase` overrides all three to implement a confidence-gated loop over +steps 2–4. See [intake-loop.md](./intake-loop.md) for details. + +Key invariants: +- **`getNextStep()` is pure** — it only returns a step number. Mutation belongs in `onLoopBack()`. +- **`step_transition` is NOT emitted at `begin()`** — it fires when step 1 + guidance is first returned, so the event log reflects when the LLM actually + begins work. +- **`ctx.onCompleteStep` is nulled on completion** — prevents stale callbacks. +- **Only one phase per RuntimeContext** — `begin()` throws if `ctx.onCompleteStep` + is already occupied. + +### System prompt vs task content + +The system prompt (injected via `before_agent_start`) establishes **role +identity and rules** — who you are, what you must/must not do, what output +files you produce, what tools you have. It deliberately omits task details. + +Task details arrive as **step guidance** — the return value of +`koan_complete_step` — after the LLM has already established the tool-calling +pattern. This separation is load-bearing (see +[architecture pitfalls](./architecture.md#pitfalls)). + +### formatStep structure + +Every step guidance string has the same structure: + +``` +{title} +{"=".repeat(title.length)} + +{instructions} + +WHEN DONE: Call koan_complete_step with your findings in the `thoughts` parameter. +Do NOT call this tool until the work described in this step is finished. +``` + +The invoke-after directive is always **last** (recency reinforcement). Steps +that need the LLM to call a domain tool before `koan_complete_step` (e.g., +`koan_select_story`) can override `invokeAfter`. + +### The `thoughts` parameter + +`thoughts` is **internal chain-of-thought reasoning only**. It is NOT task +output and MUST NOT be treated as such: + +- Task output goes to files (`findings.md`, `context.md`, etc.) +- The driver/parent reads those files after the subagent exits +- `thoughts` exists so models that cannot mix text + tool_call in one response + (e.g., GPT-5-codex) can still express reasoning while advancing the workflow +- A 500-char prefix of `thoughts` is captured in the audit projection as + `completionSummary` for scout UI display — this is the only consumer + +--- + +## Permissions + +Default-deny, role-based, enforced at runtime via the `tool_call` event handler +in `BasePhase`. + +### READ_TOOLS (always allowed) + +`bash`, `read`, `grep`, `glob`, `find`, `ls` — allowed for all roles. This is +an accepted limitation: `bash` can write files, but distinguishing read-bash +from write-bash is intractable at the permission layer. Prompt engineering +constrains intended use; enforcement does not. + +### Role permission matrix + +| Role | koan tools | write/edit | notes | +|------|-----------|------------|-------| +| **intake** | `koan_complete_step`, `koan_ask_question`, `koan_request_scouts`, `koan_set_confidence` | path-scoped to epicDir | `koan_set_confidence` blocked in step 1 (Extract) | +| **scout** | `koan_complete_step` | path-scoped to epicDir | No `koan_ask_question` (no user interaction). No `koan_request_scouts` (no nested scouts). | +| **decomposer** | `koan_complete_step`, `koan_ask_question`, `koan_request_scouts` | path-scoped to epicDir | — | +| **orchestrator** | `koan_complete_step`, `koan_ask_question`, `koan_select_story`, `koan_complete_story`, `koan_retry_story`, `koan_skip_story` | path-scoped to epicDir | No `koan_request_scouts` — orchestrator uses bash for verification | +| **planner** | `koan_complete_step`, `koan_ask_question`, `koan_request_scouts` | path-scoped to epicDir | — | +| **executor** | `koan_complete_step`, `koan_ask_question` | **unrestricted** | Must modify the actual codebase | + +### Path scoping + +Planning roles (intake, scout, decomposer, orchestrator, planner) can only +`write`/`edit` files inside the epic directory. The permission check resolves +both the tool's `path` argument and the epic directory, then verifies the tool +path starts with the epic path. If `epicDir` or the path argument is missing, +the write is allowed (cannot scope-check without context). + +--- + +## Model Tiers + +Roles map deterministically to 3 tiers: + +| Tier | Roles | Purpose | +|------|-------|---------| +| **strong** | intake, decomposer, orchestrator, planner | Complex reasoning, planning, decomposition | +| **standard** | executor | Code implementation | +| **cheap** | scout | Narrow codebase investigation | + +The user configures which specific model each tier uses via the web UI at +pipeline start (model config gate). If no config exists, `resolveModelForRole` +returns `undefined` and the `--model` flag is omitted, preserving pi's +current active model as the implicit fallback. + +Model tier config is all-or-nothing: all 3 tiers must be present. Partial +configs are treated as absent and logged. + +--- + +## Scout Isolation + +Scouts are deliberately constrained compared to other roles: + +- **No web server handle** — scouts cannot interact with the user or the UI +- **No `koan_ask_question`** — scouts do not ask questions +- **No `koan_request_scouts`** — scouts do not spawn nested scouts +- **No IPC responder** — since there is no web server, no IPC responder runs +- **Single step** — scouts have `totalSteps = 1`; they do one job and exit +- **Cheap model** — scouts use the cheapest available model +- **Parallel execution** — up to 4 scouts run concurrently via bounded pool +- **Non-fatal failures** — a failed scout does not abort the parent; its task + ID is reported in the `failures` array and the LLM is told to proceed + +Scout task parameters (`question`, `outputFile`, `investigatorRole`) live in +the scout's `task.json`. The boot prompt stays minimal; `ScoutPhase` reads the +task manifest and injects the parameters into step 1 guidance. + +--- + +## Subagent Directory Layout + +After a subagent runs, its directory contains: + +``` +{subagentDir}/ + task.json # Input: what to do (written by parent before spawn) + state.json # Output: audit projection (written by child, polled by parent) + events.jsonl # Output: append-only audit log + ipc.json # Transient: runtime communication (created/deleted per request) + stdout.log # Captured stdout from pi -p process + stderr.log # Captured stderr from pi -p process + findings.md # Task output (scouts) + context.md # Task output (intake — conversation, codebase findings, decisions) +``` + +The three JSON files have distinct lifecycles per +[architecture.md § Directory-as-contract](./architecture.md#6-directory-as-contract): + +| File | Writer | Reader | When | +|------|--------|--------|------| +| `task.json` | Parent | Child | Once at startup | +| `state.json` | Child | Parent | Continuous (500ms polling) | +| `ipc.json` | Both | Both | Per-request (created, answered, deleted) | + +--- + +## Web Server Integration + +The parent registers each subagent with the web server for UI tracking: + +```typescript +webServer.registerAgent({ id, name, dir, role, model, parent }); +// → starts 500ms polling of audit projection + recent logs +// → SSE "agents" event to browser + +webServer.trackSubagent(dir, role, storyId?); +// → starts 500ms polling for "subagent" + "logs" SSE events + +// ... subagent runs ... + +webServer.clearSubagent(); +// → stops tracking timer, emits SSE "subagent-idle" + +webServer.completeAgent(id); +// → stops polling, final readProjection, emits SSE "agents" with terminal status +``` + +**Dual polling for intake agent:** Both `registerAgent()` and +`trackSubagent()` poll at 500ms. `registerAgent` polling derives the intake +sub-phase for the progress bar: + +| Step | Pending ask? | Sub-phase | +|------|-------------|-----------| +| 1 | — | `"extract"` | +| 2 | — | `"scout"` | +| 3 | yes | `"questions"` | +| 3 | no | `"deliberate"` | +| 4 | — | `"reflect"` | +| 5 | — | `"synthesize"` | + +Steps 2–4 repeat across iterations; the server additionally reads +`intakeConfidence` and `intakeIteration` from the audit projection to populate +the `intake-progress` SSE event for UI visualization. + +This derivation is server-side — the server maps step numbers to sub-phase +names. The LLM does not report its sub-phase. From e85281ca2eb72908cdfd66781dedf49007ffc074 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 19 Mar 2026 23:24:20 +0700 Subject: [PATCH 068/412] scout dispatch card, monitor centering alignment --- src/planner/lib/audit.ts | 10 ++-- src/planner/web/css/layout.css | 41 +++++++++++++++- .../web/js/components/ActivityFeed.jsx | 41 ++++++++++++++++ .../web/js/components/AgentMonitor.jsx | 48 ++++++++++--------- 4 files changed, 112 insertions(+), 28 deletions(-) diff --git a/src/planner/lib/audit.ts b/src/planner/lib/audit.ts index 91ebabe..424115e 100644 --- a/src/planner/lib/audit.ts +++ b/src/planner/lib/audit.ts @@ -581,6 +581,8 @@ export interface LogLine { ts?: string; // Expandable content body: thinking text, tool output, etc. body?: string; + // Structured scout data for koan_request_scouts cards. + scouts?: Array<{ id: string; role: string }>; } interface ToolShape { @@ -600,7 +602,7 @@ const KOAN_SHAPES: Record = { koan_retry_story: { keys: ["story_id", "failure_summary"], freeform: ["failure_summary"], highValue: true }, koan_skip_story: { keys: ["story_id", "reason"], freeform: ["reason"], highValue: true }, koan_ask_question: { keys: ["questions"], arrays: ["questions"], highValue: true }, - koan_request_scouts: { keys: ["scouts"], arrays: ["scouts"], highValue: true }, + koan_request_scouts: { keys: [], highValue: true }, }; // Reads events.jsonl, correlates tool pairs, and returns structured log entries. @@ -910,10 +912,10 @@ function formatKoanInvocation(inv: ToolInvocation): LogLine { inFlight: inv.inFlight, }; - // Expand koan_request_scouts with per-scout detail lines. + // Structured scout data for the UI card. if (inv.tool === "koan_request_scouts" && Array.isArray(inv.input["scouts"])) { - line.details = (inv.input["scouts"] as Array>).map( - (s) => `${s["id"] ?? "?"} (${s["role"] ?? "agent"})`, + line.scouts = (inv.input["scouts"] as Array>).map( + (s) => ({ id: String(s["id"] ?? "?"), role: String(s["role"] ?? "agent") }), ); } diff --git a/src/planner/web/css/layout.css b/src/planner/web/css/layout.css index 43e5214..c3e79f7 100644 --- a/src/planner/web/css/layout.css +++ b/src/planner/web/css/layout.css @@ -200,6 +200,40 @@ user-select: none; } +/* ---- Scout dispatch card ---- */ + +.activity-card-scouts .activity-card-tool { + color: var(--blue); +} + +.scout-list { + display: flex; + flex-wrap: wrap; + gap: var(--gap-xs); + padding: 0 var(--gap-md) var(--gap-sm); +} + +.scout-entry { + display: flex; + align-items: baseline; + gap: var(--gap-sm); + padding: 3px var(--gap-sm); + font-family: var(--font-mono); + font-size: var(--font-size-xs); + background: var(--bg); + border-radius: var(--radius-sm); + border: 1px solid var(--border); +} + +.scout-name { + color: var(--text-muted); + font-weight: 500; +} + +.scout-role { + color: var(--text-ghost); +} + .activity-card-more:hover { color: var(--text-strong); } @@ -239,7 +273,7 @@ padding-left: 12px; } -/* Monitor — sticky bottom, sizes to content */ +/* Monitor — sticky bottom, sizes to content, centered like activity feed */ .monitor { flex: 0 0 auto; max-height: 40vh; @@ -252,6 +286,11 @@ -webkit-mask-image: linear-gradient(to bottom, transparent, black 12px, black); } +.monitor-inner { + max-width: 960px; + margin: 0 auto; +} + .agent-table-header { display: flex; align-items: center; diff --git a/src/planner/web/js/components/ActivityFeed.jsx b/src/planner/web/js/components/ActivityFeed.jsx index f3c5c50..0bc2477 100644 --- a/src/planner/web/js/components/ActivityFeed.jsx +++ b/src/planner/web/js/components/ActivityFeed.jsx @@ -73,6 +73,36 @@ function ThinkingCard({ line, isInFlight, isFlashing }) { ) } +/** Card for koan_request_scouts — shows dispatched scouts with name + role */ +function ScoutCard({ line, isInFlight, isFlashing }) { + const scouts = line.scouts || [] + const cls = [ + 'activity-card', + 'activity-card-scouts', + isInFlight ? 'activity-card-active' : '', + isFlashing ? 'activity-flash' : '', + ].filter(Boolean).join(' ') + + return ( +
+
+ + dispatching {scouts.length} scout{scouts.length !== 1 ? 's' : ''} + + {isInFlight && } +
+
+ {scouts.map((s, i) => ( +
+ {s.id} + {s.role} +
+ ))} +
+
+ ) +} + /** Standard line for tool calls and lifecycle events */ function ActivityLine({ line, isInFlight, isFlashing }) { const cls = [ @@ -155,6 +185,17 @@ export function ActivityFeed() { ) } + if (line.scouts) { + return ( + + ) + } + return ( -
- Subagents -
- {running} - {done > 0 && {done}} +
+
+ Subagents +
+ {running} + {done > 0 && {done}} +
+ + {(sent > 0 || recv > 0) ? `↑${formatTokens(sent)} ↓${formatTokens(recv)}` : ''} +
- - {(sent > 0 || recv > 0) ? `↑${formatTokens(sent)} ↓${formatTokens(recv)}` : ''} - + + + + + + + + + + + + + {agents.map(a => )} + +
agentmodel↑ sent↓ recvdoing
- - - - - - - - - - - - - {agents.map(a => )} - -
agentmodel↑ sent↓ recvdoing
) } From 71eb4422473fe34a8a9524c931f182e470fd4053 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 19 Mar 2026 23:48:30 +0700 Subject: [PATCH 069/412] expandable story cards in review gate --- src/planner/driver.ts | 12 ++- src/planner/web/css/components.css | 95 +++++++++++++++---- .../web/js/components/forms/ReviewForm.jsx | 71 ++++++++++++-- src/planner/web/server-types.ts | 1 + 4 files changed, 151 insertions(+), 28 deletions(-) diff --git a/src/planner/driver.ts b/src/planner/driver.ts index 0216637..f5c9035 100644 --- a/src/planner/driver.ts +++ b/src/planner/driver.ts @@ -417,10 +417,18 @@ export async function runPipeline( if (webServer && storyIds.length > 0) { webServer.pushNotification("Decomposition complete. Review story sketches...", "info"); - const titles = await Promise.all(storyIds.map((id) => readStoryTitle(epicDir, id))); + const storyData = await Promise.all(storyIds.map(async (id) => { + const storyPath = path.join(epicDir, "stories", id, "story.md"); + try { + const raw = await fs.readFile(storyPath, "utf8"); + const title = readStoryTitle(epicDir, id); + return { raw, title: await title }; + } catch { return { raw: "", title: id }; } + })); const reviewStories: ReviewStory[] = storyIds.map((storyId, i) => ({ storyId, - title: titles[i] ?? storyId, + title: storyData[i].title ?? storyId, + content: storyData[i].raw, })); const reviewResult = await webServer.requestReview(reviewStories); diff --git a/src/planner/web/css/components.css b/src/planner/web/css/components.css index ce2c191..a112b3f 100644 --- a/src/planner/web/css/components.css +++ b/src/planner/web/css/components.css @@ -390,39 +390,49 @@ } /* ---- Review checklist ---- */ -.review-story { +/* ---- Review story cards ---- */ + +.review-card { + border: 1px solid var(--border); + border-radius: var(--radius-md); + background: var(--bg-surface); + margin-bottom: var(--gap-sm); + overflow: hidden; + transition: border-color 150ms; +} + +.review-card-approved { + border-color: var(--green-border); +} + +.review-card-header { display: flex; align-items: center; gap: var(--gap-md); padding: var(--gap-sm) var(--gap-md); - border: 1px solid var(--border); - border-radius: var(--radius-sm); - background: var(--bg); - margin-bottom: var(--gap-sm); cursor: pointer; user-select: none; } -.review-story.checked { - border-color: var(--green-border); - background: var(--green-bg); +.review-card-checkbox { + flex-shrink: 0; + padding: 2px; } -.review-story-checkbox { +.review-checkbox { width: 16px; height: 16px; border: 2px solid var(--text-ghost); border-radius: 3px; - flex-shrink: 0; transition: border-color 100ms, background 100ms; } -.review-story.checked .review-story-checkbox { +.review-checkbox.checked { border-color: var(--green-border); background: var(--green-border); } -.review-story.checked .review-story-checkbox::after { +.review-checkbox.checked::after { content: "✓"; display: block; color: #fff; @@ -431,17 +441,70 @@ line-height: 12px; } -.review-story-id { +.review-card-title { + flex: 1; + min-width: 0; + display: flex; + align-items: baseline; + gap: var(--gap-sm); +} + +.review-card-id { font-family: var(--font-mono); font-size: var(--font-size-md); color: var(--text); font-weight: 600; + flex-shrink: 0; } -.review-story-title { - font-family: var(--font-sans); - font-size: var(--font-size-md); +.review-card-desc { + font-family: var(--font-mono); + font-size: var(--font-size-sm); color: var(--text-muted); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.review-card-chevron { + font-family: var(--font-mono); + font-size: var(--font-size-sm); + color: var(--text-ghost); + flex-shrink: 0; + width: 16px; + text-align: center; +} + +.review-card-body { + padding: 0 var(--gap-md) var(--gap-sm); + padding-left: calc(var(--gap-md) + 16px + var(--gap-md)); + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--text-dim); + white-space: pre-wrap; + word-break: break-word; + line-height: 1.5; +} + +.review-card-body:not(.expanded) { + display: -webkit-box; + -webkit-line-clamp: 3; + -webkit-box-orient: vertical; + overflow: hidden; +} + +.review-card-more { + padding: 2px var(--gap-md) var(--gap-sm); + padding-left: calc(var(--gap-md) + 16px + var(--gap-md)); + font-family: var(--font-mono); + font-size: var(--font-size-xs); + color: var(--blue); + cursor: pointer; + user-select: none; +} + +.review-card-more:hover { + color: var(--text-strong); } /* ---- Loading spinner ---- */ diff --git a/src/planner/web/js/components/forms/ReviewForm.jsx b/src/planner/web/js/components/forms/ReviewForm.jsx index ee878e3..9e5839b 100644 --- a/src/planner/web/js/components/forms/ReviewForm.jsx +++ b/src/planner/web/js/components/forms/ReviewForm.jsx @@ -1,7 +1,59 @@ -import { useState } from 'preact/hooks' +import { useState, useRef, useEffect } from 'preact/hooks' import { useStore } from '../../store.js' import { submitReview } from '../../lib/api.js' +function StoryCard({ story, isApproved, onToggle }) { + const [expanded, setExpanded] = useState(false) + const bodyRef = useRef(null) + const [isClamped, setIsClamped] = useState(false) + + useEffect(() => { + const el = bodyRef.current + if (el) setIsClamped(el.scrollHeight > el.clientHeight + 2) + }, [story.content, expanded]) + + function handleCheckbox(e) { + e.stopPropagation() + onToggle() + } + + function handleExpand() { + if (story.content) setExpanded(v => !v) + } + + return ( +
+
+
+
+
+
+ {story.storyId} + {story.title} +
+ {story.content && ( + {expanded ? '▾' : '▸'} + )} +
+ {story.content && ( + <> +
+ {story.content} +
+ {!expanded && isClamped && ( +
+ show spec ▸ +
+ )} + + )} +
+ ) +} + export function ReviewForm({ token }) { const { requestId, payload: stories } = useStore(s => s.pendingInput) const [approved, setApproved] = useState(() => new Set(stories.map(s => s.storyId))) @@ -28,18 +80,17 @@ export function ReviewForm({ token }) { return (

Review story sketches

-

Review stories before execution begins.

+

+ Review stories before execution begins. Click a story to inspect its specification. +

{stories.map(story => ( -
toggle(story.storyId)} - > -
- {story.storyId} - — {story.title} -
+ story={story} + isApproved={approved.has(story.storyId)} + onToggle={() => toggle(story.storyId)} + /> ))}
diff --git a/src/planner/web/server-types.ts b/src/planner/web/server-types.ts index 2812073..23aacbb 100644 --- a/src/planner/web/server-types.ts +++ b/src/planner/web/server-types.ts @@ -96,6 +96,7 @@ export function buildMultiSelectionResult( export interface ReviewStory { storyId: string; title: string; + content: string; } export interface ReviewResult { From 2b3afeb19ec621e994e9559a309364b8dddc0c03 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 20 Mar 2026 13:35:24 +0700 Subject: [PATCH 070/412] increase web server polling interval from 500ms to 50ms --- src/planner/web/server.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/planner/web/server.ts b/src/planner/web/server.ts index 075a30a..4943f75 100644 --- a/src/planner/web/server.ts +++ b/src/planner/web/server.ts @@ -446,7 +446,7 @@ export async function startWebServer(epicDir: string): Promise pushEvent("intake-progress", currentIntakeProgress); } } - }, 500); + }, 50); timer.unref(); agent.pollingTimer = timer; } @@ -682,7 +682,7 @@ export async function startWebServer(epicDir: string): Promise pushEvent("subagent", event); } } catch { /* Non-fatal */ } - }, 500); + }, 50); timer.unref(); trackingTimer = timer; }, From 355cb64b2510ef9d51264062e579caf7410ff75d Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 20 Mar 2026 13:35:35 +0700 Subject: [PATCH 071/412] document event-sourced audit, SSE lifecycle, and general architecture principles --- docs/architecture.md | 147 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 138 insertions(+), 9 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index ad5c34d..55cf4eb 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -159,6 +159,7 @@ This is not optional — the IPC responder, web server, and audit system all poll files concurrently. A partial read of `ipc.json` or `state.json` would cause silent data corruption or spurious errors. + --- ## Tool Registration Constraint @@ -177,6 +178,100 @@ is impossible. Instead: This is the **mutable-ref pattern**: static registration, dynamic dispatch. +--- + +## Event-Sourced Audit + +Each subagent maintains an append-only event log (`events.jsonl`) and an +eagerly-materialized projection (`state.json`). This is the observability +layer that drives the web dashboard. + +``` +audit event appended → fold(events) → state.json written atomically +web server polls state.json (50ms) → detects change → pushes SSE event +sse.js handler → Zustand store update → component re-render +``` + +### Rules + +- **`fold()` is pure** — given the same event sequence, it must produce the same + projection. No I/O, no randomness, no side effects inside `fold()`. +- **New event types require a fold handler.** Unknown events are silently ignored + (forward compatibility), but a new event that is not folded contributes nothing + to the projection and will not be visible to the web server or UI. +- **Projection is eagerly materialized.** It is written atomically after every + `append()` call. The web server reads `state.json`, not `events.jsonl`. This + keeps polling cheap (one file read) without needing to replay the log. +- **`append()` calls are serialized.** `EventLog` serializes appends via an + internal promise chain. Concurrent callers (e.g., heartbeat timer and + `tool_result` handler) enqueue without racing on the `.tmp.json` file. + +### Adding new observable state + +When adding a new piece of state that the UI should see, wire all five layers: + +1. **Emit an audit event** — add a typed event and an `emit*()` helper in `lib/audit.ts` +2. **Update `fold()`** — handle the new event type to update the projection field +3. **Update the Projection type** — add the field to the `Projection` interface +4. **Web server polling** — read the new field from the cached projection in the 50ms polling callback and include it in the SSE payload +5. **Frontend** — add a handler in `sse.js` and a slice in `store.js` + +All five layers must be present. Missing any one of them produces silent data +loss — the event is appended but never reaches the browser. + +--- + +## SSE Event Lifecycle + +State flows from LLM tool calls to the browser through a five-layer pipeline. +All layers must be wired for a new event type to be visible end-to-end. + +``` +[LLM calls tool] + ↓ +[tool mutates ctx + calls ctx.eventLog.emit*()] ← lib/audit.ts + ↓ +[fold() updates Projection → state.json written atomically] + ↓ +[web server polls state.json every 50ms, detects change] ← web/server.ts + ↓ +[pushEvent(type, payload) → SSE stream → browser] + ↓ +[sse.js addEventListener(type, handler) → useStore.setState()] ← web/js/sse.js + ↓ +[Zustand component selector → React re-render] ← web/js/store.js +``` + +### Concrete example: `koan_set_confidence` + +``` +LLM calls koan_set_confidence({ level: "high" }) + → ctx.intakeConfidence = "high" + → ctx.eventLog.emitConfidenceChange("high", 2) + → append({ kind: "confidence_change", level: "high", iteration: 2 }) + → fold: projection.intakeConfidence = "high", projection.intakeIteration = 2 + → writeState(projection) → state.json + → returns "Confidence set to high." + +web server polling timer fires (50ms) + → pollAgent(intake) → readProjection(dir) → intakeConfidence: "high" + → agent.lastProjection = projection + → intake sub-phase → builds IntakeProgressEvent { confidence: "high", iteration: 2, ... } + → pushEvent("intake-progress", event) → SSE stream + +browser receives "intake-progress" event + → sse.js handler → useStore.setState({ intakeProgress: event }) + → confidence visualization component re-renders +``` + +### Replay on reconnect + +The web server buffers the last value of every stateful SSE event type. On +reconnect, `replayState()` writes all buffered events to the new client. This +ensures the browser always has current state after a network drop, without +requiring a full page reload. + + --- ## Pitfalls @@ -239,19 +334,53 @@ constraint. Do not assume bash calls are blocked for planning roles. ### Don't rely on prompt instructions alone to restrict step behavior -Prompt instructions can be ignored by the LLM. The intake phase learned this -the hard way: the original 3-step design told the LLM not to scout in step 1, -but the LLM frontloaded all work into step 1 anyway, causing duplicate scout -requests in later steps. +**The pattern: prompt expresses intent; mechanical gate catches non-compliance. +Neither alone is sufficient.** -Mechanical enforcement is required for any behavior that is critical to -correctness. Use the permission fence (`checkPermission` with `intakeStep`) to -block tools that must not be used in a given step. Use -`validateStepCompletion()` to block step advancement when required pre-calls -have not been made. Prompts express intent; enforcement catches non-compliance. +- **Prompt alone** — the LLM can ignore it. The original 3-step intake design + told the LLM not to scout in step 1; it frontloaded all work into step 1 + anyway, producing duplicate scout requests in later steps. +- **Gate alone** — the LLM receives a cryptic "blocked" error with no context. + It cannot fix the problem if it does not know what it did wrong. + +Three enforcement mechanisms are available — use the appropriate one for the +constraint: + +| Mechanism | What it enforces | How | +|-----------|-----------------|-----| +| **Permission fence** (`checkPermission`) | Which tools a role (or step) can use | Block at `tool_call` event; LLM sees a rejection message | +| **`validateStepCompletion()`** | Required pre-calls before step advancement | Block `koan_complete_step`; LLM sees an error and must comply | +| **Tool description** | Soft guidance on when to call | Cannot be enforced; LLM can ignore it | + +Any behavioral constraint that matters for correctness needs **both** a prompt +instruction (so the LLM knows what to do) and a mechanical gate (so +non-compliance is caught and corrected, not silently propagated). See [intake-loop.md § Step-Aware Permission Gating](./intake-loop.md#step-aware-permission-gating). +### Don't give a step multiple cognitive goals + +Each step should have exactly one cognitive goal. Grouping multiple goals into +a single step ("do A, then B, then C") enables **simulated refinement**: the +LLM artificially downgrades its output for A to manufacture visible improvement +in C. When all three goals are in one step, the model can pre-plan the +"improvement" because it already knows C is coming. + +Separate `koan_complete_step` calls enforce genuinely isolated reasoning: the +LLM must complete each goal before it sees the next goal's instructions. There +is no opportunity to sandbag — the next step's prompt has not arrived yet. + +This is why the intake phase has three loop steps (Scout / Deliberate / Reflect) +rather than a single monolithic "investigate" step. The scout phase follows the +same principle (orient → investigate → verify → report — four distinct goals, +four distinct steps). + +When designing a new phase, each step should answer: "What is the single thing +this step accomplishes?" If the answer requires "and then", split the step. + +See [intake-loop.md § Prompt Chaining over Stepwise](./intake-loop.md#prompt-engineering-principles) +for the detailed rationale. + ### Don't parse free-text for loop control decisions Confidence (the gate that controls the intake loop) is a structured enum From 923c441879d072d9434f7fbb72551eec637baae5 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 20 Mar 2026 13:35:46 +0700 Subject: [PATCH 072/412] expand model tier documentation and update polling interval references --- docs/ipc.md | 2 +- docs/subagents.md | 63 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/docs/ipc.md b/docs/ipc.md index a2de236..e16d75d 100644 --- a/docs/ipc.md +++ b/docs/ipc.md @@ -132,7 +132,7 @@ async function deleteIpcFile(dir) { ... } |--------|----------|---------| | **Parent IPC responder** | 300ms | Detect subagent requests quickly | | **Subagent tool** | 500ms | Wait for parent response | -| **Web server agent polling** | 500ms | Update agent status in UI | +| **Web server agent polling** | 50ms | Update agent status in UI | The parent polls slightly faster than the subagent to ensure it picks up requests promptly. Both intervals are low enough for interactive feel. diff --git a/docs/subagents.md b/docs/subagents.md index 5278690..dec9ecf 100644 --- a/docs/subagents.md +++ b/docs/subagents.md @@ -290,21 +290,52 @@ the write is allowed (cannot scope-check without context). ## Model Tiers -Roles map deterministically to 3 tiers: +### Why 3 tiers instead of per-role configuration + +Koan has 6 roles, but they cluster into 3 capability bands. Configuring 3 +model names is simpler than 6 and matches the natural grouping: + +| Tier | Roles | Why this tier | +|------|-------|--------------| +| **strong** | intake, decomposer, orchestrator, planner | Complex multi-step reasoning: investigating ambiguous requirements, splitting work into stories, verifying correctness, producing precise implementation plans | +| **standard** | executor | Code implementation: reliable tool use and file editing without requiring the deepest reasoning | +| **cheap** | scout | Narrow codebase investigation: reading files, grepping patterns, writing a focused findings report — no deep reasoning needed | + +The mapping is hardcoded in `types.ts` (`ROLE_MODEL_TIER`). Adding a new role +requires updating that map. + +### Configuration + +Model tiers are configured via the web UI at pipeline start (the **model config +gate** fires before any subagent spawns). The user selects one model per tier. +Config is persisted to `~/.koan/config.json` under the `modelTiers` key: + +```json +{ + "modelTiers": { + "strong": "claude-opus-4-5", + "standard": "claude-sonnet-4-5", + "cheap": "claude-haiku-4-5" + }, + "scoutConcurrency": 4 +} +``` + +If no config exists or the config is partial, `resolveModelForRole` returns +`undefined` and the `--model` flag is omitted — pi's current active model +becomes the implicit fallback for all roles. -| Tier | Roles | Purpose | -|------|-------|---------| -| **strong** | intake, decomposer, orchestrator, planner | Complex reasoning, planning, decomposition | -| **standard** | executor | Code implementation | -| **cheap** | scout | Narrow codebase investigation | +Config is **all-or-nothing**: all 3 tiers must be present. Partial configs +are treated as absent and logged. This prevents a half-configured state where +some roles use intended models and others silently fall back. -The user configures which specific model each tier uses via the web UI at -pipeline start (model config gate). If no config exists, `resolveModelForRole` -returns `undefined` and the `--model` flag is omitted, preserving pi's -current active model as the implicit fallback. +### Scout concurrency -Model tier config is all-or-nothing: all 3 tiers must be present. Partial -configs are treated as absent and logged. +`scoutConcurrency` (default: 4) controls how many scout subagents run in +parallel via the bounded pool (`lib/pool.ts`). The pool uses an in-process +semaphore: all scout tasks are submitted to `Promise.all` simultaneously; the +semaphore gates actual execution. Increase this for faster scouting on machines +with ample resources; decrease it to reduce peak memory pressure. --- @@ -350,7 +381,7 @@ The three JSON files have distinct lifecycles per | File | Writer | Reader | When | |------|--------|--------|------| | `task.json` | Parent | Child | Once at startup | -| `state.json` | Child | Parent | Continuous (500ms polling) | +| `state.json` | Child | Parent | Continuous (50ms polling) | | `ipc.json` | Both | Both | Per-request (created, answered, deleted) | --- @@ -361,11 +392,11 @@ The parent registers each subagent with the web server for UI tracking: ```typescript webServer.registerAgent({ id, name, dir, role, model, parent }); -// → starts 500ms polling of audit projection + recent logs +// → starts 50ms polling of audit projection + recent logs // → SSE "agents" event to browser webServer.trackSubagent(dir, role, storyId?); -// → starts 500ms polling for "subagent" + "logs" SSE events +// → starts 50ms polling for "subagent" + "logs" SSE events // ... subagent runs ... @@ -377,7 +408,7 @@ webServer.completeAgent(id); ``` **Dual polling for intake agent:** Both `registerAgent()` and -`trackSubagent()` poll at 500ms. `registerAgent` polling derives the intake +`trackSubagent()` poll at 50ms. `registerAgent` polling derives the intake sub-phase for the progress bar: | Step | Pending ask? | Sub-phase | From 3373332c5def9dddf90bf8d3704a6062d8f9f745 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 20 Mar 2026 13:36:05 +0700 Subject: [PATCH 073/412] update intake-loop polling interval reference to 50ms --- docs/intake-loop.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/intake-loop.md b/docs/intake-loop.md index 47a4563..d8d7629 100644 --- a/docs/intake-loop.md +++ b/docs/intake-loop.md @@ -230,7 +230,7 @@ Both events are folded into the `state.json` projection: - `confidence_change` → `intakeConfidence`, `intakeIteration` - `iteration_start` → `intakeIteration` -The web server polls `state.json` every 500ms for each active agent. When it +The web server polls `state.json` every 50ms for each active agent. When it detects a change in `intakeConfidence` or `intakeIteration`, it pushes an `intake-progress` SSE event to connected browser clients. The event payload includes both the `confidence` string and the `iteration` number, allowing the From 6b1b8bcdf161170d9942f9d9c4b69475548f1577 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 20 Mar 2026 13:36:20 +0700 Subject: [PATCH 074/412] rewrite README to reflect current epic pipeline architecture --- README.md | 171 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 99 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index ac8d11a..7d0c31d 100644 --- a/README.md +++ b/README.md @@ -1,72 +1,99 @@ -# Koan Pi Package - -## Overview - -Koan is an opinionated planning workflow extension for the pi coding agent. It constrains model behavior with deterministic phase orchestration, explicit tool boundaries, and durable file-backed state so planning sessions are repeatable and auditable. - -## Architecture - -The runtime is split into two modes from the same extension entrypoint: - -- **Parent session mode** registers the `koan_plan` MCP tool and the `/koan-execute`, `/koan-status` commands. The parent orchestrates the full workflow when `koan_plan` is invoked. -- **Subagent mode** runs role/phase-specific workflows (architect, developer, technical writer, QR decomposer, reviewer, fix mode). - -The parent controls progression through plan design, plan code, plan docs, quality review, and iterative fixes. Subagents are isolated processes that communicate through persisted artifacts (`plan.json`, `qr-*.json`) and audit projections. - -## Invoking the Planner - -Call `koan_plan` as an MCP tool — the LLM invokes it when the user asks to plan a complex task. No parameters are needed: the conversation up to that point is automatically exported to `conversation.jsonl` in the plan directory and becomes planning input. The architect then persists a structured **background context** index via koan tools. - -The planning pipeline runs sequentially: - -1. **plan-design** (architect) — reads `conversation.jsonl`, builds structured **background context** (previous conversation(s) + indexes), explores the codebase, writes `plan.json`. -2. **plan-code** (developer) — reads `plan.json`, populates code intents and changes. -3. **plan-docs** (technical writer) — reads `plan.json` plus the injected background context snippet, and optionally `conversation.jsonl` for rationale gaps; writes documentation entries. - -Each phase is followed by a QR (quality review) block: decompose → parallel verify → fix loop, up to `MAX_FIX_ITERATIONS`. - -### conversation.jsonl + background context - -`conversation.jsonl` is written once at the start of `koan_plan`. It contains the full session branch as JSONL (one JSON object per line — raw pi `SessionManager` entries, not a plain-text transcript). - -The architect categorically analyzes this file and persists compact markdown **background context** via: -- `koan_set_background_context` - -That context is then injected directly into prompts for planning and QR agents, alongside the conversation.jsonl location. - -### Prompt + convention sources - -- Subagent system prompts are hard-coded in `src/planner/lib/agent-prompts.ts`. -- Convention docs stay file-based in `resources/conventions` and are surfaced to prompts via `CONVENTIONS_DIR`. - -### Slash commands - -| Command | Description | -|---|---| -| `/koan-execute` | Execute a koan plan (not yet implemented) | -| `/koan-status` | Show current workflow phase | - -## Design Decisions - -Key design choices that shape implementation: - -- **Inversion of control**: TypeScript orchestration code drives agent behavior; models do not self-route workflow steps. -- **Tool-call-driven transitions**: step progression happens via `koan_complete_step` tool calls, not conversational chaining. -- **Default-deny permissions**: each phase explicitly allowlists tools; unknown tool/phase access is blocked. -- **Disk-backed mutations**: planning mutations are immediately persisted with atomic writes instead of deferred finalize steps. -- **Need-to-know prompts**: each subagent only receives the minimum context needed for its task. -- **Injected background context**: each workflow step prompt prepends the same `` snippet containing conversation path + compact markdown context. -- **Ephemeral runtime workspace**: intermediate subagent logs/state live in a mkdtemp workspace and are removed on plan completion and session shutdown. - -## Invariants - -The workflow depends on these invariants: - -- Planning phases must block direct `edit`/`write` tools. -- Tool failures must throw errors (not return soft error payloads). -- Cross-reference integrity in the plan must validate before progression. -- MUST-severity QR failures remain blocking even as lower-severity checks de-escalate in later fix iterations. - -## Boundaries - -Current scope focuses on planning and QR orchestration. `/koan-execute` is intentionally not implemented yet. +# Koan + +Koan is a deterministic planning pipeline for the pi coding agent. It takes a +conversation describing a coding task and produces working code — through a +structured sequence of isolated LLM subagents, each with a narrow, auditable +responsibility. + +## How it works + +``` +Conversation + → Intake (confidence-gated investigation loop) + → Decomposer (splits scope into stories) + → Review gate (user approves story list) + → Story loop: + Orchestrator (selects + verifies) → Planner → Executor → repeat + → Done +``` + +Each stage is a separate `pi -p` subprocess. Subagents communicate through +files in a per-session directory, not through shared memory or sockets. The +parent driver reads JSON state and exit codes; it never parses LLM output. + +## Phases + +| Phase | Role | What it does | +|-------|------|-------------| +| **Intake** | `intake` | Reads the conversation, scouts the codebase, asks clarifying questions. Iterates until confident. Writes `context.md`. | +| **Scout** | `scout` | Narrow codebase investigator. Spawned in parallel by intake, decomposer, and planner via `koan_request_scouts`. | +| **Decomposer** | `decomposer` | Reads `context.md`, splits work into story sketches. Each story = one pull request. | +| **Orchestrator** | `orchestrator` | Selects the next story, verifies execution results, routes to retry/done/next. | +| **Planner** | `planner` | Reads a story sketch, writes a step-by-step implementation plan and code context file. | +| **Executor** | `executor` | Follows the plan, modifies the codebase, reports what changed. | + +## Web Dashboard + +Koan serves a local web dashboard at `http://localhost:{port}` during pipeline +execution. The dashboard provides: + +- **Activity feed** — real-time tool calls, scout dispatches, thinking traces +- **Agent monitor** — status, token counts, and recent actions for each + running subagent +- **User interaction** — question forms (intake clarifications), review gates + (story approval), model configuration + +The dashboard uses Server-Sent Events for real-time updates. State is polled +from each subagent's audit projection every 50ms. + +## Key Concepts + +**Step-first workflow.** Every subagent's first action is calling +`koan_complete_step`. This forces a tool call before any text output — critical +because `pi -p` processes exit the moment the LLM produces text without a tool +call. Task instructions are delivered as the return value of that first call. + +**Directory-as-contract.** Each subagent gets a directory with `task.json` +(input), `state.json` (live projection), and `events.jsonl` (audit log). The +spawn command carries only the directory path. No structured data flows through +CLI flags. + +**Default-deny permissions.** Every tool call passes through a permission +fence. Roles cannot use tools outside their scope. Planning roles can only +write inside the epic directory. The intake phase's Extract step additionally +blocks scouting and writing tools at the mechanism level. + +**Driver determinism.** The driver (`driver.ts`) reads JSON and exit codes, +applies routing rules, and spawns the next subagent. It never parses markdown +or adapts to LLM behavior. Routing decisions are deterministic. + +## Configuration + +Model tiers and scout concurrency are configured via the web UI at pipeline +start, then saved to `~/.koan/config.json`: + +```json +{ + "modelTiers": { + "strong": "claude-opus-4-5", + "standard": "claude-sonnet-4-5", + "cheap": "claude-haiku-4-5" + }, + "scoutConcurrency": 4 +} +``` + +Roles map to tiers: intake/decomposer/orchestrator/planner → strong, +executor → standard, scout → cheap. + +## Architecture Documentation + +- **[docs/architecture.md](./docs/architecture.md)** — core invariants, + design principles, pitfalls +- **[docs/subagents.md](./docs/subagents.md)** — spawn lifecycle, step-first + workflow, permissions, model tiers +- **[docs/ipc.md](./docs/ipc.md)** — file-based IPC between subagent and parent +- **[docs/state.md](./docs/state.md)** — driver state machine, story lifecycle, + routing rules +- **[docs/intake-loop.md](./docs/intake-loop.md)** — confidence-gated intake + loop, prompt engineering principles From d9f299cfa6c93435062bdd698690da5072cd1180 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 20 Mar 2026 13:36:33 +0700 Subject: [PATCH 075/412] wire intake-progress SSE events to frontend store --- src/planner/web/js/sse.js | 14 +++++++++++--- src/planner/web/js/store.js | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/planner/web/js/sse.js b/src/planner/web/js/sse.js index 35d0cb1..68908f5 100644 --- a/src/planner/web/js/sse.js +++ b/src/planner/web/js/sse.js @@ -6,15 +6,23 @@ export function connectSSE(token) { const handlers = { 'init': (d) => set({ availableModels: d.availableModels || [] }), - phase: (d) => set({ phase: d.phase, ...(d.phase !== 'intake' && { pendingInput: null }) }), - 'intake-progress': () => {}, // data model preserved server-side; UI unused for now + phase: (d) => set({ + phase: d.phase, + // Clear interaction state and intake progress when leaving intake + ...(d.phase !== 'intake' && { pendingInput: null, intakeProgress: null }), + }), + 'intake-progress': (d) => set({ intakeProgress: d }), stories: (d) => set({ stories: d.stories }), scouts: (d) => set({ scouts: d.scouts }), agents: (d) => set({ agents: d.agents }), logs: (d) => set({ logs: d.lines, currentToolCallId: d.currentToolCallId ?? null }), subagent: (d) => set({ subagent: d }), 'subagent-idle': () => set({ subagent: null }), - 'pipeline-end': (d) => set(s => ({ phase: d.success ? 'completed' : s.phase, pipelineEnd: d })), + 'pipeline-end': (d) => set(s => ({ + phase: d.success ? 'completed' : s.phase, + pipelineEnd: d, + intakeProgress: null, + })), ask: (d) => set({ pendingInput: { type: 'ask', requestId: d.requestId, payload: d.questions } }), review: (d) => set({ pendingInput: { type: 'review', requestId: d.requestId, payload: d.stories } }), 'model-config': (d) => set(s => ({ diff --git a/src/planner/web/js/store.js b/src/planner/web/js/store.js index 99c7b0a..18ad55a 100644 --- a/src/planner/web/js/store.js +++ b/src/planner/web/js/store.js @@ -10,6 +10,7 @@ export const useStore = create((set) => ({ currentToolCallId: null, // string | null — in-flight tool for the main agent subagent: null, pendingInput: null, + intakeProgress: null, // IntakeProgressEvent | null — set during intake phase // Client-only state notifications: [], From 7a1e9826bfc066407df91f5514ecc74135831eaf Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Fri, 20 Mar 2026 13:36:49 +0700 Subject: [PATCH 076/412] add status sidebar with intake confidence visualization, remove orphan phase components --- src/planner/web/ARCHITECTURE.md | 113 +++++++++++----- src/planner/web/css/layout.css | 109 +++++++++++++++ src/planner/web/js/components/App.jsx | 20 ++- .../web/js/components/StatusSidebar.jsx | 126 ++++++++++++++++++ .../js/components/phases/Consolidation.jsx | 39 ------ .../js/components/phases/ContextAnalysis.jsx | 21 --- .../web/js/components/phases/Execution.jsx | 34 ----- .../js/components/phases/ScoutExploration.jsx | 60 --------- 8 files changed, 329 insertions(+), 193 deletions(-) create mode 100644 src/planner/web/js/components/StatusSidebar.jsx delete mode 100644 src/planner/web/js/components/phases/Consolidation.jsx delete mode 100644 src/planner/web/js/components/phases/ContextAnalysis.jsx delete mode 100644 src/planner/web/js/components/phases/Execution.jsx delete mode 100644 src/planner/web/js/components/phases/ScoutExploration.jsx diff --git a/src/planner/web/ARCHITECTURE.md b/src/planner/web/ARCHITECTURE.md index 8731e36..b71ea99 100644 --- a/src/planner/web/ARCHITECTURE.md +++ b/src/planner/web/ARCHITECTURE.md @@ -12,7 +12,7 @@ user input via POST. Built with Preact + Zustand — see server.ts HTTP server, SSE push, WebServerHandle API server-types.ts Shared TypeScript types html/index.html Shell —
+ module script, no static skeleton -css/ Four unchanged stylesheets (variables, layout, components, animations) +css/ Four stylesheets (variables, layout, components, animations) dist/app.js Compiled bundle — generated, not committed js/ app.jsx Entry: render(), connectSSE(), heartbeat interval @@ -31,9 +31,9 @@ esbuild compiles `js/app.jsx` and all imports into `dist/app.js` (single ESM bundle, ~44KB raw / ~16KB gzip). **The alias flags are mandatory.** zustand v4 imports from `react` internally. -Without aliasing, esbuild bundles the full React 19 runtime (~17KB) alongside -Preact — two competing VDOM reconcilers that cannot share a hook dispatcher. -The aliases redirect those imports to `preact/compat`: +Without aliasing, esbuild bundles the full React 19 runtime alongside Preact — +two competing VDOM reconcilers that cannot share a hook dispatcher. The aliases +redirect those imports to `preact/compat`: ``` --alias:react=preact/compat --alias:react-dom=preact/compat @@ -46,8 +46,7 @@ to both. **On-demand build:** `ensureBundle()` in `server.ts` runs at the top of `startWebServer()`. It stats `dist/app.js` against the newest file in `js/` and rebuilds only when stale. Adds ~100ms on first start; skips on subsequent -starts. No manual build step is needed during development — pi loads extensions -from source, so `startWebServer()` is always the entry point. +starts. No manual build step is needed during development. **CI/test path:** `npm run build` runs `build:web` then `tsc`. The tsc step does not process JSX; it type-checks the TypeScript source only. @@ -66,19 +65,19 @@ server.ts ──SSE──► sse.js ──setState──► Zustand store user action ◄──fetch── lib/api.js ◄──────────────────────────┘ ``` -1. `server.ts` pushes SSE events on a 2-second polling tick. +1. `server.ts` pushes SSE events on a 50ms polling tick. 2. `sse.js` registers one `addEventListener` per event type. Each handler calls `useStore.setState()` — the static method, callable outside component context. 3. Components subscribe via `useStore(s => s.slice)`. Zustand shallow-merges `setState` calls and notifies only subscribers whose selected slice changed. - A component reading `s.agents` does not re-render when `s.phase` changes. 4. User actions (form submit, heartbeat) call `lib/api.js` fetch wrappers which POST to `/api/answer`, `/api/review`, or `/api/heartbeat`. `pendingInput` is cleared by the server: a phase transition out of `intake` clears it in the `phase` handler; `ask-cancelled` / `review-cancelled` clear -it by request ID. +it by request ID. `intakeProgress` is cleared when the phase transitions away +from intake or when the pipeline ends. --- @@ -86,48 +85,96 @@ it by request ID. ``` App -├── ProgressBar reads intakeProgress.{subPhase,intakeDone} +├── ProgressBar reads phase for step-fraction fill ├── Header -│ ├── PillStrip reads intakeProgress.{subPhase,intakeDone} +│ ├── PillStrip reads phase for active/done pill state │ └── Timer reads subagent.startedAt, ticks via useEffect interval -├── main.phase-content +│ +├── (isInteractive) main.main-panel │ └── PhaseContent dispatch hub (see below) -├── AgentMonitor reads agents; renders AgentRow per agent +│ +├── (live) div.live-layout ← row split +│ ├── div.live-main +│ │ └── main.main-panel +│ │ ├── SubagentMeta reads subagent +│ │ └── ActivityFeed reads logs, currentToolCallId +│ └── StatusSidebar reads subagent, phase, intakeProgress +│ +├── AgentMonitor reads agents (hides when none active) └── Notifications reads notifications; auto-dismisses via useEffect ``` +**App layout modes:** + +`isInteractive = !phase || pendingInput || showSettings || phase === 'completed'` + +- **Interactive mode** — `PhaseContent` fills the scrollable area. Used for forms, + loading screen, settings overlay, and completion. +- **Live mode** — `SubagentMeta` + `ActivityFeed` fill the left column. + `StatusSidebar` sits in the right column (200px), showing phase-specific + status that updates as SSE events arrive. + **PhaseContent dispatch order:** -1. `!phase` → `` -2. `pendingInput.type === 'ask'` → `` -3. `pendingInput.type === 'review'` → `` -4. `phase === 'intake'` → dispatches on `intakeProgress.subPhase`: - - `'context'` or null → `` - - `'explore'` → `` - - `'questions'` or `'spec'` → `` -5. `phase === 'completed'` → `` -6. default → `` +1. `showSettings` → `` +2. `pending.type === 'model-config'` → `` +3. `!phase` → `` +4. `pending.type === 'ask'` → `` +5. `pending.type === 'review'` → `` +6. `phase === 'completed'` → `` +7. default → `null` (live mode renders the ActivityFeed instead) `key={requestId}` on forms forces a full remount when a new request arrives, resetting local selection state without any explicit cleanup. --- +## StatusSidebar + +The `StatusSidebar` renders phase-specific context in the right column during +live mode. It reads three store slices: `subagent` (visibility gate), `phase` +(which content to show), and `intakeProgress` (intake-specific data). + +**During intake** (`phase === 'intake' && intakeProgress != null`): +- Confidence meter — 5 segments filled according to level (exploring=0, + low=1, medium=3, high=4, certain=5), with a level-appropriate colour +- Iteration indicator — 4 dots, filled up to the current round +- Sub-phase label — current sub-phase name in purple +- Summary — a static description derived from the sub-phase + +**During other phases** — a simple label and "Phase in progress…" message. +Per-phase rich content (e.g. story progress for `executing`) will be added +as those phases are instrumented. + +--- + +## intake-progress SSE event + +`IntakeProgressEvent { subPhase, intakeDone, confidence, iteration }` is pushed +from the server's 50ms agent-polling tick whenever the intake agent's projection +changes. The full pipeline: + +``` +LLM calls koan_set_confidence + → ctx.intakeConfidence set + → confidence_change appended to events.jsonl + → fold() updates state.json projection + → server polls state.json (50ms) → detects change + → pushes intake-progress SSE event + → sse.js: set({ intakeProgress: d }) + → StatusSidebar re-renders with new confidence/iteration +``` + +The event is replayed in `replayState()` on SSE reconnect so the sidebar +recovers its state after a network drop. + +--- + ## Server-side changes **`ensureBundle()`** — async function before `startWebServer()` body. Uses esbuild JS API via dynamic `await import("esbuild")`. `STATIC_ASSETS` is -constructed inside `startWebServer()` after this call completes (it was at -module scope in the old code; moved because asset loading must follow the build). - -**`intake-progress` SSE event** — denormalized event carrying -`{ subPhase: string | null, intakeDone: boolean }`. Pushed from: -- `startAgentPolling()` — after each `agents` push, if subPhase or intakeDone changed -- `handle.pushPhase()` — updates `intakeDone` on every phase transition - -Replayed in `replayState()` on SSE reconnect. Allows `PhaseContent`, -`PillStrip`, and `ProgressBar` to all subscribe to the same store slice -(`intakeProgress`) rather than using two different mechanisms. +constructed inside `startWebServer()` after this call completes. --- diff --git a/src/planner/web/css/layout.css b/src/planner/web/css/layout.css index c3e79f7..ac03c0c 100644 --- a/src/planner/web/css/layout.css +++ b/src/planner/web/css/layout.css @@ -317,3 +317,112 @@ font-size: var(--font-size-sm); color: var(--text-muted); } + +/* ---- Live layout: activity feed + status sidebar ---- */ + +/* Row wrapper that replaces main-panel in live (non-interactive) mode. + * Handles the header offset so inner .main-panel does not need margin-top. */ +.live-layout { + flex: 1 1 0; + min-height: 0; + display: flex; + flex-direction: row; + margin-top: calc(3px + var(--header-height)); +} + +/* Left column — takes all remaining width, scroll contained within. */ +.live-main { + flex: 1 1 0; + min-width: 0; + min-height: 0; + display: flex; + flex-direction: column; +} + +/* Cancel the top margin when main-panel lives inside live-main + * (the live-layout wrapper already provides the header offset). */ +.live-main > .main-panel { + margin-top: 0; +} + +/* ---- Status sidebar ---- */ + +.status-sidebar { + width: 200px; + flex-shrink: 0; + background: var(--bg-elevated); + border-left: 1px solid var(--border); + overflow-y: auto; + padding: var(--gap-md); +} + +.sidebar-heading { + font-family: var(--font-mono); + font-size: 10px; + color: var(--text-dim); + text-transform: uppercase; + letter-spacing: 0.08em; + margin-bottom: var(--gap-md); +} + +.sidebar-section { + margin-bottom: var(--gap-md); +} + +.sidebar-label { + font-family: var(--font-mono); + font-size: 10px; + color: var(--text-ghost); + text-transform: uppercase; + letter-spacing: 0.06em; + margin-bottom: var(--gap-xs); +} + +/* Five-segment confidence bar */ +.sidebar-segments { + display: flex; + gap: 3px; + margin-bottom: var(--gap-xs); +} + +.sidebar-segment { + flex: 1; + height: 6px; + border-radius: 3px; + transition: background 300ms ease; +} + +/* Value line beneath a segment bar or dots */ +.sidebar-value { + font-family: var(--font-mono); + font-size: var(--font-size-xs); + font-weight: 500; + color: var(--text-muted); +} + +/* Four-dot iteration indicator */ +.sidebar-dots { + display: flex; + gap: 4px; + margin-bottom: var(--gap-xs); +} + +.sidebar-dot { + width: 8px; + height: 8px; + border-radius: 50%; + transition: background 300ms ease; +} + +.sidebar-divider { + height: 1px; + background: var(--border); + margin: var(--gap-md) 0; +} + +.sidebar-summary { + font-family: var(--font-mono); + font-size: 11px; + color: var(--text-dim); + line-height: 1.4; +} diff --git a/src/planner/web/js/components/App.jsx b/src/planner/web/js/components/App.jsx index 031ae91..4bb6f71 100644 --- a/src/planner/web/js/components/App.jsx +++ b/src/planner/web/js/components/App.jsx @@ -4,6 +4,7 @@ import { SubagentMeta } from './SubagentMeta.jsx' import { PhaseContent } from './PhaseContent.jsx' import { ActivityFeed } from './ActivityFeed.jsx' import { AgentMonitor } from './AgentMonitor.jsx' +import { StatusSidebar } from './StatusSidebar.jsx' import { Notifications } from './Notifications.jsx' import { useStore } from '../store.js' @@ -12,8 +13,8 @@ export function App({ token, topic }) { const pending = useStore(s => s.pendingInput) const showSettings = useStore(s => s.showSettings) - // When showing interactive content (forms, model config, loading, completion), use scroll layout - // When showing live subagent activity, use fill layout with activity feed + // Interactive mode: forms, settings overlay, loading screen, completion. + // Live mode: active subagent activity feed with status sidebar. const isInteractive = !phase || pending || showSettings || phase === 'completed' return ( @@ -27,10 +28,17 @@ export function App({ token, topic }) {
) : ( -
- - -
+ // Live layout: activity feed on the left, status sidebar on the right. + // The sidebar spans the full height of the content area, independently scrollable. +
+
+
+ + +
+
+ +
)} diff --git a/src/planner/web/js/components/StatusSidebar.jsx b/src/planner/web/js/components/StatusSidebar.jsx new file mode 100644 index 0000000..3a9c315 --- /dev/null +++ b/src/planner/web/js/components/StatusSidebar.jsx @@ -0,0 +1,126 @@ +import { useStore } from '../store.js' + +// Maps confidence level to number of filled segments (out of 5) and accent colour. +const CONFIDENCE_DISPLAY = { + exploring: { segments: 0, color: 'var(--text-ghost)' }, + low: { segments: 1, color: 'var(--red)' }, + medium: { segments: 3, color: 'var(--orange)' }, + high: { segments: 4, color: 'var(--green)' }, + certain: { segments: 5, color: 'var(--green)' }, +} + +// Default summary text per sub-phase shown while the agent is working. +const SUBPHASE_SUMMARY = { + extract: 'Reading conversation to understand the task…', + scout: 'Exploring codebase via parallel scouts…', + deliberate: 'Analyzing findings, preparing questions…', + reflect: 'Verifying completeness of understanding…', + questions: 'Waiting for user response…', + synthesize: 'Writing context.md…', +} + +export function StatusSidebar() { + const subagent = useStore(s => s.subagent) + const phase = useStore(s => s.phase) + const intakeProgress = useStore(s => s.intakeProgress) + + // Only render when there is an active subagent. + if (!subagent) return null + + const isIntake = phase === 'intake' + + return ( + + ) +} + +// -- Intake-specific status: confidence meter, iteration dots, sub-phase, summary -- + +function IntakeStatus({ progress }) { + const { confidence, iteration, subPhase, intakeDone } = progress + const conf = CONFIDENCE_DISPLAY[confidence] ?? CONFIDENCE_DISPLAY.exploring + + return ( + <> + +