recoupable · sweetmantech · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/app/lib/workflows/runAgentStep.ts b/app/lib/workflows/runAgentStep.ts
@@ -1,45 +1,59 @@
 import { streamText, convertToModelMessages, type UIMessage, type UIMessageChunk } from "ai";
 import { gateway } from "@ai-sdk/gateway";
 import { agentCustomInstructions } from "@/lib/chat/agentCustomInstructions";
+import { CHAT_AGENT_STOP_WHEN } from "@/lib/chat/const";
+import { buildAgentTools } from "@/lib/agent/buildAgentTools";
+import type { AgentContext } from "@/lib/agent/tools/AgentContext";
 
 export type RunAgentStepInput = {
   messages: UIMessage[];
   modelId: string;
   writable: WritableStream<UIMessageChunk>;
+  /**
+   * Threaded into `streamText`'s `experimental_context` so each tool's
+   * `execute` callback can read the sandbox state + per-prompt context.
+   */
+  agentContext: AgentContext;
 };
 
 /**
- * One LLM turn in the chat workflow agent loop. Runs as a Vercel Workflow
- * `"use step"` so that:
+ * One LLM turn (with internal tool-call iteration) in the chat workflow.
+ * Runs as a Vercel Workflow `"use step"` so:
  *
  *   - Sandbox-banned APIs (`fetch`, `setTimeout`, `crypto`) are legal inside.
  *   - The result is cached as a single durable event — replays after a crash
- *     do not re-bill the model.
+ *     do not re-bill the model or re-execute tools.
  *
- * Currently emits a plain text response with no tools. Sandbox tools land in
- * the follow-up PR (port `@open-harness/agent` tools + wire via
- * `experimental_context`).
+ * `streamText` drives the tool-call → tool-result → next-LLM-call loop
+ * internally using its default stop condition. Our outer workflow stays
+ * single-turn for now — multi-turn message threading lands when the rest
+ * of the tool surface ports in a follow-up PR.
  *
- * @param input - Messages + selected model + the workflow's writable stream.
- * @returns finishReason from the model run (for the workflow loop's break condition).
+ * @param input - Messages + selected model + writable stream + agent context.
+ * @returns finishReason from the model run.
  */
 export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishReason: string }> {
   "use step";
 
   console.log("[runAgentStep] start", {
     modelId: input.modelId,
     messageCount: input.messages.length,
+    hasSandboxState: Boolean(input.agentContext.sandbox?.state),
   });
 
   const modelMessages = convertToModelMessages(input.messages);
+  const tools = buildAgentTools();
   const result = streamText({
     model: gateway(input.modelId),
     system: agentCustomInstructions,
     messages: modelMessages,
+    tools,
+    stopWhen: CHAT_AGENT_STOP_WHEN,
+    experimental_context: input.agentContext,
   });
 
-  // Acquire the writer once and release in `finally` — re-acquiring per chunk
-  // (the previous shape) leaked the lock when any write threw.
+  // Acquire the writer once and release in `finally` so a thrown chunk
+  // doesn't leak the lock.
   const writer = input.writable.getWriter();
   try {
     for await (const part of result.toUIMessageStream()) {

diff --git a/app/lib/workflows/runAgentWorkflow.ts b/app/lib/workflows/runAgentWorkflow.ts
@@ -1,12 +1,18 @@
 import { getWritable } from "workflow";
 import type { UIMessage, UIMessageChunk } from "ai";
 import { runAgentStep } from "@/app/lib/workflows/runAgentStep";
+import type { AgentContext } from "@/lib/agent/tools/AgentContext";
 
 export type RunAgentWorkflowInput = {
   messages: UIMessage[];
   chatId: string;
   sessionId: string;
   modelId: string;
+  /**
+   * Threaded into `streamText`'s `experimental_context` so tools (bash et al.)
+   * can read sandbox state + per-prompt Recoup creds.
+   */
+  agentContext: AgentContext;
 };
 
 /**
@@ -15,18 +21,14 @@ export type RunAgentWorkflowInput = {
  * client; this function writes UIMessage chunks into the workflow's writable
  * via `runAgentStep`.
  *
- * Currently runs a SINGLE `runAgentStep` turn. A multi-turn agent loop is
- * unsafe today: each iteration would re-send the original prompt without
- * the assistant's tool-call response in scope, so a `tool-calls` finish
- * reason would loop forever on the same input. The proper multi-turn
- * shape (where the step appends its response to `messages` before the
- * next iteration) lands with the sandbox-tool port in PR 4.
- *
- * Until then, if the model returns `tool-calls` we log a warning and exit
- * — the client receives the partial tool-call chunks but no follow-up turn.
+ * Currently runs a SINGLE `runAgentStep` turn. Tool-call iteration (up to
+ * MAX_TOOL_STEPS) happens INSIDE `streamText` via `stopWhen` — so the
+ * single workflow turn covers the full "user → assistant → tool → tool
+ * result → assistant" cycle without our outer loop having to thread
+ * messages between iterations.
  *
  * WDK constraints honored:
- *   - All I/O (streamText, fetches) lives in `"use step"` functions.
+ *   - All I/O (streamText, sandbox.exec, fetches) lives in `"use step"` functions.
  *   - The workflow body only orchestrates — no fetch / setTimeout / fs / crypto.
  */
 export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise<void> {
@@ -43,14 +45,8 @@ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise<vo
     messages: input.messages,
     modelId: input.modelId,
     writable,
+    agentContext: input.agentContext,
   });
 
-  if (result.finishReason === "tool-calls") {
-    console.warn(
-      "[runAgentWorkflow] model returned tool-calls but tool execution is not wired yet; exiting after 1 turn",
-      { chatId: input.chatId },
-    );
-  } else {
-    console.log("[runAgentWorkflow] finish", { finishReason: result.finishReason });
-  }
+  console.log("[runAgentWorkflow] finish", { finishReason: result.finishReason });
 }
diff --git a/lib/agent/__tests__/buildAgentTools.test.ts b/lib/agent/__tests__/buildAgentTools.test.ts
@@ -0,0 +1,17 @@
+import { describe, it, expect } from "vitest";
+import { buildAgentTools } from "@/lib/agent/buildAgentTools";
+
+describe("buildAgentTools", () => {
+  it("returns a tools record keyed by tool name", () => {
+    const tools = buildAgentTools();
+    expect(tools).toHaveProperty("bash");
+    expect(typeof tools.bash).toBe("object");
+  });
+
+  it("each tool has an inputSchema, description, and execute", () => {
+    const tools = buildAgentTools();
+    expect(tools.bash.inputSchema).toBeDefined();
+    expect(tools.bash.description).toBeDefined();
+    expect(typeof tools.bash.execute).toBe("function");
+  });
+});
diff --git a/lib/agent/buildAgentTools.ts b/lib/agent/buildAgentTools.ts
@@ -0,0 +1,20 @@
+import { bashTool } from "@/lib/agent/tools/bashTool";
+
+/**
+ * Factory for the full agent tool set passed into `streamText({ tools })`.
+ * Each tool reads its sandbox handle + recoup creds from `experimental_context`
+ * at execute time — the factory takes no arguments because the tools are
+ * stateless modulo that context.
+ *
+ * Slim PR 4 exposes only `bash`. The remaining sandbox tools (`read`,
+ * `write`, `grep`, `glob`, `todo`, `task`, `ask_user_question`, `skill`,
+ * `fetch`) port in follow-up PRs and slot into this record one-by-one
+ * without changing the factory signature.
+ */
+export function buildAgentTools() {
+  return {
+    bash: bashTool(),
+  };
+}
+
+export type AgentTools = ReturnType<typeof buildAgentTools>;
diff --git a/lib/agent/tools/AgentContext.ts b/lib/agent/tools/AgentContext.ts
@@ -0,0 +1,34 @@
+import type { VercelState } from "@/lib/sandbox/vercel/state";
+
+/**
+ * Per-tool-call context threaded into the agent via `streamText`'s
+ * `experimental_context`. Mirrors the open-agents `AgentContext` shape
+ * (subset — slim PR 4 ports only the `bash` tool, so context only needs
+ * what `bash` reads).
+ *
+ * Why no `recoupAccessToken` field? A short-lived per-prompt credential
+ * would let sandbox tools (`skill`, the eventual `recoup-api` skill) call
+ * back to recoup-api as the caller. We deliberately omit it here — the
+ * legacy api-key path is too long-lived to expose inside a sandbox where
+ * model-issued bash commands can read env. Proper short-lived token
+ * minting lands alongside the `skill` tool port.
+ */
+export type AgentContext = {
+  /**
+   * Persistable sandbox state. Tools reconnect via `connectVercel(state)` —
+   * we never pass a live `Sandbox` instance through context because
+   * workflow durability requires replay-friendly inputs.
+   */
+  sandbox: {
+    state: VercelState;
+    workingDirectory: string;
+    currentBranch?: string;
+  };
+  /**
+   * Organization UUID when the sandbox was opened against a recoupable
+   * org repo (`org-<slug>-<uuid>`). Forwarded to sandboxed commands as
+   * `RECOUP_ORG_ID` so future `recoup-api` skill calls scope to that org.
+   * Public information — no security risk in exposing.
+   */
+  recoupOrgId?: string;
+};
diff --git a/lib/agent/tools/__tests__/bashTool.test.ts b/lib/agent/tools/__tests__/bashTool.test.ts
@@ -0,0 +1,158 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { bashTool } from "@/lib/agent/tools/bashTool";
+import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";
+
+vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
+  connectVercel: vi.fn(),
+}));
+
+const baseContext = {
+  sandbox: { state: { sandboxName: "session-x" }, workingDirectory: "/sandbox/mono" },
+};
+
+function makeSandbox(overrides: Record<string, unknown> = {}) {
+  return {
+    workingDirectory: "/sandbox/mono",
+    exec: vi.fn(),
+    execDetached: vi.fn(),
+    ...overrides,
+  };
+}
+
+beforeEach(() => vi.clearAllMocks());
+
+describe("bashTool.execute", () => {
+  it("executes a command via sandbox.exec in the sandbox's working directory", async () => {
+    const sandbox = makeSandbox({
+      exec: vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "README.md\npackage.json",
+        stderr: "",
+        truncated: false,
+      }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    const result = await tool.execute!({ command: "ls" }, {
+      experimental_context: baseContext,
+    } as never);
+    expect(result).toEqual({
+      success: true,
+      exitCode: 0,
+      stdout: "README.md\npackage.json",
+      stderr: "",
+    });
+    expect(sandbox.exec).toHaveBeenCalledWith(
+      "ls",
+      "/sandbox/mono",
+      expect.any(Number),
+      expect.any(Object),
+    );
+  });
+
+  it("includes `truncated: true` in the result when sandbox.exec truncated output", async () => {
+    const sandbox = makeSandbox({
+      exec: vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "lots of output",
+        stderr: "",
+        truncated: true,
+      }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    const result = (await tool.execute!({ command: "find ." }, {
+      experimental_context: baseContext,
+    } as never)) as { truncated?: boolean };
+    expect(result.truncated).toBe(true);
+  });
+
+  it("resolves a workspace-relative cwd against sandbox.workingDirectory", async () => {
+    const sandbox = makeSandbox({
+      exec: vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "",
+        stderr: "",
+        truncated: false,
+      }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    await tool.execute!({ command: "ls", cwd: "apps/web" }, {
+      experimental_context: baseContext,
+    } as never);
+    expect(sandbox.exec).toHaveBeenCalledWith(
+      "ls",
+      "/sandbox/mono/apps/web",
+      expect.any(Number),
+      expect.any(Object),
+    );
+  });
+
+  it("injects RECOUP_ORG_ID into the exec env when present in context", async () => {
+    const sandbox = makeSandbox({
+      exec: vi.fn().mockResolvedValue({
+        success: true,
+        exitCode: 0,
+        stdout: "",
+        stderr: "",
+        truncated: false,
+      }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    await tool.execute!({ command: "curl example.com" }, {
+      experimental_context: { ...baseContext, recoupOrgId: "org-uuid" },
+    } as never);
+    const opts = sandbox.exec.mock.calls[0]?.[3] as { env?: Record<string, string> };
+    expect(opts.env).toEqual({ RECOUP_ORG_ID: "org-uuid" });
+  });
+
+  it("returns the detached commandId when called with detached:true", async () => {
+    const sandbox = makeSandbox({
+      execDetached: vi.fn().mockResolvedValue({ commandId: "cmd-123" }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    const result = (await tool.execute!({ command: "npm run dev", detached: true }, {
+      experimental_context: baseContext,
+    } as never)) as { success: boolean; stdout: string };
+    expect(result.success).toBe(true);
+    expect(result.stdout).toMatch(/cmd-123/);
+    expect(sandbox.execDetached).toHaveBeenCalledWith("npm run dev", "/sandbox/mono");
+  });
+
+  it("returns success:false with a descriptive stderr when the sandbox lacks execDetached", async () => {
+    const sandbox = makeSandbox({ execDetached: undefined });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    const result = (await tool.execute!({ command: "npm run dev", detached: true }, {
+      experimental_context: baseContext,
+    } as never)) as { success: boolean; stderr: string };
+    expect(result.success).toBe(false);
+    expect(result.stderr).toMatch(/detached mode is not supported/i);
+  });
+
+  it("does NOT inject env vars on detached execs", async () => {
+    const sandbox = makeSandbox({
+      execDetached: vi.fn().mockResolvedValue({ commandId: "cmd-1" }),
+    });
+    vi.mocked(connectVercel).mockResolvedValue(sandbox as never);
+
+    const tool = bashTool();
+    await tool.execute!({ command: "npm run dev", detached: true }, {
+      experimental_context: { ...baseContext, recoupOrgId: "org-uuid" },
+    } as never);
+    // execDetached signature is (command, cwd) — no env arg.
+    expect(sandbox.execDetached.mock.calls[0]).toHaveLength(2);
+  });
+});
diff --git a/lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts b/lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts
@@ -0,0 +1,31 @@
+import { describe, it, expect } from "vitest";
+import { buildRecoupExecEnv } from "@/lib/agent/tools/buildRecoupExecEnv";
+
+const baseSandbox = { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" };
+
+describe("buildRecoupExecEnv", () => {
+  it("returns undefined when no context", () => {
+    expect(buildRecoupExecEnv(undefined)).toBeUndefined();
+    expect(buildRecoupExecEnv(null)).toBeUndefined();
+    expect(buildRecoupExecEnv("not-a-context")).toBeUndefined();
+  });
+
+  it("returns undefined when context has no recoupOrgId", () => {
+    expect(buildRecoupExecEnv({ sandbox: baseSandbox })).toBeUndefined();
+  });
+
+  it("injects RECOUP_ORG_ID when present in context", () => {
+    const env = buildRecoupExecEnv({ sandbox: baseSandbox, recoupOrgId: "org-uuid" });
+    expect(env).toEqual({ RECOUP_ORG_ID: "org-uuid" });
+  });
+
+  it("ignores empty-string recoupOrgId", () => {
+    const env = buildRecoupExecEnv({ sandbox: baseSandbox, recoupOrgId: "" });
+    expect(env).toBeUndefined();
+  });
+
+  it("returns undefined when the input is not a valid AgentContext shape", () => {
+    expect(buildRecoupExecEnv({ recoupOrgId: "org-uuid" })).toBeUndefined();
+    expect(buildRecoupExecEnv({ sandbox: null, recoupOrgId: "org-uuid" })).toBeUndefined();
+  });
+});