Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions app/lib/workflows/runAgentStep.ts
Original file line number Diff line number Diff line change
@@ -1,45 +1,59 @@
import { streamText, convertToModelMessages, type UIMessage, type UIMessageChunk } from "ai";
import { gateway } from "@ai-sdk/gateway";
import { agentCustomInstructions } from "@/lib/chat/agentCustomInstructions";
import { CHAT_AGENT_STOP_WHEN } from "@/lib/chat/const";
import { buildAgentTools } from "@/lib/agent/buildAgentTools";
import type { AgentContext } from "@/lib/agent/tools/AgentContext";

export type RunAgentStepInput = {
messages: UIMessage[];
modelId: string;
writable: WritableStream<UIMessageChunk>;
/**
* Threaded into `streamText`'s `experimental_context` so each tool's
* `execute` callback can read the sandbox state + per-prompt context.
*/
agentContext: AgentContext;
};

/**
* One LLM turn in the chat workflow agent loop. Runs as a Vercel Workflow
* `"use step"` so that:
* One LLM turn (with internal tool-call iteration) in the chat workflow.
* Runs as a Vercel Workflow `"use step"` so:
*
* - Sandbox-banned APIs (`fetch`, `setTimeout`, `crypto`) are legal inside.
* - The result is cached as a single durable event — replays after a crash
* do not re-bill the model.
* do not re-bill the model or re-execute tools.
*
* Currently emits a plain text response with no tools. Sandbox tools land in
* the follow-up PR (port `@open-harness/agent` tools + wire via
* `experimental_context`).
* `streamText` drives the tool-call → tool-result → next-LLM-call loop
* internally using its default stop condition. Our outer workflow stays
* single-turn for now — multi-turn message threading lands when the rest
* of the tool surface ports in a follow-up PR.
*
* @param input - Messages + selected model + the workflow's writable stream.
* @returns finishReason from the model run (for the workflow loop's break condition).
* @param input - Messages + selected model + writable stream + agent context.
* @returns finishReason from the model run.
*/
export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishReason: string }> {
"use step";

console.log("[runAgentStep] start", {
modelId: input.modelId,
messageCount: input.messages.length,
hasSandboxState: Boolean(input.agentContext.sandbox?.state),
});

const modelMessages = convertToModelMessages(input.messages);
const tools = buildAgentTools();
const result = streamText({
model: gateway(input.modelId),
system: agentCustomInstructions,
messages: modelMessages,
tools,
stopWhen: CHAT_AGENT_STOP_WHEN,
experimental_context: input.agentContext,
});

// Acquire the writer once and release in `finally` — re-acquiring per chunk
// (the previous shape) leaked the lock when any write threw.
// Acquire the writer once and release in `finally` so a thrown chunk
// doesn't leak the lock.
const writer = input.writable.getWriter();
try {
for await (const part of result.toUIMessageStream()) {
Expand Down
32 changes: 14 additions & 18 deletions app/lib/workflows/runAgentWorkflow.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import { getWritable } from "workflow";
import type { UIMessage, UIMessageChunk } from "ai";
import { runAgentStep } from "@/app/lib/workflows/runAgentStep";
import type { AgentContext } from "@/lib/agent/tools/AgentContext";

export type RunAgentWorkflowInput = {
messages: UIMessage[];
chatId: string;
sessionId: string;
modelId: string;
/**
* Threaded into `streamText`'s `experimental_context` so tools (bash et al.)
* can read sandbox state + per-prompt Recoup creds.
*/
agentContext: AgentContext;
};

/**
Expand All @@ -15,18 +21,14 @@ export type RunAgentWorkflowInput = {
* client; this function writes UIMessage chunks into the workflow's writable
* via `runAgentStep`.
*
* Currently runs a SINGLE `runAgentStep` turn. A multi-turn agent loop is
* unsafe today: each iteration would re-send the original prompt without
* the assistant's tool-call response in scope, so a `tool-calls` finish
* reason would loop forever on the same input. The proper multi-turn
* shape (where the step appends its response to `messages` before the
* next iteration) lands with the sandbox-tool port in PR 4.
*
* Until then, if the model returns `tool-calls` we log a warning and exit
* — the client receives the partial tool-call chunks but no follow-up turn.
* Currently runs a SINGLE `runAgentStep` turn. Tool-call iteration (up to
* MAX_TOOL_STEPS) happens INSIDE `streamText` via `stopWhen` — so the
* single workflow turn covers the full "user → assistant → tool → tool
* result → assistant" cycle without our outer loop having to thread
* messages between iterations.
*
* WDK constraints honored:
* - All I/O (streamText, fetches) lives in `"use step"` functions.
* - All I/O (streamText, sandbox.exec, fetches) lives in `"use step"` functions.
* - The workflow body only orchestrates — no fetch / setTimeout / fs / crypto.
*/
export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise<void> {
Expand All @@ -43,14 +45,8 @@ export async function runAgentWorkflow(input: RunAgentWorkflowInput): Promise<vo
messages: input.messages,
modelId: input.modelId,
writable,
agentContext: input.agentContext,
});

if (result.finishReason === "tool-calls") {
console.warn(
"[runAgentWorkflow] model returned tool-calls but tool execution is not wired yet; exiting after 1 turn",
{ chatId: input.chatId },
);
} else {
console.log("[runAgentWorkflow] finish", { finishReason: result.finishReason });
}
console.log("[runAgentWorkflow] finish", { finishReason: result.finishReason });
}
17 changes: 17 additions & 0 deletions lib/agent/__tests__/buildAgentTools.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { describe, it, expect } from "vitest";
import { buildAgentTools } from "@/lib/agent/buildAgentTools";

describe("buildAgentTools", () => {
it("returns a tools record keyed by tool name", () => {
const tools = buildAgentTools();
expect(tools).toHaveProperty("bash");
expect(typeof tools.bash).toBe("object");
});

it("each tool has an inputSchema, description, and execute", () => {
const tools = buildAgentTools();
expect(tools.bash.inputSchema).toBeDefined();
expect(tools.bash.description).toBeDefined();
expect(typeof tools.bash.execute).toBe("function");
});
});
20 changes: 20 additions & 0 deletions lib/agent/buildAgentTools.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { bashTool } from "@/lib/agent/tools/bashTool";

/**
* Factory for the full agent tool set passed into `streamText({ tools })`.
* Each tool reads its sandbox handle + recoup creds from `experimental_context`
* at execute time — the factory takes no arguments because the tools are
* stateless modulo that context.
*
* Slim PR 4 exposes only `bash`. The remaining sandbox tools (`read`,
* `write`, `grep`, `glob`, `todo`, `task`, `ask_user_question`, `skill`,
* `fetch`) port in follow-up PRs and slot into this record one-by-one
* without changing the factory signature.
*/
export function buildAgentTools() {
return {
bash: bashTool(),
};
}

export type AgentTools = ReturnType<typeof buildAgentTools>;
34 changes: 34 additions & 0 deletions lib/agent/tools/AgentContext.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import type { VercelState } from "@/lib/sandbox/vercel/state";

/**
* Per-tool-call context threaded into the agent via `streamText`'s
* `experimental_context`. Mirrors the open-agents `AgentContext` shape
* (subset — slim PR 4 ports only the `bash` tool, so context only needs
* what `bash` reads).
*
* Why no `recoupAccessToken` field? A short-lived per-prompt credential
* would let sandbox tools (`skill`, the eventual `recoup-api` skill) call
* back to recoup-api as the caller. We deliberately omit it here — the
* legacy api-key path is too long-lived to expose inside a sandbox where
* model-issued bash commands can read env. Proper short-lived token
* minting lands alongside the `skill` tool port.
*/
export type AgentContext = {
/**
* Persistable sandbox state. Tools reconnect via `connectVercel(state)` —
* we never pass a live `Sandbox` instance through context because
* workflow durability requires replay-friendly inputs.
*/
sandbox: {
state: VercelState;
workingDirectory: string;
currentBranch?: string;
};
/**
* Organization UUID when the sandbox was opened against a recoupable
* org repo (`org-<slug>-<uuid>`). Forwarded to sandboxed commands as
* `RECOUP_ORG_ID` so future `recoup-api` skill calls scope to that org.
* Public information — no security risk in exposing.
*/
recoupOrgId?: string;
};
158 changes: 158 additions & 0 deletions lib/agent/tools/__tests__/bashTool.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import { describe, it, expect, vi, beforeEach } from "vitest";
import { bashTool } from "@/lib/agent/tools/bashTool";
import { connectVercel } from "@/lib/sandbox/vercel/connect/connectVercel";

vi.mock("@/lib/sandbox/vercel/connect/connectVercel", () => ({
connectVercel: vi.fn(),
}));

const baseContext = {
sandbox: { state: { sandboxName: "session-x" }, workingDirectory: "/sandbox/mono" },
};

function makeSandbox(overrides: Record<string, unknown> = {}) {
return {
workingDirectory: "/sandbox/mono",
exec: vi.fn(),
execDetached: vi.fn(),
...overrides,
};
}

beforeEach(() => vi.clearAllMocks());

describe("bashTool.execute", () => {
it("executes a command via sandbox.exec in the sandbox's working directory", async () => {
const sandbox = makeSandbox({
exec: vi.fn().mockResolvedValue({
success: true,
exitCode: 0,
stdout: "README.md\npackage.json",
stderr: "",
truncated: false,
}),
});
vi.mocked(connectVercel).mockResolvedValue(sandbox as never);

const tool = bashTool();
const result = await tool.execute!({ command: "ls" }, {
experimental_context: baseContext,
} as never);
expect(result).toEqual({
success: true,
exitCode: 0,
stdout: "README.md\npackage.json",
stderr: "",
});
expect(sandbox.exec).toHaveBeenCalledWith(
"ls",
"/sandbox/mono",
expect.any(Number),
expect.any(Object),
);
});

it("includes `truncated: true` in the result when sandbox.exec truncated output", async () => {
const sandbox = makeSandbox({
exec: vi.fn().mockResolvedValue({
success: true,
exitCode: 0,
stdout: "lots of output",
stderr: "",
truncated: true,
}),
});
vi.mocked(connectVercel).mockResolvedValue(sandbox as never);

const tool = bashTool();
const result = (await tool.execute!({ command: "find ." }, {
experimental_context: baseContext,
} as never)) as { truncated?: boolean };
expect(result.truncated).toBe(true);
});

it("resolves a workspace-relative cwd against sandbox.workingDirectory", async () => {
const sandbox = makeSandbox({
exec: vi.fn().mockResolvedValue({
success: true,
exitCode: 0,
stdout: "",
stderr: "",
truncated: false,
}),
});
vi.mocked(connectVercel).mockResolvedValue(sandbox as never);

const tool = bashTool();
await tool.execute!({ command: "ls", cwd: "apps/web" }, {
experimental_context: baseContext,
} as never);
expect(sandbox.exec).toHaveBeenCalledWith(
"ls",
"/sandbox/mono/apps/web",
expect.any(Number),
expect.any(Object),
);
});

it("injects RECOUP_ORG_ID into the exec env when present in context", async () => {
const sandbox = makeSandbox({
exec: vi.fn().mockResolvedValue({
success: true,
exitCode: 0,
stdout: "",
stderr: "",
truncated: false,
}),
});
vi.mocked(connectVercel).mockResolvedValue(sandbox as never);

const tool = bashTool();
await tool.execute!({ command: "curl example.com" }, {
experimental_context: { ...baseContext, recoupOrgId: "org-uuid" },
} as never);
const opts = sandbox.exec.mock.calls[0]?.[3] as { env?: Record<string, string> };
expect(opts.env).toEqual({ RECOUP_ORG_ID: "org-uuid" });
});

it("returns the detached commandId when called with detached:true", async () => {
const sandbox = makeSandbox({
execDetached: vi.fn().mockResolvedValue({ commandId: "cmd-123" }),
});
vi.mocked(connectVercel).mockResolvedValue(sandbox as never);

const tool = bashTool();
const result = (await tool.execute!({ command: "npm run dev", detached: true }, {
experimental_context: baseContext,
} as never)) as { success: boolean; stdout: string };
expect(result.success).toBe(true);
expect(result.stdout).toMatch(/cmd-123/);
expect(sandbox.execDetached).toHaveBeenCalledWith("npm run dev", "/sandbox/mono");
});

it("returns success:false with a descriptive stderr when the sandbox lacks execDetached", async () => {
const sandbox = makeSandbox({ execDetached: undefined });
vi.mocked(connectVercel).mockResolvedValue(sandbox as never);

const tool = bashTool();
const result = (await tool.execute!({ command: "npm run dev", detached: true }, {
experimental_context: baseContext,
} as never)) as { success: boolean; stderr: string };
expect(result.success).toBe(false);
expect(result.stderr).toMatch(/detached mode is not supported/i);
});

it("does NOT inject env vars on detached execs", async () => {
const sandbox = makeSandbox({
execDetached: vi.fn().mockResolvedValue({ commandId: "cmd-1" }),
});
vi.mocked(connectVercel).mockResolvedValue(sandbox as never);

const tool = bashTool();
await tool.execute!({ command: "npm run dev", detached: true }, {
experimental_context: { ...baseContext, recoupOrgId: "org-uuid" },
} as never);
// execDetached signature is (command, cwd) — no env arg.
expect(sandbox.execDetached.mock.calls[0]).toHaveLength(2);
});
});
31 changes: 31 additions & 0 deletions lib/agent/tools/__tests__/buildRecoupExecEnv.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { describe, it, expect } from "vitest";
import { buildRecoupExecEnv } from "@/lib/agent/tools/buildRecoupExecEnv";

const baseSandbox = { state: { sandboxName: "x" }, workingDirectory: "/sandbox/mono" };

describe("buildRecoupExecEnv", () => {
it("returns undefined when no context", () => {
expect(buildRecoupExecEnv(undefined)).toBeUndefined();
expect(buildRecoupExecEnv(null)).toBeUndefined();
expect(buildRecoupExecEnv("not-a-context")).toBeUndefined();
});

it("returns undefined when context has no recoupOrgId", () => {
expect(buildRecoupExecEnv({ sandbox: baseSandbox })).toBeUndefined();
});

it("injects RECOUP_ORG_ID when present in context", () => {
const env = buildRecoupExecEnv({ sandbox: baseSandbox, recoupOrgId: "org-uuid" });
expect(env).toEqual({ RECOUP_ORG_ID: "org-uuid" });
});

it("ignores empty-string recoupOrgId", () => {
const env = buildRecoupExecEnv({ sandbox: baseSandbox, recoupOrgId: "" });
expect(env).toBeUndefined();
});

it("returns undefined when the input is not a valid AgentContext shape", () => {
expect(buildRecoupExecEnv({ recoupOrgId: "org-uuid" })).toBeUndefined();
expect(buildRecoupExecEnv({ sandbox: null, recoupOrgId: "org-uuid" })).toBeUndefined();
});
});
Loading
Loading