diff --git a/app/lib/workflows/__tests__/runAgentStep.test.ts b/app/lib/workflows/__tests__/runAgentStep.test.ts index 0d48f81f8..b2e90475b 100644 --- a/app/lib/workflows/__tests__/runAgentStep.test.ts +++ b/app/lib/workflows/__tests__/runAgentStep.test.ts @@ -110,6 +110,59 @@ describe("runAgentStep", () => { expect(args.system).toMatch(/workspace-relative paths/); }); + it("wraps tools with anthropic cacheControl on the last tool before passing to streamText", async () => { + const captured: unknown[] = []; + vi.mocked(streamText).mockReturnValue(makeStreamResult({ metadataCalls: captured }) as never); + const { stream } = makeWritable(); + + await runAgentStep({ ...baseInput, writable: stream } as never); + + const args = vi.mocked(streamText).mock.calls[0]?.[0] as { + tools: Record< + string, + { providerOptions?: { anthropic?: { cacheControl?: { type: string } } } } + >; + }; + const toolNames = Object.keys(args.tools); + expect(toolNames.length).toBeGreaterThan(0); + const lastTool = args.tools[toolNames[toolNames.length - 1]!]!; + expect(lastTool.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" }); + // Earlier tools should NOT carry the cache-control marker (Anthropic 4-breakpoint limit). + if (toolNames.length > 1) { + expect(args.tools[toolNames[0]!]?.providerOptions).toBeUndefined(); + } + }); + + it("wires a prepareStep callback that marks the last message with cacheControl", async () => { + const captured: unknown[] = []; + vi.mocked(streamText).mockReturnValue(makeStreamResult({ metadataCalls: captured }) as never); + const { stream } = makeWritable(); + + await runAgentStep({ ...baseInput, writable: stream } as never); + + const args = vi.mocked(streamText).mock.calls[0]?.[0] as { + prepareStep?: (opts: { + messages: Array<{ role: string; providerOptions?: Record }>; + model: unknown; + steps?: unknown[]; + }) => { messages?: unknown[] } | undefined; + }; + expect(typeof args.prepareStep).toBe("function"); + const anthropicModel = { provider: "anthropic", modelId: "claude-haiku-4.5" } as never; + const result = args.prepareStep!({ + messages: [ + { role: "user", content: "first" } as never, + { role: "user", content: "second" } as never, + ], + model: anthropicModel, + steps: [], + }); + const out = result?.messages as Array<{ providerOptions?: Record }>; + expect(out).toBeDefined(); + expect(out[0]?.providerOptions).toBeUndefined(); + expect(out[1]?.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral" } } }); + }); + it("the wired callback returns undefined for non-finish-step parts", async () => { const captured: unknown[] = []; vi.mocked(streamText).mockReturnValue(makeStreamResult({ metadataCalls: captured }) as never); diff --git a/app/lib/workflows/runAgentStep.ts b/app/lib/workflows/runAgentStep.ts index 9d752e1a7..7ed847d5d 100644 --- a/app/lib/workflows/runAgentStep.ts +++ b/app/lib/workflows/runAgentStep.ts @@ -6,6 +6,8 @@ import { CHAT_AGENT_STOP_WHEN } from "@/lib/chat/const"; import { buildAgentTools } from "@/lib/agent/buildAgentTools"; import type { AgentContext, DurableAgentContext } from "@/lib/agent/tools/AgentContext"; import { buildMessageMetadataCallback } from "@/lib/agent/messageMetadata/buildMessageMetadataCallback"; +import { addCacheControlToTools } from "@/lib/agent/contextManagement/addCacheControlToTools"; +import { addCacheControlToMessages } from "@/lib/agent/contextManagement/addCacheControlToMessages"; export type RunAgentStepInput = { messages: UIMessage[]; @@ -48,7 +50,14 @@ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishRe }); const modelMessages = await convertToModelMessages(input.messages); - const tools = buildAgentTools({ skills: input.agentContext.skills }); + // Mark the last tool with `cacheControl: { type: "ephemeral" }` so + // Anthropic caches the tool-definitions block across the + // conversation. Per-step message caching is wired via `prepareStep` + // below. Mirrors open-agents' `prepareCall` + `prepareStep` split. + const tools = addCacheControlToTools({ + tools: buildAgentTools({ skills: input.agentContext.skills }), + model: input.modelId, + }); // Construct the model here (not in the workflow input) — LanguageModel // instances aren't JSON-serializable and can't ride durable inputs. // Then attach to AgentContext so tools see the same model the parent @@ -74,6 +83,12 @@ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishRe tools, stopWhen: CHAT_AGENT_STOP_WHEN, experimental_context: agentContext, + // Mark the LAST message with cacheControl on every step so Anthropic + // incrementally caches the conversation prefix. Mirrors open-agents' + // `prepareStep` in `open-harness-agent.ts:100`. + prepareStep: ({ messages, model }) => ({ + messages: addCacheControlToMessages({ messages, model }), + }), }); // Acquire the writer once and release in `finally` so a thrown chunk diff --git a/lib/agent/contextManagement/__tests__/addCacheControlToMessages.test.ts b/lib/agent/contextManagement/__tests__/addCacheControlToMessages.test.ts new file mode 100644 index 000000000..19b618dca --- /dev/null +++ b/lib/agent/contextManagement/__tests__/addCacheControlToMessages.test.ts @@ -0,0 +1,60 @@ +import { describe, it, expect } from "vitest"; +import { addCacheControlToMessages } from "@/lib/agent/contextManagement/addCacheControlToMessages"; + +const anthropicModel = { provider: "anthropic", modelId: "claude-haiku-4.5" } as never; +const openaiModel = { provider: "openai", modelId: "gpt-5" } as never; + +const makeMsgs = () => [ + { role: "user", content: "first" }, + { role: "assistant", content: "ack" }, + { role: "user", content: "second" }, +]; + +describe("addCacheControlToMessages", () => { + it("returns messages unchanged for non-Anthropic models", () => { + const messages = makeMsgs(); + const result = addCacheControlToMessages({ messages: messages as never, model: openaiModel }); + expect(result).toEqual(messages); + }); + + it("returns messages unchanged when the array is empty", () => { + const result = addCacheControlToMessages({ messages: [], model: anthropicModel }); + expect(result).toEqual([]); + }); + + it("marks ONLY the last message with ephemeral cacheControl (per Anthropic guidance)", () => { + const messages = makeMsgs(); + const result = addCacheControlToMessages({ + messages: messages as never, + model: anthropicModel, + }) as Array<{ providerOptions?: { anthropic?: { cacheControl?: { type: string } } } }>; + expect(result[0]?.providerOptions).toBeUndefined(); + expect(result[1]?.providerOptions).toBeUndefined(); + expect(result[2]?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" }); + }); + + it("preserves existing providerOptions on the last message when merging the anthropic marker", () => { + const messages = [ + { role: "user", content: "first" }, + { + role: "user", + content: "second", + providerOptions: { openai: { foo: "bar" } }, + }, + ]; + const result = addCacheControlToMessages({ + messages: messages as never, + model: anthropicModel, + }) as Array<{ providerOptions?: Record }>; + expect(result[1]?.providerOptions?.openai).toEqual({ foo: "bar" }); + expect(result[1]?.providerOptions?.anthropic).toEqual({ + cacheControl: { type: "ephemeral" }, + }); + }); + + it("does NOT mutate the input messages array", () => { + const messages = makeMsgs(); + addCacheControlToMessages({ messages: messages as never, model: anthropicModel }); + expect((messages[2] as { providerOptions?: unknown }).providerOptions).toBeUndefined(); + }); +}); diff --git a/lib/agent/contextManagement/__tests__/addCacheControlToTools.test.ts b/lib/agent/contextManagement/__tests__/addCacheControlToTools.test.ts new file mode 100644 index 000000000..af05104f2 --- /dev/null +++ b/lib/agent/contextManagement/__tests__/addCacheControlToTools.test.ts @@ -0,0 +1,63 @@ +import { describe, it, expect } from "vitest"; +import { addCacheControlToTools } from "@/lib/agent/contextManagement/addCacheControlToTools"; + +const anthropicModel = { provider: "anthropic", modelId: "claude-haiku-4.5" } as never; +const openaiModel = { provider: "openai", modelId: "gpt-5" } as never; + +const makeTools = () => ({ + bash: { description: "run bash", inputSchema: {} }, + read: { description: "read file", inputSchema: {} }, + write: { description: "write file", inputSchema: {} }, +}); + +describe("addCacheControlToTools", () => { + it("returns tools unchanged for non-Anthropic models", () => { + const tools = makeTools(); + const result = addCacheControlToTools({ tools, model: openaiModel }); + expect(result).toEqual(tools); + }); + + it("returns tools unchanged when the toolset is empty", () => { + const tools = {}; + const result = addCacheControlToTools({ tools, model: anthropicModel }); + expect(result).toEqual({}); + }); + + it("marks ONLY the last tool with ephemeral cacheControl (Anthropic's 4-breakpoint limit)", () => { + const tools = makeTools(); + const result = addCacheControlToTools({ tools, model: anthropicModel }) as Record< + string, + { providerOptions?: { anthropic?: { cacheControl?: { type: string } } } } + >; + expect(result.bash?.providerOptions).toBeUndefined(); + expect(result.read?.providerOptions).toBeUndefined(); + expect(result.write?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" }); + }); + + it("preserves existing providerOptions on the last tool when merging the anthropic marker", () => { + const tools = { + a: { description: "a", inputSchema: {} }, + b: { + description: "b", + inputSchema: {}, + providerOptions: { openai: { foo: "bar" } }, + }, + } as never; + const result = addCacheControlToTools({ tools, model: anthropicModel }) as Record< + string, + { providerOptions?: Record } + >; + expect(result.b?.providerOptions?.openai).toEqual({ foo: "bar" }); + expect(result.b?.providerOptions?.anthropic).toEqual({ cacheControl: { type: "ephemeral" } }); + }); + + it("respects a custom providerOptions override", () => { + const tools = { only: { description: "x", inputSchema: {} } } as never; + const result = addCacheControlToTools({ + tools, + model: anthropicModel, + providerOptions: { anthropic: { cacheControl: { type: "ephemeral_1h" } } }, + }) as Record; + expect(result.only?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral_1h" }); + }); +}); diff --git a/lib/agent/contextManagement/__tests__/isAnthropicModel.test.ts b/lib/agent/contextManagement/__tests__/isAnthropicModel.test.ts new file mode 100644 index 000000000..ffc12fb4f --- /dev/null +++ b/lib/agent/contextManagement/__tests__/isAnthropicModel.test.ts @@ -0,0 +1,36 @@ +import { describe, it, expect } from "vitest"; +import { isAnthropicModel } from "@/lib/agent/contextManagement/isAnthropicModel"; + +describe("isAnthropicModel", () => { + it("returns true for a string model id containing 'anthropic'", () => { + expect(isAnthropicModel("anthropic/claude-haiku-4.5" as never)).toBe(true); + }); + + it("returns true for a string model id containing 'claude' (no provider prefix)", () => { + expect(isAnthropicModel("claude-3-5-haiku" as never)).toBe(true); + }); + + it("returns false for non-Anthropic string model ids", () => { + expect(isAnthropicModel("openai/gpt-5.2" as never)).toBe(false); + expect(isAnthropicModel("google/gemini-3" as never)).toBe(false); + }); + + it("returns true for a model object whose `provider` is 'anthropic'", () => { + expect(isAnthropicModel({ provider: "anthropic", modelId: "claude-haiku-4.5" } as never)).toBe( + true, + ); + }); + + it("returns true for a model object whose `provider` contains 'anthropic' (gateway-prefixed)", () => { + expect(isAnthropicModel({ provider: "gateway.anthropic", modelId: "x" } as never)).toBe(true); + }); + + it("returns true for a model object whose `modelId` contains 'anthropic' or 'claude'", () => { + expect(isAnthropicModel({ provider: "gateway", modelId: "anthropic/x" } as never)).toBe(true); + expect(isAnthropicModel({ provider: "gateway", modelId: "claude-x" } as never)).toBe(true); + }); + + it("returns false for a model object with no anthropic / claude markers", () => { + expect(isAnthropicModel({ provider: "openai", modelId: "gpt-5" } as never)).toBe(false); + }); +}); diff --git a/lib/agent/contextManagement/addCacheControlToMessages.ts b/lib/agent/contextManagement/addCacheControlToMessages.ts new file mode 100644 index 000000000..7051998f2 --- /dev/null +++ b/lib/agent/contextManagement/addCacheControlToMessages.ts @@ -0,0 +1,44 @@ +import type { JSONValue, LanguageModel, ModelMessage } from "ai"; +import { isAnthropicModel } from "@/lib/agent/contextManagement/isAnthropicModel"; + +type ProviderOptions = Record>; + +const DEFAULT_PROVIDER_OPTIONS: ProviderOptions = { + anthropic: { cacheControl: { type: "ephemeral" } }, +}; + +/** + * Mark the LAST message with `cacheControl: { type: "ephemeral" }` so + * Anthropic incrementally caches the conversation prefix. Per + * Anthropic's docs: "Mark the final block of the final message with + * cache_control so the conversation can be incrementally cached." + * + * Port of open-agents' `addCacheControl({messages, model})` overload + * in `packages/agent/context-management/cache-control.ts`. + * + * For non-Anthropic models the input is returned unchanged. The input + * array is not mutated — a new array of message refs is returned. + */ +export function addCacheControlToMessages(opts: { + messages: ModelMessage[]; + model: LanguageModel; + providerOptions?: ProviderOptions; +}): ModelMessage[] { + const { messages, model, providerOptions = DEFAULT_PROVIDER_OPTIONS } = opts; + + if (!isAnthropicModel(model)) return messages; + if (messages.length === 0) return messages; + + const lastIndex = messages.length - 1; + return messages.map((message, index) => + index === lastIndex + ? { + ...message, + providerOptions: { + ...(message as { providerOptions?: ProviderOptions }).providerOptions, + ...providerOptions, + }, + } + : message, + ); +} diff --git a/lib/agent/contextManagement/addCacheControlToTools.ts b/lib/agent/contextManagement/addCacheControlToTools.ts new file mode 100644 index 000000000..2b63cab18 --- /dev/null +++ b/lib/agent/contextManagement/addCacheControlToTools.ts @@ -0,0 +1,50 @@ +import type { JSONValue, LanguageModel, ToolSet } from "ai"; +import { isAnthropicModel } from "@/lib/agent/contextManagement/isAnthropicModel"; + +type ProviderOptions = Record>; + +const DEFAULT_PROVIDER_OPTIONS: ProviderOptions = { + anthropic: { cacheControl: { type: "ephemeral" } }, +}; + +/** + * Mark the LAST tool in a toolset with `cacheControl: { type: "ephemeral" }` + * so Anthropic caches the tool-definitions block across the conversation. + * + * Port of open-agents' `addCacheControl({tools, model})` overload in + * `packages/agent/context-management/cache-control.ts`. Why only the + * last tool: Anthropic enforces a max of 4 cache breakpoints, and we + * spend one each on the system prompt + messages, so we conserve by + * marking just the trailing tool entry (the message's cumulative + * cache covers the rest). + * + * For non-Anthropic models the input is returned unchanged. + */ +export function addCacheControlToTools(opts: { + tools: T; + model: LanguageModel; + providerOptions?: ProviderOptions; +}): T { + const { tools, model, providerOptions = DEFAULT_PROVIDER_OPTIONS } = opts; + + if (!isAnthropicModel(model)) return tools; + + const entries = Object.entries(tools); + if (entries.length === 0) return tools; + + const lastIndex = entries.length - 1; + return Object.fromEntries( + entries.map(([name, t], index) => [ + name, + index === lastIndex + ? { + ...t, + providerOptions: { + ...(t as { providerOptions?: ProviderOptions }).providerOptions, + ...providerOptions, + }, + } + : t, + ]), + ) as T; +} diff --git a/lib/agent/contextManagement/isAnthropicModel.ts b/lib/agent/contextManagement/isAnthropicModel.ts new file mode 100644 index 000000000..b2442785b --- /dev/null +++ b/lib/agent/contextManagement/isAnthropicModel.ts @@ -0,0 +1,26 @@ +import type { LanguageModel } from "ai"; + +/** + * Predicate: is this a Claude / Anthropic model? Drives whether to + * attach `cacheControl: { type: "ephemeral" }` to messages + tools + * (Anthropic prompt caching) or leave them untouched. + * + * Byte-for-byte port of open-agents' `isAnthropicModel` + * (`packages/agent/context-management/cache-control.ts`). + * + * Accepts both string model ids (e.g. `"anthropic/claude-haiku-4.5"`) + * and `LanguageModel` instances (e.g. the value returned from + * `gateway("anthropic/claude-...")`, which carries `provider` and + * `modelId` properties). + */ +export function isAnthropicModel(model: LanguageModel): boolean { + if (typeof model === "string") { + return model.includes("anthropic") || model.includes("claude"); + } + return ( + model.provider === "anthropic" || + model.provider.includes("anthropic") || + model.modelId.includes("anthropic") || + model.modelId.includes("claude") + ); +}