recoupable · sweetmantech · May 22, 2026 · May 22, 2026 · cubic-dev-ai · May 22, 2026
diff --git a/app/lib/workflows/__tests__/runAgentStep.test.ts b/app/lib/workflows/__tests__/runAgentStep.test.ts
@@ -110,6 +110,59 @@ describe("runAgentStep", () => {
     expect(args.system).toMatch(/workspace-relative paths/);
   });
 
+  it("wraps tools with anthropic cacheControl on the last tool before passing to streamText", async () => {
+    const captured: unknown[] = [];
+    vi.mocked(streamText).mockReturnValue(makeStreamResult({ metadataCalls: captured }) as never);
+    const { stream } = makeWritable();
+
+    await runAgentStep({ ...baseInput, writable: stream } as never);
+
+    const args = vi.mocked(streamText).mock.calls[0]?.[0] as {
+      tools: Record<
+        string,
+        { providerOptions?: { anthropic?: { cacheControl?: { type: string } } } }
+      >;
+    };
+    const toolNames = Object.keys(args.tools);
+    expect(toolNames.length).toBeGreaterThan(0);
+    const lastTool = args.tools[toolNames[toolNames.length - 1]!]!;
+    expect(lastTool.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" });
+    // Earlier tools should NOT carry the cache-control marker (Anthropic 4-breakpoint limit).
+    if (toolNames.length > 1) {
+      expect(args.tools[toolNames[0]!]?.providerOptions).toBeUndefined();
+    }
+  });
+
+  it("wires a prepareStep callback that marks the last message with cacheControl", async () => {
+    const captured: unknown[] = [];
+    vi.mocked(streamText).mockReturnValue(makeStreamResult({ metadataCalls: captured }) as never);
+    const { stream } = makeWritable();
+
+    await runAgentStep({ ...baseInput, writable: stream } as never);
+
+    const args = vi.mocked(streamText).mock.calls[0]?.[0] as {
+      prepareStep?: (opts: {
+        messages: Array<{ role: string; providerOptions?: Record<string, unknown> }>;
+        model: unknown;
+        steps?: unknown[];
+      }) => { messages?: unknown[] } | undefined;
+    };
+    expect(typeof args.prepareStep).toBe("function");
+    const anthropicModel = { provider: "anthropic", modelId: "claude-haiku-4.5" } as never;
+    const result = args.prepareStep!({
+      messages: [
+        { role: "user", content: "first" } as never,
+        { role: "user", content: "second" } as never,
+      ],
+      model: anthropicModel,
+      steps: [],
+    });
+    const out = result?.messages as Array<{ providerOptions?: Record<string, unknown> }>;
+    expect(out).toBeDefined();
+    expect(out[0]?.providerOptions).toBeUndefined();
+    expect(out[1]?.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral" } } });
+  });
+
   it("the wired callback returns undefined for non-finish-step parts", async () => {
     const captured: unknown[] = [];
     vi.mocked(streamText).mockReturnValue(makeStreamResult({ metadataCalls: captured }) as never);

diff --git a/app/lib/workflows/runAgentStep.ts b/app/lib/workflows/runAgentStep.ts
@@ -6,6 +6,8 @@ import { CHAT_AGENT_STOP_WHEN } from "@/lib/chat/const";
 import { buildAgentTools } from "@/lib/agent/buildAgentTools";
 import type { AgentContext, DurableAgentContext } from "@/lib/agent/tools/AgentContext";
 import { buildMessageMetadataCallback } from "@/lib/agent/messageMetadata/buildMessageMetadataCallback";
+import { addCacheControlToTools } from "@/lib/agent/contextManagement/addCacheControlToTools";
+import { addCacheControlToMessages } from "@/lib/agent/contextManagement/addCacheControlToMessages";
 
 export type RunAgentStepInput = {
   messages: UIMessage[];
@@ -48,7 +50,14 @@ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishRe
   });
 
   const modelMessages = await convertToModelMessages(input.messages);
-  const tools = buildAgentTools({ skills: input.agentContext.skills });
+  // Mark the last tool with `cacheControl: { type: "ephemeral" }` so
+  // Anthropic caches the tool-definitions block across the
+  // conversation. Per-step message caching is wired via `prepareStep`
+  // below. Mirrors open-agents' `prepareCall` + `prepareStep` split.
+  const tools = addCacheControlToTools({
+    tools: buildAgentTools({ skills: input.agentContext.skills }),
+    model: input.modelId,
+  });
   // Construct the model here (not in the workflow input) — LanguageModel
   // instances aren't JSON-serializable and can't ride durable inputs.
   // Then attach to AgentContext so tools see the same model the parent
@@ -74,6 +83,12 @@ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishRe
     tools,
     stopWhen: CHAT_AGENT_STOP_WHEN,
     experimental_context: agentContext,
+    // Mark the LAST message with cacheControl on every step so Anthropic
+    // incrementally caches the conversation prefix. Mirrors open-agents'
+    // `prepareStep` in `open-harness-agent.ts:100`.
+    prepareStep: ({ messages, model }) => ({
+      messages: addCacheControlToMessages({ messages, model }),
+    }),
   });
 
   // Acquire the writer once and release in `finally` so a thrown chunk

diff --git a/lib/agent/contextManagement/__tests__/addCacheControlToMessages.test.ts b/lib/agent/contextManagement/__tests__/addCacheControlToMessages.test.ts
@@ -0,0 +1,60 @@
+import { describe, it, expect } from "vitest";
+import { addCacheControlToMessages } from "@/lib/agent/contextManagement/addCacheControlToMessages";
+
+const anthropicModel = { provider: "anthropic", modelId: "claude-haiku-4.5" } as never;
+const openaiModel = { provider: "openai", modelId: "gpt-5" } as never;
+
+const makeMsgs = () => [
+  { role: "user", content: "first" },
+  { role: "assistant", content: "ack" },
+  { role: "user", content: "second" },
+];
+
+describe("addCacheControlToMessages", () => {
+  it("returns messages unchanged for non-Anthropic models", () => {
+    const messages = makeMsgs();
+    const result = addCacheControlToMessages({ messages: messages as never, model: openaiModel });
+    expect(result).toEqual(messages);
+  });
+
+  it("returns messages unchanged when the array is empty", () => {
+    const result = addCacheControlToMessages({ messages: [], model: anthropicModel });
+    expect(result).toEqual([]);
+  });
+
+  it("marks ONLY the last message with ephemeral cacheControl (per Anthropic guidance)", () => {
+    const messages = makeMsgs();
+    const result = addCacheControlToMessages({
+      messages: messages as never,
+      model: anthropicModel,
+    }) as Array<{ providerOptions?: { anthropic?: { cacheControl?: { type: string } } } }>;
+    expect(result[0]?.providerOptions).toBeUndefined();
+    expect(result[1]?.providerOptions).toBeUndefined();
+    expect(result[2]?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" });
+  });
+
+  it("preserves existing providerOptions on the last message when merging the anthropic marker", () => {
+    const messages = [
+      { role: "user", content: "first" },
+      {
+        role: "user",
+        content: "second",
+        providerOptions: { openai: { foo: "bar" } },
+      },
+    ];
+    const result = addCacheControlToMessages({
+      messages: messages as never,
+      model: anthropicModel,
+    }) as Array<{ providerOptions?: Record<string, unknown> }>;
+    expect(result[1]?.providerOptions?.openai).toEqual({ foo: "bar" });
+    expect(result[1]?.providerOptions?.anthropic).toEqual({
+      cacheControl: { type: "ephemeral" },
+    });
+  });
+
+  it("does NOT mutate the input messages array", () => {
+    const messages = makeMsgs();
+    addCacheControlToMessages({ messages: messages as never, model: anthropicModel });
+    expect((messages[2] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
+  });
+});
diff --git a/lib/agent/contextManagement/__tests__/addCacheControlToTools.test.ts b/lib/agent/contextManagement/__tests__/addCacheControlToTools.test.ts
@@ -0,0 +1,63 @@
+import { describe, it, expect } from "vitest";
+import { addCacheControlToTools } from "@/lib/agent/contextManagement/addCacheControlToTools";
+
+const anthropicModel = { provider: "anthropic", modelId: "claude-haiku-4.5" } as never;
+const openaiModel = { provider: "openai", modelId: "gpt-5" } as never;
+
+const makeTools = () => ({
+  bash: { description: "run bash", inputSchema: {} },
+  read: { description: "read file", inputSchema: {} },
+  write: { description: "write file", inputSchema: {} },
+});
+
+describe("addCacheControlToTools", () => {
+  it("returns tools unchanged for non-Anthropic models", () => {
+    const tools = makeTools();
+    const result = addCacheControlToTools({ tools, model: openaiModel });
+    expect(result).toEqual(tools);
+  });
+
+  it("returns tools unchanged when the toolset is empty", () => {
+    const tools = {};
+    const result = addCacheControlToTools({ tools, model: anthropicModel });
+    expect(result).toEqual({});
+  });
+
+  it("marks ONLY the last tool with ephemeral cacheControl (Anthropic's 4-breakpoint limit)", () => {
+    const tools = makeTools();
+    const result = addCacheControlToTools({ tools, model: anthropicModel }) as Record<
+      string,
+      { providerOptions?: { anthropic?: { cacheControl?: { type: string } } } }
+    >;
+    expect(result.bash?.providerOptions).toBeUndefined();
+    expect(result.read?.providerOptions).toBeUndefined();
+    expect(result.write?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" });
+  });
+
+  it("preserves existing providerOptions on the last tool when merging the anthropic marker", () => {
+    const tools = {
+      a: { description: "a", inputSchema: {} },
+      b: {
+        description: "b",
+        inputSchema: {},
+        providerOptions: { openai: { foo: "bar" } },
+      },
+    } as never;
+    const result = addCacheControlToTools({ tools, model: anthropicModel }) as Record<
+      string,
+      { providerOptions?: Record<string, unknown> }
+    >;
+    expect(result.b?.providerOptions?.openai).toEqual({ foo: "bar" });
+    expect(result.b?.providerOptions?.anthropic).toEqual({ cacheControl: { type: "ephemeral" } });
+  });
+
+  it("respects a custom providerOptions override", () => {
+    const tools = { only: { description: "x", inputSchema: {} } } as never;
+    const result = addCacheControlToTools({
+      tools,
+      model: anthropicModel,
+      providerOptions: { anthropic: { cacheControl: { type: "ephemeral_1h" } } },
+    }) as Record<string, { providerOptions?: { anthropic?: { cacheControl?: { type: string } } } }>;
+    expect(result.only?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral_1h" });
+  });
+});
diff --git a/lib/agent/contextManagement/__tests__/isAnthropicModel.test.ts b/lib/agent/contextManagement/__tests__/isAnthropicModel.test.ts
@@ -0,0 +1,36 @@
+import { describe, it, expect } from "vitest";
+import { isAnthropicModel } from "@/lib/agent/contextManagement/isAnthropicModel";
+
+describe("isAnthropicModel", () => {
+  it("returns true for a string model id containing 'anthropic'", () => {
+    expect(isAnthropicModel("anthropic/claude-haiku-4.5" as never)).toBe(true);
+  });
+
+  it("returns true for a string model id containing 'claude' (no provider prefix)", () => {
+    expect(isAnthropicModel("claude-3-5-haiku" as never)).toBe(true);
+  });
+
+  it("returns false for non-Anthropic string model ids", () => {
+    expect(isAnthropicModel("openai/gpt-5.2" as never)).toBe(false);
+    expect(isAnthropicModel("google/gemini-3" as never)).toBe(false);
+  });
+
+  it("returns true for a model object whose `provider` is 'anthropic'", () => {
+    expect(isAnthropicModel({ provider: "anthropic", modelId: "claude-haiku-4.5" } as never)).toBe(
+      true,
+    );
+  });
+
+  it("returns true for a model object whose `provider` contains 'anthropic' (gateway-prefixed)", () => {
+    expect(isAnthropicModel({ provider: "gateway.anthropic", modelId: "x" } as never)).toBe(true);
+  });
+
+  it("returns true for a model object whose `modelId` contains 'anthropic' or 'claude'", () => {
+    expect(isAnthropicModel({ provider: "gateway", modelId: "anthropic/x" } as never)).toBe(true);
+    expect(isAnthropicModel({ provider: "gateway", modelId: "claude-x" } as never)).toBe(true);
+  });
+
+  it("returns false for a model object with no anthropic / claude markers", () => {
+    expect(isAnthropicModel({ provider: "openai", modelId: "gpt-5" } as never)).toBe(false);
+  });
+});
diff --git a/lib/agent/contextManagement/addCacheControlToMessages.ts b/lib/agent/contextManagement/addCacheControlToMessages.ts
@@ -0,0 +1,44 @@
+import type { JSONValue, LanguageModel, ModelMessage } from "ai";
+import { isAnthropicModel } from "@/lib/agent/contextManagement/isAnthropicModel";
+
+type ProviderOptions = Record<string, Record<string, JSONValue>>;
+
+const DEFAULT_PROVIDER_OPTIONS: ProviderOptions = {
+  anthropic: { cacheControl: { type: "ephemeral" } },
+};
+
+/**
+ * Mark the LAST message with `cacheControl: { type: "ephemeral" }` so
+ * Anthropic incrementally caches the conversation prefix. Per
+ * Anthropic's docs: "Mark the final block of the final message with
+ * cache_control so the conversation can be incrementally cached."
+ *
+ * Port of open-agents' `addCacheControl({messages, model})` overload
+ * in `packages/agent/context-management/cache-control.ts`.
+ *
+ * For non-Anthropic models the input is returned unchanged. The input
+ * array is not mutated — a new array of message refs is returned.
+ */
+export function addCacheControlToMessages(opts: {
+  messages: ModelMessage[];
+  model: LanguageModel;
+  providerOptions?: ProviderOptions;
+}): ModelMessage[] {
+  const { messages, model, providerOptions = DEFAULT_PROVIDER_OPTIONS } = opts;
+
+  if (!isAnthropicModel(model)) return messages;
+  if (messages.length === 0) return messages;
+
+  const lastIndex = messages.length - 1;
+  return messages.map((message, index) =>
+    index === lastIndex
+      ? {
+          ...message,
+          providerOptions: {
+            ...(message as { providerOptions?: ProviderOptions }).providerOptions,
+            ...providerOptions,
+          },
+        }
+      : message,
+  );
+}
diff --git a/lib/agent/contextManagement/addCacheControlToTools.ts b/lib/agent/contextManagement/addCacheControlToTools.ts
@@ -0,0 +1,50 @@
+import type { JSONValue, LanguageModel, ToolSet } from "ai";
+import { isAnthropicModel } from "@/lib/agent/contextManagement/isAnthropicModel";
+
+type ProviderOptions = Record<string, Record<string, JSONValue>>;
+
+const DEFAULT_PROVIDER_OPTIONS: ProviderOptions = {
+  anthropic: { cacheControl: { type: "ephemeral" } },
+};
+
+/**
+ * Mark the LAST tool in a toolset with `cacheControl: { type: "ephemeral" }`
+ * so Anthropic caches the tool-definitions block across the conversation.
+ *
+ * Port of open-agents' `addCacheControl({tools, model})` overload in
+ * `packages/agent/context-management/cache-control.ts`. Why only the
+ * last tool: Anthropic enforces a max of 4 cache breakpoints, and we
+ * spend one each on the system prompt + messages, so we conserve by
+ * marking just the trailing tool entry (the message's cumulative
+ * cache covers the rest).
+ *
+ * For non-Anthropic models the input is returned unchanged.
+ */
+export function addCacheControlToTools<T extends ToolSet>(opts: {
+  tools: T;
+  model: LanguageModel;
+  providerOptions?: ProviderOptions;
+}): T {
+  const { tools, model, providerOptions = DEFAULT_PROVIDER_OPTIONS } = opts;
+
+  if (!isAnthropicModel(model)) return tools;
+
+  const entries = Object.entries(tools);
+  if (entries.length === 0) return tools;
+
+  const lastIndex = entries.length - 1;
+  return Object.fromEntries(
+    entries.map(([name, t], index) => [
+      name,
+      index === lastIndex
+        ? {
+            ...t,
+            providerOptions: {
+              ...(t as { providerOptions?: ProviderOptions }).providerOptions,
+              ...providerOptions,
+            },
+          }
+        : t,
+    ]),
+  ) as T;
+}
diff --git a/lib/agent/contextManagement/isAnthropicModel.ts b/lib/agent/contextManagement/isAnthropicModel.ts
@@ -0,0 +1,26 @@
+import type { LanguageModel } from "ai";
+
+/**
+ * Predicate: is this a Claude / Anthropic model? Drives whether to
+ * attach `cacheControl: { type: "ephemeral" }` to messages + tools
+ * (Anthropic prompt caching) or leave them untouched.
+ *
+ * Byte-for-byte port of open-agents' `isAnthropicModel`
+ * (`packages/agent/context-management/cache-control.ts`).
+ *
+ * Accepts both string model ids (e.g. `"anthropic/claude-haiku-4.5"`)
+ * and `LanguageModel` instances (e.g. the value returned from
+ * `gateway("anthropic/claude-...")`, which carries `provider` and
+ * `modelId` properties).
+ */
+export function isAnthropicModel(model: LanguageModel): boolean {
+  if (typeof model === "string") {
+    return model.includes("anthropic") || model.includes("claude");
+  }
+  return (
+    model.provider === "anthropic" ||
+    model.provider.includes("anthropic") ||
+    model.modelId.includes("anthropic") ||
+    model.modelId.includes("claude")
+  );
+}