Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions app/lib/workflows/__tests__/runAgentStep.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import { describe, it, expect, vi, beforeEach } from "vitest";
import { streamText } from "ai";
import { runAgentStep } from "@/app/lib/workflows/runAgentStep";

vi.mock("ai", async () => {
const actual = await vi.importActual<typeof import("ai")>("ai");
return { ...actual, streamText: vi.fn() };
});

// Avoid pulling in real gateway / fetch surface.
vi.mock("@ai-sdk/gateway", () => ({
gateway: vi.fn((modelId: string) => ({ modelId, __mock: "gateway" })),
}));

function makeStreamResult(opts?: { metadataCalls?: Array<unknown> }) {
const calls = opts?.metadataCalls ?? [];
return {
toUIMessageStream: vi.fn((streamOpts: { messageMetadata?: unknown }) => {
// Capture the callback so tests can inspect it
calls.push(streamOpts.messageMetadata);
return (async function* () {
yield { type: "start" };
yield { type: "finish" };
})();
}),
finishReason: Promise.resolve("stop"),
};
}

function makeWritable() {
const written: unknown[] = [];
const stream = new WritableStream({
write(chunk) {
written.push(chunk);
},
});
return { stream, written };
}

const baseInput = {
messages: [
{
id: "m1",
role: "user" as const,
parts: [{ type: "text" as const, text: "hi" }],
},
],
modelId: "anthropic/claude-haiku-4.5",
agentContext: {
sandbox: { state: { type: "vercel" }, workingDirectory: "/sandbox/mono" },
},
};

describe("runAgentStep", () => {
beforeEach(() => {
vi.clearAllMocks();
});

it("wires a messageMetadata callback into toUIMessageStream", async () => {
const captured: unknown[] = [];
vi.mocked(streamText).mockReturnValue(makeStreamResult({ metadataCalls: captured }) as never);
const { stream } = makeWritable();

await runAgentStep({ ...baseInput, writable: stream } as never);

expect(captured).toHaveLength(1);
expect(typeof captured[0]).toBe("function");
});

it("the wired callback emits modelId on finish-step parts", async () => {
const captured: unknown[] = [];
vi.mocked(streamText).mockReturnValue(makeStreamResult({ metadataCalls: captured }) as never);
const { stream } = makeWritable();

await runAgentStep({ ...baseInput, writable: stream } as never);

const cb = captured[0] as (args: {
part: { type: string; usage?: unknown; finishReason?: string };
}) => { modelId?: string } | undefined;
const meta = cb({
part: {
type: "finish-step",
usage: { inputTokens: 10, outputTokens: 5 },
finishReason: "stop",
},
});
expect(meta).toBeDefined();
expect(meta?.modelId).toBe("anthropic/claude-haiku-4.5");
});

it("the wired callback returns undefined for non-finish-step parts", async () => {
const captured: unknown[] = [];
vi.mocked(streamText).mockReturnValue(makeStreamResult({ metadataCalls: captured }) as never);
const { stream } = makeWritable();

await runAgentStep({ ...baseInput, writable: stream } as never);

const cb = captured[0] as (args: { part: { type: string } }) => unknown;
expect(cb({ part: { type: "text-delta" } })).toBeUndefined();
expect(cb({ part: { type: "start" } })).toBeUndefined();
});
});
10 changes: 8 additions & 2 deletions app/lib/workflows/runAgentStep.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { agentCustomInstructions } from "@/lib/chat/agentCustomInstructions";
import { CHAT_AGENT_STOP_WHEN } from "@/lib/chat/const";
import { buildAgentTools } from "@/lib/agent/buildAgentTools";
import type { AgentContext, DurableAgentContext } from "@/lib/agent/tools/AgentContext";
import { buildMessageMetadataCallback } from "@/lib/agent/messageMetadata/buildMessageMetadataCallback";

export type RunAgentStepInput = {
messages: UIMessage[];
Expand Down Expand Up @@ -45,7 +46,7 @@ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishRe
hasSandboxState: Boolean(input.agentContext.sandbox?.state),
});

const modelMessages = convertToModelMessages(input.messages);
const modelMessages = await convertToModelMessages(input.messages);
const tools = buildAgentTools({ skills: input.agentContext.skills });
// Construct the model here (not in the workflow input) — LanguageModel
// instances aren't JSON-serializable and can't ride durable inputs.
Expand All @@ -69,7 +70,12 @@ export async function runAgentStep(input: RunAgentStepInput): Promise<{ finishRe
// doesn't leak the lock.
const writer = input.writable.getWriter();
try {
for await (const part of result.toUIMessageStream()) {
// `messageMetadata` emits {modelId, usage, cost} chunks the UI
// renders as model/cost badges. Mirrors open-agents' chat workflow
// shape so sandbox.recoupable.com sees the same metadata when cut
// over to api's /api/chat/workflow.
const messageMetadata = buildMessageMetadataCallback({ modelId: input.modelId });
for await (const part of result.toUIMessageStream({ messageMetadata })) {
await writer.write(part);
}
} finally {
Expand Down
29 changes: 29 additions & 0 deletions lib/agent/messageMetadata/AgentMessageMetadata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import type { FinishReason, LanguageModelUsage } from "ai";
import type { AgentStepFinishMetadata } from "@/lib/agent/messageMetadata/AgentStepFinishMetadata";

/**
* Metadata emitted on each assistant turn via the `messageMetadata`
* callback in `runAgentStep`. Mirrors open-agents'
* `apps/web/app/types.ts:WebAgentMessageMetadata` byte-for-byte so the
* sandbox.recoupable.com UI can render model/cost/usage badges when
* cut over to api's `/api/chat/workflow`. Now that api ships
* `ai@^6.0.190`, `LanguageModelUsage` is the same flat-shape type
* open-agents has been using — no shape conversion needed.
*/
export type AgentMessageMetadata = {
/** Model the client requested (e.g. user selection in the UI). */
selectedModelId?: string;
/** Model actually used for the call (may differ from selected under gateway fallback). */
modelId?: string;
/** Usage from the most recent `finish-step`. */
lastStepUsage?: LanguageModelUsage;
/** Cumulative usage across every step in this message. */
totalMessageUsage?: LanguageModelUsage;
/** Gateway-reported cost of the most recent step, in USD. */
lastStepCost?: number;
/** Cumulative gateway-reported cost across every step of the message, in USD. */
totalMessageCost?: number;
lastStepFinishReason?: FinishReason;
lastStepRawFinishReason?: string;
stepFinishReasons?: AgentStepFinishMetadata[];
};
11 changes: 11 additions & 0 deletions lib/agent/messageMetadata/AgentStepFinishMetadata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import type { FinishReason } from "ai";

/**
* Per-finish-step record kept on the assistant message so the UI can
* render a finish-reason history. Mirrors open-agents'
* `WebAgentStepFinishMetadata` in `apps/web/app/types.ts`.
*/
export type AgentStepFinishMetadata = {
finishReason: FinishReason;
rawFinishReason?: string;
};
18 changes: 18 additions & 0 deletions lib/agent/messageMetadata/GatewayProviderMetadata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/**
* Shape of the Vercel AI Gateway entry in `providerMetadata`.
* Mirrors open-agents' `apps/web/app/workflows/gateway-metadata.ts`.
*
* The gateway surfaces per-step cost information alongside routing
* diagnostics. We only consume the `cost` field; other fields are
* documented for reference and forward-compat.
*/
export interface GatewayProviderMetadata {
gateway: {
cost?: string;
marketCost?: string;
inferenceCost?: string;
inputInferenceCost?: string;
outputInferenceCost?: string;
generationId?: string;
};
}
49 changes: 49 additions & 0 deletions lib/agent/messageMetadata/__tests__/addLanguageModelUsage.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { describe, it, expect } from "vitest";
import { addLanguageModelUsage } from "@/lib/agent/messageMetadata/addLanguageModelUsage";

describe("addLanguageModelUsage", () => {
it("sums basic input/output/total tokens", () => {
const result = addLanguageModelUsage(
{ inputTokens: 100, outputTokens: 50, totalTokens: 150 },
{ inputTokens: 200, outputTokens: 75, totalTokens: 275 },
);
expect(result.inputTokens).toBe(300);
expect(result.outputTokens).toBe(125);
expect(result.totalTokens).toBe(425);
});

it("sums nested cache token details", () => {
const result = addLanguageModelUsage(
{
inputTokens: 100,
outputTokens: 50,
inputTokenDetails: { cacheReadTokens: 10, cacheWriteTokens: 5, noCacheTokens: 85 },
} as never,
{
inputTokens: 200,
outputTokens: 75,
inputTokenDetails: { cacheReadTokens: 20, cacheWriteTokens: 15, noCacheTokens: 165 },
} as never,
);
expect(result.inputTokenDetails?.cacheReadTokens).toBe(30);
expect(result.inputTokenDetails?.cacheWriteTokens).toBe(20);
expect(result.inputTokenDetails?.noCacheTokens).toBe(250);
});

it("returns undefined for fields missing on both inputs", () => {
const result = addLanguageModelUsage(
{ inputTokens: 100 } as never,
{ inputTokens: 200 } as never,
);
expect(result.outputTokens).toBeUndefined();
expect(result.totalTokens).toBeUndefined();
});

it("treats missing field on one side as 0", () => {
const result = addLanguageModelUsage(
{ inputTokens: 100, outputTokens: 50 } as never,
{ inputTokens: 200 } as never,
);
expect(result.outputTokens).toBe(50);
});
});
27 changes: 27 additions & 0 deletions lib/agent/messageMetadata/__tests__/addTokenCounts.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { describe, it, expect } from "vitest";
import { addTokenCounts } from "@/lib/agent/messageMetadata/addTokenCounts";

describe("addTokenCounts", () => {
it("returns undefined when both inputs are undefined", () => {
expect(addTokenCounts(undefined, undefined)).toBeUndefined();
});

it("returns undefined when both inputs are null", () => {
expect(addTokenCounts(null as never, null as never)).toBeUndefined();
});

it("sums two numbers", () => {
expect(addTokenCounts(100, 50)).toBe(150);
});

it("treats undefined on one side as 0", () => {
expect(addTokenCounts(100, undefined)).toBe(100);
expect(addTokenCounts(undefined, 50)).toBe(50);
});

it("handles zero correctly (not confused with undefined)", () => {
expect(addTokenCounts(0, 50)).toBe(50);
expect(addTokenCounts(0, 0)).toBe(0);
expect(addTokenCounts(0, undefined)).toBe(0);
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { describe, it, expect } from "vitest";
import { buildMessageMetadataCallback } from "@/lib/agent/messageMetadata/buildMessageMetadataCallback";

const MODEL_ID = "anthropic/claude-haiku-4.5";

// `ai@^6.0.190` uses the flat LanguageModelUsage shape — same as the
// open-agents UI consumes — so the callback passes usage through
// without any shape conversion.
function finishStepPart(opts: {
inputTokens?: number;
outputTokens?: number;
cost?: string;
finishReason?: string;
}) {
const inputTokens = opts.inputTokens ?? 100;
const outputTokens = opts.outputTokens ?? 50;
return {
type: "finish-step",
usage: {
inputTokens,
outputTokens,
totalTokens: inputTokens + outputTokens,
inputTokenDetails: {
noCacheTokens: inputTokens,
cacheReadTokens: undefined,
cacheWriteTokens: undefined,
},
outputTokenDetails: {
textTokens: outputTokens,
reasoningTokens: undefined,
},
},
providerMetadata: opts.cost ? { gateway: { cost: opts.cost } } : undefined,
finishReason: opts.finishReason ?? "tool-calls",
} as never;
}

describe("buildMessageMetadataCallback", () => {
it("returns undefined for non-finish-step parts (start, text-delta, tool-call, etc.)", () => {
const cb = buildMessageMetadataCallback({ modelId: MODEL_ID });
expect(cb({ part: { type: "text-delta", delta: "hi" } as never })).toBeUndefined();
expect(cb({ part: { type: "start" } as never })).toBeUndefined();
expect(cb({ part: { type: "tool-call", toolName: "bash" } as never })).toBeUndefined();
});

it("emits modelId + selectedModelId + usage on the first finish-step", () => {
const cb = buildMessageMetadataCallback({ modelId: MODEL_ID });
const meta = cb({ part: finishStepPart({ inputTokens: 100, outputTokens: 50 }) });
expect(meta).toMatchObject({
modelId: MODEL_ID,
selectedModelId: MODEL_ID,
lastStepUsage: { inputTokens: 100, outputTokens: 50 },
totalMessageUsage: { inputTokens: 100, outputTokens: 50 },
});
});

it("emits cost when the gateway provider metadata includes it", () => {
const cb = buildMessageMetadataCallback({ modelId: MODEL_ID });
const meta = cb({ part: finishStepPart({ cost: "0.025" }) });
expect(meta).toMatchObject({ lastStepCost: 0.025, totalMessageCost: 0.025 });
});

it("omits cost fields when the gateway did not report one", () => {
const cb = buildMessageMetadataCallback({ modelId: MODEL_ID });
const meta = cb({ part: finishStepPart({}) }) as Record<string, unknown>;
expect(meta.lastStepCost).toBeUndefined();
expect(meta.totalMessageCost).toBeUndefined();
});

it("accumulates usage AND cost across multiple finish-step calls", () => {
const cb = buildMessageMetadataCallback({ modelId: MODEL_ID });
cb({ part: finishStepPart({ inputTokens: 100, outputTokens: 50, cost: "0.01" }) });
const meta = cb({
part: finishStepPart({ inputTokens: 200, outputTokens: 75, cost: "0.03" }),
});
expect(meta).toMatchObject({
lastStepUsage: { inputTokens: 200, outputTokens: 75 },
totalMessageUsage: { inputTokens: 300, outputTokens: 125 },
lastStepCost: 0.03,
totalMessageCost: 0.04,
});
});

it("records lastStepFinishReason and stepFinishReasons history", () => {
const cb = buildMessageMetadataCallback({ modelId: MODEL_ID });
cb({ part: finishStepPart({ finishReason: "tool-calls" }) });
const meta = cb({ part: finishStepPart({ finishReason: "stop" }) });
expect(meta).toMatchObject({
lastStepFinishReason: "stop",
stepFinishReasons: [{ finishReason: "tool-calls" }, { finishReason: "stop" }],
});
});
});
28 changes: 28 additions & 0 deletions lib/agent/messageMetadata/__tests__/extractGatewayCost.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { describe, it, expect } from "vitest";
import { extractGatewayCost } from "@/lib/agent/messageMetadata/extractGatewayCost";

describe("extractGatewayCost", () => {
it("returns undefined when providerMetadata is missing", () => {
expect(extractGatewayCost(undefined)).toBeUndefined();
});

it("returns undefined when there is no `gateway` namespace", () => {
expect(extractGatewayCost({ openai: { foo: "bar" } } as never)).toBeUndefined();
});

it("returns undefined when `gateway.cost` is missing", () => {
expect(extractGatewayCost({ gateway: {} } as never)).toBeUndefined();
});

it("parses a numeric string cost", () => {
expect(extractGatewayCost({ gateway: { cost: "0.0420" } } as never)).toBe(0.042);
});

it("returns undefined when cost is non-numeric", () => {
expect(extractGatewayCost({ gateway: { cost: "not-a-number" } } as never)).toBeUndefined();
});

it("returns undefined when cost is a number (gateway should send strings)", () => {
expect(extractGatewayCost({ gateway: { cost: 0.05 } } as never)).toBeUndefined();
});
});
Loading
Loading