diff --git a/lib/credits/AvailableModelCost.ts b/lib/credits/AvailableModelCost.ts new file mode 100644 index 000000000..72d3026f9 --- /dev/null +++ b/lib/credits/AvailableModelCost.ts @@ -0,0 +1,25 @@ +/** + * Shape of the per-model cost catalog used for token-based credit estimation. + * + * Mirrors open-agents' + * `apps/web/lib/models.ts:AvailableModelCost` so the same + * `estimateModelUsageCost` math runs against either catalog without + * shape conversion. api's current gateway/models.dev pipeline emits + * only `{ input, output }` (see + * `lib/ai/parseModelsDevMetadata.ts:ModelsDevMetadata`); the richer + * `cache_read` and `context_over_200k` fields are kept in the type so + * a future catalog expansion (or a hand-edited override) gets picked + * up automatically by the estimator. + * + * All token-cost units are USD per million tokens, matching + * models.dev. + */ +export interface AvailableModelCostTier { + input?: number; + output?: number; + cache_read?: number; +} + +export interface AvailableModelCost extends AvailableModelCostTier { + context_over_200k?: AvailableModelCostTier; +} diff --git a/lib/credits/__tests__/computeCreditsDeductedCents.test.ts b/lib/credits/__tests__/computeCreditsDeductedCents.test.ts new file mode 100644 index 000000000..a05c79df7 --- /dev/null +++ b/lib/credits/__tests__/computeCreditsDeductedCents.test.ts @@ -0,0 +1,183 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents"; +import { getAvailableModels } from "@/lib/ai/getAvailableModels"; + +vi.mock("@/lib/ai/getAvailableModels", () => ({ + getAvailableModels: vi.fn(), +})); + +beforeEach(() => { + vi.clearAllMocks(); + // Default catalog: empty (forces token-estimate path to fall through to 1c). + vi.mocked(getAvailableModels).mockResolvedValue([]); +}); + +const ZERO_USAGE = { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 }; + +describe("computeCreditsDeductedCents", () => { + describe("gateway cost path (preferred)", () => { + it("returns gateway cost in cents when gatewayCostUsd is a positive number", async () => { + // $0.0074 → 0.74c → ceil to 1c minimum is not needed; round to 1c + expect( + await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.0074), + ).toBe(1); + // $0.42 → 42c + expect( + await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.42), + ).toBe(42); + }); + + it("rounds the gateway cost to the nearest cent", async () => { + // $0.123 → 12.3c → 12c + expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.123)).toBe(12); + // $0.126 → 12.6c → 13c + expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.126)).toBe(13); + }); + + it("returns at least 1 when gateway cost rounds to 0", async () => { + // $0.0001 → 0.01c → would round to 0, must bump to 1 + expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.0001)).toBe(1); + }); + + it("does NOT call the catalog when gateway cost is usable", async () => { + await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.05); + expect(getAvailableModels).not.toHaveBeenCalled(); + }); + }); + + describe("falls back to token-based estimate when gateway cost is unusable", () => { + it("when gatewayCostUsd is undefined", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 1, output: 4 } } as never, + ]); + // 1M in + 1M out → $5 → 500c + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "model-x", + undefined, + ), + ).toBe(500); + }); + + it("when gatewayCostUsd is 0", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 1, output: 4 } } as never, + ]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "model-x", + 0, + ), + ).toBe(500); + }); + + it("when gatewayCostUsd is negative (corrupted/upstream bug)", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 1, output: 4 } } as never, + ]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "model-x", + -1, + ), + ).toBe(500); + }); + + it("when gatewayCostUsd is NaN (not Number.isFinite)", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 1, output: 4 } } as never, + ]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "model-x", + Number.NaN, + ), + ).toBe(500); + }); + + it("when gatewayCostUsd is Infinity", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 1, output: 4 } } as never, + ]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "model-x", + Number.POSITIVE_INFINITY, + ), + ).toBe(500); + }); + }); + + describe("estimate fallbacks (also: never charge zero on success)", () => { + it("returns 1 when modelId is not in the catalog", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "other-model", cost: { input: 1, output: 4 } } as never, + ]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, + "model-x", + undefined, + ), + ).toBe(1); + }); + + it("returns 1 when the catalog has no cost for the model", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([{ id: "model-x" } as never]); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, + "model-x", + undefined, + ), + ).toBe(1); + }); + + it("returns 1 when getAvailableModels rejects", async () => { + vi.mocked(getAvailableModels).mockRejectedValue(new Error("gateway down")); + expect( + await computeCreditsDeductedCents( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, + "model-x", + undefined, + ), + ).toBe(1); + }); + + it("returns 1 when token estimate rounds to 0 (very tiny usage)", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "model-x", cost: { input: 0.0001, output: 0.0001 } } as never, + ]); + // ~$0.0000002 → 0.00002c → bumps to 1c minimum + expect( + await computeCreditsDeductedCents( + { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 }, + "model-x", + undefined, + ), + ).toBe(1); + }); + }); + + describe("model lookup", () => { + it("matches modelId exactly (provider/model form)", async () => { + vi.mocked(getAvailableModels).mockResolvedValue([ + { id: "anthropic/claude-haiku-4.5", cost: { input: 1, output: 4 } } as never, + { id: "openai/gpt-5", cost: { input: 10, output: 40 } } as never, + ]); + // Pick haiku: 1M in + 1M out @ haiku rates → $5 → 500c + expect( + await computeCreditsDeductedCents( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, + "anthropic/claude-haiku-4.5", + undefined, + ), + ).toBe(500); + }); + }); +}); diff --git a/lib/credits/__tests__/estimateModelUsageCost.test.ts b/lib/credits/__tests__/estimateModelUsageCost.test.ts new file mode 100644 index 000000000..e41d9454e --- /dev/null +++ b/lib/credits/__tests__/estimateModelUsageCost.test.ts @@ -0,0 +1,165 @@ +import { describe, it, expect } from "vitest"; +import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost"; + +const baseCost = { input: 1, output: 4 }; // $1/M in, $4/M out + +describe("estimateModelUsageCost", () => { + describe("guard rails", () => { + it("returns undefined when cost catalog entry is missing", () => { + expect( + estimateModelUsageCost( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, + undefined, + ), + ).toBeUndefined(); + }); + + it("returns undefined when input price is missing", () => { + expect( + estimateModelUsageCost( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, + { output: 4 }, + ), + ).toBeUndefined(); + }); + + it("returns undefined when output price is missing", () => { + expect( + estimateModelUsageCost( + { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, + { input: 1 }, + ), + ).toBeUndefined(); + }); + }); + + describe("base tier (≤200k input tokens)", () => { + it("computes uncached input + output cost in USD", () => { + // 1_000_000 in @ $1/M + 1_000_000 out @ $4/M = $5 + expect( + estimateModelUsageCost( + { + inputTokens: 1_000_000, + cachedInputTokens: 0, + outputTokens: 1_000_000, + }, + baseCost, + ), + ).toBe(5); + }); + + it("applies cache_read price for cachedInputTokens portion when present", () => { + // 100k cached @ $0.10/M + 100k uncached @ $1/M + 100k out @ $4/M + // = 0.01 + 0.10 + 0.40 = $0.51 + const cost = { input: 1, output: 4, cache_read: 0.1 }; + expect( + estimateModelUsageCost( + { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 }, + cost, + ), + ).toBeCloseTo(0.51, 6); + }); + + it("falls back to input price when cache_read is undefined (cached tokens billed at full price)", () => { + // 100k cached @ $1/M + 100k uncached @ $1/M + 100k out @ $4/M + // = 0.10 + 0.10 + 0.40 = $0.60 + expect( + estimateModelUsageCost( + { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 }, + baseCost, + ), + ).toBeCloseTo(0.6, 6); + }); + + it("clamps negative cachedInputTokens to 0", () => { + expect( + estimateModelUsageCost( + { inputTokens: 1_000_000, cachedInputTokens: -50_000, outputTokens: 0 }, + baseCost, + ), + ).toBe(1); + }); + + it("clamps cachedInputTokens > inputTokens so uncached doesn't go negative", () => { + // cached=200_000 but input=100_000 — uncached must clamp to 0 (not -100_000). + // 200_000 cached @ $1/M (no cache_read, falls back to input) + 0 out = $0.20. + // Without the Math.max guard, this would underbill: a negative uncached count + // times the input price would subtract from the cached charge. + expect( + estimateModelUsageCost( + { inputTokens: 100_000, cachedInputTokens: 200_000, outputTokens: 0 }, + baseCost, + ), + ).toBeCloseTo(0.2, 6); + }); + + it("clamps negative outputTokens to 0", () => { + expect( + estimateModelUsageCost( + { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: -1000 }, + baseCost, + ), + ).toBe(1); + }); + }); + + describe("context_over_200k tier", () => { + const tieredCost = { + input: 1, + output: 4, + context_over_200k: { input: 2, output: 8 }, + }; + + it("uses context_over_200k tier when inputTokens exceeds 200k", () => { + // 300_000 in @ $2/M + 100_000 out @ $8/M = 0.60 + 0.80 = $1.40 + expect( + estimateModelUsageCost( + { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 }, + tieredCost, + ), + ).toBeCloseTo(1.4, 6); + }); + + it("does NOT use context_over_200k tier when inputTokens is exactly 200k", () => { + // boundary check — must be strictly > 200k + // 200_000 @ $1/M + 0 out = $0.20 + expect( + estimateModelUsageCost( + { inputTokens: 200_000, cachedInputTokens: 0, outputTokens: 0 }, + tieredCost, + ), + ).toBeCloseTo(0.2, 6); + }); + + it("ignores context_over_200k when both input and output overrides are missing", () => { + // only cache_read is set in the override — should NOT trigger the tier swap + const cost = { + input: 1, + output: 4, + context_over_200k: { cache_read: 0.5 }, + }; + // Treated as base tier — 300k @ $1/M + 0 out = $0.30 + expect( + estimateModelUsageCost( + { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 0 }, + cost, + ), + ).toBeCloseTo(0.3, 6); + }); + + it("falls back to base tier input when context_over_200k.input is missing", () => { + const cost = { + input: 1, + output: 4, + context_over_200k: { output: 8 }, // only output overridden + }; + // 300k in @ $1/M (fallback) + 100k out @ $8/M = 0.30 + 0.80 = $1.10 + expect( + estimateModelUsageCost( + { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 }, + cost, + ), + ).toBeCloseTo(1.1, 6); + }); + }); +}); diff --git a/lib/credits/computeCreditsDeductedCents.ts b/lib/credits/computeCreditsDeductedCents.ts new file mode 100644 index 000000000..7887801ad --- /dev/null +++ b/lib/credits/computeCreditsDeductedCents.ts @@ -0,0 +1,57 @@ +import { getAvailableModels } from "@/lib/ai/getAvailableModels"; +import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost"; +import type { AvailableModelCost } from "@/lib/credits/AvailableModelCost"; + +/** + * Per-turn credit charge in cents (minimum 1). + * + * Mirrors open-agents' + * `apps/web/lib/credits/compute-credits-deducted-cents.ts` so the same + * billing math runs on both sides of the chat cutover. Resolution order: + * + * 1. Gateway-reported cost on `responseMessage.metadata.totalMessageCost` + * — the exact number the chat UI shows next to the assistant + * response. Used directly so the wallet debit converges with the + * cost label. + * 2. Token-based estimate against the model catalog's `cost` entry. + * Catalog is the same gateway / models.dev pipeline that backs + * `GET /api/ai/models`. + * 3. 1c floor when no pricing is available — every successful turn + * moves the wallet by at least 1c so a transient catalog outage + * can't make a turn free. + * + * Errors in the catalog fetch are swallowed and treated as path #3 — + * the caller (recordChatUsage) must not fail the workflow on a credit + * accounting hiccup. + * + * @param usage Token counts for the turn (matches AI SDK's `LanguageModelUsage`). + * @param modelId Fully qualified gateway id (e.g. `anthropic/claude-haiku-4.5`). + * @param gatewayCostUsd Gateway-reported total USD cost for the turn, + * when available. Subagent steps (collectTaskToolUsageEvents) won't + * have one and fall through to the token estimate. + * @returns Integer cent amount, ≥ 1. + */ +export async function computeCreditsDeductedCents( + usage: { + inputTokens: number; + cachedInputTokens: number; + outputTokens: number; + }, + modelId: string, + gatewayCostUsd?: number, +): Promise { + if (typeof gatewayCostUsd === "number" && Number.isFinite(gatewayCostUsd) && gatewayCostUsd > 0) { + return Math.max(1, Math.round(gatewayCostUsd * 100)); + } + + try { + const models = await getAvailableModels(); + const model = models.find(m => m.id === modelId) as { cost?: AvailableModelCost } | undefined; + const usd = estimateModelUsageCost(usage, model?.cost); + if (typeof usd !== "number" || usd <= 0) return 1; + return Math.max(1, Math.round(usd * 100)); + } catch (error) { + console.error("Failed to compute credits from usage:", error); + return 1; + } +} diff --git a/lib/credits/estimateModelUsageCost.ts b/lib/credits/estimateModelUsageCost.ts new file mode 100644 index 000000000..d675486a0 --- /dev/null +++ b/lib/credits/estimateModelUsageCost.ts @@ -0,0 +1,79 @@ +import type { AvailableModelCost, AvailableModelCostTier } from "@/lib/credits/AvailableModelCost"; + +const TOKENS_PER_MILLION = 1_000_000; + +/** + * Picks the right cost tier for `usage`. Above 200k input tokens many + * providers charge more, so the cost catalog exposes an override on + * `cost.context_over_200k`. Missing fields fall back to the base tier + * so a partial override is still valid. + * + * The trigger is "input > 200k AND at least one of input/output is + * overridden" — a tier that only overrides `cache_read` is treated as + * the base tier (it's not a real tier swap, just a cache discount). + */ +function resolveCostTier( + usage: { inputTokens: number }, + cost: AvailableModelCost | undefined, +): AvailableModelCostTier | undefined { + if (!cost) return undefined; + + if ( + usage.inputTokens > 200_000 && + (typeof cost.context_over_200k?.input === "number" || + typeof cost.context_over_200k?.output === "number") + ) { + return { + input: cost.context_over_200k.input ?? cost.input, + output: cost.context_over_200k.output ?? cost.output, + cache_read: cost.context_over_200k.cache_read ?? cost.cache_read, + }; + } + + return cost; +} + +/** + * Token-based estimate of a turn's USD cost, used as a fallback when + * the gateway hasn't reported an actual cost on the + * `assistantMessage.metadata.totalMessageCost` field. + * + * Ports `apps/web/lib/models.ts:estimateModelUsageCost` from + * open-agents so the same per-turn math runs on both sides during the + * cutover. Cached input tokens are billed at `cache_read` when the + * catalog exposes it and fall back to the base input price otherwise. + * + * Returns `undefined` when the catalog can't price this model (missing + * cost entry, missing input price, or missing output price) so the + * caller knows to use a different fallback (open-agents' rule: never + * charge 0 — bill the 1c minimum instead). + * + * @param usage Token counts for the turn (mirrors AI SDK's `LanguageModelUsage`). + * @param cost Per-model catalog entry. `undefined` short-circuits to `undefined`. + * @returns USD cost, or `undefined` if not priceable. + */ +export function estimateModelUsageCost( + usage: { + inputTokens: number; + cachedInputTokens: number; + outputTokens: number; + }, + cost: AvailableModelCost | undefined, +): number | undefined { + const costTier = resolveCostTier(usage, cost); + const inputPrice = costTier?.input; + const outputPrice = costTier?.output; + if (typeof inputPrice !== "number" || typeof outputPrice !== "number") { + return undefined; + } + + const cachedInputTokens = Math.max(0, usage.cachedInputTokens); + const uncachedInputTokens = Math.max(0, usage.inputTokens - cachedInputTokens); + const cacheReadPrice = costTier?.cache_read ?? inputPrice; + + return ( + (uncachedInputTokens * inputPrice) / TOKENS_PER_MILLION + + (cachedInputTokens * cacheReadPrice) / TOKENS_PER_MILLION + + (Math.max(0, usage.outputTokens) * outputPrice) / TOKENS_PER_MILLION + ); +}