-
Notifications
You must be signed in to change notification settings - Fork 9
feat(credits): port computeCreditsDeductedCents from open-agents (step 2/6) #611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| /** | ||
| * Shape of the per-model cost catalog used for token-based credit estimation. | ||
| * | ||
| * Mirrors open-agents' | ||
| * `apps/web/lib/models.ts:AvailableModelCost` so the same | ||
| * `estimateModelUsageCost` math runs against either catalog without | ||
| * shape conversion. api's current gateway/models.dev pipeline emits | ||
| * only `{ input, output }` (see | ||
| * `lib/ai/parseModelsDevMetadata.ts:ModelsDevMetadata`); the richer | ||
| * `cache_read` and `context_over_200k` fields are kept in the type so | ||
| * a future catalog expansion (or a hand-edited override) gets picked | ||
| * up automatically by the estimator. | ||
| * | ||
| * All token-cost units are USD per million tokens, matching | ||
| * models.dev. | ||
| */ | ||
| export interface AvailableModelCostTier { | ||
| input?: number; | ||
| output?: number; | ||
| cache_read?: number; | ||
| } | ||
|
|
||
| export interface AvailableModelCost extends AvailableModelCostTier { | ||
| context_over_200k?: AvailableModelCostTier; | ||
| } |
183 changes: 183 additions & 0 deletions
183
lib/credits/__tests__/computeCreditsDeductedCents.test.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,183 @@ | ||
| import { describe, it, expect, vi, beforeEach } from "vitest"; | ||
| import { computeCreditsDeductedCents } from "@/lib/credits/computeCreditsDeductedCents"; | ||
| import { getAvailableModels } from "@/lib/ai/getAvailableModels"; | ||
|
|
||
| vi.mock("@/lib/ai/getAvailableModels", () => ({ | ||
| getAvailableModels: vi.fn(), | ||
| })); | ||
|
|
||
| beforeEach(() => { | ||
| vi.clearAllMocks(); | ||
| // Default catalog: empty (forces token-estimate path to fall through to 1c). | ||
| vi.mocked(getAvailableModels).mockResolvedValue([]); | ||
| }); | ||
|
|
||
| const ZERO_USAGE = { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 }; | ||
|
|
||
| describe("computeCreditsDeductedCents", () => { | ||
| describe("gateway cost path (preferred)", () => { | ||
| it("returns gateway cost in cents when gatewayCostUsd is a positive number", async () => { | ||
| // $0.0074 → 0.74c → ceil to 1c minimum is not needed; round to 1c | ||
| expect( | ||
| await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.0074), | ||
| ).toBe(1); | ||
| // $0.42 → 42c | ||
| expect( | ||
| await computeCreditsDeductedCents(ZERO_USAGE, "anthropic/claude-haiku-4.5", 0.42), | ||
| ).toBe(42); | ||
| }); | ||
|
|
||
| it("rounds the gateway cost to the nearest cent", async () => { | ||
| // $0.123 → 12.3c → 12c | ||
| expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.123)).toBe(12); | ||
| // $0.126 → 12.6c → 13c | ||
| expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.126)).toBe(13); | ||
| }); | ||
|
|
||
| it("returns at least 1 when gateway cost rounds to 0", async () => { | ||
| // $0.0001 → 0.01c → would round to 0, must bump to 1 | ||
| expect(await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.0001)).toBe(1); | ||
| }); | ||
|
|
||
| it("does NOT call the catalog when gateway cost is usable", async () => { | ||
| await computeCreditsDeductedCents(ZERO_USAGE, "model", 0.05); | ||
| expect(getAvailableModels).not.toHaveBeenCalled(); | ||
| }); | ||
| }); | ||
|
|
||
| describe("falls back to token-based estimate when gateway cost is unusable", () => { | ||
| it("when gatewayCostUsd is undefined", async () => { | ||
| vi.mocked(getAvailableModels).mockResolvedValue([ | ||
| { id: "model-x", cost: { input: 1, output: 4 } } as never, | ||
| ]); | ||
| // 1M in + 1M out → $5 → 500c | ||
| expect( | ||
| await computeCreditsDeductedCents( | ||
| { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, | ||
| "model-x", | ||
| undefined, | ||
| ), | ||
| ).toBe(500); | ||
| }); | ||
|
|
||
| it("when gatewayCostUsd is 0", async () => { | ||
| vi.mocked(getAvailableModels).mockResolvedValue([ | ||
| { id: "model-x", cost: { input: 1, output: 4 } } as never, | ||
| ]); | ||
| expect( | ||
| await computeCreditsDeductedCents( | ||
| { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, | ||
| "model-x", | ||
| 0, | ||
| ), | ||
| ).toBe(500); | ||
| }); | ||
|
|
||
| it("when gatewayCostUsd is negative (corrupted/upstream bug)", async () => { | ||
| vi.mocked(getAvailableModels).mockResolvedValue([ | ||
| { id: "model-x", cost: { input: 1, output: 4 } } as never, | ||
| ]); | ||
| expect( | ||
| await computeCreditsDeductedCents( | ||
| { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, | ||
| "model-x", | ||
| -1, | ||
| ), | ||
| ).toBe(500); | ||
| }); | ||
|
|
||
| it("when gatewayCostUsd is NaN (not Number.isFinite)", async () => { | ||
| vi.mocked(getAvailableModels).mockResolvedValue([ | ||
| { id: "model-x", cost: { input: 1, output: 4 } } as never, | ||
| ]); | ||
| expect( | ||
| await computeCreditsDeductedCents( | ||
| { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, | ||
| "model-x", | ||
| Number.NaN, | ||
| ), | ||
| ).toBe(500); | ||
| }); | ||
|
|
||
| it("when gatewayCostUsd is Infinity", async () => { | ||
| vi.mocked(getAvailableModels).mockResolvedValue([ | ||
| { id: "model-x", cost: { input: 1, output: 4 } } as never, | ||
| ]); | ||
| expect( | ||
| await computeCreditsDeductedCents( | ||
| { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, | ||
| "model-x", | ||
| Number.POSITIVE_INFINITY, | ||
| ), | ||
| ).toBe(500); | ||
| }); | ||
| }); | ||
|
|
||
| describe("estimate fallbacks (also: never charge zero on success)", () => { | ||
| it("returns 1 when modelId is not in the catalog", async () => { | ||
| vi.mocked(getAvailableModels).mockResolvedValue([ | ||
| { id: "other-model", cost: { input: 1, output: 4 } } as never, | ||
| ]); | ||
| expect( | ||
| await computeCreditsDeductedCents( | ||
| { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, | ||
| "model-x", | ||
| undefined, | ||
| ), | ||
| ).toBe(1); | ||
| }); | ||
|
|
||
| it("returns 1 when the catalog has no cost for the model", async () => { | ||
| vi.mocked(getAvailableModels).mockResolvedValue([{ id: "model-x" } as never]); | ||
| expect( | ||
| await computeCreditsDeductedCents( | ||
| { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, | ||
| "model-x", | ||
| undefined, | ||
| ), | ||
| ).toBe(1); | ||
| }); | ||
|
|
||
| it("returns 1 when getAvailableModels rejects", async () => { | ||
| vi.mocked(getAvailableModels).mockRejectedValue(new Error("gateway down")); | ||
| expect( | ||
| await computeCreditsDeductedCents( | ||
| { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 1000 }, | ||
| "model-x", | ||
| undefined, | ||
| ), | ||
| ).toBe(1); | ||
| }); | ||
|
|
||
| it("returns 1 when token estimate rounds to 0 (very tiny usage)", async () => { | ||
| vi.mocked(getAvailableModels).mockResolvedValue([ | ||
| { id: "model-x", cost: { input: 0.0001, output: 0.0001 } } as never, | ||
| ]); | ||
| // ~$0.0000002 → 0.00002c → bumps to 1c minimum | ||
| expect( | ||
| await computeCreditsDeductedCents( | ||
| { inputTokens: 1, cachedInputTokens: 0, outputTokens: 1 }, | ||
| "model-x", | ||
| undefined, | ||
| ), | ||
| ).toBe(1); | ||
| }); | ||
| }); | ||
|
|
||
| describe("model lookup", () => { | ||
| it("matches modelId exactly (provider/model form)", async () => { | ||
| vi.mocked(getAvailableModels).mockResolvedValue([ | ||
| { id: "anthropic/claude-haiku-4.5", cost: { input: 1, output: 4 } } as never, | ||
| { id: "openai/gpt-5", cost: { input: 10, output: 40 } } as never, | ||
| ]); | ||
| // Pick haiku: 1M in + 1M out @ haiku rates → $5 → 500c | ||
| expect( | ||
| await computeCreditsDeductedCents( | ||
| { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: 1_000_000 }, | ||
| "anthropic/claude-haiku-4.5", | ||
| undefined, | ||
| ), | ||
| ).toBe(500); | ||
| }); | ||
| }); | ||
| }); | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,165 @@ | ||
| import { describe, it, expect } from "vitest"; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P3: Custom agent: Enforce Clear Code Style and Maintainability Practices This new test file exceeds the repository’s 100-line limit. Prompt for AI agents |
||
| import { estimateModelUsageCost } from "@/lib/credits/estimateModelUsageCost"; | ||
|
|
||
| const baseCost = { input: 1, output: 4 }; // $1/M in, $4/M out | ||
|
|
||
| describe("estimateModelUsageCost", () => { | ||
| describe("guard rails", () => { | ||
| it("returns undefined when cost catalog entry is missing", () => { | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, | ||
| undefined, | ||
| ), | ||
| ).toBeUndefined(); | ||
| }); | ||
|
|
||
| it("returns undefined when input price is missing", () => { | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, | ||
| { output: 4 }, | ||
| ), | ||
| ).toBeUndefined(); | ||
| }); | ||
|
|
||
| it("returns undefined when output price is missing", () => { | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 1000, cachedInputTokens: 0, outputTokens: 500 }, | ||
| { input: 1 }, | ||
| ), | ||
| ).toBeUndefined(); | ||
| }); | ||
| }); | ||
|
|
||
| describe("base tier (≤200k input tokens)", () => { | ||
| it("computes uncached input + output cost in USD", () => { | ||
| // 1_000_000 in @ $1/M + 1_000_000 out @ $4/M = $5 | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { | ||
| inputTokens: 1_000_000, | ||
| cachedInputTokens: 0, | ||
| outputTokens: 1_000_000, | ||
| }, | ||
| baseCost, | ||
| ), | ||
| ).toBe(5); | ||
| }); | ||
|
|
||
| it("applies cache_read price for cachedInputTokens portion when present", () => { | ||
| // 100k cached @ $0.10/M + 100k uncached @ $1/M + 100k out @ $4/M | ||
| // = 0.01 + 0.10 + 0.40 = $0.51 | ||
| const cost = { input: 1, output: 4, cache_read: 0.1 }; | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 }, | ||
| cost, | ||
| ), | ||
| ).toBeCloseTo(0.51, 6); | ||
| }); | ||
|
|
||
| it("falls back to input price when cache_read is undefined (cached tokens billed at full price)", () => { | ||
| // 100k cached @ $1/M + 100k uncached @ $1/M + 100k out @ $4/M | ||
| // = 0.10 + 0.10 + 0.40 = $0.60 | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 200_000, cachedInputTokens: 100_000, outputTokens: 100_000 }, | ||
| baseCost, | ||
| ), | ||
| ).toBeCloseTo(0.6, 6); | ||
| }); | ||
|
|
||
| it("clamps negative cachedInputTokens to 0", () => { | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 1_000_000, cachedInputTokens: -50_000, outputTokens: 0 }, | ||
| baseCost, | ||
| ), | ||
| ).toBe(1); | ||
| }); | ||
|
|
||
| it("clamps cachedInputTokens > inputTokens so uncached doesn't go negative", () => { | ||
| // cached=200_000 but input=100_000 — uncached must clamp to 0 (not -100_000). | ||
| // 200_000 cached @ $1/M (no cache_read, falls back to input) + 0 out = $0.20. | ||
| // Without the Math.max guard, this would underbill: a negative uncached count | ||
| // times the input price would subtract from the cached charge. | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 100_000, cachedInputTokens: 200_000, outputTokens: 0 }, | ||
| baseCost, | ||
| ), | ||
| ).toBeCloseTo(0.2, 6); | ||
| }); | ||
|
|
||
| it("clamps negative outputTokens to 0", () => { | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 1_000_000, cachedInputTokens: 0, outputTokens: -1000 }, | ||
| baseCost, | ||
| ), | ||
| ).toBe(1); | ||
| }); | ||
| }); | ||
|
|
||
| describe("context_over_200k tier", () => { | ||
| const tieredCost = { | ||
| input: 1, | ||
| output: 4, | ||
| context_over_200k: { input: 2, output: 8 }, | ||
| }; | ||
|
|
||
| it("uses context_over_200k tier when inputTokens exceeds 200k", () => { | ||
| // 300_000 in @ $2/M + 100_000 out @ $8/M = 0.60 + 0.80 = $1.40 | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 }, | ||
| tieredCost, | ||
| ), | ||
| ).toBeCloseTo(1.4, 6); | ||
| }); | ||
|
|
||
| it("does NOT use context_over_200k tier when inputTokens is exactly 200k", () => { | ||
| // boundary check — must be strictly > 200k | ||
| // 200_000 @ $1/M + 0 out = $0.20 | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 200_000, cachedInputTokens: 0, outputTokens: 0 }, | ||
| tieredCost, | ||
| ), | ||
| ).toBeCloseTo(0.2, 6); | ||
| }); | ||
|
|
||
| it("ignores context_over_200k when both input and output overrides are missing", () => { | ||
| // only cache_read is set in the override — should NOT trigger the tier swap | ||
| const cost = { | ||
| input: 1, | ||
| output: 4, | ||
| context_over_200k: { cache_read: 0.5 }, | ||
| }; | ||
| // Treated as base tier — 300k @ $1/M + 0 out = $0.30 | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 0 }, | ||
| cost, | ||
| ), | ||
| ).toBeCloseTo(0.3, 6); | ||
| }); | ||
|
|
||
| it("falls back to base tier input when context_over_200k.input is missing", () => { | ||
| const cost = { | ||
| input: 1, | ||
| output: 4, | ||
| context_over_200k: { output: 8 }, // only output overridden | ||
| }; | ||
| // 300k in @ $1/M (fallback) + 100k out @ $8/M = 0.30 + 0.80 = $1.10 | ||
| expect( | ||
| estimateModelUsageCost( | ||
| { inputTokens: 300_000, cachedInputTokens: 0, outputTokens: 100_000 }, | ||
| cost, | ||
| ), | ||
| ).toBeCloseTo(1.1, 6); | ||
| }); | ||
| }); | ||
| }); | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P2: Custom agent: Enforce Clear Code Style and Maintainability Practices
Test file exceeds the repository’s 100-line limit and combines multiple concerns in one module.
Prompt for AI agents