Skip to content

Commit 7e17c87

Browse files
committed
feat(cost-calculator): prompt caching support
1 parent f8e9bc8 commit 7e17c87

File tree

2 files changed

+70
-1
lines changed

2 files changed

+70
-1
lines changed

cost-calculator.test.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,31 @@ describe("CostCalculator", () => {
3535
expect(cost.outputCost).toEqual(75);
3636
});
3737

38+
test("should calculate cost with cache tokens for opus-4 model", () => {
39+
const models = calculator.listModels();
40+
const testModel = models.find((m) => m.includes("claude-opus-4-20250514"));
41+
42+
expect(testModel).toBeDefined();
43+
44+
const cost = calculator.calculateCost(
45+
testModel as string,
46+
/* input tokens */ 1_000_000,
47+
/* output tokens */ 1_000_000,
48+
/* cache creation tokens */ 1_000_000,
49+
/* cache read tokens */ 1_000_000,
50+
);
51+
52+
expect(cost.inputTokens).toBe(1_000_000);
53+
expect(cost.outputTokens).toBe(1_000_000);
54+
expect(cost.cacheCreationTokens).toBe(1_000_000);
55+
expect(cost.cacheReadTokens).toBe(1_000_000);
56+
expect(cost.inputCost).toEqual(15);
57+
expect(cost.outputCost).toEqual(75);
58+
expect(cost.cacheCreationCost).toEqual(18.75);
59+
expect(cost.cacheReadCost).toEqual(1.5);
60+
expect(cost.totalCost).toEqual(15 + 75 + 18.75 + 1.5);
61+
});
62+
3863
test("should calculate cost for a sonnet-4 model", () => {
3964
// Using sonnet-4 model as an example
4065
// https://www.anthropic.com/pricing
@@ -57,6 +82,33 @@ describe("CostCalculator", () => {
5782
expect(cost.outputCost).toEqual(15);
5883
});
5984

85+
test("should calculate cost with cache tokens for sonnet-4 model", () => {
86+
const models = calculator.listModels();
87+
const testModel = models.find((m) =>
88+
m.includes("claude-sonnet-4-20250514"),
89+
);
90+
91+
expect(testModel).toBeDefined();
92+
93+
const cost = calculator.calculateCost(
94+
testModel as string,
95+
/* input tokens */ 1_000_000,
96+
/* output tokens */ 1_000_000,
97+
/* cache creation tokens */ 1_000_000,
98+
/* cache read tokens */ 1_000_000,
99+
);
100+
101+
expect(cost.inputTokens).toBe(1_000_000);
102+
expect(cost.outputTokens).toBe(1_000_000);
103+
expect(cost.cacheCreationTokens).toBe(1_000_000);
104+
expect(cost.cacheReadTokens).toBe(1_000_000);
105+
expect(cost.inputCost).toEqual(3);
106+
expect(cost.outputCost).toEqual(15);
107+
expect(cost.cacheCreationCost).toEqual(3.75);
108+
expect(cost.cacheReadCost).toEqual(0.3);
109+
expect(cost.totalCost).toEqual(3 + 15 + 3.75 + 0.3);
110+
});
111+
60112
test("should throw error for unknown model", () => {
61113
expect(() => {
62114
calculator.calculateCost("unknown-model", 1000);

cost-calculator.ts

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@ import { LiteLLMModelPricesSchema } from "./types";
55
const CostCalculationSchema = v.object({
66
inputTokens: v.number(),
77
outputTokens: v.number(),
8+
cacheCreationTokens: v.number(),
9+
cacheReadTokens: v.number(),
810
inputCost: v.number(),
911
outputCost: v.number(),
12+
cacheCreationCost: v.number(),
13+
cacheReadCost: v.number(),
1014
totalCost: v.number(),
1115
});
1216

@@ -35,6 +39,8 @@ export class CostCalculator {
3539
modelName: string,
3640
inputTokens: number,
3741
outputTokens = 0,
42+
cacheCreationTokens = 0,
43+
cacheReadTokens = 0,
3844
): CostCalculation {
3945
const modelSpec = this.modelPrices[modelName];
4046

@@ -48,13 +54,24 @@ export class CostCalculator {
4854

4955
const inputCost = inputTokens * modelSpec.input_cost_per_token;
5056
const outputCost = outputTokens * modelSpec.output_cost_per_token;
51-
const totalCost = inputCost + outputCost;
57+
const cacheCreationCost =
58+
cacheCreationTokens *
59+
(modelSpec.cache_creation_input_token_cost ||
60+
modelSpec.input_cost_per_token);
61+
const cacheReadCost =
62+
cacheReadTokens * (modelSpec.cache_read_input_token_cost || 0);
63+
const totalCost =
64+
inputCost + outputCost + cacheCreationCost + cacheReadCost;
5265

5366
return {
5467
inputTokens,
5568
outputTokens,
69+
cacheCreationTokens,
70+
cacheReadTokens,
5671
inputCost,
5772
outputCost,
73+
cacheCreationCost,
74+
cacheReadCost,
5875
totalCost,
5976
};
6077
}

0 commit comments

Comments
 (0)