Skip to content

Commit 00b1865

Browse files
feat(cost): split cache creation pricing by duration (#1221)
* feat(cost): split cache creation pricing by 5m/1h duration Claude Code JSONL includes a `cache_creation` breakdown with `ephemeral_5m_input_tokens` and `ephemeral_1h_input_tokens`. Previously all cache creation tokens were priced at the 5-minute rate (1.25x base input), but Claude Code predominantly uses 1-hour caching (2x base input). - Parse `cache_creation.ephemeral_5m_input_tokens` and `ephemeral_1h_input_tokens` from JSONL usage records - Price 5m tokens at existing cache_create rate, 1h tokens at 2x base input (matching Anthropic pricing) - Fall back to flat `cache_creation_input_tokens` when breakdown is absent (older records, non-Claude agents) - Fix token count aggregation to include breakdown tokens Closes #899 * chore: add .npmrc to .gitignore * chore: fix whitespace in pullfrog workflow * chore: suppress zizmor unpinned-uses in pullfrog workflow * style: single space before zizmor comment * test(cost): cover cache creation duration pricing Add focused regression coverage for Claude Code cache creation duration handling. The tests pin the three important cases from #899: records with 5-minute and 1-hour cache creation breakdowns use separate rates, older records without the breakdown keep the flat cache creation fallback, and serde parses the nested cache_creation usage fields. Update the cost mode guide so the documented formula matches the new 5m/1h calculation. Also make cache_creation_token_count borrow self and name the 1-hour multiplier so future pricing changes have a single obvious constant to inspect. --------- Co-authored-by: pullfrog[bot] <226033991+pullfrog[bot]@users.noreply.github.com> Co-authored-by: ryoppippi <1560508+ryoppippi@users.noreply.github.com>
1 parent 199ee29 commit 00b1865

27 files changed

Lines changed: 196 additions & 9 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
4444
.direnv
4545
!.envrc
4646
.pre-commit-config.yaml
47+
.npmrc
4748

4849
# generated local agent skills
4950
.claude/skills

docs/guide/cost-modes.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,8 @@ When calculating costs from tokens, ccusage uses:
186186
type TokenCosts = {
187187
input: number; // Input tokens
188188
output: number; // Output tokens
189-
cacheCreate: number; // Cache creation tokens
189+
cacheCreate5m: number; // 5-minute cache creation tokens
190+
cacheCreate1h: number; // 1-hour cache creation tokens
190191
cacheRead: number; // Cache read tokens
191192
};
192193
```
@@ -197,10 +198,15 @@ type TokenCosts = {
197198
totalCost =
198199
inputTokens * inputPrice +
199200
outputTokens * outputPrice +
200-
cacheCreateTokens * cacheCreatePrice +
201+
cacheCreate5mTokens * cacheCreatePrice +
202+
cacheCreate1hTokens * inputPrice * 2 +
201203
cacheReadTokens * cacheReadPrice;
202204
```
203205

206+
When Claude Code records do not include the `cache_creation` duration
207+
breakdown, ccusage falls back to pricing `cache_creation_input_tokens` at the
208+
standard cache creation rate.
209+
204210
### Pre-calculated Costs
205211

206212
Claude Code provides `costUSD` values in JSONL files:

rust/crates/ccusage/src/adapter/amp/parser.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ fn parse_ledger_events(
7676
cache_creation_input_tokens: cache.0,
7777
cache_read_input_tokens: cache.1,
7878
speed: None,
79+
cache_creation: None,
7980
};
8081
let total_tokens = json_value_u64(tokens.get("total"));
8182
let (usage, extra_total_tokens) = apply_total_token_fallback(usage, 0, total_tokens);
@@ -109,6 +110,7 @@ fn parse_ledger_events(
109110
.usage
110111
.output_tokens
111112
.saturating_add(extra_total_tokens),
113+
cache_creation: None,
112114
..data.message.usage
113115
},
114116
..data.message.clone()
@@ -179,6 +181,7 @@ fn parse_message_usage(
179181
cache_creation_input_tokens: json_value_u64(usage.get("cacheCreationInputTokens")),
180182
cache_read_input_tokens: json_value_u64(usage.get("cacheReadInputTokens")),
181183
speed: None,
184+
cache_creation: None,
182185
};
183186
let total_tokens = json_value_u64(usage.get("totalTokens"));
184187
let (usage_raw, extra_total_tokens) =
@@ -218,6 +221,7 @@ fn parse_message_usage(
218221
.usage
219222
.output_tokens
220223
.saturating_add(extra_total_tokens),
224+
cache_creation: None,
221225
..data.message.usage
222226
},
223227
..data.message.clone()

rust/crates/ccusage/src/adapter/claude/daily.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ fn is_valid_daily_usage_entry(data: &DailyUsageEntry) -> bool {
358358
fn daily_usage_token_total(entry: &DailyLoadedEntry) -> u64 {
359359
entry.usage.input_tokens
360360
+ entry.usage.output_tokens
361-
+ entry.usage.cache_creation_input_tokens
361+
+ entry.usage.cache_creation_token_count()
362362
+ entry.usage.cache_read_input_tokens
363363
}
364364

@@ -474,7 +474,7 @@ impl DailyAccumulator {
474474
let breakdown = &mut self.breakdowns[index];
475475
breakdown.input_tokens += entry.usage.input_tokens;
476476
breakdown.output_tokens += entry.usage.output_tokens;
477-
breakdown.cache_creation_tokens += entry.usage.cache_creation_input_tokens;
477+
breakdown.cache_creation_tokens += entry.usage.cache_creation_token_count();
478478
breakdown.cache_read_tokens += entry.usage.cache_read_input_tokens;
479479
breakdown.cost += entry.cost;
480480
if entry.missing_pricing_model.is_some() {
@@ -594,6 +594,7 @@ mod tests {
594594
cache_creation_input_tokens: 0,
595595
cache_read_input_tokens: fixture.cache_read_tokens,
596596
speed: None,
597+
cache_creation: None,
597598
},
598599
cost: 0.0,
599600
model: Some("claude-sonnet-4-20250514".to_string()),

rust/crates/ccusage/src/adapter/claude/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ fn usage_token_total(data: &UsageEntry) -> u64 {
216216
let usage = data.message.usage;
217217
usage.input_tokens
218218
+ usage.output_tokens
219-
+ usage.cache_creation_input_tokens
219+
+ usage.cache_creation_token_count()
220220
+ usage.cache_read_input_tokens
221221
}
222222

@@ -765,6 +765,7 @@ mod tests {
765765
cache_creation_input_tokens: 0,
766766
cache_read_input_tokens: fixture.cache_read_tokens,
767767
speed: None,
768+
cache_creation: None,
768769
},
769770
model: Some("claude-sonnet-4-20250514".to_string()),
770771
id: Some(fixture.message_id.to_string()),

rust/crates/ccusage/src/adapter/codebuff/loader.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ mod tests {
193193
cache_creation_input_tokens: 20,
194194
cache_read_input_tokens: 10,
195195
speed: None,
196+
cache_creation: None,
196197
},
197198
model: Some("claude-sonnet-4-20250514".to_string()),
198199
id: Some("message-a".to_string()),

rust/crates/ccusage/src/adapter/codebuff/parser.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ pub(super) fn load_chat_file(path: &Path) -> Result<Vec<CodebuffEntry>> {
8888
cache_creation_input_tokens: usage.cache_creation_input_tokens,
8989
cache_read_input_tokens: usage.cache_read_input_tokens,
9090
speed: None,
91+
cache_creation: None,
9192
},
9293
extra_total_tokens: usage.extra_total_tokens,
9394
dedup_key,
@@ -251,6 +252,7 @@ pub(super) fn parse_usage_object(value: Option<&Value>) -> AssistantUsage {
251252
cache_creation_input_tokens: usage.cache_creation_input_tokens,
252253
cache_read_input_tokens: usage.cache_read_input_tokens,
253254
speed: None,
255+
cache_creation: None,
254256
};
255257
let (raw_usage, extra_total_tokens) =
256258
apply_total_token_fallback(raw_usage, usage.extra_total_tokens, total_tokens);
@@ -396,6 +398,7 @@ pub(super) fn calculate_codebuff_cost(entry: &CodebuffEntry, pricing: &PricingMa
396398
.usage
397399
.output_tokens
398400
.saturating_add(entry.extra_total_tokens),
401+
cache_creation: None,
399402
..entry.usage
400403
};
401404
let raw = calculate_cost_for_usage(
@@ -429,6 +432,7 @@ pub(super) fn missing_codebuff_pricing(
429432
.usage
430433
.output_tokens
431434
.saturating_add(entry.extra_total_tokens),
435+
cache_creation: None,
432436
..entry.usage
433437
};
434438
let mut candidates = vec![entry.model.clone()];

rust/crates/ccusage/src/adapter/copilot/loader.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,11 @@ fn usage_entry_to_loaded(
5959
cache_creation_input_tokens: entry.cache_creation_tokens,
6060
cache_read_input_tokens: entry.cache_read_tokens,
6161
speed: None,
62+
cache_creation: None,
6263
};
6364
let cost_usage = TokenUsageRaw {
6465
output_tokens: entry.output_tokens + entry.reasoning_output_tokens,
66+
cache_creation: None,
6567
..usage
6668
};
6769
let data = UsageEntry {

rust/crates/ccusage/src/adapter/copilot/parser.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ fn to_candidate(
161161
cache_creation_input_tokens: cache_creation,
162162
cache_read_input_tokens: cache_read,
163163
speed: None,
164+
cache_creation: None,
164165
};
165166
let (usage, reasoning) = apply_total_token_fallback(usage, reasoning, total);
166167
if crate::total_usage_tokens(usage) + reasoning == 0 {

rust/crates/ccusage/src/adapter/droid/loader.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ mod tests {
248248
cache_creation_input_tokens: 20,
249249
cache_read_input_tokens: 10,
250250
speed: None,
251+
cache_creation: None,
251252
},
252253
model: Some("claude-sonnet-4".to_string()),
253254
id: Some("droid:session-a".to_string()),

0 commit comments

Comments
 (0)