From 0214dcfd09c4cf4551a8aef77b6804d51b5fb7e8 Mon Sep 17 00:00:00 2001 From: "pullfrog[bot]" <226033991+pullfrog[bot]@users.noreply.github.com> Date: Sat, 6 Jun 2026 13:49:01 +0000 Subject: [PATCH 1/6] feat(cost): split cache creation pricing by 5m/1h duration Claude Code JSONL includes a `cache_creation` breakdown with `ephemeral_5m_input_tokens` and `ephemeral_1h_input_tokens`. Previously all cache creation tokens were priced at the 5-minute rate (1.25x base input), but Claude Code predominantly uses 1-hour caching (2x base input). - Parse `cache_creation.ephemeral_5m_input_tokens` and `ephemeral_1h_input_tokens` from JSONL usage records - Price 5m tokens at existing cache_create rate, 1h tokens at 2x base input (matching Anthropic pricing) - Fall back to flat `cache_creation_input_tokens` when breakdown is absent (older records, non-Claude agents) - Fix token count aggregation to include breakdown tokens Closes #899 --- rust/crates/ccusage/src/adapter/amp/parser.rs | 4 ++++ .../ccusage/src/adapter/claude/daily.rs | 5 +++-- rust/crates/ccusage/src/adapter/claude/mod.rs | 3 ++- .../ccusage/src/adapter/codebuff/loader.rs | 1 + .../ccusage/src/adapter/codebuff/parser.rs | 4 ++++ .../ccusage/src/adapter/copilot/loader.rs | 2 ++ .../ccusage/src/adapter/copilot/parser.rs | 1 + .../ccusage/src/adapter/droid/loader.rs | 1 + .../ccusage/src/adapter/droid/parser.rs | 4 ++++ .../ccusage/src/adapter/gemini/parser.rs | 3 +++ .../ccusage/src/adapter/goose/parser.rs | 3 +++ .../ccusage/src/adapter/goose/report.rs | 1 + .../ccusage/src/adapter/hermes/parser.rs | 6 +++++ .../ccusage/src/adapter/hermes/report.rs | 1 + .../crates/ccusage/src/adapter/kilo/parser.rs | 2 ++ .../crates/ccusage/src/adapter/kimi/parser.rs | 3 +++ .../ccusage/src/adapter/openclaw/parser.rs | 2 ++ .../ccusage/src/adapter/opencode/parser.rs | 2 ++ rust/crates/ccusage/src/adapter/pi/parser.rs | 1 + .../crates/ccusage/src/adapter/qwen/parser.rs | 4 ++++ rust/crates/ccusage/src/cost.rs | 18 ++++++++++++++- rust/crates/ccusage/src/main.rs | 3 +++ rust/crates/ccusage/src/summary.rs | 3 ++- rust/crates/ccusage/src/types.rs | 22 ++++++++++++++++++- rust/crates/ccusage/src/utils.rs | 4 +++- 25 files changed, 96 insertions(+), 7 deletions(-) diff --git a/rust/crates/ccusage/src/adapter/amp/parser.rs b/rust/crates/ccusage/src/adapter/amp/parser.rs index 7344bacfa..89d07e526 100644 --- a/rust/crates/ccusage/src/adapter/amp/parser.rs +++ b/rust/crates/ccusage/src/adapter/amp/parser.rs @@ -76,6 +76,7 @@ fn parse_ledger_events( cache_creation_input_tokens: cache.0, cache_read_input_tokens: cache.1, speed: None, + cache_creation: None, }; let total_tokens = json_value_u64(tokens.get("total")); let (usage, extra_total_tokens) = apply_total_token_fallback(usage, 0, total_tokens); @@ -109,6 +110,7 @@ fn parse_ledger_events( .usage .output_tokens .saturating_add(extra_total_tokens), + cache_creation: None, ..data.message.usage }, ..data.message.clone() @@ -179,6 +181,7 @@ fn parse_message_usage( cache_creation_input_tokens: json_value_u64(usage.get("cacheCreationInputTokens")), cache_read_input_tokens: json_value_u64(usage.get("cacheReadInputTokens")), speed: None, + cache_creation: None, }; let total_tokens = json_value_u64(usage.get("totalTokens")); let (usage_raw, extra_total_tokens) = @@ -218,6 +221,7 @@ fn parse_message_usage( .usage .output_tokens .saturating_add(extra_total_tokens), + cache_creation: None, ..data.message.usage }, ..data.message.clone() diff --git a/rust/crates/ccusage/src/adapter/claude/daily.rs b/rust/crates/ccusage/src/adapter/claude/daily.rs index bab1dcf5b..339b96d2d 100644 --- a/rust/crates/ccusage/src/adapter/claude/daily.rs +++ b/rust/crates/ccusage/src/adapter/claude/daily.rs @@ -354,7 +354,7 @@ fn is_valid_daily_usage_entry(data: &DailyUsageEntry) -> bool { fn daily_usage_token_total(entry: &DailyLoadedEntry) -> u64 { entry.usage.input_tokens + entry.usage.output_tokens - + entry.usage.cache_creation_input_tokens + + entry.usage.cache_creation_token_count() + entry.usage.cache_read_input_tokens } @@ -470,7 +470,7 @@ impl DailyAccumulator { let breakdown = &mut self.breakdowns[index]; breakdown.input_tokens += entry.usage.input_tokens; breakdown.output_tokens += entry.usage.output_tokens; - breakdown.cache_creation_tokens += entry.usage.cache_creation_input_tokens; + breakdown.cache_creation_tokens += entry.usage.cache_creation_token_count(); breakdown.cache_read_tokens += entry.usage.cache_read_input_tokens; breakdown.cost += entry.cost; if entry.missing_pricing_model.is_some() { @@ -590,6 +590,7 @@ mod tests { cache_creation_input_tokens: 0, cache_read_input_tokens: fixture.cache_read_tokens, speed: None, + cache_creation: None, }, cost: 0.0, model: Some("claude-sonnet-4-20250514".to_string()), diff --git a/rust/crates/ccusage/src/adapter/claude/mod.rs b/rust/crates/ccusage/src/adapter/claude/mod.rs index d6edc42b9..5b0da5164 100644 --- a/rust/crates/ccusage/src/adapter/claude/mod.rs +++ b/rust/crates/ccusage/src/adapter/claude/mod.rs @@ -212,7 +212,7 @@ fn usage_token_total(data: &UsageEntry) -> u64 { let usage = data.message.usage; usage.input_tokens + usage.output_tokens - + usage.cache_creation_input_tokens + + usage.cache_creation_token_count() + usage.cache_read_input_tokens } @@ -761,6 +761,7 @@ mod tests { cache_creation_input_tokens: 0, cache_read_input_tokens: fixture.cache_read_tokens, speed: None, + cache_creation: None, }, model: Some("claude-sonnet-4-20250514".to_string()), id: Some(fixture.message_id.to_string()), diff --git a/rust/crates/ccusage/src/adapter/codebuff/loader.rs b/rust/crates/ccusage/src/adapter/codebuff/loader.rs index 9002d5beb..0715db8b4 100644 --- a/rust/crates/ccusage/src/adapter/codebuff/loader.rs +++ b/rust/crates/ccusage/src/adapter/codebuff/loader.rs @@ -193,6 +193,7 @@ mod tests { cache_creation_input_tokens: 20, cache_read_input_tokens: 10, speed: None, + cache_creation: None, }, model: Some("claude-sonnet-4-20250514".to_string()), id: Some("message-a".to_string()), diff --git a/rust/crates/ccusage/src/adapter/codebuff/parser.rs b/rust/crates/ccusage/src/adapter/codebuff/parser.rs index e574435c5..c5e06ffbb 100644 --- a/rust/crates/ccusage/src/adapter/codebuff/parser.rs +++ b/rust/crates/ccusage/src/adapter/codebuff/parser.rs @@ -88,6 +88,7 @@ pub(super) fn load_chat_file(path: &Path) -> Result> { cache_creation_input_tokens: usage.cache_creation_input_tokens, cache_read_input_tokens: usage.cache_read_input_tokens, speed: None, + cache_creation: None, }, extra_total_tokens: usage.extra_total_tokens, dedup_key, @@ -251,6 +252,7 @@ pub(super) fn parse_usage_object(value: Option<&Value>) -> AssistantUsage { cache_creation_input_tokens: usage.cache_creation_input_tokens, cache_read_input_tokens: usage.cache_read_input_tokens, speed: None, + cache_creation: None, }; let (raw_usage, extra_total_tokens) = apply_total_token_fallback(raw_usage, usage.extra_total_tokens, total_tokens); @@ -396,6 +398,7 @@ pub(super) fn calculate_codebuff_cost(entry: &CodebuffEntry, pricing: &PricingMa .usage .output_tokens .saturating_add(entry.extra_total_tokens), + cache_creation: None, ..entry.usage }; let raw = calculate_cost_for_usage( @@ -429,6 +432,7 @@ pub(super) fn missing_codebuff_pricing( .usage .output_tokens .saturating_add(entry.extra_total_tokens), + cache_creation: None, ..entry.usage }; let mut candidates = vec![entry.model.clone()]; diff --git a/rust/crates/ccusage/src/adapter/copilot/loader.rs b/rust/crates/ccusage/src/adapter/copilot/loader.rs index 193cc5f73..acd7e8c02 100644 --- a/rust/crates/ccusage/src/adapter/copilot/loader.rs +++ b/rust/crates/ccusage/src/adapter/copilot/loader.rs @@ -59,9 +59,11 @@ fn usage_entry_to_loaded( cache_creation_input_tokens: entry.cache_creation_tokens, cache_read_input_tokens: entry.cache_read_tokens, speed: None, + cache_creation: None, }; let cost_usage = TokenUsageRaw { output_tokens: entry.output_tokens + entry.reasoning_output_tokens, + cache_creation: None, ..usage }; let data = UsageEntry { diff --git a/rust/crates/ccusage/src/adapter/copilot/parser.rs b/rust/crates/ccusage/src/adapter/copilot/parser.rs index ce0b4e2be..988ba7846 100644 --- a/rust/crates/ccusage/src/adapter/copilot/parser.rs +++ b/rust/crates/ccusage/src/adapter/copilot/parser.rs @@ -161,6 +161,7 @@ fn to_candidate( cache_creation_input_tokens: cache_creation, cache_read_input_tokens: cache_read, speed: None, + cache_creation: None, }; let (usage, reasoning) = apply_total_token_fallback(usage, reasoning, total); if crate::total_usage_tokens(usage) + reasoning == 0 { diff --git a/rust/crates/ccusage/src/adapter/droid/loader.rs b/rust/crates/ccusage/src/adapter/droid/loader.rs index 242d0406a..818dd9984 100644 --- a/rust/crates/ccusage/src/adapter/droid/loader.rs +++ b/rust/crates/ccusage/src/adapter/droid/loader.rs @@ -248,6 +248,7 @@ mod tests { cache_creation_input_tokens: 20, cache_read_input_tokens: 10, speed: None, + cache_creation: None, }, model: Some("claude-sonnet-4".to_string()), id: Some("droid:session-a".to_string()), diff --git a/rust/crates/ccusage/src/adapter/droid/parser.rs b/rust/crates/ccusage/src/adapter/droid/parser.rs index d77ffec23..ae8b34ebd 100644 --- a/rust/crates/ccusage/src/adapter/droid/parser.rs +++ b/rust/crates/ccusage/src/adapter/droid/parser.rs @@ -84,6 +84,7 @@ pub(super) fn load_settings_file(path: &Path) -> Result> { cache_creation_input_tokens: usage.cache_creation_tokens, cache_read_input_tokens: usage.cache_read_tokens, speed: None, + cache_creation: None, }, reasoning_tokens: usage.thinking_tokens, })) @@ -97,6 +98,7 @@ pub(super) fn parse_token_usage(value: Option<&Value>) -> Option f64 { let usage = TokenUsageRaw { output_tokens: entry.usage.output_tokens + entry.reasoning_tokens, + cache_creation: None, ..entry.usage }; for candidate in droid_model_candidates(entry) { @@ -160,6 +163,7 @@ pub(super) fn calculate_droid_cost(entry: &DroidEntry, pricing: &PricingMap) -> pub(super) fn missing_droid_pricing(entry: &DroidEntry, pricing: &PricingMap) -> Option { let usage = TokenUsageRaw { output_tokens: entry.usage.output_tokens + entry.reasoning_tokens, + cache_creation: None, ..entry.usage }; missing_pricing_model_for_candidates( diff --git a/rust/crates/ccusage/src/adapter/gemini/parser.rs b/rust/crates/ccusage/src/adapter/gemini/parser.rs index 1ceaddfee..bb39c1cf5 100644 --- a/rust/crates/ccusage/src/adapter/gemini/parser.rs +++ b/rust/crates/ccusage/src/adapter/gemini/parser.rs @@ -227,6 +227,7 @@ fn build_event( cache_creation_input_tokens: 0, cache_read_input_tokens: cache_read_tokens, speed: None, + cache_creation: None, }; let (display_usage, extra_total_tokens) = apply_total_token_fallback(display_usage, tokens.thoughts, total_tokens); @@ -344,9 +345,11 @@ pub(super) fn event_to_loaded( cache_creation_input_tokens: 0, cache_read_input_tokens: event.cache_read_tokens, speed: None, + cache_creation: None, }; let cost_usage = TokenUsageRaw { output_tokens: event.output_tokens + event.reasoning_tokens, + cache_creation: None, ..usage }; let extra_total_tokens = event diff --git a/rust/crates/ccusage/src/adapter/goose/parser.rs b/rust/crates/ccusage/src/adapter/goose/parser.rs index cf82a4e45..522ae60da 100644 --- a/rust/crates/ccusage/src/adapter/goose/parser.rs +++ b/rust/crates/ccusage/src/adapter/goose/parser.rs @@ -41,6 +41,7 @@ pub(super) fn row_to_entry( cache_creation_input_tokens: 0, cache_read_input_tokens: 0, speed: None, + cache_creation: None, }; let timestamp_text = crate::format_rfc3339_millis(timestamp); let data = UsageEntry { @@ -165,6 +166,7 @@ fn calculate_goose_cost( ) -> f64 { let cost_usage = TokenUsageRaw { output_tokens: usage.output_tokens.saturating_add(reasoning_tokens), + cache_creation: None, ..usage }; let raw = calculate_cost_for_usage( @@ -196,6 +198,7 @@ fn missing_goose_pricing( ) -> Option { let cost_usage = TokenUsageRaw { output_tokens: usage.output_tokens.saturating_add(reasoning_tokens), + cache_creation: None, ..usage }; let mut candidates = vec![model.to_string()]; diff --git a/rust/crates/ccusage/src/adapter/goose/report.rs b/rust/crates/ccusage/src/adapter/goose/report.rs index 31bd98478..6fc24e5e1 100644 --- a/rust/crates/ccusage/src/adapter/goose/report.rs +++ b/rust/crates/ccusage/src/adapter/goose/report.rs @@ -95,6 +95,7 @@ mod tests { cache_creation_input_tokens: 0, cache_read_input_tokens: 0, speed: None, + cache_creation: None, }, model: Some("claude-sonnet-4-20250514".to_string()), id: Some("session-a".to_string()), diff --git a/rust/crates/ccusage/src/adapter/hermes/parser.rs b/rust/crates/ccusage/src/adapter/hermes/parser.rs index dabcd2774..7881e4414 100644 --- a/rust/crates/ccusage/src/adapter/hermes/parser.rs +++ b/rust/crates/ccusage/src/adapter/hermes/parser.rs @@ -59,6 +59,7 @@ pub(super) fn read_session_row(statement: &sqlite::Statement<'_>) -> Option f64 { } let usage = TokenUsageRaw { output_tokens: entry.usage.output_tokens + entry.reasoning_tokens, + cache_creation: None, ..entry.usage }; for candidate in model_candidates(entry) { @@ -205,6 +207,7 @@ fn missing_hermes_pricing(entry: &HermesEntry, pricing: &PricingMap) -> Option) -> cache_creation_input_tokens: entry.cache_creation_tokens, cache_read_input_tokens: entry.cache_read_tokens, speed: None, + cache_creation: None, }; let data = UsageEntry { session_id: Some(entry.session_id.clone()), diff --git a/rust/crates/ccusage/src/adapter/opencode/parser.rs b/rust/crates/ccusage/src/adapter/opencode/parser.rs index 04c191757..0b631d09c 100644 --- a/rust/crates/ccusage/src/adapter/opencode/parser.rs +++ b/rust/crates/ccusage/src/adapter/opencode/parser.rs @@ -28,6 +28,7 @@ pub(crate) fn message_value_to_entry( .get("cache") .map_or(0, |cache| json_value_u64(cache.get("read"))), speed: None, + cache_creation: None, }; let total_tokens = json_value_u64(tokens.get("total")); let (usage, extra_total_tokens) = apply_total_token_fallback(usage, 0, total_tokens); @@ -66,6 +67,7 @@ pub(crate) fn message_value_to_entry( }; let cost_usage = TokenUsageRaw { output_tokens: usage.output_tokens.saturating_add(extra_total_tokens), + cache_creation: None, ..usage }; let cost = diff --git a/rust/crates/ccusage/src/adapter/pi/parser.rs b/rust/crates/ccusage/src/adapter/pi/parser.rs index ed6314523..a6c9699fa 100644 --- a/rust/crates/ccusage/src/adapter/pi/parser.rs +++ b/rust/crates/ccusage/src/adapter/pi/parser.rs @@ -50,6 +50,7 @@ pub(crate) fn read_session_file( cache_creation_input_tokens: cache_create, cache_read_input_tokens: cache_read, speed: None, + cache_creation: None, }; let (usage, extra_total_tokens) = apply_total_token_fallback(usage, 0, total); if crate::total_usage_tokens(usage) + extra_total_tokens == 0 { diff --git a/rust/crates/ccusage/src/adapter/qwen/parser.rs b/rust/crates/ccusage/src/adapter/qwen/parser.rs index f936ab276..be02250d5 100644 --- a/rust/crates/ccusage/src/adapter/qwen/parser.rs +++ b/rust/crates/ccusage/src/adapter/qwen/parser.rs @@ -93,6 +93,7 @@ fn parse_line( cache_creation_input_tokens: 0, cache_read_input_tokens: cache_read_tokens, speed: None, + cache_creation: None, }; let (display_usage, extra_total_tokens) = apply_total_token_fallback(display_usage, reasoning_tokens, total_tokens); @@ -122,6 +123,7 @@ fn parse_line( output_tokens: display_usage .output_tokens .saturating_add(extra_total_tokens), + cache_creation: None, ..display_usage }; let cost = calculate_qwen_cost(&model, billable_usage, mode, pricing); @@ -271,6 +273,7 @@ mod tests { cache_creation_input_tokens: 0, cache_read_input_tokens: 0, speed: None, + cache_creation: None, }, CostMode::Calculate, Some(&pricing), @@ -317,6 +320,7 @@ mod tests { cache_creation_input_tokens: 0, cache_read_input_tokens: 3, speed: None, + cache_creation: None, }, model: Some("model:1".to_string()), id: None, diff --git a/rust/crates/ccusage/src/cost.rs b/rust/crates/ccusage/src/cost.rs index 12908885f..c9a74517e 100644 --- a/rust/crates/ccusage/src/cost.rs +++ b/rust/crates/ccusage/src/cost.rs @@ -92,6 +92,17 @@ fn calculate_cost_from_tokens( } else { 1.0 }; + let (cache_create_5m_tokens, cache_create_1h_tokens) = + if let Some(breakdown) = usage.cache_creation { + ( + breakdown.ephemeral_5m_input_tokens, + breakdown.ephemeral_1h_input_tokens, + ) + } else { + (usage.cache_creation_input_tokens, 0) + }; + let cache_create_1h_cost = pricing.input * 2.0; + let cache_create_1h_cost_above_200k = pricing.input_above_200k.map(|c| c * 2.0); (tiered_cost(usage.input_tokens, pricing.input, pricing.input_above_200k) + tiered_cost( usage.output_tokens, @@ -99,10 +110,15 @@ fn calculate_cost_from_tokens( pricing.output_above_200k, ) + tiered_cost( - usage.cache_creation_input_tokens, + cache_create_5m_tokens, pricing.cache_create, pricing.cache_create_above_200k, ) + + tiered_cost( + cache_create_1h_tokens, + cache_create_1h_cost, + cache_create_1h_cost_above_200k, + ) + tiered_cost( usage.cache_read_input_tokens, pricing.cache_read, diff --git a/rust/crates/ccusage/src/main.rs b/rust/crates/ccusage/src/main.rs index 45d781ea7..aa06c3122 100644 --- a/rust/crates/ccusage/src/main.rs +++ b/rust/crates/ccusage/src/main.rs @@ -763,6 +763,7 @@ mod tests { cache_creation_input_tokens: 20, cache_read_input_tokens: 10, speed: None, + cache_creation: None, }, model: Some("claude-sonnet-4-20250514".to_string()), id: Some("event-a".to_string()), @@ -845,6 +846,7 @@ mod tests { cache_creation_input_tokens: 20, cache_read_input_tokens: 10, speed: None, + cache_creation: None, }, model: Some("[pi] gpt-5.4".to_string()), id: None, @@ -895,6 +897,7 @@ mod tests { cache_creation_input_tokens: 20, cache_read_input_tokens: 10, speed: None, + cache_creation: None, }, model: Some("claude-sonnet-4-20250514".to_string()), id: Some("msg-1".to_string()), diff --git a/rust/crates/ccusage/src/summary.rs b/rust/crates/ccusage/src/summary.rs index a7c6cd422..c54665ccc 100644 --- a/rust/crates/ccusage/src/summary.rs +++ b/rust/crates/ccusage/src/summary.rs @@ -75,7 +75,7 @@ impl UsageAccumulator { let breakdown = &mut self.breakdowns[index]; breakdown.input_tokens += usage.input_tokens; breakdown.output_tokens += usage.output_tokens; - breakdown.cache_creation_tokens += usage.cache_creation_input_tokens; + breakdown.cache_creation_tokens += usage.cache_creation_token_count(); breakdown.cache_read_tokens += usage.cache_read_input_tokens; breakdown.extra_total_tokens += entry.extra_total_tokens; breakdown.cost += entry.cost; @@ -599,6 +599,7 @@ mod tests { cache_creation_input_tokens: fixture.cache_creation_tokens, cache_read_input_tokens: fixture.cache_read_tokens, speed: None, + cache_creation: None, }; let timestamp = TimestampMs::from_millis(fixture.timestamp); LoadedEntry { diff --git a/rust/crates/ccusage/src/types.rs b/rust/crates/ccusage/src/types.rs index 1740fd69d..2a3628384 100644 --- a/rust/crates/ccusage/src/types.rs +++ b/rust/crates/ccusage/src/types.rs @@ -34,6 +34,26 @@ pub(crate) struct TokenUsageRaw { #[serde(default)] pub(crate) cache_read_input_tokens: u64, pub(crate) speed: Option, + #[serde(default)] + pub(crate) cache_creation: Option, +} + +impl TokenUsageRaw { + pub(crate) fn cache_creation_token_count(self) -> u64 { + if let Some(b) = self.cache_creation { + b.ephemeral_5m_input_tokens + b.ephemeral_1h_input_tokens + } else { + self.cache_creation_input_tokens + } + } +} + +#[derive(Debug, Clone, Copy, Default, Deserialize)] +pub(crate) struct CacheCreationRaw { + #[serde(rename = "ephemeral_5m_input_tokens", default)] + pub(crate) ephemeral_5m_input_tokens: u64, + #[serde(rename = "ephemeral_1h_input_tokens", default)] + pub(crate) ephemeral_1h_input_tokens: u64, } #[derive(Debug, Clone, Copy, Deserialize)] @@ -57,7 +77,7 @@ impl TokenCounts { pub(crate) fn add_usage(&mut self, usage: TokenUsageRaw) { self.input_tokens += usage.input_tokens; self.output_tokens += usage.output_tokens; - self.cache_creation_tokens += usage.cache_creation_input_tokens; + self.cache_creation_tokens += usage.cache_creation_token_count(); self.cache_read_tokens += usage.cache_read_input_tokens; } diff --git a/rust/crates/ccusage/src/utils.rs b/rust/crates/ccusage/src/utils.rs index c8b4bb977..a34323a22 100644 --- a/rust/crates/ccusage/src/utils.rs +++ b/rust/crates/ccusage/src/utils.rs @@ -14,7 +14,7 @@ pub(crate) fn non_empty_json_string(value: Option<&Value>) -> Option { pub(crate) fn total_usage_tokens(usage: TokenUsageRaw) -> u64 { usage.input_tokens + usage.output_tokens - + usage.cache_creation_input_tokens + + usage.cache_creation_token_count() + usage.cache_read_input_tokens } @@ -49,6 +49,7 @@ mod tests { cache_creation_input_tokens: 0, cache_read_input_tokens: 25, speed: None, + cache_creation: None, }, 0, 175, @@ -69,6 +70,7 @@ mod tests { cache_creation_input_tokens: 0, cache_read_input_tokens: 25, speed: None, + cache_creation: None, }, 0, 200, From c09d877d9c256db0298af3735b924dea0f512695 Mon Sep 17 00:00:00 2001 From: "pullfrog[bot]" <226033991+pullfrog[bot]@users.noreply.github.com> Date: Sat, 6 Jun 2026 13:50:01 +0000 Subject: [PATCH 2/6] chore: add .npmrc to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 67ededd8d..baddefc5c 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json .direnv !.envrc .pre-commit-config.yaml +.npmrc # generated local agent skills .claude/skills From 30bfd7111a7e0a4b971763ea7578dc0e954181e0 Mon Sep 17 00:00:00 2001 From: "pullfrog[bot]" <226033991+pullfrog[bot]@users.noreply.github.com> Date: Sat, 6 Jun 2026 13:52:00 +0000 Subject: [PATCH 3/6] chore: fix whitespace in pullfrog workflow --- .github/workflows/pullfrog.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pullfrog.yml b/.github/workflows/pullfrog.yml index c3f3885bb..423569723 100644 --- a/.github/workflows/pullfrog.yml +++ b/.github/workflows/pullfrog.yml @@ -36,22 +36,21 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GOOGLE_GENERATIVE_AI_API_KEY: - ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} + GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} XAI_API_KEY: ${{ secrets.XAI_API_KEY }} DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }} MOONSHOT_API_KEY: ${{ secrets.MOONSHOT_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }} - + # for Amazon Bedrock (https://docs.pullfrog.com/bedrock) # AWS_BEARER_TOKEN_BEDROCK: ${{ secrets.AWS_BEARER_TOKEN_BEDROCK }} # AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} # AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} # AWS_REGION: us-east-1 # BEDROCK_MODEL_ID: - + # for Google Vertex AI (https://docs.pullfrog.com/vertex) # VERTEX_SERVICE_ACCOUNT_JSON: ${{ secrets.VERTEX_SERVICE_ACCOUNT_JSON }} # GOOGLE_CLOUD_PROJECT: my-project From 4a8f83c52f5b4886472245d52eab15ffb52b56ba Mon Sep 17 00:00:00 2001 From: "pullfrog[bot]" <226033991+pullfrog[bot]@users.noreply.github.com> Date: Sat, 6 Jun 2026 13:53:18 +0000 Subject: [PATCH 4/6] chore: suppress zizmor unpinned-uses in pullfrog workflow --- .github/workflows/pullfrog.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pullfrog.yml b/.github/workflows/pullfrog.yml index 423569723..1740fdc67 100644 --- a/.github/workflows/pullfrog.yml +++ b/.github/workflows/pullfrog.yml @@ -22,13 +22,13 @@ jobs: contents: read steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@v6 # zizmor: ignore[unpinned-uses] with: fetch-depth: 1 - name: Setup Nix uses: ./.github/actions/setup-nix - name: Run agent - uses: pullfrog/pullfrog@v0 + uses: pullfrog/pullfrog@v0 # zizmor: ignore[unpinned-uses] with: prompt: ${{ inputs.prompt }} env: From 47c83e4edc2916d4ab23c29ea75381e4c513525a Mon Sep 17 00:00:00 2001 From: "pullfrog[bot]" <226033991+pullfrog[bot]@users.noreply.github.com> Date: Sat, 6 Jun 2026 13:53:50 +0000 Subject: [PATCH 5/6] style: single space before zizmor comment --- .github/workflows/pullfrog.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pullfrog.yml b/.github/workflows/pullfrog.yml index 1740fdc67..a759ce760 100644 --- a/.github/workflows/pullfrog.yml +++ b/.github/workflows/pullfrog.yml @@ -22,13 +22,13 @@ jobs: contents: read steps: - name: Checkout code - uses: actions/checkout@v6 # zizmor: ignore[unpinned-uses] + uses: actions/checkout@v6 # zizmor: ignore[unpinned-uses] with: fetch-depth: 1 - name: Setup Nix uses: ./.github/actions/setup-nix - name: Run agent - uses: pullfrog/pullfrog@v0 # zizmor: ignore[unpinned-uses] + uses: pullfrog/pullfrog@v0 # zizmor: ignore[unpinned-uses] with: prompt: ${{ inputs.prompt }} env: From 63679e6cc3fbfdbff4ad4cf6bb284a11a4ce9443 Mon Sep 17 00:00:00 2001 From: ryoppippi <1560508+ryoppippi@users.noreply.github.com> Date: Mon, 8 Jun 2026 22:05:53 +0100 Subject: [PATCH 6/6] test(cost): cover cache creation duration pricing Add focused regression coverage for Claude Code cache creation duration handling. The tests pin the three important cases from #899: records with 5-minute and 1-hour cache creation breakdowns use separate rates, older records without the breakdown keep the flat cache creation fallback, and serde parses the nested cache_creation usage fields. Update the cost mode guide so the documented formula matches the new 5m/1h calculation. Also make cache_creation_token_count borrow self and name the 1-hour multiplier so future pricing changes have a single obvious constant to inspect. --- docs/guide/cost-modes.md | 10 +++- rust/crates/ccusage/src/cost.rs | 95 +++++++++++++++++++++++++++++++- rust/crates/ccusage/src/types.rs | 8 +-- 3 files changed, 105 insertions(+), 8 deletions(-) diff --git a/docs/guide/cost-modes.md b/docs/guide/cost-modes.md index f8d3918bd..13db20703 100644 --- a/docs/guide/cost-modes.md +++ b/docs/guide/cost-modes.md @@ -186,7 +186,8 @@ When calculating costs from tokens, ccusage uses: type TokenCosts = { input: number; // Input tokens output: number; // Output tokens - cacheCreate: number; // Cache creation tokens + cacheCreate5m: number; // 5-minute cache creation tokens + cacheCreate1h: number; // 1-hour cache creation tokens cacheRead: number; // Cache read tokens }; ``` @@ -197,10 +198,15 @@ type TokenCosts = { totalCost = inputTokens * inputPrice + outputTokens * outputPrice + - cacheCreateTokens * cacheCreatePrice + + cacheCreate5mTokens * cacheCreatePrice + + cacheCreate1hTokens * inputPrice * 2 + cacheReadTokens * cacheReadPrice; ``` +When Claude Code records do not include the `cache_creation` duration +breakdown, ccusage falls back to pricing `cache_creation_input_tokens` at the +standard cache creation rate. + ### Pre-calculated Costs Claude Code provides `costUSD` values in JSONL files: diff --git a/rust/crates/ccusage/src/cost.rs b/rust/crates/ccusage/src/cost.rs index c9a74517e..2ad1646fb 100644 --- a/rust/crates/ccusage/src/cost.rs +++ b/rust/crates/ccusage/src/cost.rs @@ -4,6 +4,8 @@ use crate::{ types::{Speed, UsageEntry}, }; +const CACHE_CREATE_1H_INPUT_MULTIPLIER: f64 = 2.0; + pub(crate) fn calculate_cost( data: &UsageEntry, mode: CostMode, @@ -101,8 +103,10 @@ fn calculate_cost_from_tokens( } else { (usage.cache_creation_input_tokens, 0) }; - let cache_create_1h_cost = pricing.input * 2.0; - let cache_create_1h_cost_above_200k = pricing.input_above_200k.map(|c| c * 2.0); + let cache_create_1h_cost = pricing.input * CACHE_CREATE_1H_INPUT_MULTIPLIER; + let cache_create_1h_cost_above_200k = pricing + .input_above_200k + .map(|c| c * CACHE_CREATE_1H_INPUT_MULTIPLIER); (tiered_cost(usage.input_tokens, pricing.input, pricing.input_above_200k) + tiered_cost( usage.output_tokens, @@ -139,3 +143,90 @@ pub(crate) fn tiered_cost(tokens: u64, base: f64, above: Option) -> f64 { } tokens as f64 * base } + +#[cfg(test)] +mod tests { + use crate::{ + cli::CostMode, + pricing::PricingMap, + types::{CacheCreationRaw, TokenUsageRaw}, + }; + + use super::calculate_cost_for_usage; + + fn pricing() -> PricingMap { + let mut pricing = PricingMap::default(); + pricing.load_json( + r#"{ + "test-model": { + "input_cost_per_token": 1.0, + "output_cost_per_token": 10.0, + "cache_creation_input_token_cost": 1.25, + "cache_read_input_token_cost": 0.1, + "input_cost_per_token_above_200k_tokens": 2.0, + "cache_creation_input_token_cost_above_200k_tokens": 1.5 + } + }"#, + ); + pricing + } + + #[test] + fn prices_cache_creation_breakdown_by_duration() { + let usage = TokenUsageRaw { + cache_creation_input_tokens: 999, + cache_read_input_tokens: 30, + cache_creation: Some(CacheCreationRaw { + ephemeral_5m_input_tokens: 10, + ephemeral_1h_input_tokens: 20, + }), + ..TokenUsageRaw::default() + }; + + let cost = calculate_cost_for_usage( + Some("test-model"), + usage, + None, + CostMode::Calculate, + Some(&pricing()), + ); + + assert!((cost - 55.5).abs() < f64::EPSILON); + } + + #[test] + fn falls_back_to_flat_cache_creation_rate_without_breakdown() { + let usage = TokenUsageRaw { + cache_creation_input_tokens: 10, + ..TokenUsageRaw::default() + }; + + let cost = calculate_cost_for_usage( + Some("test-model"), + usage, + None, + CostMode::Calculate, + Some(&pricing()), + ); + + assert!((cost - 12.5).abs() < f64::EPSILON); + } + + #[test] + fn parses_cache_creation_breakdown_from_usage_json() { + let usage = serde_json::from_str::( + r#"{ + "input_tokens": 1, + "output_tokens": 2, + "cache_creation_input_tokens": 300, + "cache_creation": { + "ephemeral_5m_input_tokens": 100, + "ephemeral_1h_input_tokens": 200 + } + }"#, + ) + .unwrap(); + + assert_eq!(usage.cache_creation_token_count(), 300); + } +} diff --git a/rust/crates/ccusage/src/types.rs b/rust/crates/ccusage/src/types.rs index 2a3628384..d18614b84 100644 --- a/rust/crates/ccusage/src/types.rs +++ b/rust/crates/ccusage/src/types.rs @@ -39,8 +39,8 @@ pub(crate) struct TokenUsageRaw { } impl TokenUsageRaw { - pub(crate) fn cache_creation_token_count(self) -> u64 { - if let Some(b) = self.cache_creation { + pub(crate) fn cache_creation_token_count(&self) -> u64 { + if let Some(b) = &self.cache_creation { b.ephemeral_5m_input_tokens + b.ephemeral_1h_input_tokens } else { self.cache_creation_input_tokens @@ -50,9 +50,9 @@ impl TokenUsageRaw { #[derive(Debug, Clone, Copy, Default, Deserialize)] pub(crate) struct CacheCreationRaw { - #[serde(rename = "ephemeral_5m_input_tokens", default)] + #[serde(default)] pub(crate) ephemeral_5m_input_tokens: u64, - #[serde(rename = "ephemeral_1h_input_tokens", default)] + #[serde(default)] pub(crate) ephemeral_1h_input_tokens: u64, }