Skip to content

Commit 8a52395

Browse files
authored
fix(codex): dedupe goal rollout token events (#1237)
* fix(codex): dedupe copied rollout token events Align the streaming Codex group aggregation dedupe key with the event loader so copied token_count rows from goal or rollout session files are counted once instead of once per session file. This prevents daily and monthly table reports from double-counting copied Codex history while preserving the same token tuple based duplicate detection already used by JSON/all-agent loading paths. * test(codex): cover rollout dedupe threading modes Extend the copied rollout token regression test across both local and parallel Codex aggregation paths. This addresses CodeRabbit feedback and keeps the dedupe contract covered for each SharedArgs single_thread mode. * fix(codex): preserve session report dedupe scope Keep Codex rollout token dedupe across files for daily, weekly, and monthly reports while restoring session-scoped dedupe keys for session reports. The previous dedupe key intentionally removed session_id to collapse copied root/goal rollout token_count rows, but that same global key ran before session grouping and could drop a distinct session bucket when two session files had identical token usage rows. Session reports now include the session hash in the key, while aggregate period reports use a zero session component to retain cross-session dedupe. Adds a regression test that identical token usage rows in two session files remain visible as two session groups across both single-threaded and parallel aggregation.
1 parent 884514a commit 8a52395

2 files changed

Lines changed: 109 additions & 10 deletions

File tree

rust/crates/ccusage/src/adapter/codex/aggregate.rs

Lines changed: 104 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ fn add_event_to_groups(
237237
};
238238
let timestamp = parse_ts_timestamp(&event.timestamp)
239239
.ok_or_else(|| crate::cli_error(format!("Invalid Codex timestamp: {}", event.timestamp)))?;
240-
if !insert_event_key(event, timestamp, model, seen) {
240+
if !insert_event_key(event, timestamp, model, kind, seen) {
241241
return Ok(());
242242
}
243243
add_deduped_event_to_groups(event, model, timestamp, kind, timezone, shared, groups)
@@ -257,7 +257,7 @@ fn add_event_to_groups_local(
257257
.ok_or_else(|| crate::cli_error(format!("Invalid Codex timestamp: {}", event.timestamp)))?;
258258
if !aggregation
259259
.seen
260-
.insert(codex_event_key(event, timestamp, model))
260+
.insert(codex_event_key(event, timestamp, model, kind))
261261
{
262262
return Ok(());
263263
}
@@ -341,9 +341,10 @@ fn insert_event_key(
341341
event: &CodexTokenUsageEvent,
342342
timestamp: crate::TimestampMs,
343343
model: &str,
344+
kind: AgentReportKind,
344345
seen: &CodexDedupeShards,
345346
) -> bool {
346-
let key = codex_event_key(event, timestamp, model);
347+
let key = codex_event_key(event, timestamp, model, kind);
347348
let mut hasher = FxHasher::default();
348349
key.hash(&mut hasher);
349350
let shard_index = hasher.finish() as usize % seen.len();
@@ -354,10 +355,16 @@ fn codex_event_key(
354355
event: &CodexTokenUsageEvent,
355356
timestamp: crate::TimestampMs,
356357
model: &str,
358+
kind: AgentReportKind,
357359
) -> CodexEventKey {
360+
let (session_hash, session_len) = if kind == AgentReportKind::Session {
361+
(hash_text(&event.session_id), event.session_id.len())
362+
} else {
363+
(0, 0)
364+
};
358365
(
359-
hash_text(&event.session_id),
360-
event.session_id.len(),
366+
session_hash,
367+
session_len,
361368
timestamp,
362369
hash_text(model),
363370
model.len(),
@@ -463,6 +470,98 @@ mod tests {
463470

464471
use crate::adapter::codex::paths::CodexUsageSource;
465472

473+
#[test]
474+
fn dedupes_copied_token_usage_across_session_files() {
475+
let usage_line = json!({
476+
"timestamp": "2026-05-29T08:01:00.000Z",
477+
"type": "event_msg",
478+
"payload": {
479+
"type": "token_count",
480+
"info": {
481+
"model": "gpt-5.2",
482+
"last_token_usage": {
483+
"input_tokens": 1_000,
484+
"cached_input_tokens": 100,
485+
"output_tokens": 200,
486+
"reasoning_output_tokens": 20,
487+
"total_tokens": 1_200,
488+
},
489+
},
490+
},
491+
})
492+
.to_string();
493+
let fixture = fs_fixture!({
494+
"sessions/root.jsonl": &usage_line,
495+
"sessions/goal.jsonl": &usage_line,
496+
});
497+
for single_thread in [true, false] {
498+
let shared = SharedArgs {
499+
single_thread,
500+
timezone: Some("UTC".to_string()),
501+
..SharedArgs::default()
502+
};
503+
504+
let groups = load_groups_from_directory(
505+
&fixture.path("sessions"),
506+
&shared,
507+
AgentReportKind::Daily,
508+
)
509+
.unwrap();
510+
511+
assert_eq!(groups.len(), 1);
512+
let group = groups.get("2026-05-29").unwrap();
513+
assert_eq!(group.input_tokens, 1_000);
514+
assert_eq!(group.cached_input_tokens, 100);
515+
assert_eq!(group.output_tokens, 200);
516+
assert_eq!(group.reasoning_output_tokens, 20);
517+
assert_eq!(group.total_tokens, 1_200);
518+
}
519+
}
520+
521+
#[test]
522+
fn keeps_matching_token_usage_in_distinct_session_groups() {
523+
let usage_line = json!({
524+
"timestamp": "2026-05-29T08:01:00.000Z",
525+
"type": "event_msg",
526+
"payload": {
527+
"type": "token_count",
528+
"info": {
529+
"model": "gpt-5.2",
530+
"last_token_usage": {
531+
"input_tokens": 1_000,
532+
"cached_input_tokens": 100,
533+
"output_tokens": 200,
534+
"reasoning_output_tokens": 20,
535+
"total_tokens": 1_200,
536+
},
537+
},
538+
},
539+
})
540+
.to_string();
541+
let fixture = fs_fixture!({
542+
"sessions/root.jsonl": &usage_line,
543+
"sessions/goal.jsonl": &usage_line,
544+
});
545+
for single_thread in [true, false] {
546+
let shared = SharedArgs {
547+
single_thread,
548+
timezone: Some("UTC".to_string()),
549+
..SharedArgs::default()
550+
};
551+
552+
let groups = load_groups_from_directory(
553+
&fixture.path("sessions"),
554+
&shared,
555+
AgentReportKind::Session,
556+
)
557+
.unwrap();
558+
559+
assert_eq!(groups.len(), 2);
560+
assert_eq!(groups["root"].input_tokens, 1_000);
561+
assert_eq!(groups["goal"].input_tokens, 1_000);
562+
}
563+
}
564+
466565
#[test]
467566
fn aggregates_active_copy_when_archived_file_has_same_relative_path() {
468567
let active_usage = [

rust/crates/ccusage/src/adapter/codex/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ mod tests {
9696
}
9797

9898
#[test]
99-
fn keeps_matching_grouped_codex_usage_events_from_distinct_sessions() {
99+
fn dedupes_matching_grouped_codex_usage_events_from_distinct_sessions() {
100100
let usage_line = r#"{"timestamp":"2026-01-02T00:00:00.000Z","type":"event_msg","payload":{"type":"token_count","info":{"model":"gpt-5","last_token_usage":{"input_tokens":100,"cached_input_tokens":10,"output_tokens":50,"reasoning_output_tokens":0,"total_tokens":150}}}}"#;
101101
let fixture = fs_fixture!({
102102
"sessions/session-a.jsonl": usage_line,
@@ -113,10 +113,10 @@ mod tests {
113113

114114
assert_eq!(groups.len(), 1);
115115
let group = groups.get("2026-01-02").unwrap();
116-
assert_eq!(group.input_tokens, 200);
117-
assert_eq!(group.cached_input_tokens, 20);
118-
assert_eq!(group.output_tokens, 100);
119-
assert_eq!(group.total_tokens, 300);
116+
assert_eq!(group.input_tokens, 100);
117+
assert_eq!(group.cached_input_tokens, 10);
118+
assert_eq!(group.output_tokens, 50);
119+
assert_eq!(group.total_tokens, 150);
120120
}
121121

122122
#[test]

0 commit comments

Comments
 (0)