From e07469082f6b6244e73bf35e9bfa910c5fe09508 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 21 Nov 2025 12:05:29 -0800 Subject: [PATCH 1/9] encrypt --- codex-rs/Cargo.lock | 1 + codex-rs/core/src/codex.rs | 22 ++- codex-rs/core/src/context_manager/history.rs | 47 ++++++ .../core/src/context_manager/history_tests.rs | 31 ++++ codex-rs/core/src/state/session.rs | 4 + codex-rs/core/src/truncate.rs | 2 +- codex-rs/core/tests/common/Cargo.toml | 1 + codex-rs/core/tests/common/responses.rs | 7 + codex-rs/core/tests/suite/compact.rs | 150 ++++++++++++++++++ 9 files changed, 251 insertions(+), 14 deletions(-) diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index c891e76fa7..e17bf80510 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1774,6 +1774,7 @@ version = "0.0.0" dependencies = [ "anyhow", "assert_cmd", + "base64", "codex-core", "codex-protocol", "notify", diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 45e3d87aca..041d0869e0 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -642,6 +642,11 @@ impl Session { format!("auto-compact-{id}") } + async fn get_total_token_usage(&self) -> i64 { + let state = self.state.lock().await; + state.get_total_token_usage() + } + async fn record_initial_history(&self, conversation_history: InitialHistory) { let turn_context = self.new_turn(SessionSettingsUpdate::default()).await; match conversation_history { @@ -1891,19 +1896,14 @@ pub(crate) async fn run_task( { Ok(turn_output) => { let TurnRunResult { - processed_items, - total_token_usage, + processed_items, .. } = turn_output; let limit = turn_context .client .get_auto_compact_token_limit() .unwrap_or(i64::MAX); - let total_usage_tokens = total_token_usage - .as_ref() - .map(TokenUsage::tokens_in_context_window); - let token_limit_reached = total_usage_tokens - .map(|tokens| tokens >= limit) - .unwrap_or(false); + let total_usage_tokens = sess.get_total_token_usage().await; + let token_limit_reached = total_usage_tokens >= limit; let (responses, items_to_record_in_conversation_history) = process_items(processed_items, &sess, &turn_context).await; @@ -2094,7 +2094,6 @@ pub struct ProcessedResponseItem { #[derive(Debug)] struct TurnRunResult { processed_items: Vec, - total_token_usage: Option, } #[allow(clippy::too_many_arguments)] @@ -2267,10 +2266,7 @@ async fn try_run_turn( sess.send_event(&turn_context, msg).await; } - let result = TurnRunResult { - processed_items, - total_token_usage: token_usage.clone(), - }; + let result = TurnRunResult { processed_items }; return Ok(result); } diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 8eefcbf855..72e260be50 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -2,6 +2,7 @@ use crate::codex::TurnContext; use crate::context_manager::normalize; use crate::truncate::TruncationPolicy; use crate::truncate::approx_token_count; +use crate::truncate::approx_tokens_from_byte_count; use crate::truncate::truncate_function_output_items_with_policy; use crate::truncate::truncate_text; use codex_protocol::models::FunctionCallOutputPayload; @@ -119,6 +120,52 @@ impl ContextManager { ); } + fn get_non_last_reasoning_items_tokens(&self) -> usize { + // get reasoning items excluding all the ones after the last user message + let Some(last_user_index) = self + .items + .iter() + .rposition(|item| matches!(item, ResponseItem::Message { role, .. } if role == "user")) + else { + return 0usize; + }; + + let total_reasoning_bytes = self + .items + .iter() + .take(last_user_index) + .filter_map(|item| { + if let ResponseItem::Reasoning { + encrypted_content: Some(content), + .. + } = item + { + Some(content.len()) + } else { + None + } + }) + .fold(0usize, |acc, len| { + let decoded_bytes = len + .saturating_mul(3) + .checked_div(4) + .unwrap_or(0) + .saturating_sub(550); + acc.saturating_add(decoded_bytes) + }); + + let token_estimate = approx_tokens_from_byte_count(total_reasoning_bytes); + token_estimate as usize + } + + pub(crate) fn get_total_token_usage(&self) -> i64 { + self.token_info + .as_ref() + .map(|info| info.last_token_usage.total_tokens) + .unwrap_or(0) + .saturating_add(self.get_non_last_reasoning_items_tokens() as i64) + } + /// This function enforces a couple of invariants on the in-memory history: /// 1. every call (function/custom) has a corresponding output entry /// 2. every output has a corresponding call entry diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 1a01604a70..d6e53bd323 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -56,6 +56,17 @@ fn reasoning_msg(text: &str) -> ResponseItem { } } +fn reasoning_with_encrypted_content(len: usize) -> ResponseItem { + ResponseItem::Reasoning { + id: String::new(), + summary: vec![ReasoningItemReasoningSummary::SummaryText { + text: "summary".to_string(), + }], + content: None, + encrypted_content: Some("a".repeat(len)), + } +} + fn truncate_exec_output(content: &str) -> String { truncate::truncate_text(content, TruncationPolicy::Tokens(EXEC_FORMAT_MAX_TOKENS)) } @@ -112,6 +123,26 @@ fn filters_non_api_messages() { ); } +#[test] +fn non_last_reasoning_tokens_return_zero_when_no_user_messages() { + let history = create_history_with_items(vec![reasoning_with_encrypted_content(800)]); + + assert_eq!(history.get_non_last_reasoning_items_tokens(), 0); +} + +#[test] +fn non_last_reasoning_tokens_ignore_entries_after_last_user() { + let history = create_history_with_items(vec![ + reasoning_with_encrypted_content(800), + user_msg("first"), + reasoning_with_encrypted_content(1_000), + user_msg("second"), + reasoning_with_encrypted_content(2_000), + ]); + + assert_eq!(history.get_non_last_reasoning_items_tokens(), 63); +} + #[test] fn get_history_for_prompt_drops_ghost_commits() { let items = vec![ResponseItem::GhostSnapshot { diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 2dfa5199f1..caebac6b86 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -74,4 +74,8 @@ impl SessionState { pub(crate) fn set_token_usage_full(&mut self, context_window: i64) { self.history.set_token_usage_full(context_window); } + + pub(crate) fn get_total_token_usage(&self) -> i64 { + self.history.get_total_token_usage() + } } diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index bf883c0612..6e38ef6986 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -296,7 +296,7 @@ fn approx_bytes_for_tokens(tokens: usize) -> usize { tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) } -fn approx_tokens_from_byte_count(bytes: usize) -> u64 { +pub(crate) fn approx_tokens_from_byte_count(bytes: usize) -> u64 { let bytes_u64 = bytes as u64; bytes_u64.saturating_add((APPROX_BYTES_PER_TOKEN as u64).saturating_sub(1)) / (APPROX_BYTES_PER_TOKEN as u64) diff --git a/codex-rs/core/tests/common/Cargo.toml b/codex-rs/core/tests/common/Cargo.toml index 094f33a26b..4c47fbb527 100644 --- a/codex-rs/core/tests/common/Cargo.toml +++ b/codex-rs/core/tests/common/Cargo.toml @@ -9,6 +9,7 @@ path = "lib.rs" [dependencies] anyhow = { workspace = true } assert_cmd = { workspace = true } +base64 = { workspace = true } codex-core = { workspace = true } codex-protocol = { workspace = true } notify = { workspace = true } diff --git a/codex-rs/core/tests/common/responses.rs b/codex-rs/core/tests/common/responses.rs index b84e966399..7cc59f26ef 100644 --- a/codex-rs/core/tests/common/responses.rs +++ b/codex-rs/core/tests/common/responses.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use std::sync::Mutex; use anyhow::Result; +use base64::Engine; use serde_json::Value; use wiremock::BodyPrintLimit; use wiremock::Match; @@ -297,12 +298,18 @@ pub fn ev_reasoning_item(id: &str, summary: &[&str], raw_content: &[&str]) -> Va .map(|text| serde_json::json!({"type": "summary_text", "text": text})) .collect(); + let overhead = "b".repeat(550); + let raw_content_joined = raw_content.join(""); + let encrypted_content = + base64::engine::general_purpose::STANDARD.encode(overhead + raw_content_joined.as_str()); + let mut event = serde_json::json!({ "type": "response.output_item.done", "item": { "type": "reasoning", "id": id, "summary": summary_entries, + "encrypted_content": encrypted_content, } }); diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 1324d3edb9..49d9c9fdcf 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -1879,3 +1879,153 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() { "auto compact request should include the summarization prompt after exceeding 95% (limit {limit})" ); } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auto_compact_counts_encrypted_reasoning_before_last_user() { + skip_if_no_network!(); + + let server = start_mock_server().await; + + let first_user = "COUNT_PRE_LAST_REASONING"; + let second_user = "TRIGGER_COMPACT_AT_LIMIT"; + + let pre_last_encrypted = "a".repeat(2_400); + let post_last_encrypted = "b".repeat(4_000); + + let pre_last_reasoning = serde_json::json!({ + "type": "response.output_item.done", + "item": { + "type": "reasoning", + "id": "pre-reasoning", + "summary": [{"type": "summary_text", "text": "pre"}], + "encrypted_content": pre_last_encrypted, + } + }); + + let post_last_reasoning = serde_json::json!({ + "type": "response.output_item.done", + "item": { + "type": "reasoning", + "id": "post-reasoning", + "summary": [{"type": "summary_text", "text": "post"}], + "encrypted_content": post_last_encrypted, + } + }); + + // First turn seeds encrypted reasoning before the eventual last user. + let first_turn = sse(vec![pre_last_reasoning, ev_completed_with_tokens("r1", 10)]); + + // Second turn adds a reasoning item after the last user (should be ignored by the estimate) + // and reports low token usage, so compaction only triggers if the pre-last reasoning is counted. + let second_turn = sse(vec![ + post_last_reasoning, + ev_completed_with_tokens("r2", 80), + ]); + + let auto_summary_payload = auto_summary(AUTO_SUMMARY_TEXT); + let auto_compact_turn = sse(vec![ + ev_assistant_message("m3", &auto_summary_payload), + ev_completed_with_tokens("r3", 1), + ]); + + let resume_turn = sse(vec![ + ev_assistant_message("m4", FINAL_REPLY), + ev_completed_with_tokens("r4", 1), + ]); + + let first_matcher = move |req: &wiremock::Request| { + let body = std::str::from_utf8(&req.body).unwrap_or(""); + body.contains(first_user) && !body_contains_text(body, SUMMARIZATION_PROMPT) + }; + mount_sse_once_match(&server, first_matcher, first_turn).await; + + let second_matcher = move |req: &wiremock::Request| { + let body = std::str::from_utf8(&req.body).unwrap_or(""); + body.contains(first_user) + && body.contains(second_user) + && !body_contains_text(body, SUMMARIZATION_PROMPT) + }; + mount_sse_once_match(&server, second_matcher, second_turn).await; + + let compact_matcher = |req: &wiremock::Request| { + let body = std::str::from_utf8(&req.body).unwrap_or(""); + body_contains_text(body, SUMMARIZATION_PROMPT) + }; + let auto_compact_mock = mount_sse_once_match(&server, compact_matcher, auto_compact_turn).await; + + let resume_marker = auto_summary_payload.clone(); + let resume_matcher = move |req: &wiremock::Request| { + let body = std::str::from_utf8(&req.body).unwrap_or(""); + body.contains(&resume_marker) && !body_contains_text(body, SUMMARIZATION_PROMPT) + }; + let resume_mock = mount_sse_once_match(&server, resume_matcher, resume_turn).await; + + let model_provider = ModelProviderInfo { + base_url: Some(format!("{}/v1", server.uri())), + ..built_in_model_providers()["openai"].clone() + }; + + let home = TempDir::new().unwrap(); + let mut config = load_default_config_for_test(&home); + config.model_provider = model_provider; + set_test_compact_prompt(&mut config); + config.model_auto_compact_token_limit = Some(300); + + let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy")); + let codex = conversation_manager + .new_conversation(config) + .await + .unwrap() + .conversation; + + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: first_user.into(), + }], + }) + .await + .unwrap(); + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: second_user.into(), + }], + }) + .await + .unwrap(); + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + let requests = server.received_requests().await.unwrap(); + assert_eq!( + requests.len(), + 4, + "second turn should trigger an auto compact and follow-up resume" + ); + + let auto_compact_index = requests + .iter() + .position(|req| { + let body = std::str::from_utf8(&req.body).unwrap_or(""); + body_contains_text(body, SUMMARIZATION_PROMPT) + }) + .expect("auto compact request missing"); + assert_eq!( + auto_compact_index, 2, + "auto compact should be the third request (after two user turns)" + ); + + let resume_index = requests + .iter() + .position(|req| { + let body = std::str::from_utf8(&req.body).unwrap_or(""); + body.contains(&auto_summary_payload) && !body_contains_text(body, SUMMARIZATION_PROMPT) + }) + .expect("resume request missing after auto compact"); + assert_eq!(resume_index, 3, "resume request should follow auto compact"); + + assert_eq!(auto_compact_mock.requests().len(), 1); + assert_eq!(resume_mock.requests().len(), 1); +} From 1009560500776e1538754a9fb3f80536cd1f6d11 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 21 Nov 2025 14:11:59 -0800 Subject: [PATCH 2/9] test --- codex-rs/core/tests/suite/compact.rs | 181 +++++++++------------------ 1 file changed, 62 insertions(+), 119 deletions(-) diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 49d9c9fdcf..696e5da313 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -1889,143 +1889,86 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() { let first_user = "COUNT_PRE_LAST_REASONING"; let second_user = "TRIGGER_COMPACT_AT_LIMIT"; - let pre_last_encrypted = "a".repeat(2_400); - let post_last_encrypted = "b".repeat(4_000); - - let pre_last_reasoning = serde_json::json!({ - "type": "response.output_item.done", - "item": { - "type": "reasoning", - "id": "pre-reasoning", - "summary": [{"type": "summary_text", "text": "pre"}], - "encrypted_content": pre_last_encrypted, - } - }); - - let post_last_reasoning = serde_json::json!({ - "type": "response.output_item.done", - "item": { - "type": "reasoning", - "id": "post-reasoning", - "summary": [{"type": "summary_text", "text": "post"}], - "encrypted_content": post_last_encrypted, - } - }); - - // First turn seeds encrypted reasoning before the eventual last user. - let first_turn = sse(vec![pre_last_reasoning, ev_completed_with_tokens("r1", 10)]); + let pre_last_reasoning_content = "a".repeat(2_400); + let post_last_reasoning_content = "b".repeat(4_000); - // Second turn adds a reasoning item after the last user (should be ignored by the estimate) - // and reports low token usage, so compaction only triggers if the pre-last reasoning is counted. + let first_turn = sse(vec![ + ev_reasoning_item("pre-reasoning", &["pre"], &[&pre_last_reasoning_content]), + ev_completed_with_tokens("r1", 10), + ]); let second_turn = sse(vec![ - post_last_reasoning, + ev_reasoning_item("post-reasoning", &["post"], &[&post_last_reasoning_content]), ev_completed_with_tokens("r2", 80), ]); let auto_summary_payload = auto_summary(AUTO_SUMMARY_TEXT); - let auto_compact_turn = sse(vec![ - ev_assistant_message("m3", &auto_summary_payload), - ev_completed_with_tokens("r3", 1), - ]); - - let resume_turn = sse(vec![ - ev_assistant_message("m4", FINAL_REPLY), - ev_completed_with_tokens("r4", 1), - ]); - - let first_matcher = move |req: &wiremock::Request| { - let body = std::str::from_utf8(&req.body).unwrap_or(""); - body.contains(first_user) && !body_contains_text(body, SUMMARIZATION_PROMPT) - }; - mount_sse_once_match(&server, first_matcher, first_turn).await; - - let second_matcher = move |req: &wiremock::Request| { - let body = std::str::from_utf8(&req.body).unwrap_or(""); - body.contains(first_user) - && body.contains(second_user) - && !body_contains_text(body, SUMMARIZATION_PROMPT) - }; - mount_sse_once_match(&server, second_matcher, second_turn).await; - - let compact_matcher = |req: &wiremock::Request| { - let body = std::str::from_utf8(&req.body).unwrap_or(""); - body_contains_text(body, SUMMARIZATION_PROMPT) - }; - let auto_compact_mock = mount_sse_once_match(&server, compact_matcher, auto_compact_turn).await; - - let resume_marker = auto_summary_payload.clone(); - let resume_matcher = move |req: &wiremock::Request| { - let body = std::str::from_utf8(&req.body).unwrap_or(""); - body.contains(&resume_marker) && !body_contains_text(body, SUMMARIZATION_PROMPT) - }; - let resume_mock = mount_sse_once_match(&server, resume_matcher, resume_turn).await; - - let model_provider = ModelProviderInfo { - base_url: Some(format!("{}/v1", server.uri())), - ..built_in_model_providers()["openai"].clone() - }; - - let home = TempDir::new().unwrap(); - let mut config = load_default_config_for_test(&home); - config.model_provider = model_provider; - set_test_compact_prompt(&mut config); - config.model_auto_compact_token_limit = Some(300); - - let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy")); - let codex = conversation_manager - .new_conversation(config) - .await - .unwrap() - .conversation; + let request_log = mount_sse_sequence( + &server, + vec![ + // Turn 1: reasoning before last user (should count). + first_turn, + // Turn 2: reasoning after last user (should be ignored for compaction). + second_turn, + // Turn 3: auto compact, only if the pre-last reasoning was counted. + sse(vec![ + ev_assistant_message("m3", &auto_summary_payload), + ev_completed_with_tokens("r3", 1), + ]), + // Turn 4: resume after compaction. + sse(vec![ + ev_assistant_message("m4", FINAL_REPLY), + ev_completed_with_tokens("r4", 1), + ]), + ], + ) + .await; - codex - .submit(Op::UserInput { - items: vec![UserInput::Text { - text: first_user.into(), - }], + let codex = test_codex() + .with_config(|config| { + set_test_compact_prompt(config); + config.model_auto_compact_token_limit = Some(300); }) + .build(&server) .await - .unwrap(); - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + .expect("build codex") + .codex; - codex - .submit(Op::UserInput { - items: vec![UserInput::Text { - text: second_user.into(), - }], - }) - .await - .unwrap(); - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + for user in [first_user, second_user] { + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { text: user.into() }], + }) + .await + .unwrap(); + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + } - let requests = server.received_requests().await.unwrap(); + let requests = request_log.requests(); assert_eq!( requests.len(), 4, "second turn should trigger an auto compact and follow-up resume" ); - let auto_compact_index = requests - .iter() - .position(|req| { - let body = std::str::from_utf8(&req.body).unwrap_or(""); - body_contains_text(body, SUMMARIZATION_PROMPT) - }) - .expect("auto compact request missing"); - assert_eq!( - auto_compact_index, 2, - "auto compact should be the third request (after two user turns)" + // If we double-counted the post-last reasoning, compaction would fire here. + let second_request_body = requests[1].body_json().to_string(); + assert!( + !body_contains_text(&second_request_body, SUMMARIZATION_PROMPT), + "summarization should not be requested before evaluating the second turn's usage" ); - let resume_index = requests - .iter() - .position(|req| { - let body = std::str::from_utf8(&req.body).unwrap_or(""); - body.contains(&auto_summary_payload) && !body_contains_text(body, SUMMARIZATION_PROMPT) - }) - .expect("resume request missing after auto compact"); - assert_eq!(resume_index, 3, "resume request should follow auto compact"); + // Compaction should happen only after considering pre-last encrypted reasoning. + let auto_compact_body = requests[2].body_json().to_string(); + assert!( + body_contains_text(&auto_compact_body, SUMMARIZATION_PROMPT), + "auto compact should be the third request (after two user turns)" + ); - assert_eq!(auto_compact_mock.requests().len(), 1); - assert_eq!(resume_mock.requests().len(), 1); + // Final request resumes normal flow. + let resume_body = requests[3].body_json().to_string(); + assert!( + body_contains_text(&resume_body, FINAL_REPLY) + || body_contains_text(&resume_body, &auto_summary_payload), + "resume request should follow auto compact" + ); } From a348a0853061a4bbc1b5997440fb7e76cdeea2f4 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 21 Nov 2025 14:19:10 -0800 Subject: [PATCH 3/9] tests --- codex-rs/core/tests/suite/compact.rs | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 696e5da313..e53ff2e1a8 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -481,22 +481,33 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() { // mock responses from the model + let reasoning_response_1 = ev_reasoning_item("m1", &["I will create a react app"], &[]); + // first chunk of work let model_reasoning_response_1_sse = sse(vec![ - ev_reasoning_item("m1", &["I will create a react app"], &[]), + reasoning_response_1.clone(), ev_local_shell_call("r1-shell", "completed", vec!["echo", "make-react"]), ev_completed_with_tokens("r1", token_count_used), ]); + let encrypted_content_1 = reasoning_response_1["item"]["encrypted_content"] + .as_str() + .unwrap(); + // first compaction response let model_compact_response_1_sse = sse(vec![ ev_assistant_message("m2", first_summary_text), ev_completed_with_tokens("r2", token_count_used_after_compaction), ]); + let reasoning_response_2 = ev_reasoning_item("m3", &["I will create a node app"], &[]); + let encrypted_content_2 = reasoning_response_2["item"]["encrypted_content"] + .as_str() + .unwrap(); + // second chunk of work let model_reasoning_response_2_sse = sse(vec![ - ev_reasoning_item("m3", &["I will create a node app"], &[]), + reasoning_response_2.clone(), ev_local_shell_call("r3-shell", "completed", vec!["echo", "make-node"]), ev_completed_with_tokens("r3", token_count_used), ]); @@ -507,6 +518,11 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() { ev_completed_with_tokens("r4", token_count_used_after_compaction), ]); + let reasoning_response_3 = ev_reasoning_item("m6", &["I will create a python app"], &[]); + let encrypted_content_3 = reasoning_response_3["item"]["encrypted_content"] + .as_str() + .unwrap(); + // third chunk of work let model_reasoning_response_3_sse = sse(vec![ ev_reasoning_item("m6", &["I will create a python app"], &[]), @@ -635,7 +651,7 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() { }, { "content": null, - "encrypted_content": null, + "encrypted_content": encrypted_content_1, "summary": [ { "text": "I will create a react app", @@ -745,7 +761,7 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() { }, { "content": null, - "encrypted_content": null, + "encrypted_content": encrypted_content_2, "summary": [ { "text": "I will create a node app", @@ -855,7 +871,7 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() { }, { "content": null, - "encrypted_content": null, + "encrypted_content": encrypted_content_3, "summary": [ { "text": "I will create a python app", From fbd7dfba95be0c38cbe0c1d2d9c12ca58db3e0b1 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 21 Nov 2025 14:23:42 -0800 Subject: [PATCH 4/9] tests --- codex-rs/core/src/context_manager/history_tests.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index d6e53bd323..942029fc7c 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -139,7 +139,9 @@ fn non_last_reasoning_tokens_ignore_entries_after_last_user() { user_msg("second"), reasoning_with_encrypted_content(2_000), ]); - + // first: (800 * 0.75 - 550) / 4 = 12.5 tokens + // second: (1000 * 0.75 - 550) / 4 = 50 tokens + // first + second = 62.5 assert_eq!(history.get_non_last_reasoning_items_tokens(), 63); } From 56610f3e1904a7691eda742c11bec3822292c786 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 21 Nov 2025 14:24:31 -0800 Subject: [PATCH 5/9] tests --- codex-rs/core/src/codex.rs | 4 +--- codex-rs/core/tests/suite/compact.rs | 7 +++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 041d0869e0..5bb07983bb 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1895,9 +1895,7 @@ pub(crate) async fn run_task( .await { Ok(turn_output) => { - let TurnRunResult { - processed_items, .. - } = turn_output; + let TurnRunResult { processed_items } = turn_output; let limit = turn_context .client .get_auto_compact_token_limit() diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index e53ff2e1a8..c6e30981e7 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -482,6 +482,9 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() { // mock responses from the model let reasoning_response_1 = ev_reasoning_item("m1", &["I will create a react app"], &[]); + let encrypted_content_1 = reasoning_response_1["item"]["encrypted_content"] + .as_str() + .unwrap(); // first chunk of work let model_reasoning_response_1_sse = sse(vec![ @@ -490,10 +493,6 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() { ev_completed_with_tokens("r1", token_count_used), ]); - let encrypted_content_1 = reasoning_response_1["item"]["encrypted_content"] - .as_str() - .unwrap(); - // first compaction response let model_compact_response_1_sse = sse(vec![ ev_assistant_message("m2", first_summary_text), From 2439e932be525c828b561182d200299511a3b2d5 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 21 Nov 2025 18:05:52 -0800 Subject: [PATCH 6/9] feedback --- codex-rs/core/src/codex.rs | 15 ++++----------- codex-rs/core/src/context_manager/history.rs | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 5bb07983bb..6f56c09446 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1895,7 +1895,7 @@ pub(crate) async fn run_task( .await { Ok(turn_output) => { - let TurnRunResult { processed_items } = turn_output; + let processed_items = turn_output; let limit = turn_context .client .get_auto_compact_token_limit() @@ -1958,7 +1958,7 @@ async fn run_turn( turn_diff_tracker: SharedTurnDiffTracker, input: Vec, cancellation_token: CancellationToken, -) -> CodexResult { +) -> CodexResult> { let mcp_tools = sess .services .mcp_connection_manager @@ -2089,11 +2089,6 @@ pub struct ProcessedResponseItem { pub response: Option, } -#[derive(Debug)] -struct TurnRunResult { - processed_items: Vec, -} - #[allow(clippy::too_many_arguments)] async fn try_run_turn( router: Arc, @@ -2102,7 +2097,7 @@ async fn try_run_turn( turn_diff_tracker: SharedTurnDiffTracker, prompt: &Prompt, cancellation_token: CancellationToken, -) -> CodexResult { +) -> CodexResult> { let rollout_item = RolloutItem::TurnContext(TurnContextItem { cwd: turn_context.cwd.clone(), approval_policy: turn_context.approval_policy, @@ -2264,9 +2259,7 @@ async fn try_run_turn( sess.send_event(&turn_context, msg).await; } - let result = TurnRunResult { processed_items }; - - return Ok(result); + return Ok(processed_items); } ResponseEvent::OutputTextDelta(delta) => { // In review child threads, suppress assistant text deltas; the diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 72e260be50..a6d6907d90 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -145,19 +145,21 @@ impl ContextManager { None } }) - .fold(0usize, |acc, len| { - let decoded_bytes = len - .saturating_mul(3) - .checked_div(4) - .unwrap_or(0) - .saturating_sub(550); - acc.saturating_add(decoded_bytes) - }); + .map(Self::estimate_reasoning_length) + .fold(0usize, usize::saturating_add); let token_estimate = approx_tokens_from_byte_count(total_reasoning_bytes); token_estimate as usize } + fn estimate_reasoning_length(encoded_len: usize) -> usize { + encoded_len + .saturating_mul(3) + .checked_div(4) + .unwrap_or(0) + .saturating_sub(550) + } + pub(crate) fn get_total_token_usage(&self) -> i64 { self.token_info .as_ref() From cda98cc2b5a3a613c700873ffe0464039615a509 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 21 Nov 2025 18:22:33 -0800 Subject: [PATCH 7/9] feedback --- codex-rs/core/tests/suite/compact.rs | 78 +++++++++++++++++----------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index c6e30981e7..aa74ec8978 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -7,6 +7,7 @@ use codex_core::built_in_model_providers; use codex_core::compact::SUMMARIZATION_PROMPT; use codex_core::compact::SUMMARY_PREFIX; use codex_core::config::Config; +use codex_core::features::Feature; use codex_core::protocol::EventMsg; use codex_core::protocol::Op; use codex_core::protocol::RolloutItem; @@ -27,6 +28,7 @@ use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_completed_with_tokens; use core_test_support::responses::ev_function_call; +use core_test_support::responses::mount_compact_json_once; use core_test_support::responses::mount_sse_once; use core_test_support::responses::mount_sse_once_match; use core_test_support::responses::mount_sse_sequence; @@ -1915,8 +1917,11 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() { ev_reasoning_item("post-reasoning", &["post"], &[&post_last_reasoning_content]), ev_completed_with_tokens("r2", 80), ]); + let resume_turn = sse(vec![ + ev_assistant_message("m4", FINAL_REPLY), + ev_completed_with_tokens("r4", 1), + ]); - let auto_summary_payload = auto_summary(AUTO_SUMMARY_TEXT); let request_log = mount_sse_sequence( &server, vec![ @@ -1924,31 +1929,35 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() { first_turn, // Turn 2: reasoning after last user (should be ignored for compaction). second_turn, - // Turn 3: auto compact, only if the pre-last reasoning was counted. - sse(vec![ - ev_assistant_message("m3", &auto_summary_payload), - ev_completed_with_tokens("r3", 1), - ]), - // Turn 4: resume after compaction. - sse(vec![ - ev_assistant_message("m4", FINAL_REPLY), - ev_completed_with_tokens("r4", 1), - ]), + // Turn 3: resume after remote compaction. + resume_turn, ], ) .await; + let compacted_history = vec![codex_protocol::models::ResponseItem::Message { + id: None, + role: "assistant".to_string(), + content: vec![codex_protocol::models::ContentItem::OutputText { + text: "REMOTE_COMPACT_SUMMARY".to_string(), + }], + }]; + let compact_mock = + mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await; + let codex = test_codex() + .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) .with_config(|config| { set_test_compact_prompt(config); config.model_auto_compact_token_limit = Some(300); + config.features.enable(Feature::RemoteCompaction); }) .build(&server) .await .expect("build codex") .codex; - for user in [first_user, second_user] { + for (idx, user) in [first_user, second_user].into_iter().enumerate() { codex .submit(Op::UserInput { items: vec![UserInput::Text { text: user.into() }], @@ -1956,34 +1965,41 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() { .await .unwrap(); wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + if idx == 0 { + assert!( + compact_mock.requests().is_empty(), + "remote compaction should not run after the first turn" + ); + } } + let compact_requests = compact_mock.requests(); + assert_eq!( + compact_requests.len(), + 1, + "remote compaction should run once after the second turn" + ); + assert_eq!( + compact_requests[0].path(), + "/v1/responses/compact", + "remote compaction should hit the compact endpoint" + ); + let requests = request_log.requests(); assert_eq!( requests.len(), - 4, - "second turn should trigger an auto compact and follow-up resume" + 3, + "conversation should include two user turns and a post-compaction resume" ); - - // If we double-counted the post-last reasoning, compaction would fire here. let second_request_body = requests[1].body_json().to_string(); assert!( - !body_contains_text(&second_request_body, SUMMARIZATION_PROMPT), - "summarization should not be requested before evaluating the second turn's usage" + !second_request_body.contains("REMOTE_COMPACT_SUMMARY"), + "second turn should not include compacted history" ); - - // Compaction should happen only after considering pre-last encrypted reasoning. - let auto_compact_body = requests[2].body_json().to_string(); - assert!( - body_contains_text(&auto_compact_body, SUMMARIZATION_PROMPT), - "auto compact should be the third request (after two user turns)" - ); - - // Final request resumes normal flow. - let resume_body = requests[3].body_json().to_string(); + let resume_body = requests[2].body_json().to_string(); assert!( - body_contains_text(&resume_body, FINAL_REPLY) - || body_contains_text(&resume_body, &auto_summary_payload), - "resume request should follow auto compact" + resume_body.contains("REMOTE_COMPACT_SUMMARY") || resume_body.contains(FINAL_REPLY), + "resume request should follow remote compact and use compacted history" ); } From a2c3bc00996c85290382b071317d39ad4181951c Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 21 Nov 2025 18:38:36 -0800 Subject: [PATCH 8/9] const --- codex-rs/core/src/context_manager/history.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index a6d6907d90..035d0d2867 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -157,7 +157,7 @@ impl ContextManager { .saturating_mul(3) .checked_div(4) .unwrap_or(0) - .saturating_sub(550) + .saturating_sub(650) } pub(crate) fn get_total_token_usage(&self) -> i64 { From 313c01ad6ad019d2d696f93badea8f23fc9be528 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 21 Nov 2025 18:50:06 -0800 Subject: [PATCH 9/9] const --- codex-rs/core/src/context_manager/history_tests.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 942029fc7c..cb59c97231 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -133,16 +133,16 @@ fn non_last_reasoning_tokens_return_zero_when_no_user_messages() { #[test] fn non_last_reasoning_tokens_ignore_entries_after_last_user() { let history = create_history_with_items(vec![ - reasoning_with_encrypted_content(800), + reasoning_with_encrypted_content(900), user_msg("first"), reasoning_with_encrypted_content(1_000), user_msg("second"), reasoning_with_encrypted_content(2_000), ]); - // first: (800 * 0.75 - 550) / 4 = 12.5 tokens - // second: (1000 * 0.75 - 550) / 4 = 50 tokens + // first: (900 * 0.75 - 650) / 4 = 6.25 tokens + // second: (1000 * 0.75 - 650) / 4 = 25 tokens // first + second = 62.5 - assert_eq!(history.get_non_last_reasoning_items_tokens(), 63); + assert_eq!(history.get_non_last_reasoning_items_tokens(), 32); } #[test]