From 020809453255557de3ab385aac4abaf46da736a5 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 14 Nov 2025 22:04:33 -0800 Subject: [PATCH 01/68] tighten --- codex-rs/core/src/truncate.rs | 15 +++++++++++++-- codex-rs/core/src/util.rs | 7 ++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index c82e088560..6ea3f2dc0f 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -2,11 +2,12 @@ //! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation //! used across the core crate. -use crate::util::error_or_panic; use codex_protocol::models::FunctionCallOutputContentItem; use codex_utils_string::take_bytes_at_char_boundary; use codex_utils_string::take_last_bytes_at_char_boundary; use codex_utils_tokenizer::Tokenizer; +use serde_json::Value; +use crate::util::error_or_panic; /// Model-formatting limits: clients get full streams; only content sent to the model is truncated. pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB @@ -149,7 +150,17 @@ fn truncate_formatted_exec_output( } fn debug_panic_on_double_truncation(content: &str) { - if content.contains("Total output lines:") && content.contains("omitted") { + if let Ok(json) = serde_json::from_str::(content) { + if let Some(output) = json.get("output") + && let Some(text) = output.as_str() + && text.starts_with("Total output lines:") + && text.contains("omitted") + { + error_or_panic(format!( + "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}" + )); + } + } else if content.starts_with("Total output lines:") && content.contains("omitted") { error_or_panic(format!( "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}" )); diff --git a/codex-rs/core/src/util.rs b/codex-rs/core/src/util.rs index 0bce5b4439..9f6dae5fd0 100644 --- a/codex-rs/core/src/util.rs +++ b/codex-rs/core/src/util.rs @@ -16,7 +16,12 @@ pub(crate) fn backoff(attempt: u64) -> Duration { pub(crate) fn error_or_panic(message: String) { if cfg!(debug_assertions) || env!("CARGO_PKG_VERSION").contains("alpha") { - panic!("{message}"); + error!("{message}"); + panic!( + "This is an intentional panic to catch errors in debug and alpha builds. + If you don't know why this panic is happening, please report the issue to the Codex team in the appropriate channels including `/feedback`. + {message}" + ); } else { error!("{message}"); } From e91a3b87670cab06053407a0627aae11ff22b346 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 14 Nov 2025 22:06:29 -0800 Subject: [PATCH 02/68] tighten --- codex-rs/core/src/util.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/codex-rs/core/src/util.rs b/codex-rs/core/src/util.rs index 9f6dae5fd0..0bce5b4439 100644 --- a/codex-rs/core/src/util.rs +++ b/codex-rs/core/src/util.rs @@ -16,12 +16,7 @@ pub(crate) fn backoff(attempt: u64) -> Duration { pub(crate) fn error_or_panic(message: String) { if cfg!(debug_assertions) || env!("CARGO_PKG_VERSION").contains("alpha") { - error!("{message}"); - panic!( - "This is an intentional panic to catch errors in debug and alpha builds. - If you don't know why this panic is happening, please report the issue to the Codex team in the appropriate channels including `/feedback`. - {message}" - ); + panic!("{message}"); } else { error!("{message}"); } From fbe5fcfa326369340fb34952fc9f51bc5e6d091e Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 14 Nov 2025 23:14:02 -0800 Subject: [PATCH 03/68] tighten_panic_double_truncation --- codex-rs/core/src/truncate.rs | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 6ea3f2dc0f..0e520e6764 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -6,8 +6,6 @@ use codex_protocol::models::FunctionCallOutputContentItem; use codex_utils_string::take_bytes_at_char_boundary; use codex_utils_string::take_last_bytes_at_char_boundary; use codex_utils_tokenizer::Tokenizer; -use serde_json::Value; -use crate::util::error_or_panic; /// Model-formatting limits: clients get full streams; only content sent to the model is truncated. pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB @@ -150,20 +148,10 @@ fn truncate_formatted_exec_output( } fn debug_panic_on_double_truncation(content: &str) { - if let Ok(json) = serde_json::from_str::(content) { - if let Some(output) = json.get("output") - && let Some(text) = output.as_str() - && text.starts_with("Total output lines:") - && text.contains("omitted") - { - error_or_panic(format!( - "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}" - )); - } - } else if content.starts_with("Total output lines:") && content.contains("omitted") { - error_or_panic(format!( + if content.contains("Total output lines:") && content.contains("omitted") { + tracing::error!( "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}" - )); + ); } } From dbb25e9afe0a7b84b990391e72cd1cce79549657 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Fri, 14 Nov 2025 23:14:49 -0800 Subject: [PATCH 04/68] tighten_panic_double_truncation --- codex-rs/core/src/truncate.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 0e520e6764..42d6a967de 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -84,7 +84,7 @@ fn truncate_formatted_exec_output( limit_bytes: usize, limit_lines: usize, ) -> String { - debug_panic_on_double_truncation(content); + error_on_double_truncation(content); let head_lines: usize = limit_lines / 2; let tail_lines: usize = limit_lines - head_lines; // 128 let head_bytes: usize = limit_bytes / 2; @@ -147,7 +147,7 @@ fn truncate_formatted_exec_output( result } -fn debug_panic_on_double_truncation(content: &str) { +fn error_on_double_truncation(content: &str) { if content.contains("Total output lines:") && content.contains("omitted") { tracing::error!( "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}" From 63596d16ad7342c7a96e0fe7bae1634b6caeee3e Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Sun, 16 Nov 2025 15:23:59 -0800 Subject: [PATCH 05/68] truncate --- codex-rs/core/src/codex.rs | 20 +- codex-rs/core/src/compact.rs | 42 +- codex-rs/core/src/config/mod.rs | 14 + codex-rs/core/src/context_manager/history.rs | 53 +- .../core/src/context_manager/history_tests.rs | 49 +- codex-rs/core/src/error.rs | 19 +- codex-rs/core/src/state/session.rs | 4 +- codex-rs/core/src/tools/orchestrator.rs | 4 +- codex-rs/core/src/truncate.rs | 466 ++++++++++-------- codex-rs/core/src/unified_exec/mod.rs | 1 + codex-rs/core/src/unified_exec/session.rs | 3 +- .../core/src/unified_exec/session_manager.rs | 10 +- codex-rs/core/tests/suite/unified_exec.rs | 2 +- docs/config.md | 1 + docs/example-config.md | 1 + 15 files changed, 429 insertions(+), 260 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index dbde7a4e28..cac4d9204a 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -67,7 +67,7 @@ use crate::error::Result as CodexResult; use crate::exec::StreamOutput; // Removed: legacy executor wiring replaced by ToolOrchestrator flows. // legacy normalize_exec_result no longer used after orchestrator migration -use crate::compact::build_compacted_history; +use crate::compact::build_token_limited_compacted_history; use crate::compact::collect_user_messages; use crate::mcp::auth::compute_auth_statuses; use crate::mcp_connection_manager::McpConnectionManager; @@ -183,6 +183,8 @@ impl Codex { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: config.features.clone(), + context_manager_function_output_max_tokens: config + .context_manager_function_output_max_tokens, session_source, }; @@ -337,6 +339,8 @@ pub(crate) struct SessionConfiguration { /// Set of feature flags for this session features: Features, + context_manager_function_output_max_tokens: usize, + // TODO(pakrym): Remove config from here original_config_do_not_use: Arc, /// Source of the session (cli, vscode, exec, mcp, ...) @@ -366,6 +370,10 @@ impl SessionConfiguration { } next_configuration } + + pub(crate) fn context_manager_function_output_max_tokens(&self) -> usize { + self.context_manager_function_output_max_tokens + } } #[derive(Default, Clone)] @@ -987,7 +995,7 @@ impl Session { RolloutItem::Compacted(compacted) => { let snapshot = history.get_history(); let user_messages = collect_user_messages(&snapshot); - let rebuilt = build_compacted_history( + let rebuilt = build_token_limited_compacted_history( self.build_initial_context(turn_context), &user_messages, &compacted.message, @@ -2600,6 +2608,8 @@ mod tests { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: Features::default(), + context_manager_function_output_max_tokens: config + .context_manager_function_output_max_tokens, session_source: SessionSource::Exec, }; @@ -2676,6 +2686,8 @@ mod tests { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: Features::default(), + context_manager_function_output_max_tokens: config + .context_manager_function_output_max_tokens, session_source: SessionSource::Exec, }; @@ -2933,7 +2945,7 @@ mod tests { let summary1 = "summary one"; let snapshot1 = live_history.get_history(); let user_messages1 = collect_user_messages(&snapshot1); - let rebuilt1 = build_compacted_history( + let rebuilt1 = build_token_limited_compacted_history( session.build_initial_context(turn_context), &user_messages1, summary1, @@ -2966,7 +2978,7 @@ mod tests { let summary2 = "summary two"; let snapshot2 = live_history.get_history(); let user_messages2 = collect_user_messages(&snapshot2); - let rebuilt2 = build_compacted_history( + let rebuilt2 = build_token_limited_compacted_history( session.build_initial_context(turn_context), &user_messages2, summary2, diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 6908faeec2..0a0352f21d 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -22,6 +22,7 @@ use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::RolloutItem; use codex_protocol::user_input::UserInput; +use codex_utils_tokenizer::Tokenizer; use futures::prelude::*; use tracing::error; @@ -147,7 +148,8 @@ async fn run_compact_task_inner( let user_messages = collect_user_messages(&history_snapshot); let initial_context = sess.build_initial_context(turn_context.as_ref()); - let mut new_history = build_compacted_history(initial_context, &user_messages, &summary_text); + let mut new_history = + build_token_limited_compacted_history(initial_context, &user_messages, &summary_text); let ghost_snapshots: Vec = history_snapshot .iter() .filter(|item| matches!(item, ResponseItem::GhostSnapshot { .. })) @@ -220,35 +222,40 @@ pub(crate) fn is_summary_message(message: &str) -> bool { message.starts_with(format!("{SUMMARY_PREFIX}\n").as_str()) } -pub(crate) fn build_compacted_history( +pub(crate) fn build_token_limited_compacted_history( initial_context: Vec, user_messages: &[String], summary_text: &str, ) -> Vec { - build_compacted_history_with_limit( + build_token_limited_compacted_history_with_limit( initial_context, user_messages, summary_text, - COMPACT_USER_MESSAGE_MAX_TOKENS * 4, + COMPACT_USER_MESSAGE_MAX_TOKENS, ) } -fn build_compacted_history_with_limit( +fn build_token_limited_compacted_history_with_limit( mut history: Vec, user_messages: &[String], summary_text: &str, - max_bytes: usize, + max_tokens: usize, ) -> Vec { let mut selected_messages: Vec = Vec::new(); - if max_bytes > 0 { - let mut remaining = max_bytes; + if max_tokens > 0 { + let tokenizer = Tokenizer::try_default().ok(); + let mut remaining = max_tokens; for message in user_messages.iter().rev() { if remaining == 0 { break; } - if message.len() <= remaining { + let tokens = tokenizer + .as_ref() + .map(|tok| usize::try_from(tok.count(message)).unwrap_or(usize::MAX)) + .unwrap_or_else(|| message.len().saturating_add(3) / 4); + if tokens <= remaining { selected_messages.push(message.clone()); - remaining = remaining.saturating_sub(message.len()); + remaining = remaining.saturating_sub(tokens); } else { let (truncated, _) = truncate_middle(message, remaining); selected_messages.push(truncated); @@ -408,16 +415,16 @@ mod tests { } #[test] - fn build_compacted_history_truncates_overlong_user_messages() { + fn build_token_limited_compacted_history_truncates_overlong_user_messages() { // Use a small truncation limit so the test remains fast while still validating // that oversized user content is truncated. - let max_bytes = 128; - let big = "X".repeat(max_bytes + 50); - let history = super::build_compacted_history_with_limit( + let max_tokens = 16; + let big = "word ".repeat(200); + let history = super::build_token_limited_compacted_history_with_limit( Vec::new(), std::slice::from_ref(&big), "SUMMARY", - max_bytes, + max_tokens, ); assert_eq!(history.len(), 2); @@ -450,12 +457,13 @@ mod tests { } #[test] - fn build_compacted_history_appends_summary_message() { + fn build_token_limited_compacted_history_appends_summary_message() { let initial_context: Vec = Vec::new(); let user_messages = vec!["first user message".to_string()]; let summary_text = "summary text"; - let history = build_compacted_history(initial_context, &user_messages, summary_text); + let history = + build_token_limited_compacted_history(initial_context, &user_messages, summary_text); assert!( !history.is_empty(), "expected compacted history to include summary" diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index b1e5b7f98a..5734354849 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -32,6 +32,7 @@ use crate::project_doc::DEFAULT_PROJECT_DOC_FILENAME; use crate::project_doc::LOCAL_PROJECT_DOC_FILENAME; use crate::protocol::AskForApproval; use crate::protocol::SandboxPolicy; +use crate::truncate::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT; use codex_app_server_protocol::Tools; use codex_app_server_protocol::UserSavedConfig; use codex_protocol::config_types::ForcedLoginMethod; @@ -193,6 +194,9 @@ pub struct Config { /// Additional filenames to try when looking for project-level docs. pub project_doc_fallback_filenames: Vec, + /// Token budget applied when storing tool/function outputs in the context manager. + pub context_manager_function_output_max_tokens: usize, + /// Directory containing all Codex state (defaults to `~/.codex` but can be /// overridden by the `CODEX_HOME` environment variable). pub codex_home: PathBuf, @@ -592,6 +596,9 @@ pub struct ConfigToml { /// Ordered list of fallback filenames to look for when AGENTS.md is missing. pub project_doc_fallback_filenames: Option>, + /// Token budget applied when storing tool/function outputs in the context manager. + pub context_manager_function_output_max_tokens: Option, + /// Profile to use from the `profiles` map. pub profile: Option, @@ -1135,6 +1142,9 @@ impl Config { } }) .collect(), + context_manager_function_output_max_tokens: cfg + .context_manager_function_output_max_tokens + .unwrap_or(DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT), codex_home, history, file_opener: cfg.file_opener.unwrap_or(UriBasedFileOpener::VsCode), @@ -2887,6 +2897,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), + context_manager_function_output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -2958,6 +2969,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), + context_manager_function_output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3044,6 +3056,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), + context_manager_function_output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3116,6 +3129,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), + context_manager_function_output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 189b3aa7a5..2e4809586c 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -1,8 +1,8 @@ use crate::codex::TurnContext; use crate::context_manager::normalize; -use crate::truncate; -use crate::truncate::format_output_for_model_body; -use crate::truncate::globally_truncate_function_output_items; +use crate::truncate::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT; +use crate::truncate::truncate_function_output_items_to_token_limit; +use crate::truncate::truncate_middle; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::TokenUsage; @@ -10,25 +10,25 @@ use codex_protocol::protocol::TokenUsageInfo; use codex_utils_tokenizer::Tokenizer; use std::ops::Deref; -const CONTEXT_WINDOW_HARD_LIMIT_FACTOR: f64 = 1.1; -const CONTEXT_WINDOW_HARD_LIMIT_BYTES: usize = - (truncate::MODEL_FORMAT_MAX_BYTES as f64 * CONTEXT_WINDOW_HARD_LIMIT_FACTOR) as usize; -const CONTEXT_WINDOW_HARD_LIMIT_LINES: usize = - (truncate::MODEL_FORMAT_MAX_LINES as f64 * CONTEXT_WINDOW_HARD_LIMIT_FACTOR) as usize; - /// Transcript of conversation history -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone)] pub(crate) struct ContextManager { /// The oldest items are at the beginning of the vector. items: Vec, token_info: Option, + function_output_max_tokens: usize, } impl ContextManager { pub(crate) fn new() -> Self { + Self::with_function_output_limit(DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT) + } + + pub(crate) fn with_function_output_limit(max_tokens: usize) -> Self { Self { items: Vec::new(), token_info: TokenUsageInfo::new_or_append(&None, &None, None), + function_output_max_tokens: max_tokens, } } @@ -62,7 +62,7 @@ impl ContextManager { continue; } - let processed = Self::process_item(&item); + let processed = self.process_item(item_ref); self.items.push(processed); } } @@ -150,18 +150,17 @@ impl ContextManager { items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. })); } - fn process_item(item: &ResponseItem) -> ResponseItem { + fn process_item(&self, item: &ResponseItem) -> ResponseItem { match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let truncated = format_output_for_model_body( - output.content.as_str(), - CONTEXT_WINDOW_HARD_LIMIT_BYTES, - CONTEXT_WINDOW_HARD_LIMIT_LINES, - ); - let truncated_items = output - .content_items - .as_ref() - .map(|items| globally_truncate_function_output_items(items)); + let (truncated, _) = + truncate_middle(output.content.as_str(), self.function_output_max_tokens); + let truncated_items = output.content_items.as_ref().map(|items| { + truncate_function_output_items_to_token_limit( + items, + self.function_output_max_tokens, + ) + }); ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { @@ -172,11 +171,7 @@ impl ContextManager { } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let truncated = format_output_for_model_body( - output, - CONTEXT_WINDOW_HARD_LIMIT_BYTES, - CONTEXT_WINDOW_HARD_LIMIT_LINES, - ); + let (truncated, _) = truncate_middle(output, self.function_output_max_tokens); ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), output: truncated, @@ -194,6 +189,12 @@ impl ContextManager { } } +impl Default for ContextManager { + fn default() -> Self { + Self::new() + } +} + /// API messages include every non-system item (user/assistant messages, reasoning, /// tool calls, tool outputs, shell calls, and web-search calls). fn is_api_message(message: &ResponseItem) -> bool { diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index c81749c2c1..239b84c812 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -10,6 +10,7 @@ use codex_protocol::models::LocalShellExecAction; use codex_protocol::models::LocalShellStatus; use codex_protocol::models::ReasoningItemContent; use codex_protocol::models::ReasoningItemReasoningSummary; +use codex_utils_tokenizer::Tokenizer; use pretty_assertions::assert_eq; use regex_lite::Regex; @@ -237,6 +238,7 @@ fn normalization_retains_local_shell_outputs() { #[test] fn record_items_truncates_function_call_output_content() { let mut history = ContextManager::new(); + let tok = Tokenizer::try_default().expect("load tokenizer"); let long_line = "a very long line to trigger truncation\n"; let long_output = long_line.repeat(2_500); let item = ResponseItem::FunctionCallOutput { @@ -255,10 +257,15 @@ fn record_items_truncates_function_call_output_content() { ResponseItem::FunctionCallOutput { output, .. } => { assert_ne!(output.content, long_output); assert!( - output.content.starts_with("Total output lines:"), - "expected truncated summary, got {}", + output.content.contains("tokens truncated"), + "expected token-based truncation marker, got {}", output.content ); + let token_count = usize::try_from(tok.count(&output.content)).unwrap_or(usize::MAX); + assert!( + token_count <= truncate::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + "token count should not exceed limit: {token_count}" + ); } other => panic!("unexpected history item: {other:?}"), } @@ -267,6 +274,7 @@ fn record_items_truncates_function_call_output_content() { #[test] fn record_items_truncates_custom_tool_call_output_content() { let mut history = ContextManager::new(); + let tok = Tokenizer::try_default().expect("load tokenizer"); let line = "custom output that is very long\n"; let long_output = line.repeat(2_500); let item = ResponseItem::CustomToolCallOutput { @@ -281,14 +289,47 @@ fn record_items_truncates_custom_tool_call_output_content() { ResponseItem::CustomToolCallOutput { output, .. } => { assert_ne!(output, &long_output); assert!( - output.starts_with("Total output lines:"), - "expected truncated summary, got {output}" + output.contains("tokens truncated"), + "expected token-based truncation marker, got {output}" + ); + let token_count = usize::try_from(tok.count(output)).unwrap_or(usize::MAX); + assert!( + token_count <= truncate::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + "token count should not exceed limit: {token_count}" ); } other => panic!("unexpected history item: {other:?}"), } } +#[test] +fn record_items_respects_custom_token_limit() { + let mut history = ContextManager::with_function_output_limit(8); + let tok = Tokenizer::try_default().expect("load tokenizer"); + let long_output = "tokenized content repeated many times ".repeat(200); + let item = ResponseItem::FunctionCallOutput { + call_id: "call-custom-limit".to_string(), + output: FunctionCallOutputPayload { + content: long_output, + success: Some(true), + ..Default::default() + }, + }; + + history.record_items([&item]); + + let stored = match &history.items[0] { + ResponseItem::FunctionCallOutput { output, .. } => output, + other => panic!("unexpected history item: {other:?}"), + }; + let stored_tokens = usize::try_from(tok.count(&stored.content)).unwrap_or(usize::MAX); + assert!(stored.content.contains("tokens truncated")); + assert!( + stored_tokens <= 8, + "stored_tokens should be <= 8, got {stored_tokens}" + ); +} + fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) { let pattern = truncated_message_pattern(line, total_lines); let regex = Regex::new(&pattern).unwrap_or_else(|err| { diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 64ba8df848..5174098fac 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -19,8 +19,8 @@ use tokio::task::JoinError; pub type Result = std::result::Result; -/// Limit UI error messages to a reasonable size while keeping useful context. -const ERROR_MESSAGE_UI_MAX_BYTES: usize = 2 * 1024; // 4 KiB +/// Limit UI error messages to a reasonable token budget (~2 KiB of text). +const ERROR_MESSAGE_UI_MAX_TOKENS: usize = (2 * 1024) / 4; #[derive(Error, Debug)] pub enum SandboxErr { @@ -431,7 +431,7 @@ impl CodexErr { } } -pub fn get_error_message_ui(e: &CodexErr) -> String { +pub fn token_limited_error_message(e: &CodexErr) -> String { let message = match e { CodexErr::Sandbox(SandboxErr::Denied { output }) => { let aggregated = output.aggregated_output.text.trim(); @@ -461,7 +461,7 @@ pub fn get_error_message_ui(e: &CodexErr) -> String { _ => e.to_string(), }; - truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0 + truncate_middle(&message, ERROR_MESSAGE_UI_MAX_TOKENS).0 } #[cfg(test)] @@ -533,7 +533,7 @@ mod tests { let err = CodexErr::Sandbox(SandboxErr::Denied { output: Box::new(output), }); - assert_eq!(get_error_message_ui(&err), "aggregate detail"); + assert_eq!(token_limited_error_message(&err), "aggregate detail"); } #[test] @@ -549,7 +549,10 @@ mod tests { let err = CodexErr::Sandbox(SandboxErr::Denied { output: Box::new(output), }); - assert_eq!(get_error_message_ui(&err), "stderr detail\nstdout detail"); + assert_eq!( + token_limited_error_message(&err), + "stderr detail\nstdout detail" + ); } #[test] @@ -565,7 +568,7 @@ mod tests { let err = CodexErr::Sandbox(SandboxErr::Denied { output: Box::new(output), }); - assert_eq!(get_error_message_ui(&err), "stdout only"); + assert_eq!(token_limited_error_message(&err), "stdout only"); } #[test] @@ -582,7 +585,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - get_error_message_ui(&err), + token_limited_error_message(&err), "command failed inside sandbox with exit code 13" ); } diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 5b630d5ce9..9ed8eeccd1 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -20,7 +20,9 @@ impl SessionState { pub(crate) fn new(session_configuration: SessionConfiguration) -> Self { Self { session_configuration, - history: ContextManager::new(), + history: ContextManager::with_function_output_limit( + session_configuration.context_manager_function_output_max_tokens(), + ), latest_rate_limits: None, } } diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index 878e48e8be..ea584809ec 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -7,7 +7,7 @@ retry without sandbox on denial (no re‑approval thanks to caching). */ use crate::error::CodexErr; use crate::error::SandboxErr; -use crate::error::get_error_message_ui; +use crate::error::token_limited_error_message; use crate::exec::ExecToolCallOutput; use crate::sandboxing::SandboxManager; use crate::tools::sandboxing::ApprovalCtx; @@ -129,7 +129,7 @@ impl ToolOrchestrator { let err = SandboxErr::Denied { output: output.clone(), }; - let friendly = get_error_message_ui(&CodexErr::Sandbox(err)); + let friendly = token_limited_error_message(&CodexErr::Sandbox(err)); let failure_summary = format!("failed in sandbox: {friendly}"); risk = tool_ctx diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 42d6a967de..e69022491a 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -10,37 +10,40 @@ use codex_utils_tokenizer::Tokenizer; /// Model-formatting limits: clients get full streams; only content sent to the model is truncated. pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines - -/// Globally truncate function output items to fit within `MODEL_FORMAT_MAX_BYTES` -/// by preserving as many text/image items as possible and appending a summary -/// for any omitted text items. -pub(crate) fn globally_truncate_function_output_items( +pub const DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT: usize = MODEL_FORMAT_MAX_BYTES / 4; +const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 1024; // 1 MiB + +/// Globally truncate function output items to fit within +/// `max_tokens` tokens by preserving as many +/// text/image items as possible and appending a summary for any omitted text +/// items. +pub(crate) fn truncate_function_output_items_to_token_limit( items: &[FunctionCallOutputContentItem], + max_tokens: usize, ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); - let mut remaining = MODEL_FORMAT_MAX_BYTES; + let mut remaining_tokens = max_tokens; let mut omitted_text_items = 0usize; + let tokenizer = Tokenizer::try_default().ok(); for it in items { match it { FunctionCallOutputContentItem::InputText { text } => { - if remaining == 0 { + if remaining_tokens == 0 { omitted_text_items += 1; continue; } - let len = text.len(); - if len <= remaining { + let token_len = estimate_safe_token_count(text, tokenizer.as_ref()); + if token_len <= remaining_tokens { out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); - remaining -= len; + remaining_tokens = remaining_tokens.saturating_sub(token_len); } else { - let slice = take_bytes_at_char_boundary(text, remaining); - if !slice.is_empty() { - out.push(FunctionCallOutputContentItem::InputText { - text: slice.to_string(), - }); + let (snippet, _) = truncate_middle(text, remaining_tokens); + if !snippet.is_empty() { + out.push(FunctionCallOutputContentItem::InputText { text: snippet }); } - remaining = 0; + remaining_tokens = 0; } } // todo(aibrahim): handle input images; resize @@ -155,115 +158,157 @@ fn error_on_double_truncation(content: &str) { } } -/// Truncate an output string to a maximum number of “tokens”, where tokens are -/// approximated as individual `char`s. Preserves a prefix and suffix with an -/// elision marker describing how many tokens were omitted. -pub(crate) fn truncate_output_to_tokens( - output: &str, - max_tokens: usize, -) -> (String, Option) { - if max_tokens == 0 { - let total_tokens = output.chars().count(); - let message = format!("…{total_tokens} tokens truncated…"); - return (message, Some(total_tokens)); - } - - let tokens: Vec = output.chars().collect(); - let total_tokens = tokens.len(); - if total_tokens <= max_tokens { - return (output.to_string(), None); +fn estimate_safe_token_count(text: &str, tokenizer: Option<&Tokenizer>) -> usize { + if text.is_empty() { + return 0; } - let half = max_tokens / 2; - if half == 0 { - let truncated = total_tokens.saturating_sub(max_tokens); - let message = format!("…{truncated} tokens truncated…"); - return (message, Some(total_tokens)); + if text.len() > TOKENIZER_STACK_SAFE_BYTES { + return usize::try_from(approx_token_count(text)).unwrap_or(usize::MAX); } - let truncated = total_tokens.saturating_sub(half * 2); - let mut truncated_output = String::new(); - truncated_output.extend(&tokens[..half]); - truncated_output.push_str(&format!("…{truncated} tokens truncated…")); - truncated_output.extend(&tokens[total_tokens - half..]); - (truncated_output, Some(total_tokens)) + tokenizer + .map(|tok| usize::try_from(tok.count(text)).unwrap_or(usize::MAX)) + .unwrap_or_else(|| usize::try_from(approx_token_count(text)).unwrap_or(usize::MAX)) } -/// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes, -/// preserving the beginning and the end. Returns the possibly truncated -/// string and `Some(original_token_count)` (counted with the local tokenizer; -/// falls back to a 4-bytes-per-token estimate if the tokenizer cannot load) -/// if truncation occurred; otherwise returns the original string and `None`. -pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option) { - if s.len() <= max_bytes { - return (s.to_string(), None); +/// Truncate the middle of a UTF-8 string to at most `max_tokens` tokens, +/// preserving the beginning and the end. Returns the possibly truncated string +/// and `Some(original_token_count)` if truncation occurred; otherwise returns +/// the original string and `None`. +pub(crate) fn truncate_middle(s: &str, max_tokens: usize) -> (String, Option) { + if s.is_empty() { + return (String::new(), None); } - // Build a tokenizer for counting (default to o200k_base; fall back to cl100k_base). - // If both fail, fall back to a 4-bytes-per-token estimate. - let tok = Tokenizer::try_default().ok(); - let token_count = |text: &str| -> u64 { - if let Some(ref t) = tok { - t.count(text) as u64 - } else { - (text.len() as u64).div_ceil(4) - } + if s.len() > TOKENIZER_STACK_SAFE_BYTES { + return truncate_middle_fallback(s, max_tokens); + } + + let tokenizer = match Tokenizer::try_default() { + Ok(tok) => tok, + Err(_) => return truncate_middle_fallback(s, max_tokens), }; - let total_tokens = token_count(s); - if max_bytes == 0 { + let encoded = tokenizer.encode(s, false); + let total_tokens = encoded.len() as u64; + + if max_tokens == 0 { return ( format!("…{total_tokens} tokens truncated…"), Some(total_tokens), ); } - fn truncate_on_boundary(input: &str, max_len: usize) -> &str { - if input.len() <= max_len { - return input; - } - let mut end = max_len; - while end > 0 && !input.is_char_boundary(end) { - end -= 1; - } - &input[..end] + if encoded.len() <= max_tokens { + return (s.to_string(), None); } - fn pick_prefix_end(s: &str, left_budget: usize) -> usize { - if let Some(head) = s.get(..left_budget) - && let Some(i) = head.rfind('\n') - { - return i + 1; + let mut guess_removed = total_tokens.saturating_sub(max_tokens as u64).max(1); + for _ in 0..4 { + let marker = format!("…{guess_removed} tokens truncated…"); + let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); + if marker_len >= max_tokens { + return (marker, Some(total_tokens)); } - truncate_on_boundary(s, left_budget).len() - } - fn pick_suffix_start(s: &str, right_budget: usize) -> usize { - let start_tail = s.len().saturating_sub(right_budget); - if let Some(tail) = s.get(start_tail..) - && let Some(i) = tail.find('\n') - { - return start_tail + i + 1; + let keep_budget = max_tokens - marker_len; + if keep_budget == 0 { + return (marker, Some(total_tokens)); } - let mut idx = start_tail.min(s.len()); - while idx < s.len() && !s.is_char_boundary(idx) { - idx += 1; + let left_keep = keep_budget / 2; + let right_keep = keep_budget - left_keep; + let removed_tokens = encoded.len().saturating_sub(left_keep + right_keep) as u64; + let final_marker = format!("…{removed_tokens} tokens truncated…"); + let final_marker_len = + usize::try_from(tokenizer.count(&final_marker)).unwrap_or(usize::MAX); + if final_marker_len == marker_len { + let prefix = if left_keep > 0 { + tokenizer.decode(&encoded[..left_keep]).unwrap_or_default() + } else { + String::new() + }; + let suffix = if right_keep > 0 { + tokenizer + .decode(&encoded[encoded.len() - right_keep..]) + .unwrap_or_default() + } else { + String::new() + }; + let mut out = + String::with_capacity(prefix.len() + final_marker.len() + suffix.len() + 1); + out.push_str(&prefix); + out.push_str(&final_marker); + if !suffix.is_empty() { + out.push('\n'); + out.push_str(&suffix); + } + return (out, Some(total_tokens)); } - idx + + guess_removed = removed_tokens.max(1); + } + + let marker = format!("…{guess_removed} tokens truncated…"); + let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); + if marker_len >= max_tokens { + return (marker, Some(total_tokens)); } - // Iterate to stabilize marker length → keep budget → boundaries. - let mut guess_tokens: u64 = 1; + let keep_budget = max_tokens - marker_len; + if keep_budget == 0 { + return (marker, Some(total_tokens)); + } + let left_keep = keep_budget / 2; + let right_keep = keep_budget - left_keep; + let prefix = if left_keep > 0 { + tokenizer.decode(&encoded[..left_keep]).unwrap_or_default() + } else { + String::new() + }; + let suffix = if right_keep > 0 { + tokenizer + .decode(&encoded[encoded.len() - right_keep..]) + .unwrap_or_default() + } else { + String::new() + }; + let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + 1); + out.push_str(&prefix); + out.push_str(&marker); + if !suffix.is_empty() { + out.push('\n'); + out.push_str(&suffix); + } + (out, Some(total_tokens)) +} + +fn truncate_middle_fallback(s: &str, max_tokens: usize) -> (String, Option) { + let total_tokens = approx_token_count(s); + if max_tokens == 0 { + return ( + format!("…{total_tokens} tokens truncated…"), + Some(total_tokens), + ); + } + + if total_tokens as usize <= max_tokens { + return (s.to_string(), None); + } + + let max_bytes = max_tokens.saturating_mul(4); + if s.len() <= max_bytes { + return (s.to_string(), None); + } + + let mut guess_tokens = total_tokens.saturating_sub(max_tokens as u64).max(1); for _ in 0..4 { let marker = format!("…{guess_tokens} tokens truncated…"); let marker_len = marker.len(); let keep_budget = max_bytes.saturating_sub(marker_len); if keep_budget == 0 { - return ( - format!("…{total_tokens} tokens truncated…"), - Some(total_tokens), - ); + return (marker, Some(total_tokens)); } let left_budget = keep_budget / 2; @@ -274,11 +319,7 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option suffix_start = prefix_end; } - // Tokens actually removed (middle slice) using the real tokenizer. - let removed_tokens = token_count(&s[prefix_end..suffix_start]); - - // If the number of digits in the token count does not change the marker length, - // we can finalize output. + let removed_tokens = approx_token_count(&s[prefix_end..suffix_start]); let final_marker = format!("…{removed_tokens} tokens truncated…"); if final_marker.len() == marker_len { let kept_content_bytes = prefix_end + (s.len() - suffix_start); @@ -290,18 +331,14 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option return (out, Some(total_tokens)); } - guess_tokens = removed_tokens; + guess_tokens = removed_tokens.max(1); } - // Fallback build after iterations: compute with the last guess. let marker = format!("…{guess_tokens} tokens truncated…"); let marker_len = marker.len(); let keep_budget = max_bytes.saturating_sub(marker_len); if keep_budget == 0 { - return ( - format!("…{total_tokens} tokens truncated…"), - Some(total_tokens), - ); + return (marker, Some(total_tokens)); } let left_budget = keep_budget / 2; @@ -320,14 +357,53 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option (out, Some(total_tokens)) } +fn approx_token_count(text: &str) -> u64 { + (text.len() as u64).saturating_add(3) / 4 +} + +fn truncate_on_boundary(input: &str, max_len: usize) -> &str { + if input.len() <= max_len { + return input; + } + let mut end = max_len; + while end > 0 && !input.is_char_boundary(end) { + end -= 1; + } + &input[..end] +} + +fn pick_prefix_end(s: &str, left_budget: usize) -> usize { + if let Some(head) = s.get(..left_budget) + && let Some(i) = head.rfind('\n') + { + return i + 1; + } + truncate_on_boundary(s, left_budget).len() +} + +fn pick_suffix_start(s: &str, right_budget: usize) -> usize { + let start_tail = s.len().saturating_sub(right_budget); + if let Some(tail) = s.get(start_tail..) + && let Some(i) = tail.find('\n') + { + return start_tail + i + 1; + } + + let mut idx = start_tail.min(s.len()); + while idx < s.len() && !s.is_char_boundary(idx) { + idx += 1; + } + idx +} + #[cfg(test)] mod tests { + use super::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT; use super::MODEL_FORMAT_MAX_BYTES; use super::MODEL_FORMAT_MAX_LINES; use super::format_output_for_model_body; - use super::globally_truncate_function_output_items; + use super::truncate_function_output_items_to_token_limit; use super::truncate_middle; - use super::truncate_output_to_tokens; use codex_protocol::models::FunctionCallOutputContentItem; use codex_utils_tokenizer::Tokenizer; use pretty_assertions::assert_eq; @@ -350,89 +426,69 @@ mod tests { ) } - #[test] - fn truncate_middle_no_newlines_fallback() { - let tok = Tokenizer::try_default().expect("load tokenizer"); - let s = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ*"; - let max_bytes = 32; - let (out, original) = truncate_middle(s, max_bytes); - assert!(out.starts_with("abc")); - assert!(out.contains("tokens truncated")); - assert!(out.ends_with("XYZ*")); - assert_eq!(original, Some(tok.count(s) as u64)); + fn build_chunked_text( + tok: &Tokenizer, + chunk: &str, + chunk_tokens: usize, + target_tokens: usize, + ) -> (String, usize) { + let mut text = String::new(); + let mut tokens = 0; + while tokens + chunk_tokens <= target_tokens { + text.push_str(chunk); + tokens += chunk_tokens; + } + if text.is_empty() { + text.push_str(chunk); + tokens = chunk_tokens; + } + (text, tokens) } #[test] - fn truncate_middle_prefers_newline_boundaries() { + fn truncate_middle_returns_original_when_under_limit() { let tok = Tokenizer::try_default().expect("load tokenizer"); - let mut s = String::new(); - for i in 1..=20 { - s.push_str(&format!("{i:03}\n")); - } - assert_eq!(s.len(), 80); - - let max_bytes = 64; - let (out, tokens) = truncate_middle(&s, max_bytes); - assert!(out.starts_with("001\n002\n003\n004\n")); - assert!(out.contains("tokens truncated")); - assert!(out.ends_with("017\n018\n019\n020\n")); - assert_eq!(tokens, Some(tok.count(&s) as u64)); + let s = "short output"; + let limit = usize::try_from(tok.count(s)).unwrap_or(0) + 10; + let (out, original) = truncate_middle(s, limit); + assert_eq!(out, s); + assert_eq!(original, None); } #[test] - fn truncate_middle_handles_utf8_content() { + fn truncate_middle_reports_truncation_at_zero_limit() { let tok = Tokenizer::try_default().expect("load tokenizer"); - let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n"; - let max_bytes = 32; - let (out, tokens) = truncate_middle(s, max_bytes); - + let s = "abcdef"; + let total = tok.count(s) as u64; + let (out, original) = truncate_middle(s, 0); assert!(out.contains("tokens truncated")); - assert!(!out.contains('\u{fffd}')); - assert_eq!(tokens, Some(tok.count(s) as u64)); + assert_eq!(original, Some(total)); } #[test] - fn truncate_middle_prefers_newline_boundaries_2() { + fn truncate_middle_enforces_token_budget() { let tok = Tokenizer::try_default().expect("load tokenizer"); - // Build a multi-line string of 20 numbered lines (each "NNN\n"). - let mut s = String::new(); - for i in 1..=20 { - s.push_str(&format!("{i:03}\n")); - } - assert_eq!(s.len(), 80); - - let max_bytes = 64; - let (out, total) = truncate_middle(&s, max_bytes); - assert!(out.starts_with("001\n002\n003\n004\n")); + let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa"; + let max_tokens = 12; + let (out, original) = truncate_middle(s, max_tokens); assert!(out.contains("tokens truncated")); - assert!(out.ends_with("017\n018\n019\n020\n")); - assert_eq!(total, Some(tok.count(&s) as u64)); - } - - #[test] - fn truncate_output_to_tokens_returns_original_when_under_limit() { - let s = "short output"; - let (truncated, original) = truncate_output_to_tokens(s, 100); - assert_eq!(truncated, s); - assert_eq!(original, None); + assert_eq!(original, Some(tok.count(s) as u64)); + let result_tokens = tok.count(&out) as usize; + assert!(result_tokens <= max_tokens); } #[test] - fn truncate_output_to_tokens_reports_truncation_at_zero_limit() { - let s = "abcdef"; - let (truncated, original) = truncate_output_to_tokens(s, 0); - assert!(truncated.contains("tokens truncated")); - assert_eq!(original, Some(s.chars().count())); - } + fn truncate_middle_handles_utf8_content() { + let tok = Tokenizer::try_default().expect("load tokenizer"); + let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n"; + let max_tokens = 8; + let (out, tokens) = truncate_middle(s, max_tokens); - #[test] - fn truncate_output_to_tokens_preserves_prefix_and_suffix() { - let s = "abcdefghijklmnopqrstuvwxyz"; - let max_tokens = 10; - let (truncated, original) = truncate_output_to_tokens(s, max_tokens); - assert!(truncated.starts_with("abcde")); - assert!(truncated.ends_with("vwxyz")); - assert_eq!(original, Some(s.chars().count())); + assert!(out.contains("tokens truncated")); + assert!(!out.contains('\u{fffd}')); + assert_eq!(tokens, Some(tok.count(s) as u64)); + let result_tokens = tok.count(&out) as usize; + assert!(result_tokens <= max_tokens); } #[test] @@ -550,24 +606,37 @@ mod tests { #[test] fn truncates_across_multiple_under_limit_texts_and_reports_omitted() { - // Arrange: several text items, none exceeding per-item limit, but total exceeds budget. - let budget = MODEL_FORMAT_MAX_BYTES; - let t1_len = (budget / 2).saturating_sub(10); - let t2_len = (budget / 2).saturating_sub(10); - let remaining_after_t1_t2 = budget.saturating_sub(t1_len + t2_len); - let t3_len = 50; // gets truncated to remaining_after_t1_t2 - let t4_len = 5; // omitted - let t5_len = 7; // omitted - - let t1 = "a".repeat(t1_len); - let t2 = "b".repeat(t2_len); - let t3 = "c".repeat(t3_len); - let t4 = "d".repeat(t4_len); - let t5 = "e".repeat(t5_len); + let tok = Tokenizer::try_default().expect("load tokenizer"); + let chunk = "alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu nu xi omicron pi rho sigma tau upsilon phi chi psi omega.\n"; + let chunk_tokens = usize::try_from(tok.count(chunk)).unwrap_or(usize::MAX); + assert!(chunk_tokens > 0, "chunk must consume tokens"); + + let target_each = DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT + .saturating_div(2) + .saturating_sub(chunk_tokens); + let (t1, t1_tokens) = build_chunked_text(&tok, chunk, chunk_tokens, target_each); + let (t2, t2_tokens) = build_chunked_text(&tok, chunk, chunk_tokens, target_each); + let remaining_after_t1_t2 = + DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT.saturating_sub(t1_tokens + t2_tokens); + assert!( + remaining_after_t1_t2 > 0, + "expected positive token remainder after first two items" + ); + + let repeats_for_t3 = remaining_after_t1_t2 / chunk_tokens + 2; + let t3 = chunk.repeat(repeats_for_t3); + let t3_tokens = usize::try_from(tok.count(&t3)).unwrap_or(usize::MAX); + assert!( + t3_tokens > remaining_after_t1_t2, + "t3 must exceed remaining tokens" + ); + + let t4 = chunk.to_string(); + let t5 = chunk.to_string(); let items = vec![ - FunctionCallOutputContentItem::InputText { text: t1 }, - FunctionCallOutputContentItem::InputText { text: t2 }, + FunctionCallOutputContentItem::InputText { text: t1.clone() }, + FunctionCallOutputContentItem::InputText { text: t2.clone() }, FunctionCallOutputContentItem::InputImage { image_url: "img:mid".to_string(), }, @@ -576,7 +645,10 @@ mod tests { FunctionCallOutputContentItem::InputText { text: t5 }, ]; - let output = globally_truncate_function_output_items(&items); + let output = truncate_function_output_items_to_token_limit( + &items, + DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + ); // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. assert_eq!(output.len(), 5); @@ -585,13 +657,13 @@ mod tests { FunctionCallOutputContentItem::InputText { text } => text, other => panic!("unexpected first item: {other:?}"), }; - assert_eq!(first_text.len(), t1_len); + assert_eq!(first_text, &t1); let second_text = match &output[1] { FunctionCallOutputContentItem::InputText { text } => text, other => panic!("unexpected second item: {other:?}"), }; - assert_eq!(second_text.len(), t2_len); + assert_eq!(second_text, &t2); assert_eq!( output[2], @@ -604,7 +676,15 @@ mod tests { FunctionCallOutputContentItem::InputText { text } => text, other => panic!("unexpected fourth item: {other:?}"), }; - assert_eq!(fourth_text.len(), remaining_after_t1_t2); + assert!( + fourth_text.contains("tokens truncated"), + "expected marker in truncated snippet: {fourth_text}" + ); + let truncated_tokens = usize::try_from(tok.count(fourth_text)).unwrap_or(usize::MAX); + assert!( + truncated_tokens <= remaining_after_t1_t2, + "truncated snippet must respect remaining token budget: {truncated_tokens} > {remaining_after_t1_t2}" + ); let summary_text = match &output[4] { FunctionCallOutputContentItem::InputText { text } => text, diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index f77b744497..390401d789 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -45,6 +45,7 @@ pub(crate) const MIN_YIELD_TIME_MS: u64 = 250; pub(crate) const MAX_YIELD_TIME_MS: u64 = 30_000; pub(crate) const DEFAULT_MAX_OUTPUT_TOKENS: usize = 10_000; pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 1024 * 1024; // 1 MiB +pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_TOKENS: usize = UNIFIED_EXEC_OUTPUT_MAX_BYTES / 4; pub(crate) struct UnifiedExecContext { pub session: Arc, diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index bdb935f171..0be00aedda 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -19,6 +19,7 @@ use codex_utils_pty::ExecCommandSession; use codex_utils_pty::SpawnedPty; use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES; +use super::UNIFIED_EXEC_OUTPUT_MAX_TOKENS; use super::UnifiedExecError; #[derive(Debug, Default)] @@ -165,7 +166,7 @@ impl UnifiedExecSession { }; if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) { - let (snippet, _) = truncate_middle(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_BYTES); + let (snippet, _) = truncate_middle(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_TOKENS); let message = if snippet.is_empty() { format!("exit code {exit_code}") } else { diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index fee46df8b8..3a4f9e245c 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -36,7 +36,7 @@ use super::generate_chunk_id; use super::resolve_max_tokens; use super::session::OutputBuffer; use super::session::UnifiedExecSession; -use crate::truncate::truncate_output_to_tokens; +use crate::truncate::truncate_middle; impl UnifiedExecSessionManager { pub(crate) async fn exec_command( @@ -70,7 +70,9 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens); + let (output, original_token_count) = truncate_middle(&text, max_tokens); + let original_token_count = + original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); let has_exited = session.has_exited(); let stored_id = self @@ -175,7 +177,9 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens); + let (output, original_token_count) = truncate_middle(&text, max_tokens); + let original_token_count = + original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); let status = self.refresh_session_state(session_id).await; diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index 8c682efaf2..6e5710d0ca 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -820,7 +820,7 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> { let call_id = "uexec-metadata"; let args = serde_json::json!({ - "cmd": "printf 'abcdefghijklmnopqrstuvwxyz'", + "cmd": "printf 'token one token two token three token four token five token six token seven'", "yield_time_ms": 500, "max_output_tokens": 6, }); diff --git a/docs/config.md b/docs/config.md index 3e7b7e165e..878000f881 100644 --- a/docs/config.md +++ b/docs/config.md @@ -911,6 +911,7 @@ Valid values: | `model_provider` | string | Provider id from `model_providers` (default: `openai`). | | `model_context_window` | number | Context window tokens. | | `model_max_output_tokens` | number | Max output tokens. | +| `context_manager_function_output_max_tokens` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | | `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. | | `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. | | `sandbox_workspace_write.writable_roots` | array | Extra writable roots in workspace‑write. | diff --git a/docs/example-config.md b/docs/example-config.md index 43a12a3b5e..8fca8e73eb 100644 --- a/docs/example-config.md +++ b/docs/example-config.md @@ -33,6 +33,7 @@ model_provider = "openai" # model_context_window = 128000 # tokens; default: auto for model # model_max_output_tokens = 8192 # tokens; default: auto for model # model_auto_compact_token_limit = 0 # disable/override auto; default: model family specific +# context_manager_function_output_max_tokens = 2560 # tokens stored per tool output; default: 2560 ################################################################################ # Reasoning & Verbosity (Responses API capable models) From b811a9b9f7fa5d19412ad62bb90fa7cce6a40113 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Sun, 16 Nov 2025 17:44:59 -0800 Subject: [PATCH 06/68] change function names --- codex-rs/core/src/compact.rs | 4 +- codex-rs/core/src/context_manager/history.rs | 11 +- .../core/src/context_manager/history_tests.rs | 10 +- codex-rs/core/src/context_manager/mod.rs | 2 +- codex-rs/core/src/error.rs | 4 +- codex-rs/core/src/tools/mod.rs | 4 +- codex-rs/core/src/truncate.rs | 253 +++++++++--------- codex-rs/core/src/unified_exec/session.rs | 4 +- .../core/src/unified_exec/session_manager.rs | 6 +- 9 files changed, 156 insertions(+), 142 deletions(-) diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 0a0352f21d..73c949ef78 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -14,7 +14,7 @@ use crate::protocol::EventMsg; use crate::protocol::TaskStartedEvent; use crate::protocol::TurnContextItem; use crate::protocol::WarningEvent; -use crate::truncate::truncate_middle; +use crate::truncate::truncate_with_token_budget; use crate::util::backoff; use codex_protocol::items::TurnItem; use codex_protocol::models::ContentItem; @@ -257,7 +257,7 @@ fn build_token_limited_compacted_history_with_limit( selected_messages.push(message.clone()); remaining = remaining.saturating_sub(tokens); } else { - let (truncated, _) = truncate_middle(message, remaining); + let (truncated, _) = truncate_with_token_budget(message, remaining); selected_messages.push(truncated); break; } diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 2e4809586c..787081372d 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -2,7 +2,7 @@ use crate::codex::TurnContext; use crate::context_manager::normalize; use crate::truncate::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT; use crate::truncate::truncate_function_output_items_to_token_limit; -use crate::truncate::truncate_middle; +use crate::truncate::truncate_with_token_budget; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::TokenUsage; @@ -153,8 +153,10 @@ impl ContextManager { fn process_item(&self, item: &ResponseItem) -> ResponseItem { match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let (truncated, _) = - truncate_middle(output.content.as_str(), self.function_output_max_tokens); + let (truncated, _) = truncate_with_token_budget( + output.content.as_str(), + self.function_output_max_tokens, + ); let truncated_items = output.content_items.as_ref().map(|items| { truncate_function_output_items_to_token_limit( items, @@ -171,7 +173,8 @@ impl ContextManager { } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let (truncated, _) = truncate_middle(output, self.function_output_max_tokens); + let (truncated, _) = + truncate_with_token_budget(output, self.function_output_max_tokens); ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), output: truncated, diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 239b84c812..cc2afb828f 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -372,7 +372,7 @@ fn format_exec_output_truncates_large_error() { let line = "very long execution error line that should trigger truncation\n"; let large_error = line.repeat(2_500); // way beyond both byte and line limits - let truncated = truncate::format_output_for_model_body( + let truncated = truncate::truncate_with_line_bytes_budget( &large_error, truncate::MODEL_FORMAT_MAX_BYTES, truncate::MODEL_FORMAT_MAX_LINES, @@ -386,7 +386,7 @@ fn format_exec_output_truncates_large_error() { #[test] fn format_exec_output_marks_byte_truncation_without_omitted_lines() { let long_line = "a".repeat(truncate::MODEL_FORMAT_MAX_BYTES + 50); - let truncated = truncate::format_output_for_model_body( + let truncated = truncate::truncate_with_line_bytes_budget( &long_line, truncate::MODEL_FORMAT_MAX_BYTES, truncate::MODEL_FORMAT_MAX_LINES, @@ -412,7 +412,7 @@ fn format_exec_output_returns_original_when_within_limits() { let content = "example output\n".repeat(10); assert_eq!( - truncate::format_output_for_model_body( + truncate::truncate_with_line_bytes_budget( &content, truncate::MODEL_FORMAT_MAX_BYTES, truncate::MODEL_FORMAT_MAX_LINES @@ -428,7 +428,7 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { .map(|idx| format!("line-{idx}\n")) .collect(); - let truncated = truncate::format_output_for_model_body( + let truncated = truncate::truncate_with_line_bytes_budget( &content, truncate::MODEL_FORMAT_MAX_BYTES, truncate::MODEL_FORMAT_MAX_LINES, @@ -460,7 +460,7 @@ fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { .map(|idx| format!("line-{idx}-{long_line}\n")) .collect(); - let truncated = truncate::format_output_for_model_body( + let truncated = truncate::truncate_with_line_bytes_budget( &content, truncate::MODEL_FORMAT_MAX_BYTES, truncate::MODEL_FORMAT_MAX_LINES, diff --git a/codex-rs/core/src/context_manager/mod.rs b/codex-rs/core/src/context_manager/mod.rs index ab0d2e8168..b19bc4e7ef 100644 --- a/codex-rs/core/src/context_manager/mod.rs +++ b/codex-rs/core/src/context_manager/mod.rs @@ -3,5 +3,5 @@ mod normalize; pub(crate) use crate::truncate::MODEL_FORMAT_MAX_BYTES; pub(crate) use crate::truncate::MODEL_FORMAT_MAX_LINES; -pub(crate) use crate::truncate::format_output_for_model_body; +pub(crate) use crate::truncate::truncate_with_line_bytes_budget; pub(crate) use history::ContextManager; diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 5174098fac..293ba1ce5d 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -2,7 +2,7 @@ use crate::codex::ProcessedResponseItem; use crate::exec::ExecToolCallOutput; use crate::token_data::KnownPlan; use crate::token_data::PlanType; -use crate::truncate::truncate_middle; +use crate::truncate::truncate_with_token_budget; use chrono::DateTime; use chrono::Datelike; use chrono::Local; @@ -461,7 +461,7 @@ pub fn token_limited_error_message(e: &CodexErr) -> String { _ => e.to_string(), }; - truncate_middle(&message, ERROR_MESSAGE_UI_MAX_TOKENS).0 + truncate_with_token_budget(&message, ERROR_MESSAGE_UI_MAX_TOKENS).0 } #[cfg(test)] diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index c94a7c28d9..1588e37073 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -11,7 +11,7 @@ pub mod spec; use crate::context_manager::MODEL_FORMAT_MAX_BYTES; use crate::context_manager::MODEL_FORMAT_MAX_LINES; -use crate::context_manager::format_output_for_model_body; +use crate::context_manager::truncate_with_line_bytes_budget; use crate::exec::ExecToolCallOutput; pub use router::ToolRouter; use serde::Serialize; @@ -77,5 +77,5 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { }; // Truncate for model consumption before serialization. - format_output_for_model_body(&body, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES) + truncate_with_line_bytes_budget(&body, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES) } diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index e69022491a..83bc87c4bf 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -39,7 +39,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); } else { - let (snippet, _) = truncate_middle(text, remaining_tokens); + let (snippet, _) = truncate_with_token_budget(text, remaining_tokens); if !snippet.is_empty() { out.push(FunctionCallOutputContentItem::InputText { text: snippet }); } @@ -66,153 +66,62 @@ pub(crate) fn truncate_function_output_items_to_token_limit( /// Format a block of exec/tool output for model consumption, truncating by /// lines and bytes while preserving head and tail segments. -pub(crate) fn format_output_for_model_body( +pub(crate) fn truncate_with_line_bytes_budget( content: &str, - limit_bytes: usize, - limit_lines: usize, + bytes_budget: usize, + lines_budget: usize, ) -> String { // Head+tail truncation for the model: show the beginning and end with an elision. // Clients still receive full streams; only this formatted summary is capped. let total_lines = content.lines().count(); - if content.len() <= limit_bytes && total_lines <= limit_lines { + if content.len() <= bytes_budget && total_lines <= lines_budget { return content.to_string(); } - let output = truncate_formatted_exec_output(content, total_lines, limit_bytes, limit_lines); + let output = truncate_formatted_exec_output(content, total_lines, bytes_budget, lines_budget); format!("Total output lines: {total_lines}\n\n{output}") } -fn truncate_formatted_exec_output( - content: &str, - total_lines: usize, - limit_bytes: usize, - limit_lines: usize, -) -> String { - error_on_double_truncation(content); - let head_lines: usize = limit_lines / 2; - let tail_lines: usize = limit_lines - head_lines; // 128 - let head_bytes: usize = limit_bytes / 2; - let segments: Vec<&str> = content.split_inclusive('\n').collect(); - let head_take = head_lines.min(segments.len()); - let tail_take = tail_lines.min(segments.len().saturating_sub(head_take)); - let omitted = segments.len().saturating_sub(head_take + tail_take); - - let head_slice_end: usize = segments - .iter() - .take(head_take) - .map(|segment| segment.len()) - .sum(); - let tail_slice_start: usize = if tail_take == 0 { - content.len() - } else { - content.len() - - segments - .iter() - .rev() - .take(tail_take) - .map(|segment| segment.len()) - .sum::() - }; - let head_slice = &content[..head_slice_end]; - let tail_slice = &content[tail_slice_start..]; - let truncated_by_bytes = content.len() > limit_bytes; - // this is a bit wrong. We are counting metadata lines and not just shell output lines. - let marker = if omitted > 0 { - Some(format!( - "\n[... omitted {omitted} of {total_lines} lines ...]\n\n" - )) - } else if truncated_by_bytes { - Some(format!( - "\n[... output truncated to fit {limit_bytes} bytes ...]\n\n" - )) - } else { - None - }; - - let marker_len = marker.as_ref().map_or(0, String::len); - let base_head_budget = head_bytes.min(limit_bytes); - let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len)); - let head_part = take_bytes_at_char_boundary(head_slice, head_budget); - let mut result = String::with_capacity(limit_bytes.min(content.len())); - - result.push_str(head_part); - if let Some(marker_text) = marker.as_ref() { - result.push_str(marker_text); - } - - let remaining = limit_bytes.saturating_sub(result.len()); - if remaining == 0 { - return result; - } - - let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining); - result.push_str(tail_part); - - result -} - -fn error_on_double_truncation(content: &str) { - if content.contains("Total output lines:") && content.contains("omitted") { - tracing::error!( - "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}" - ); - } -} - -fn estimate_safe_token_count(text: &str, tokenizer: Option<&Tokenizer>) -> usize { - if text.is_empty() { - return 0; - } - - if text.len() > TOKENIZER_STACK_SAFE_BYTES { - return usize::try_from(approx_token_count(text)).unwrap_or(usize::MAX); - } - - tokenizer - .map(|tok| usize::try_from(tok.count(text)).unwrap_or(usize::MAX)) - .unwrap_or_else(|| usize::try_from(approx_token_count(text)).unwrap_or(usize::MAX)) -} - /// Truncate the middle of a UTF-8 string to at most `max_tokens` tokens, /// preserving the beginning and the end. Returns the possibly truncated string /// and `Some(original_token_count)` if truncation occurred; otherwise returns /// the original string and `None`. -pub(crate) fn truncate_middle(s: &str, max_tokens: usize) -> (String, Option) { +pub(crate) fn truncate_with_token_budget(s: &str, max_budget: usize) -> (String, Option) { if s.is_empty() { return (String::new(), None); } if s.len() > TOKENIZER_STACK_SAFE_BYTES { - return truncate_middle_fallback(s, max_tokens); + return truncate_with_token_estimate(s, max_budget); } let tokenizer = match Tokenizer::try_default() { Ok(tok) => tok, - Err(_) => return truncate_middle_fallback(s, max_tokens), + Err(_) => return truncate_with_token_estimate(s, max_budget), }; let encoded = tokenizer.encode(s, false); let total_tokens = encoded.len() as u64; - if max_tokens == 0 { + if max_budget == 0 { return ( format!("…{total_tokens} tokens truncated…"), Some(total_tokens), ); } - if encoded.len() <= max_tokens { + if encoded.len() <= max_budget { return (s.to_string(), None); } - let mut guess_removed = total_tokens.saturating_sub(max_tokens as u64).max(1); + let mut guess_removed = total_tokens.saturating_sub(max_budget as u64).max(1); for _ in 0..4 { let marker = format!("…{guess_removed} tokens truncated…"); let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); - if marker_len >= max_tokens { + if marker_len >= max_budget { return (marker, Some(total_tokens)); } - let keep_budget = max_tokens - marker_len; + let keep_budget = max_budget - marker_len; if keep_budget == 0 { return (marker, Some(total_tokens)); } @@ -252,11 +161,11 @@ pub(crate) fn truncate_middle(s: &str, max_tokens: usize) -> (String, Option= max_tokens { + if marker_len >= max_budget { return (marker, Some(total_tokens)); } - let keep_budget = max_tokens - marker_len; + let keep_budget = max_budget - marker_len; if keep_budget == 0 { return (marker, Some(total_tokens)); } @@ -284,7 +193,8 @@ pub(crate) fn truncate_middle(s: &str, max_tokens: usize) -> (String, Option (String, Option) { +/// estimate the number of tokens in a string based on the length of the string +fn truncate_with_token_estimate(s: &str, max_tokens: usize) -> (String, Option) { let total_tokens = approx_token_count(s); if max_tokens == 0 { return ( @@ -396,14 +306,105 @@ fn pick_suffix_start(s: &str, right_budget: usize) -> usize { idx } +fn truncate_formatted_exec_output( + content: &str, + total_lines: usize, + limit_bytes: usize, + limit_lines: usize, +) -> String { + error_on_double_truncation(content); + let head_lines: usize = limit_lines / 2; + let tail_lines: usize = limit_lines - head_lines; // 128 + let head_bytes: usize = limit_bytes / 2; + let segments: Vec<&str> = content.split_inclusive('\n').collect(); + let head_take = head_lines.min(segments.len()); + let tail_take = tail_lines.min(segments.len().saturating_sub(head_take)); + let omitted = segments.len().saturating_sub(head_take + tail_take); + + let head_slice_end: usize = segments + .iter() + .take(head_take) + .map(|segment| segment.len()) + .sum(); + let tail_slice_start: usize = if tail_take == 0 { + content.len() + } else { + content.len() + - segments + .iter() + .rev() + .take(tail_take) + .map(|segment| segment.len()) + .sum::() + }; + let head_slice = &content[..head_slice_end]; + let tail_slice = &content[tail_slice_start..]; + let truncated_by_bytes = content.len() > limit_bytes; + // this is a bit wrong. We are counting metadata lines and not just shell output lines. + let marker = if omitted > 0 { + Some(format!( + "\n[... omitted {omitted} of {total_lines} lines ...]\n\n" + )) + } else if truncated_by_bytes { + Some(format!( + "\n[... output truncated to fit {limit_bytes} bytes ...]\n\n" + )) + } else { + None + }; + + let marker_len = marker.as_ref().map_or(0, String::len); + let base_head_budget = head_bytes.min(limit_bytes); + let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len)); + let head_part = take_bytes_at_char_boundary(head_slice, head_budget); + let mut result = String::with_capacity(limit_bytes.min(content.len())); + + result.push_str(head_part); + if let Some(marker_text) = marker.as_ref() { + result.push_str(marker_text); + } + + let remaining = limit_bytes.saturating_sub(result.len()); + if remaining == 0 { + return result; + } + + let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining); + result.push_str(tail_part); + + result +} + +fn error_on_double_truncation(content: &str) { + if content.contains("Total output lines:") && content.contains("omitted") { + tracing::error!( + "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}" + ); + } +} + +fn estimate_safe_token_count(text: &str, tokenizer: Option<&Tokenizer>) -> usize { + if text.is_empty() { + return 0; + } + + if text.len() > TOKENIZER_STACK_SAFE_BYTES { + return usize::try_from(approx_token_count(text)).unwrap_or(usize::MAX); + } + + tokenizer + .map(|tok| usize::try_from(tok.count(text)).unwrap_or(usize::MAX)) + .unwrap_or_else(|| usize::try_from(approx_token_count(text)).unwrap_or(usize::MAX)) +} + #[cfg(test)] mod tests { use super::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT; use super::MODEL_FORMAT_MAX_BYTES; use super::MODEL_FORMAT_MAX_LINES; - use super::format_output_for_model_body; use super::truncate_function_output_items_to_token_limit; - use super::truncate_middle; + use super::truncate_with_line_bytes_budget; + use super::truncate_with_token_budget; use codex_protocol::models::FunctionCallOutputContentItem; use codex_utils_tokenizer::Tokenizer; use pretty_assertions::assert_eq; @@ -450,7 +451,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "short output"; let limit = usize::try_from(tok.count(s)).unwrap_or(0) + 10; - let (out, original) = truncate_middle(s, limit); + let (out, original) = truncate_with_token_budget(s, limit); assert_eq!(out, s); assert_eq!(original, None); } @@ -460,7 +461,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "abcdef"; let total = tok.count(s) as u64; - let (out, original) = truncate_middle(s, 0); + let (out, original) = truncate_with_token_budget(s, 0); assert!(out.contains("tokens truncated")); assert_eq!(original, Some(total)); } @@ -470,7 +471,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa"; let max_tokens = 12; - let (out, original) = truncate_middle(s, max_tokens); + let (out, original) = truncate_with_token_budget(s, max_tokens); assert!(out.contains("tokens truncated")); assert_eq!(original, Some(tok.count(s) as u64)); let result_tokens = tok.count(&out) as usize; @@ -482,7 +483,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n"; let max_tokens = 8; - let (out, tokens) = truncate_middle(s, max_tokens); + let (out, tokens) = truncate_with_token_budget(s, max_tokens); assert!(out.contains("tokens truncated")); assert!(!out.contains('\u{fffd}')); @@ -496,7 +497,7 @@ mod tests { let line = "very long execution error line that should trigger truncation\n"; let large_error = line.repeat(2_500); // way beyond both byte and line limits - let truncated = format_output_for_model_body( + let truncated = truncate_with_line_bytes_budget( &large_error, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES, @@ -525,7 +526,7 @@ mod tests { #[test] fn format_exec_output_marks_byte_truncation_without_omitted_lines() { let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50); - let truncated = format_output_for_model_body( + let truncated = truncate_with_line_bytes_budget( &long_line, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES, @@ -549,7 +550,11 @@ mod tests { let content = "example output\n".repeat(10); assert_eq!( - format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES), + truncate_with_line_bytes_budget( + &content, + MODEL_FORMAT_MAX_BYTES, + MODEL_FORMAT_MAX_LINES + ), content ); } @@ -561,8 +566,11 @@ mod tests { .map(|idx| format!("line-{idx}\n")) .collect(); - let truncated = - format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES); + let truncated = truncate_with_line_bytes_budget( + &content, + MODEL_FORMAT_MAX_BYTES, + MODEL_FORMAT_MAX_LINES, + ); let omitted = total_lines - MODEL_FORMAT_MAX_LINES; let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); @@ -591,8 +599,11 @@ mod tests { .map(|idx| format!("line-{idx}-{long_line}\n")) .collect(); - let truncated = - format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES); + let truncated = truncate_with_line_bytes_budget( + &content, + MODEL_FORMAT_MAX_BYTES, + MODEL_FORMAT_MAX_LINES, + ); assert!( truncated.contains("[... omitted 42 of 298 lines ...]"), diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index 0be00aedda..ef006677f1 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -14,7 +14,7 @@ use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::StreamOutput; use crate::exec::is_likely_sandbox_denied; -use crate::truncate::truncate_middle; +use crate::truncate::truncate_with_token_budget; use codex_utils_pty::ExecCommandSession; use codex_utils_pty::SpawnedPty; @@ -166,7 +166,7 @@ impl UnifiedExecSession { }; if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) { - let (snippet, _) = truncate_middle(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_TOKENS); + let (snippet, _) = truncate_with_token_budget(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_TOKENS); let message = if snippet.is_empty() { format!("exit code {exit_code}") } else { diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index 3a4f9e245c..c7081b4014 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -36,7 +36,7 @@ use super::generate_chunk_id; use super::resolve_max_tokens; use super::session::OutputBuffer; use super::session::UnifiedExecSession; -use crate::truncate::truncate_middle; +use crate::truncate::truncate_with_token_budget; impl UnifiedExecSessionManager { pub(crate) async fn exec_command( @@ -70,7 +70,7 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let (output, original_token_count) = truncate_middle(&text, max_tokens); + let (output, original_token_count) = truncate_with_token_budget(&text, max_tokens); let original_token_count = original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); @@ -177,7 +177,7 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let (output, original_token_count) = truncate_middle(&text, max_tokens); + let (output, original_token_count) = truncate_with_token_budget(&text, max_tokens); let original_token_count = original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); From d599cf29c5c9c9706458a1d76747002ff4605818 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Sun, 16 Nov 2025 18:21:24 -0800 Subject: [PATCH 07/68] cleanup --- codex-rs/core/src/codex.rs | 8 + codex-rs/core/src/compact.rs | 2 +- codex-rs/core/src/context_manager/history.rs | 15 +- .../core/src/context_manager/history_tests.rs | 26 ++ codex-rs/core/src/error.rs | 2 +- codex-rs/core/src/state/session.rs | 10 +- codex-rs/core/src/truncate.rs | 413 +++++++++++------- codex-rs/core/src/unified_exec/session.rs | 3 +- .../core/src/unified_exec/session_manager.rs | 8 +- 9 files changed, 316 insertions(+), 171 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index cac4d9204a..587aeca8bb 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -374,6 +374,10 @@ impl SessionConfiguration { pub(crate) fn context_manager_function_output_max_tokens(&self) -> usize { self.context_manager_function_output_max_tokens } + + pub(crate) fn model(&self) -> &str { + self.model.as_str() + } } #[derive(Default, Clone)] @@ -737,6 +741,8 @@ impl Session { let mut state = self.state.lock().await; state.session_configuration = state.session_configuration.apply(&updates); + let model = state.session_configuration.model().to_string(); + state.history.set_model(Some(model.as_str())); } pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc { @@ -753,6 +759,8 @@ impl Session { let mut state = self.state.lock().await; let session_configuration = state.session_configuration.clone().apply(&updates); state.session_configuration = session_configuration.clone(); + let model = state.session_configuration.model().to_string(); + state.history.set_model(Some(model.as_str())); session_configuration }; diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 73c949ef78..b6311c3565 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -257,7 +257,7 @@ fn build_token_limited_compacted_history_with_limit( selected_messages.push(message.clone()); remaining = remaining.saturating_sub(tokens); } else { - let (truncated, _) = truncate_with_token_budget(message, remaining); + let (truncated, _) = truncate_with_token_budget(message, remaining, None); selected_messages.push(truncated); break; } diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 787081372d..cce5ec5e82 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -17,6 +17,7 @@ pub(crate) struct ContextManager { items: Vec, token_info: Option, function_output_max_tokens: usize, + model: Option, } impl ContextManager { @@ -29,9 +30,14 @@ impl ContextManager { items: Vec::new(), token_info: TokenUsageInfo::new_or_append(&None, &None, None), function_output_max_tokens: max_tokens, + model: None, } } + pub(crate) fn set_model(&mut self, model: Option<&str>) { + self.model = model.map(|m| m.to_string()); + } + pub(crate) fn token_info(&self) -> Option { self.token_info.clone() } @@ -156,11 +162,13 @@ impl ContextManager { let (truncated, _) = truncate_with_token_budget( output.content.as_str(), self.function_output_max_tokens, + self.model.as_deref(), ); let truncated_items = output.content_items.as_ref().map(|items| { truncate_function_output_items_to_token_limit( items, self.function_output_max_tokens, + self.model.as_deref(), ) }); ResponseItem::FunctionCallOutput { @@ -173,8 +181,11 @@ impl ContextManager { } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let (truncated, _) = - truncate_with_token_budget(output, self.function_output_max_tokens); + let (truncated, _) = truncate_with_token_budget( + output, + self.function_output_max_tokens, + self.model.as_deref(), + ); ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), output: truncated, diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index cc2afb828f..95a4ac1f1e 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -522,6 +522,32 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() { .as_array() .expect("array output"); + for (idx, entry) in output.iter().enumerate() { + if let Some(obj) = entry.as_object() { + let kind = obj + .get("type") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"); + if kind == "input_text" { + if let Some(text) = obj.get("text").and_then(|t| t.as_str()) { + let preview: String = text.chars().take(40).collect(); + println!( + "entry {idx}: {kind} len={} preview={preview:?}", + text.len() + ); + } else { + println!("entry {idx}: {kind} (missing text)"); + } + } else if kind == "input_image" { + println!("entry {idx}: {kind}"); + } else { + println!("entry {idx}: {kind}"); + } + } else { + println!("entry {idx}: non-object {entry:?}"); + } + } + // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. assert_eq!(output.len(), 5); diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 293ba1ce5d..2b9481f2f8 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -461,7 +461,7 @@ pub fn token_limited_error_message(e: &CodexErr) -> String { _ => e.to_string(), }; - truncate_with_token_budget(&message, ERROR_MESSAGE_UI_MAX_TOKENS).0 + truncate_with_token_budget(&message, ERROR_MESSAGE_UI_MAX_TOKENS, None).0 } #[cfg(test)] diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 9ed8eeccd1..574db2f975 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -18,11 +18,13 @@ pub(crate) struct SessionState { impl SessionState { /// Create a new session state mirroring previous `State::default()` semantics. pub(crate) fn new(session_configuration: SessionConfiguration) -> Self { + let mut history = ContextManager::with_function_output_limit( + session_configuration.context_manager_function_output_max_tokens(), + ); + history.set_model(Some(session_configuration.model())); Self { - session_configuration, - history: ContextManager::with_function_output_limit( - session_configuration.context_manager_function_output_max_tokens(), - ), + session_configuration: session_configuration.clone(), + history, latest_rate_limits: None, } } diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 83bc87c4bf..1a4c175558 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -12,6 +12,63 @@ pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines pub const DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT: usize = MODEL_FORMAT_MAX_BYTES / 4; const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 1024; // 1 MiB +const APPROX_BYTES_PER_TOKEN: usize = 4; +const TOKEN_ROUTER_MIN_ESTIMATE_BYTES: usize = 4 * 1024; // 4 KiB guard for byte-path routing + +/// Format a block of exec/tool output for model consumption, truncating by +/// lines and bytes while preserving head and tail segments. +pub(crate) fn truncate_with_line_bytes_budget( + content: &str, + bytes_budget: usize, + lines_budget: usize, +) -> String { + // Head+tail truncation for the model: show the beginning and end with an elision. + // Clients still receive full streams; only this formatted summary is capped. + let total_lines = content.lines().count(); + if content.len() <= bytes_budget && total_lines <= lines_budget { + return content.to_string(); + } + let output = truncate_formatted_exec_output(content, total_lines, bytes_budget, lines_budget); + format!("Total output lines: {total_lines}\n\n{output}") +} + +/// Truncate the middle of a UTF-8 string to at most `max_tokens` tokens, +/// preserving the beginning and the end. Returns the possibly truncated string +/// and `Some(original_token_count)` if truncation occurred; otherwise returns +/// the original string and `None`. +pub(crate) fn truncate_with_token_budget( + s: &str, + max_budget: usize, + model: Option<&str>, +) -> (String, Option) { + if s.is_empty() { + return (String::new(), None); + } + + let byte_len = s.len(); + if max_budget > 0 { + let small_threshold = approx_bytes_for_tokens(max_budget / 4); + if small_threshold > 0 && byte_len <= small_threshold { + return (s.to_string(), None); + } + } + + let exceeds_stack_limit = byte_len > TOKENIZER_STACK_SAFE_BYTES; + let exceeds_large_threshold = max_budget > 0 + && byte_len >= TOKEN_ROUTER_MIN_ESTIMATE_BYTES + && byte_len > approx_bytes_for_tokens(max_budget.saturating_mul(2)); + if exceeds_stack_limit || exceeds_large_threshold { + return truncate_with_byte_estimate(s, max_budget, model); + } + + let tokenizer = match select_tokenizer(model) { + Some(tok) => tok, + None => return truncate_with_byte_estimate(s, max_budget, model), + }; + let encoded = tokenizer.encode(s, false); + let total_tokens = encoded.len() as u64; + truncate_with_tokenizer_path(tokenizer, encoded, max_budget, s, total_tokens) +} /// Globally truncate function output items to fit within /// `max_tokens` tokens by preserving as many @@ -20,11 +77,12 @@ const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 1024; // 1 MiB pub(crate) fn truncate_function_output_items_to_token_limit( items: &[FunctionCallOutputContentItem], max_tokens: usize, + model: Option<&str>, ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); let mut remaining_tokens = max_tokens; let mut omitted_text_items = 0usize; - let tokenizer = Tokenizer::try_default().ok(); + let tokenizer = select_tokenizer(model); for it in items { match it { @@ -39,7 +97,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); } else { - let (snippet, _) = truncate_with_token_budget(text, remaining_tokens); + let (snippet, _) = truncate_with_token_budget(text, remaining_tokens, model); if !snippet.is_empty() { out.push(FunctionCallOutputContentItem::InputText { text: snippet }); } @@ -64,58 +122,24 @@ pub(crate) fn truncate_function_output_items_to_token_limit( out } -/// Format a block of exec/tool output for model consumption, truncating by -/// lines and bytes while preserving head and tail segments. -pub(crate) fn truncate_with_line_bytes_budget( - content: &str, - bytes_budget: usize, - lines_budget: usize, -) -> String { - // Head+tail truncation for the model: show the beginning and end with an elision. - // Clients still receive full streams; only this formatted summary is capped. - let total_lines = content.lines().count(); - if content.len() <= bytes_budget && total_lines <= lines_budget { - return content.to_string(); - } - let output = truncate_formatted_exec_output(content, total_lines, bytes_budget, lines_budget); - format!("Total output lines: {total_lines}\n\n{output}") -} - -/// Truncate the middle of a UTF-8 string to at most `max_tokens` tokens, -/// preserving the beginning and the end. Returns the possibly truncated string -/// and `Some(original_token_count)` if truncation occurred; otherwise returns -/// the original string and `None`. -pub(crate) fn truncate_with_token_budget(s: &str, max_budget: usize) -> (String, Option) { - if s.is_empty() { - return (String::new(), None); - } - - if s.len() > TOKENIZER_STACK_SAFE_BYTES { - return truncate_with_token_estimate(s, max_budget); - } - - let tokenizer = match Tokenizer::try_default() { - Ok(tok) => tok, - Err(_) => return truncate_with_token_estimate(s, max_budget), - }; - - let encoded = tokenizer.encode(s, false); - let total_tokens = encoded.len() as u64; - +fn truncate_with_tokenizer_path( + tokenizer: Tokenizer, + encoded: Vec, + max_budget: usize, + original: &str, + total_tokens: u64, +) -> (String, Option) { if max_budget == 0 { - return ( - format!("…{total_tokens} tokens truncated…"), - Some(total_tokens), - ); + return (format_truncation_marker(total_tokens), Some(total_tokens)); } if encoded.len() <= max_budget { - return (s.to_string(), None); + return (original.to_string(), None); } let mut guess_removed = total_tokens.saturating_sub(max_budget as u64).max(1); for _ in 0..4 { - let marker = format!("…{guess_removed} tokens truncated…"); + let marker = format_truncation_marker(guess_removed); let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); if marker_len >= max_budget { return (marker, Some(total_tokens)); @@ -126,40 +150,27 @@ pub(crate) fn truncate_with_token_budget(s: &str, max_budget: usize) -> (String, return (marker, Some(total_tokens)); } - let left_keep = keep_budget / 2; - let right_keep = keep_budget - left_keep; + let (left_keep, right_keep) = split_budget(keep_budget); let removed_tokens = encoded.len().saturating_sub(left_keep + right_keep) as u64; - let final_marker = format!("…{removed_tokens} tokens truncated…"); + let final_marker = format_truncation_marker(removed_tokens); let final_marker_len = usize::try_from(tokenizer.count(&final_marker)).unwrap_or(usize::MAX); if final_marker_len == marker_len { - let prefix = if left_keep > 0 { - tokenizer.decode(&encoded[..left_keep]).unwrap_or_default() - } else { - String::new() - }; - let suffix = if right_keep > 0 { - tokenizer - .decode(&encoded[encoded.len() - right_keep..]) - .unwrap_or_default() - } else { - String::new() - }; - let mut out = - String::with_capacity(prefix.len() + final_marker.len() + suffix.len() + 1); - out.push_str(&prefix); - out.push_str(&final_marker); - if !suffix.is_empty() { - out.push('\n'); - out.push_str(&suffix); - } + let (prefix, suffix) = + decode_token_segments(&tokenizer, &encoded, left_keep, right_keep); + let out = assemble_truncated_output( + &prefix, + &suffix, + &final_marker, + NewlineMode::WhenSuffixPresent, + ); return (out, Some(total_tokens)); } guess_removed = removed_tokens.max(1); } - let marker = format!("…{guess_removed} tokens truncated…"); + let marker = format_truncation_marker(guess_removed); let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); if marker_len >= max_budget { return (marker, Some(total_tokens)); @@ -169,60 +180,42 @@ pub(crate) fn truncate_with_token_budget(s: &str, max_budget: usize) -> (String, if keep_budget == 0 { return (marker, Some(total_tokens)); } - let left_keep = keep_budget / 2; - let right_keep = keep_budget - left_keep; - let prefix = if left_keep > 0 { - tokenizer.decode(&encoded[..left_keep]).unwrap_or_default() - } else { - String::new() - }; - let suffix = if right_keep > 0 { - tokenizer - .decode(&encoded[encoded.len() - right_keep..]) - .unwrap_or_default() - } else { - String::new() - }; - let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + 1); - out.push_str(&prefix); - out.push_str(&marker); - if !suffix.is_empty() { - out.push('\n'); - out.push_str(&suffix); - } + let (left_keep, right_keep) = split_budget(keep_budget); + let (prefix, suffix) = decode_token_segments(&tokenizer, &encoded, left_keep, right_keep); + let out = assemble_truncated_output(&prefix, &suffix, &marker, NewlineMode::WhenSuffixPresent); (out, Some(total_tokens)) } /// estimate the number of tokens in a string based on the length of the string -fn truncate_with_token_estimate(s: &str, max_tokens: usize) -> (String, Option) { +fn truncate_with_byte_estimate( + s: &str, + max_tokens: usize, + model: Option<&str>, +) -> (String, Option) { let total_tokens = approx_token_count(s); if max_tokens == 0 { - return ( - format!("…{total_tokens} tokens truncated…"), - Some(total_tokens), - ); + return (format_truncation_marker(total_tokens), Some(total_tokens)); } if total_tokens as usize <= max_tokens { return (s.to_string(), None); } - let max_bytes = max_tokens.saturating_mul(4); + let max_bytes = approx_bytes_for_tokens(max_tokens); if s.len() <= max_bytes { return (s.to_string(), None); } let mut guess_tokens = total_tokens.saturating_sub(max_tokens as u64).max(1); for _ in 0..4 { - let marker = format!("…{guess_tokens} tokens truncated…"); + let marker = format_truncation_marker(guess_tokens); let marker_len = marker.len(); let keep_budget = max_bytes.saturating_sub(marker_len); if keep_budget == 0 { return (marker, Some(total_tokens)); } - let left_budget = keep_budget / 2; - let right_budget = keep_budget - left_budget; + let (left_budget, right_budget) = split_budget(keep_budget); let prefix_end = pick_prefix_end(s, left_budget); let mut suffix_start = pick_suffix_start(s, right_budget); if suffix_start < prefix_end { @@ -230,80 +223,41 @@ fn truncate_with_token_estimate(s: &str, max_tokens: usize) -> (String, Option u64 { - (text.len() as u64).saturating_add(3) / 4 -} - -fn truncate_on_boundary(input: &str, max_len: usize) -> &str { - if input.len() <= max_len { - return input; - } - let mut end = max_len; - while end > 0 && !input.is_char_boundary(end) { - end -= 1; - } - &input[..end] -} - -fn pick_prefix_end(s: &str, left_budget: usize) -> usize { - if let Some(head) = s.get(..left_budget) - && let Some(i) = head.rfind('\n') - { - return i + 1; - } - truncate_on_boundary(s, left_budget).len() -} - -fn pick_suffix_start(s: &str, right_budget: usize) -> usize { - let start_tail = s.len().saturating_sub(right_budget); - if let Some(tail) = s.get(start_tail..) - && let Some(i) = tail.find('\n') - { - return start_tail + i + 1; - } - - let mut idx = start_tail.min(s.len()); - while idx < s.len() && !s.is_char_boundary(idx) { - idx += 1; - } - idx + let out = assemble_truncated_output( + &s[..prefix_end], + &s[suffix_start..], + &marker, + NewlineMode::Always, + ); + ensure_candidate_within_token_budget(out, max_tokens, total_tokens, model) } fn truncate_formatted_exec_output( @@ -375,6 +329,144 @@ fn truncate_formatted_exec_output( result } +#[derive(Clone, Copy)] +enum NewlineMode { + Always, + WhenSuffixPresent, +} + +fn format_truncation_marker(removed_tokens: u64) -> String { + format!("…{removed_tokens} tokens truncated…") +} + +fn split_budget(budget: usize) -> (usize, usize) { + let left = budget / 2; + (left, budget - left) +} + +fn decode_token_segments( + tokenizer: &Tokenizer, + encoded: &[i32], + left_keep: usize, + right_keep: usize, +) -> (String, String) { + let prefix = if left_keep > 0 { + tokenizer.decode(&encoded[..left_keep]).unwrap_or_default() + } else { + String::new() + }; + let suffix = if right_keep > 0 { + tokenizer + .decode(&encoded[encoded.len() - right_keep..]) + .unwrap_or_default() + } else { + String::new() + }; + (prefix, suffix) +} + +fn assemble_truncated_output( + prefix: &str, + suffix: &str, + marker: &str, + newline_mode: NewlineMode, +) -> String { + let newline_needed = match newline_mode { + NewlineMode::Always => true, + NewlineMode::WhenSuffixPresent => !suffix.is_empty(), + }; + let newline_len = if newline_needed { 1 } else { 0 }; + let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + newline_len); + out.push_str(prefix); + out.push_str(marker); + if newline_needed { + out.push('\n'); + } + if !suffix.is_empty() { + out.push_str(suffix); + } + out +} + +fn ensure_candidate_within_token_budget( + candidate: String, + max_budget: usize, + total_tokens: u64, + model: Option<&str>, +) -> (String, Option) { + if max_budget == 0 { + return (candidate, Some(total_tokens)); + } + + if let Some(tokenizer) = select_tokenizer(model) { + let encoded = tokenizer.encode(candidate.as_str(), false); + if encoded.len() > max_budget { + return truncate_with_tokenizer_path( + tokenizer, + encoded, + max_budget, + candidate.as_str(), + total_tokens, + ); + } + } + + (candidate, Some(total_tokens)) +} + +fn approx_token_count(text: &str) -> u64 { + (text.len() as u64).saturating_add(3) / 4 +} + +fn approx_bytes_for_tokens(tokens: usize) -> usize { + tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) +} + +fn select_tokenizer(model: Option<&str>) -> Option { + if let Some(name) = model { + Tokenizer::for_model(name) + .or_else(|_| Tokenizer::try_default()) + .ok() + } else { + Tokenizer::try_default().ok() + } +} + +fn truncate_on_boundary(input: &str, max_len: usize) -> &str { + if input.len() <= max_len { + return input; + } + let mut end = max_len; + while end > 0 && !input.is_char_boundary(end) { + end -= 1; + } + &input[..end] +} + +fn pick_prefix_end(s: &str, left_budget: usize) -> usize { + if let Some(head) = s.get(..left_budget) + && let Some(i) = head.rfind('\n') + { + return i + 1; + } + truncate_on_boundary(s, left_budget).len() +} + +fn pick_suffix_start(s: &str, right_budget: usize) -> usize { + let start_tail = s.len().saturating_sub(right_budget); + if let Some(tail) = s.get(start_tail..) + && let Some(i) = tail.find('\n') + { + return start_tail + i + 1; + } + + let mut idx = start_tail.min(s.len()); + while idx < s.len() && !s.is_char_boundary(idx) { + idx += 1; + } + idx +} + fn error_on_double_truncation(content: &str) { if content.contains("Total output lines:") && content.contains("omitted") { tracing::error!( @@ -451,7 +543,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "short output"; let limit = usize::try_from(tok.count(s)).unwrap_or(0) + 10; - let (out, original) = truncate_with_token_budget(s, limit); + let (out, original) = truncate_with_token_budget(s, limit, None); assert_eq!(out, s); assert_eq!(original, None); } @@ -461,7 +553,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "abcdef"; let total = tok.count(s) as u64; - let (out, original) = truncate_with_token_budget(s, 0); + let (out, original) = truncate_with_token_budget(s, 0, None); assert!(out.contains("tokens truncated")); assert_eq!(original, Some(total)); } @@ -471,7 +563,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa"; let max_tokens = 12; - let (out, original) = truncate_with_token_budget(s, max_tokens); + let (out, original) = truncate_with_token_budget(s, max_tokens, None); assert!(out.contains("tokens truncated")); assert_eq!(original, Some(tok.count(s) as u64)); let result_tokens = tok.count(&out) as usize; @@ -483,7 +575,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n"; let max_tokens = 8; - let (out, tokens) = truncate_with_token_budget(s, max_tokens); + let (out, tokens) = truncate_with_token_budget(s, max_tokens, None); assert!(out.contains("tokens truncated")); assert!(!out.contains('\u{fffd}')); @@ -659,6 +751,7 @@ mod tests { let output = truncate_function_output_items_to_token_limit( &items, DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + None, ); // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index ef006677f1..8200996947 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -166,7 +166,8 @@ impl UnifiedExecSession { }; if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) { - let (snippet, _) = truncate_with_token_budget(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_TOKENS); + let (snippet, _) = + truncate_with_token_budget(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_TOKENS, None); let message = if snippet.is_empty() { format!("exit code {exit_code}") } else { diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index c7081b4014..859473fce5 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -70,7 +70,9 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let (output, original_token_count) = truncate_with_token_budget(&text, max_tokens); + let model = context.turn.client.get_model(); + let (output, original_token_count) = + truncate_with_token_budget(&text, max_tokens, Some(model.as_str())); let original_token_count = original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); @@ -177,7 +179,9 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let (output, original_token_count) = truncate_with_token_budget(&text, max_tokens); + let model = turn_ref.client.get_model(); + let (output, original_token_count) = + truncate_with_token_budget(&text, max_tokens, Some(model.as_str())); let original_token_count = original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); From 16369ee35e8f433a2a29517d20f8223200c5d7ac Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Sun, 16 Nov 2025 18:50:25 -0800 Subject: [PATCH 08/68] progress --- codex-rs/core/src/context_manager/history.rs | 1 - .../core/src/context_manager/history_tests.rs | 5 +---- codex-rs/core/src/truncate.rs | 22 ++++++++++++------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index cce5ec5e82..e3ce603577 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -168,7 +168,6 @@ impl ContextManager { truncate_function_output_items_to_token_limit( items, self.function_output_max_tokens, - self.model.as_deref(), ) }); ResponseItem::FunctionCallOutput { diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 95a4ac1f1e..f0b97f01cd 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -531,10 +531,7 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() { if kind == "input_text" { if let Some(text) = obj.get("text").and_then(|t| t.as_str()) { let preview: String = text.chars().take(40).collect(); - println!( - "entry {idx}: {kind} len={} preview={preview:?}", - text.len() - ); + println!("entry {idx}: {kind} len={} preview={preview:?}", text.len()); } else { println!("entry {idx}: {kind} (missing text)"); } diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 1a4c175558..88f83a61d6 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -77,34 +77,41 @@ pub(crate) fn truncate_with_token_budget( pub(crate) fn truncate_function_output_items_to_token_limit( items: &[FunctionCallOutputContentItem], max_tokens: usize, - model: Option<&str>, ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); let mut remaining_tokens = max_tokens; + let mut remaining_bytes = approx_bytes_for_tokens(max_tokens); let mut omitted_text_items = 0usize; - let tokenizer = select_tokenizer(model); + let tokenizer = Tokenizer::try_default().ok(); for it in items { match it { FunctionCallOutputContentItem::InputText { text } => { - if remaining_tokens == 0 { + if remaining_tokens == 0 || remaining_bytes == 0 { omitted_text_items += 1; continue; } let token_len = estimate_safe_token_count(text, tokenizer.as_ref()); - if token_len <= remaining_tokens { + if token_len <= remaining_tokens && text.len() <= remaining_bytes { out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); + remaining_bytes = remaining_bytes.saturating_sub(text.len()); } else { - let (snippet, _) = truncate_with_token_budget(text, remaining_tokens, model); - if !snippet.is_empty() { + let (mut snippet, _) = truncate_with_token_budget(text, remaining_tokens, None); + if snippet.len() > remaining_bytes { + snippet = + take_bytes_at_char_boundary(&snippet, remaining_bytes).to_string(); + } + if snippet.is_empty() { + omitted_text_items += 1; + } else { + remaining_bytes = remaining_bytes.saturating_sub(snippet.len()); out.push(FunctionCallOutputContentItem::InputText { text: snippet }); } remaining_tokens = 0; } } - // todo(aibrahim): handle input images; resize FunctionCallOutputContentItem::InputImage { image_url } => { out.push(FunctionCallOutputContentItem::InputImage { image_url: image_url.clone(), @@ -751,7 +758,6 @@ mod tests { let output = truncate_function_output_items_to_token_limit( &items, DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, - None, ); // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. From 5805ab0c57122810e7a6cded6730facdccd508bb Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 11:08:56 -0800 Subject: [PATCH 09/68] tests --- codex-rs/core/tests/suite/unified_exec.rs | 30 +++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index 6e5710d0ca..b3498e62b0 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -26,9 +26,11 @@ use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use core_test_support::wait_for_event_match; +use core_test_support::wait_for_event_with_timeout; use regex_lite::Regex; use serde_json::Value; use serde_json::json; +use tokio::time::Duration; fn extract_output_text(item: &Value) -> Option<&str> { item.get("output").and_then(|value| match value { @@ -1371,8 +1373,13 @@ PY summary: ReasoningSummary::Auto, }) .await?; - - wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; + // This is a worst case scenario for the truncate logic. + wait_for_event_with_timeout( + &codex, + |event| matches!(event, EventMsg::TaskComplete(_)), + Duration::from_secs(10), + ) + .await; let requests = server.received_requests().await.expect("recorded requests"); assert!(!requests.is_empty(), "expected at least one POST request"); @@ -1529,8 +1536,8 @@ async fn unified_exec_formats_large_output_summary() -> Result<()> { } = builder.build(&server).await?; let script = r#"python3 - <<'PY' -for i in range(300): - print(f"line-{i}") +for i in range(3000): + print("token " * 50) PY "#; @@ -1583,15 +1590,12 @@ PY let outputs = collect_tool_outputs(&bodies)?; let large_output = outputs.get(call_id).expect("missing large output summary"); - assert_regex_match( - concat!( - r"(?s)", - r"line-0.*?", - r"\[\.{3} omitted \d+ of \d+ lines \.{3}\].*?", - r"line-299", - ), - &large_output.output, - ); + let output_text = &large_output.output; + assert_regex_match(r"(?s)tokens truncated", output_text); + + let original_tokens = large_output + .original_token_count + .expect("missing original_token_count for large output summary"); Ok(()) } From 7812ef55fb5137d267dc38f6bffff4cdf4c08495 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 11:44:22 -0800 Subject: [PATCH 10/68] tests --- .../core/src/context_manager/history_tests.rs | 98 ------------------- codex-rs/core/src/truncate.rs | 14 +-- 2 files changed, 4 insertions(+), 108 deletions(-) diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index f0b97f01cd..23d56ababb 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -476,104 +476,6 @@ fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { ); } -#[test] -fn truncates_across_multiple_under_limit_texts_and_reports_omitted() { - // Arrange: several text items, none exceeding per-item limit, but total exceeds budget. - let budget = truncate::MODEL_FORMAT_MAX_BYTES; - let t1_len = (budget / 2).saturating_sub(10); - let t2_len = (budget / 2).saturating_sub(10); - let remaining_after_t1_t2 = budget.saturating_sub(t1_len + t2_len); - let t3_len = 50; // gets truncated to remaining_after_t1_t2 - let t4_len = 5; // omitted - let t5_len = 7; // omitted - - let t1 = "a".repeat(t1_len); - let t2 = "b".repeat(t2_len); - let t3 = "c".repeat(t3_len); - let t4 = "d".repeat(t4_len); - let t5 = "e".repeat(t5_len); - - let item = ResponseItem::FunctionCallOutput { - call_id: "call-omit".to_string(), - output: FunctionCallOutputPayload { - content: "irrelevant".to_string(), - content_items: Some(vec![ - FunctionCallOutputContentItem::InputText { text: t1 }, - FunctionCallOutputContentItem::InputText { text: t2 }, - FunctionCallOutputContentItem::InputImage { - image_url: "img:mid".to_string(), - }, - FunctionCallOutputContentItem::InputText { text: t3 }, - FunctionCallOutputContentItem::InputText { text: t4 }, - FunctionCallOutputContentItem::InputText { text: t5 }, - ]), - success: Some(true), - }, - }; - - let mut history = ContextManager::new(); - history.record_items([&item]); - assert_eq!(history.items.len(), 1); - let json = serde_json::to_value(&history.items[0]).expect("serialize to json"); - - let output = json - .get("output") - .expect("output field") - .as_array() - .expect("array output"); - - for (idx, entry) in output.iter().enumerate() { - if let Some(obj) = entry.as_object() { - let kind = obj - .get("type") - .and_then(|v| v.as_str()) - .unwrap_or("unknown"); - if kind == "input_text" { - if let Some(text) = obj.get("text").and_then(|t| t.as_str()) { - let preview: String = text.chars().take(40).collect(); - println!("entry {idx}: {kind} len={} preview={preview:?}", text.len()); - } else { - println!("entry {idx}: {kind} (missing text)"); - } - } else if kind == "input_image" { - println!("entry {idx}: {kind}"); - } else { - println!("entry {idx}: {kind}"); - } - } else { - println!("entry {idx}: non-object {entry:?}"); - } - } - - // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. - assert_eq!(output.len(), 5); - - let first = output[0].as_object().expect("first obj"); - assert_eq!(first.get("type").unwrap(), "input_text"); - let first_text = first.get("text").unwrap().as_str().unwrap(); - assert_eq!(first_text.len(), t1_len); - - let second = output[1].as_object().expect("second obj"); - assert_eq!(second.get("type").unwrap(), "input_text"); - let second_text = second.get("text").unwrap().as_str().unwrap(); - assert_eq!(second_text.len(), t2_len); - - assert_eq!( - output[2], - serde_json::json!({"type": "input_image", "image_url": "img:mid"}) - ); - - let fourth = output[3].as_object().expect("fourth obj"); - assert_eq!(fourth.get("type").unwrap(), "input_text"); - let fourth_text = fourth.get("text").unwrap().as_str().unwrap(); - assert_eq!(fourth_text.len(), remaining_after_t1_t2); - - let summary = output[4].as_object().expect("summary obj"); - assert_eq!(summary.get("type").unwrap(), "input_text"); - let summary_text = summary.get("text").unwrap().as_str().unwrap(); - assert!(summary_text.contains("omitted 2 text items")); -} - //TODO(aibrahim): run CI in release mode. #[cfg(not(debug_assertions))] #[test] diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 88f83a61d6..158c44706a 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -80,33 +80,26 @@ pub(crate) fn truncate_function_output_items_to_token_limit( ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); let mut remaining_tokens = max_tokens; - let mut remaining_bytes = approx_bytes_for_tokens(max_tokens); let mut omitted_text_items = 0usize; let tokenizer = Tokenizer::try_default().ok(); for it in items { match it { FunctionCallOutputContentItem::InputText { text } => { - if remaining_tokens == 0 || remaining_bytes == 0 { + if remaining_tokens == 0 { omitted_text_items += 1; continue; } let token_len = estimate_safe_token_count(text, tokenizer.as_ref()); - if token_len <= remaining_tokens && text.len() <= remaining_bytes { + if token_len <= remaining_tokens { out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); - remaining_bytes = remaining_bytes.saturating_sub(text.len()); } else { - let (mut snippet, _) = truncate_with_token_budget(text, remaining_tokens, None); - if snippet.len() > remaining_bytes { - snippet = - take_bytes_at_char_boundary(&snippet, remaining_bytes).to_string(); - } + let (snippet, _) = truncate_with_token_budget(text, remaining_tokens, None); if snippet.is_empty() { omitted_text_items += 1; } else { - remaining_bytes = remaining_bytes.saturating_sub(snippet.len()); out.push(FunctionCallOutputContentItem::InputText { text: snippet }); } remaining_tokens = 0; @@ -761,6 +754,7 @@ mod tests { ); // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. + eprintln!("output: {:?}", output); assert_eq!(output.len(), 5); let first_text = match &output[0] { From c9bc844968fb80a5c2ed4d5c15f24b6d8afe816a Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 12:45:40 -0800 Subject: [PATCH 11/68] tests --- codex-rs/core/tests/suite/truncation.rs | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index 37ee4f3990..0dde8010a6 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -27,7 +27,6 @@ use core_test_support::skip_if_no_network; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use escargot::CargoBuild; -use regex_lite::Regex; use serde_json::Value; use serde_json::json; use std::collections::HashMap; @@ -48,7 +47,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> { let test = builder.build(&server).await?; // Construct a very long, non-existent path to force a RespondToModel error with a large message - let long_path = "a".repeat(20_000); + let long_path = "axyzldg".repeat(20_000); let call_id = "grep-huge-error"; let args = json!({ "pattern": "alpha", @@ -80,12 +79,16 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> { tracing::debug!(output = %output, "truncated function error output"); - // Expect plaintext with byte-truncation marker and no omitted-lines marker + // Expect plaintext with token-based truncation marker and no omitted-lines marker assert!( serde_json::from_str::(&output).is_err(), "expected error output to be plain text", ); - let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]\s*$"#; + assert!( + !output.contains("Total output lines:"), + "error output should not include line-based truncation header: {output}", + ); + let truncated_pattern = r"(?s)^unable to access `.*tokens truncated.*$"; assert_regex_match(truncated_pattern, &output); assert!( !output.contains("omitted"), @@ -334,22 +337,19 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> .function_call_output_text(call_id) .context("function_call_output present for rmcp call")?; - // Expect plain text with byte-based truncation marker. + // Expect plain text with token-based truncation marker; the original JSON body + // is truncated in the middle of the echo string. assert!( serde_json::from_str::(&output).is_err(), "expected truncated MCP output to be plain text" ); assert!( - output.starts_with("Total output lines: 1\n\n{"), - "expected total line header and JSON head, got: {output}" + !output.contains("Total output lines:"), + "MCP output should not include line-based truncation header: {output}" ); - let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]") - .expect("compile regex"); - assert!( - byte_marker.is_match(&output), - "expected byte truncation marker, got: {output}" - ); + let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#; + assert_regex_match(truncated_pattern, &output); Ok(()) } From f1522bafe85beef58bf751f0429ad44b40cfaa0e Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 12:47:04 -0800 Subject: [PATCH 12/68] tests --- codex-rs/core/src/truncate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 158c44706a..95e6bc2046 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -336,7 +336,7 @@ enum NewlineMode { } fn format_truncation_marker(removed_tokens: u64) -> String { - format!("…{removed_tokens} tokens truncated…") + format!("[…{removed_tokens} tokens truncated…]") } fn split_budget(budget: usize) -> (usize, usize) { From da168135fa418fc304a0c8df3f4179abe0204edb Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 12:58:59 -0800 Subject: [PATCH 13/68] lint --- codex-rs/core/src/context_manager/history.rs | 2 +- codex-rs/core/src/context_manager/history_tests.rs | 1 - codex-rs/core/src/truncate.rs | 6 ++---- codex-rs/core/tests/suite/unified_exec.rs | 1 + 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index e3ce603577..e2d05feeb7 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -35,7 +35,7 @@ impl ContextManager { } pub(crate) fn set_model(&mut self, model: Option<&str>) { - self.model = model.map(|m| m.to_string()); + self.model = model.map(ToString::to_string); } pub(crate) fn token_info(&self) -> Option { diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 23d56ababb..4be1010a39 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -3,7 +3,6 @@ use crate::context_manager::MODEL_FORMAT_MAX_LINES; use crate::truncate; use codex_git::GhostCommit; use codex_protocol::models::ContentItem; -use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::LocalShellAction; use codex_protocol::models::LocalShellExecAction; diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 95e6bc2046..3ae07ad77d 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -520,7 +520,6 @@ mod tests { } fn build_chunked_text( - tok: &Tokenizer, chunk: &str, chunk_tokens: usize, target_tokens: usize, @@ -717,8 +716,8 @@ mod tests { let target_each = DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT .saturating_div(2) .saturating_sub(chunk_tokens); - let (t1, t1_tokens) = build_chunked_text(&tok, chunk, chunk_tokens, target_each); - let (t2, t2_tokens) = build_chunked_text(&tok, chunk, chunk_tokens, target_each); + let (t1, t1_tokens) = build_chunked_text(chunk, chunk_tokens, target_each); + let (t2, t2_tokens) = build_chunked_text(chunk, chunk_tokens, target_each); let remaining_after_t1_t2 = DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT.saturating_sub(t1_tokens + t2_tokens); assert!( @@ -754,7 +753,6 @@ mod tests { ); // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. - eprintln!("output: {:?}", output); assert_eq!(output.len(), 5); let first_text = match &output[0] { diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index b3498e62b0..b15abc89cf 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -1596,6 +1596,7 @@ PY let original_tokens = large_output .original_token_count .expect("missing original_token_count for large output summary"); + assert!(original_tokens > 0); Ok(()) } From d1d06442bc7494f3f984de4ff5f85b6139e9f1ea Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 13:41:09 -0800 Subject: [PATCH 14/68] lint --- codex-rs/core/src/truncate.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 3ae07ad77d..5b33611390 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -13,7 +13,6 @@ pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines pub const DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT: usize = MODEL_FORMAT_MAX_BYTES / 4; const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 1024; // 1 MiB const APPROX_BYTES_PER_TOKEN: usize = 4; -const TOKEN_ROUTER_MIN_ESTIMATE_BYTES: usize = 4 * 1024; // 4 KiB guard for byte-path routing /// Format a block of exec/tool output for model consumption, truncating by /// lines and bytes while preserving head and tail segments. @@ -54,9 +53,8 @@ pub(crate) fn truncate_with_token_budget( } let exceeds_stack_limit = byte_len > TOKENIZER_STACK_SAFE_BYTES; - let exceeds_large_threshold = max_budget > 0 - && byte_len >= TOKEN_ROUTER_MIN_ESTIMATE_BYTES - && byte_len > approx_bytes_for_tokens(max_budget.saturating_mul(2)); + let exceeds_large_threshold = + max_budget > 0 && byte_len > approx_bytes_for_tokens(max_budget.saturating_mul(2)); if exceeds_stack_limit || exceeds_large_threshold { return truncate_with_byte_estimate(s, max_budget, model); } From 7f9637de704ba2da3110044c4afde340d0d94942 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 14:36:29 -0800 Subject: [PATCH 15/68] remove line --- .../core/src/context_manager/history_tests.rs | 39 +++++----------- codex-rs/core/src/context_manager/mod.rs | 1 - codex-rs/core/src/model_family.rs | 10 +++++ codex-rs/core/src/tools/mod.rs | 3 +- codex-rs/core/src/truncate.rs | 44 ++++++------------- 5 files changed, 36 insertions(+), 61 deletions(-) diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 4be1010a39..486ec4ed91 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -1,5 +1,4 @@ use super::*; -use crate::context_manager::MODEL_FORMAT_MAX_LINES; use crate::truncate; use codex_git::GhostCommit; use codex_protocol::models::ContentItem; @@ -349,8 +348,8 @@ fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usiz } fn truncated_message_pattern(line: &str, total_lines: usize) -> String { - let head_lines = MODEL_FORMAT_MAX_LINES / 2; - let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines; + let head_lines = truncate::MODEL_FORMAT_MAX_LINES / 2; + let tail_lines = truncate::MODEL_FORMAT_MAX_LINES - head_lines; let head_take = head_lines.min(total_lines); let tail_take = tail_lines.min(total_lines.saturating_sub(head_take)); let omitted = total_lines.saturating_sub(head_take + tail_take); @@ -371,11 +370,8 @@ fn format_exec_output_truncates_large_error() { let line = "very long execution error line that should trigger truncation\n"; let large_error = line.repeat(2_500); // way beyond both byte and line limits - let truncated = truncate::truncate_with_line_bytes_budget( - &large_error, - truncate::MODEL_FORMAT_MAX_BYTES, - truncate::MODEL_FORMAT_MAX_LINES, - ); + let truncated = + truncate::truncate_with_line_bytes_budget(&large_error, truncate::MODEL_FORMAT_MAX_BYTES); let total_lines = large_error.lines().count(); assert_truncated_message_matches(&truncated, line, total_lines); @@ -385,11 +381,8 @@ fn format_exec_output_truncates_large_error() { #[test] fn format_exec_output_marks_byte_truncation_without_omitted_lines() { let long_line = "a".repeat(truncate::MODEL_FORMAT_MAX_BYTES + 50); - let truncated = truncate::truncate_with_line_bytes_budget( - &long_line, - truncate::MODEL_FORMAT_MAX_BYTES, - truncate::MODEL_FORMAT_MAX_LINES, - ); + let truncated = + truncate::truncate_with_line_bytes_budget(&long_line, truncate::MODEL_FORMAT_MAX_BYTES); assert_ne!(truncated, long_line); let marker_line = format!( @@ -411,11 +404,7 @@ fn format_exec_output_returns_original_when_within_limits() { let content = "example output\n".repeat(10); assert_eq!( - truncate::truncate_with_line_bytes_budget( - &content, - truncate::MODEL_FORMAT_MAX_BYTES, - truncate::MODEL_FORMAT_MAX_LINES - ), + truncate::truncate_with_line_bytes_budget(&content, truncate::MODEL_FORMAT_MAX_BYTES,), content ); } @@ -427,11 +416,8 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { .map(|idx| format!("line-{idx}\n")) .collect(); - let truncated = truncate::truncate_with_line_bytes_budget( - &content, - truncate::MODEL_FORMAT_MAX_BYTES, - truncate::MODEL_FORMAT_MAX_LINES, - ); + let truncated = + truncate::truncate_with_line_bytes_budget(&content, truncate::MODEL_FORMAT_MAX_BYTES); let omitted = total_lines - truncate::MODEL_FORMAT_MAX_LINES; let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); @@ -459,11 +445,8 @@ fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { .map(|idx| format!("line-{idx}-{long_line}\n")) .collect(); - let truncated = truncate::truncate_with_line_bytes_budget( - &content, - truncate::MODEL_FORMAT_MAX_BYTES, - truncate::MODEL_FORMAT_MAX_LINES, - ); + let truncated = + truncate::truncate_with_line_bytes_budget(&content, truncate::MODEL_FORMAT_MAX_BYTES); assert!( truncated.contains("[... omitted 42 of 298 lines ...]"), diff --git a/codex-rs/core/src/context_manager/mod.rs b/codex-rs/core/src/context_manager/mod.rs index b19bc4e7ef..2a4e4a7417 100644 --- a/codex-rs/core/src/context_manager/mod.rs +++ b/codex-rs/core/src/context_manager/mod.rs @@ -2,6 +2,5 @@ mod history; mod normalize; pub(crate) use crate::truncate::MODEL_FORMAT_MAX_BYTES; -pub(crate) use crate::truncate::MODEL_FORMAT_MAX_LINES; pub(crate) use crate::truncate::truncate_with_line_bytes_budget; pub(crate) use history::ContextManager; diff --git a/codex-rs/core/src/model_family.rs b/codex-rs/core/src/model_family.rs index b46fae4db7..db348bf1bf 100644 --- a/codex-rs/core/src/model_family.rs +++ b/codex-rs/core/src/model_family.rs @@ -4,6 +4,7 @@ use codex_protocol::config_types::Verbosity; use crate::config::types::ReasoningSummaryFormat; use crate::tools::handlers::apply_patch::ApplyPatchToolType; use crate::tools::spec::ConfigShellToolType; +use crate::truncate::TruncationMode; /// The `instructions` field in the payload sent to a model should always start /// with this content. @@ -66,6 +67,8 @@ pub struct ModelFamily { /// Preferred shell tool type for this model family when features do not override it. pub shell_type: ConfigShellToolType, + + pub truncation_mode: TruncationMode, } macro_rules! model_family { @@ -89,6 +92,7 @@ macro_rules! model_family { shell_type: ConfigShellToolType::Default, default_verbosity: None, default_reasoning_effort: None, + truncation_mode: TruncationMode::Bytes(10_000), }; // apply overrides @@ -146,6 +150,7 @@ pub fn find_family_for_model(slug: &str) -> Option { ], supports_parallel_tool_calls: true, support_verbosity: true, + truncation_mode: TruncationMode::Tokens(10_000), ) // Internal models. @@ -163,6 +168,7 @@ pub fn find_family_for_model(slug: &str) -> Option { ], supports_parallel_tool_calls: true, support_verbosity: true, + truncation_mode: TruncationMode::Tokens(10_000), ) // Production models. @@ -177,6 +183,7 @@ pub fn find_family_for_model(slug: &str) -> Option { base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), support_verbosity: false, + truncation_mode: TruncationMode::Tokens(10_000), ) } else if slug.starts_with("gpt-5.1") { model_family!( @@ -187,6 +194,7 @@ pub fn find_family_for_model(slug: &str) -> Option { default_verbosity: Some(Verbosity::Low), base_instructions: GPT_5_1_INSTRUCTIONS.to_string(), default_reasoning_effort: Some(ReasoningEffort::Medium), + truncation_mode: TruncationMode::Bytes(10_000), ) } else if slug.starts_with("gpt-5") { model_family!( @@ -194,6 +202,7 @@ pub fn find_family_for_model(slug: &str) -> Option { supports_reasoning_summaries: true, needs_special_apply_patch_instructions: true, support_verbosity: true, + truncation_mode: TruncationMode::Bytes(10_000), ) } else { None @@ -216,5 +225,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily { shell_type: ConfigShellToolType::Default, default_verbosity: None, default_reasoning_effort: None, + truncation_mode: TruncationMode::Bytes(10_000), } } diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index 1588e37073..708194907c 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -10,7 +10,6 @@ pub mod sandboxing; pub mod spec; use crate::context_manager::MODEL_FORMAT_MAX_BYTES; -use crate::context_manager::MODEL_FORMAT_MAX_LINES; use crate::context_manager::truncate_with_line_bytes_budget; use crate::exec::ExecToolCallOutput; pub use router::ToolRouter; @@ -77,5 +76,5 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { }; // Truncate for model consumption before serialization. - truncate_with_line_bytes_budget(&body, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES) + truncate_with_line_bytes_budget(&body, MODEL_FORMAT_MAX_BYTES) } diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 5b33611390..0d6da3f565 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -14,13 +14,17 @@ pub const DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT: usize = MODEL_FORMAT_MAX_BYTES / const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 1024; // 1 MiB const APPROX_BYTES_PER_TOKEN: usize = 4; +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum TruncationMode { + Bytes(usize), + Tokens(usize), +} + /// Format a block of exec/tool output for model consumption, truncating by /// lines and bytes while preserving head and tail segments. -pub(crate) fn truncate_with_line_bytes_budget( - content: &str, - bytes_budget: usize, - lines_budget: usize, -) -> String { +pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize) -> String { + // TODO(aibrahim): to be removed + let lines_budget = MODEL_FORMAT_MAX_LINES; // Head+tail truncation for the model: show the beginning and end with an elision. // Clients still receive full streams; only this formatted summary is capped. let total_lines = content.lines().count(); @@ -586,11 +590,7 @@ mod tests { let line = "very long execution error line that should trigger truncation\n"; let large_error = line.repeat(2_500); // way beyond both byte and line limits - let truncated = truncate_with_line_bytes_budget( - &large_error, - MODEL_FORMAT_MAX_BYTES, - MODEL_FORMAT_MAX_LINES, - ); + let truncated = truncate_with_line_bytes_budget(&large_error, MODEL_FORMAT_MAX_BYTES); let total_lines = large_error.lines().count(); let pattern = truncated_message_pattern(line, total_lines); @@ -615,11 +615,7 @@ mod tests { #[test] fn format_exec_output_marks_byte_truncation_without_omitted_lines() { let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50); - let truncated = truncate_with_line_bytes_budget( - &long_line, - MODEL_FORMAT_MAX_BYTES, - MODEL_FORMAT_MAX_LINES, - ); + let truncated = truncate_with_line_bytes_budget(&long_line, MODEL_FORMAT_MAX_BYTES); assert_ne!(truncated, long_line); let marker_line = @@ -639,11 +635,7 @@ mod tests { let content = "example output\n".repeat(10); assert_eq!( - truncate_with_line_bytes_budget( - &content, - MODEL_FORMAT_MAX_BYTES, - MODEL_FORMAT_MAX_LINES - ), + truncate_with_line_bytes_budget(&content, MODEL_FORMAT_MAX_BYTES), content ); } @@ -655,11 +647,7 @@ mod tests { .map(|idx| format!("line-{idx}\n")) .collect(); - let truncated = truncate_with_line_bytes_budget( - &content, - MODEL_FORMAT_MAX_BYTES, - MODEL_FORMAT_MAX_LINES, - ); + let truncated = truncate_with_line_bytes_budget(&content, MODEL_FORMAT_MAX_BYTES); let omitted = total_lines - MODEL_FORMAT_MAX_LINES; let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); @@ -688,11 +676,7 @@ mod tests { .map(|idx| format!("line-{idx}-{long_line}\n")) .collect(); - let truncated = truncate_with_line_bytes_budget( - &content, - MODEL_FORMAT_MAX_BYTES, - MODEL_FORMAT_MAX_LINES, - ); + let truncated = truncate_with_line_bytes_budget(&content, MODEL_FORMAT_MAX_BYTES); assert!( truncated.contains("[... omitted 42 of 298 lines ...]"), From 283511836f8b3b1c8e24473a7257d13144af8909 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 14:53:36 -0800 Subject: [PATCH 16/68] remove line --- codex-rs/core/src/context_manager/history.rs | 22 ++-- codex-rs/core/src/state/session.rs | 4 +- codex-rs/core/src/truncate.rs | 102 +++++++++++++------ 3 files changed, 84 insertions(+), 44 deletions(-) diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index e2d05feeb7..66f3066580 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -56,7 +56,7 @@ impl ContextManager { } /// `items` is ordered from oldest to newest. - pub(crate) fn record_items(&mut self, items: I) + pub(crate) async fn record_items(&mut self, items: I) where I: IntoIterator, I::Item: std::ops::Deref, @@ -68,7 +68,7 @@ impl ContextManager { continue; } - let processed = self.process_item(item_ref); + let processed = self.process_item(item_ref).await; self.items.push(processed); } } @@ -156,7 +156,7 @@ impl ContextManager { items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. })); } - fn process_item(&self, item: &ResponseItem) -> ResponseItem { + async fn process_item(&self, item: &ResponseItem) -> ResponseItem { match item { ResponseItem::FunctionCallOutput { call_id, output } => { let (truncated, _) = truncate_with_token_budget( @@ -164,12 +164,16 @@ impl ContextManager { self.function_output_max_tokens, self.model.as_deref(), ); - let truncated_items = output.content_items.as_ref().map(|items| { - truncate_function_output_items_to_token_limit( - items, - self.function_output_max_tokens, - ) - }); + let truncated_items = match output.content_items.as_ref() { + Some(items) => Some( + truncate_function_output_items_to_token_limit( + items, + self.function_output_max_tokens, + ) + .await, + ), + None => None, + }; ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 574db2f975..2e2e5289fe 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -30,12 +30,12 @@ impl SessionState { } // History helpers - pub(crate) fn record_items(&mut self, items: I) + pub(crate) async fn record_items(&mut self, items: I) where I: IntoIterator, I::Item: std::ops::Deref, { - self.history.record_items(items) + self.history.record_items(items).await; } pub(crate) fn clone_history(&self) -> ContextManager { diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 0d6da3f565..a17db0e095 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -7,6 +7,10 @@ use codex_utils_string::take_bytes_at_char_boundary; use codex_utils_string::take_last_bytes_at_char_boundary; use codex_utils_tokenizer::Tokenizer; +use crate::model_family::ModelFamily; +use crate::model_family::derive_default_model_family; +use crate::model_family::find_family_for_model; + /// Model-formatting limits: clients get full streams; only content sent to the model is truncated. pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines @@ -35,48 +39,42 @@ pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize format!("Total output lines: {total_lines}\n\n{output}") } -/// Truncate the middle of a UTF-8 string to at most `max_tokens` tokens, -/// preserving the beginning and the end. Returns the possibly truncated string -/// and `Some(original_token_count)` if truncation occurred; otherwise returns -/// the original string and `None`. -pub(crate) fn truncate_with_token_budget( - s: &str, - max_budget: usize, +pub(crate) async fn truncate_with_mode( + content: &str, model: Option<&str>, + tokens_budget: Option, ) -> (String, Option) { - if s.is_empty() { - return (String::new(), None); - } - - let byte_len = s.len(); - if max_budget > 0 { - let small_threshold = approx_bytes_for_tokens(max_budget / 4); - if small_threshold > 0 && byte_len <= small_threshold { - return (s.to_string(), None); + let mode = model + .map(|m| { + find_family_for_model(m) + .unwrap_or(derive_default_model_family(m)) + .truncation_mode + }) + .unwrap_or(TruncationMode::Bytes(MODEL_FORMAT_MAX_BYTES)); + match mode { + TruncationMode::Bytes(bytes) => { + let max_tokens = if let Some(tokens) = tokens_budget { + tokens + } else { + bytes / APPROX_BYTES_PER_TOKEN + }; + truncate_with_byte_estimate(content, max_tokens, model) + } + TruncationMode::Tokens(tokens) => { + if let Some(tokens) = tokens_budget { + truncate_with_token_budget(content, tokens, model).await + } else { + truncate_with_token_budget(content, tokens, model).await + } } } - - let exceeds_stack_limit = byte_len > TOKENIZER_STACK_SAFE_BYTES; - let exceeds_large_threshold = - max_budget > 0 && byte_len > approx_bytes_for_tokens(max_budget.saturating_mul(2)); - if exceeds_stack_limit || exceeds_large_threshold { - return truncate_with_byte_estimate(s, max_budget, model); - } - - let tokenizer = match select_tokenizer(model) { - Some(tok) => tok, - None => return truncate_with_byte_estimate(s, max_budget, model), - }; - let encoded = tokenizer.encode(s, false); - let total_tokens = encoded.len() as u64; - truncate_with_tokenizer_path(tokenizer, encoded, max_budget, s, total_tokens) } /// Globally truncate function output items to fit within /// `max_tokens` tokens by preserving as many /// text/image items as possible and appending a summary for any omitted text /// items. -pub(crate) fn truncate_function_output_items_to_token_limit( +pub(crate) async fn truncate_function_output_items_to_token_limit( items: &[FunctionCallOutputContentItem], max_tokens: usize, ) -> Vec { @@ -98,7 +96,8 @@ pub(crate) fn truncate_function_output_items_to_token_limit( out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); } else { - let (snippet, _) = truncate_with_token_budget(text, remaining_tokens, None); + let (snippet, _) = + truncate_with_token_budget(text, remaining_tokens, None).await; if snippet.is_empty() { omitted_text_items += 1; } else { @@ -124,6 +123,43 @@ pub(crate) fn truncate_function_output_items_to_token_limit( out } +/// Truncate the middle of a UTF-8 string to at most `max_tokens` tokens, +/// preserving the beginning and the end. Returns the possibly truncated string +/// and `Some(original_token_count)` if truncation occurred; otherwise returns +/// the original string and `None`. +async fn truncate_with_token_budget( + s: &str, + max_tokens: usize, + model: Option<&str>, +) -> (String, Option) { + if s.is_empty() { + return (String::new(), None); + } + + let byte_len = s.len(); + if max_tokens > 0 { + let small_threshold = approx_bytes_for_tokens(max_tokens / 4); + if small_threshold > 0 && byte_len <= small_threshold { + return (s.to_string(), None); + } + } + + let exceeds_stack_limit = byte_len > TOKENIZER_STACK_SAFE_BYTES; + let exceeds_large_threshold = + max_tokens > 0 && byte_len > approx_bytes_for_tokens(max_tokens.saturating_mul(2)); + if exceeds_stack_limit || exceeds_large_threshold { + return truncate_with_byte_estimate(s, max_tokens, model); + } + + let tokenizer = match select_tokenizer(model) { + Some(tok) => tok, + None => return truncate_with_byte_estimate(s, max_tokens, model), + }; + let encoded = tokenizer.encode(s, false); + let total_tokens = encoded.len() as u64; + truncate_with_tokenizer_path(tokenizer, encoded, max_tokens, s, total_tokens) +} + fn truncate_with_tokenizer_path( tokenizer: Tokenizer, encoded: Vec, From 64bb960c1db7b6538545dad5b8f4751f1c069eef Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 15:34:37 -0800 Subject: [PATCH 17/68] router --- codex-rs/core/src/client.rs | 6 ++++ codex-rs/core/src/codex.rs | 3 ++ codex-rs/core/src/compact.rs | 27 ++++++++++++---- codex-rs/core/src/context_manager/history.rs | 32 ++++++++----------- codex-rs/core/src/error.rs | 20 ++++++++---- codex-rs/core/src/state/session.rs | 4 +-- codex-rs/core/src/tools/orchestrator.rs | 5 ++- .../core/src/tools/runtimes/unified_exec.rs | 4 +-- codex-rs/core/src/truncate.rs | 16 ++++------ codex-rs/core/src/unified_exec/session.rs | 20 ++++++++---- .../core/src/unified_exec/session_manager.rs | 9 +++--- 11 files changed, 91 insertions(+), 55 deletions(-) diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 98775e3d3a..fa49028fb8 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -132,6 +132,12 @@ impl ModelClient { }) } + pub fn get_max_output_tokens(&self) -> Option { + self.config.model_max_output_tokens.or_else(|| { + get_model_info(&self.config.model_family).map(|info| info.max_output_tokens) + }) + } + pub fn config(&self) -> Arc { Arc::clone(&self.config) } diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 587aeca8bb..69903436be 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1007,6 +1007,7 @@ impl Session { self.build_initial_context(turn_context), &user_messages, &compacted.message, + Some(turn_context.client.get_model().as_str()), ); history.replace(rebuilt); } @@ -2957,6 +2958,7 @@ mod tests { session.build_initial_context(turn_context), &user_messages1, summary1, + Some(turn_context.client.get_model().as_str()), ); live_history.replace(rebuilt1); rollout_items.push(RolloutItem::Compacted(CompactedItem { @@ -2990,6 +2992,7 @@ mod tests { session.build_initial_context(turn_context), &user_messages2, summary2, + Some(turn_context.client.get_model().as_str()), ); live_history.replace(rebuilt2); rollout_items.push(RolloutItem::Compacted(CompactedItem { diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index b6311c3565..ff7b483fe1 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -14,7 +14,7 @@ use crate::protocol::EventMsg; use crate::protocol::TaskStartedEvent; use crate::protocol::TurnContextItem; use crate::protocol::WarningEvent; -use crate::truncate::truncate_with_token_budget; +use crate::truncate::truncate_text; use crate::util::backoff; use codex_protocol::items::TurnItem; use codex_protocol::models::ContentItem; @@ -148,8 +148,12 @@ async fn run_compact_task_inner( let user_messages = collect_user_messages(&history_snapshot); let initial_context = sess.build_initial_context(turn_context.as_ref()); - let mut new_history = - build_token_limited_compacted_history(initial_context, &user_messages, &summary_text); + let mut new_history = build_token_limited_compacted_history( + initial_context, + &user_messages, + &summary_text, + Some(turn_context.client.get_model().as_str()), + ); let ghost_snapshots: Vec = history_snapshot .iter() .filter(|item| matches!(item, ResponseItem::GhostSnapshot { .. })) @@ -226,12 +230,14 @@ pub(crate) fn build_token_limited_compacted_history( initial_context: Vec, user_messages: &[String], summary_text: &str, + model: Option<&str>, ) -> Vec { build_token_limited_compacted_history_with_limit( initial_context, user_messages, summary_text, COMPACT_USER_MESSAGE_MAX_TOKENS, + model, ) } @@ -240,6 +246,7 @@ fn build_token_limited_compacted_history_with_limit( user_messages: &[String], summary_text: &str, max_tokens: usize, + model: Option<&str>, ) -> Vec { let mut selected_messages: Vec = Vec::new(); if max_tokens > 0 { @@ -257,7 +264,7 @@ fn build_token_limited_compacted_history_with_limit( selected_messages.push(message.clone()); remaining = remaining.saturating_sub(tokens); } else { - let (truncated, _) = truncate_with_token_budget(message, remaining, None); + let (truncated, _) = truncate_text(message, Some(remaining), model); selected_messages.push(truncated); break; } @@ -324,6 +331,8 @@ async fn drain_to_completed( #[cfg(test)] mod tests { + use crate::config::OPENAI_DEFAULT_MODEL; + use super::*; use pretty_assertions::assert_eq; @@ -420,11 +429,13 @@ mod tests { // that oversized user content is truncated. let max_tokens = 16; let big = "word ".repeat(200); + let model = OPENAI_DEFAULT_MODEL; let history = super::build_token_limited_compacted_history_with_limit( Vec::new(), std::slice::from_ref(&big), "SUMMARY", max_tokens, + Some(model), ); assert_eq!(history.len(), 2); @@ -462,8 +473,12 @@ mod tests { let user_messages = vec!["first user message".to_string()]; let summary_text = "summary text"; - let history = - build_token_limited_compacted_history(initial_context, &user_messages, summary_text); + let history = build_token_limited_compacted_history( + initial_context, + &user_messages, + summary_text, + Some(OPENAI_DEFAULT_MODEL), + ); assert!( !history.is_empty(), "expected compacted history to include summary" diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 66f3066580..d1a02ccfed 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -2,7 +2,7 @@ use crate::codex::TurnContext; use crate::context_manager::normalize; use crate::truncate::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT; use crate::truncate::truncate_function_output_items_to_token_limit; -use crate::truncate::truncate_with_token_budget; +use crate::truncate::truncate_text; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::TokenUsage; @@ -56,7 +56,7 @@ impl ContextManager { } /// `items` is ordered from oldest to newest. - pub(crate) async fn record_items(&mut self, items: I) + pub(crate) fn record_items(&mut self, items: I) where I: IntoIterator, I::Item: std::ops::Deref, @@ -68,7 +68,7 @@ impl ContextManager { continue; } - let processed = self.process_item(item_ref).await; + let processed = self.process_item(item_ref); self.items.push(processed); } } @@ -156,24 +156,20 @@ impl ContextManager { items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. })); } - async fn process_item(&self, item: &ResponseItem) -> ResponseItem { + fn process_item(&self, item: &ResponseItem) -> ResponseItem { match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let (truncated, _) = truncate_with_token_budget( + let (truncated, _) = truncate_text( output.content.as_str(), - self.function_output_max_tokens, + Some(self.function_output_max_tokens), self.model.as_deref(), ); - let truncated_items = match output.content_items.as_ref() { - Some(items) => Some( - truncate_function_output_items_to_token_limit( - items, - self.function_output_max_tokens, - ) - .await, - ), - None => None, - }; + let truncated_items = output.content_items.as_ref().map(|items| { + truncate_function_output_items_to_token_limit( + items, + self.function_output_max_tokens, + ) + }); ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { @@ -184,9 +180,9 @@ impl ContextManager { } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let (truncated, _) = truncate_with_token_budget( + let (truncated, _) = truncate_text( output, - self.function_output_max_tokens, + Some(self.function_output_max_tokens), self.model.as_deref(), ); ResponseItem::CustomToolCallOutput { diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 2b9481f2f8..83ab7d4e98 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -2,7 +2,7 @@ use crate::codex::ProcessedResponseItem; use crate::exec::ExecToolCallOutput; use crate::token_data::KnownPlan; use crate::token_data::PlanType; -use crate::truncate::truncate_with_token_budget; +use crate::truncate::truncate_text; use chrono::DateTime; use chrono::Datelike; use chrono::Local; @@ -431,7 +431,7 @@ impl CodexErr { } } -pub fn token_limited_error_message(e: &CodexErr) -> String { +pub fn token_limited_error_message(e: &CodexErr, model: Option<&str>) -> String { let message = match e { CodexErr::Sandbox(SandboxErr::Denied { output }) => { let aggregated = output.aggregated_output.text.trim(); @@ -461,7 +461,7 @@ pub fn token_limited_error_message(e: &CodexErr) -> String { _ => e.to_string(), }; - truncate_with_token_budget(&message, ERROR_MESSAGE_UI_MAX_TOKENS, None).0 + truncate_text(&message, Some(ERROR_MESSAGE_UI_MAX_TOKENS), model).0 } #[cfg(test)] @@ -533,7 +533,10 @@ mod tests { let err = CodexErr::Sandbox(SandboxErr::Denied { output: Box::new(output), }); - assert_eq!(token_limited_error_message(&err), "aggregate detail"); + assert_eq!( + token_limited_error_message(&err, Some(OPENAI_DEFAULT_MODEL)), + "aggregate detail" + ); } #[test] @@ -550,7 +553,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - token_limited_error_message(&err), + token_limited_error_message(&err, Some(OPENAI_DEFAULT_MODEL)), "stderr detail\nstdout detail" ); } @@ -568,7 +571,10 @@ mod tests { let err = CodexErr::Sandbox(SandboxErr::Denied { output: Box::new(output), }); - assert_eq!(token_limited_error_message(&err), "stdout only"); + assert_eq!( + token_limited_error_message(&err, Some(OPENAI_DEFAULT_MODEL)), + "stdout only" + ); } #[test] @@ -585,7 +591,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - token_limited_error_message(&err), + token_limited_error_message(&err, Some(OPENAI_DEFAULT_MODEL)), "command failed inside sandbox with exit code 13" ); } diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 2e2e5289fe..faab6248fb 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -30,12 +30,12 @@ impl SessionState { } // History helpers - pub(crate) async fn record_items(&mut self, items: I) + pub(crate) fn record_items(&mut self, items: I) where I: IntoIterator, I::Item: std::ops::Deref, { - self.history.record_items(items).await; + self.history.record_items(items); } pub(crate) fn clone_history(&self) -> ContextManager { diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index ea584809ec..df335260a3 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -129,7 +129,10 @@ impl ToolOrchestrator { let err = SandboxErr::Denied { output: output.clone(), }; - let friendly = token_limited_error_message(&CodexErr::Sandbox(err)); + let friendly = token_limited_error_message( + &CodexErr::Sandbox(err), + Some(turn_ctx.client.get_model().as_str()), + ); let failure_summary = format!("failed in sandbox: {friendly}"); risk = tool_ctx diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs index cddac1924e..5a5e60b38b 100644 --- a/codex-rs/core/src/tools/runtimes/unified_exec.rs +++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs @@ -153,7 +153,7 @@ impl<'a> ToolRuntime for UnifiedExecRunt &mut self, req: &UnifiedExecRequest, attempt: &SandboxAttempt<'_>, - _ctx: &ToolCtx<'_>, + ctx: &ToolCtx<'_>, ) -> Result { let spec = build_command_spec( &req.command, @@ -168,7 +168,7 @@ impl<'a> ToolRuntime for UnifiedExecRunt .env_for(&spec) .map_err(|err| ToolError::Codex(err.into()))?; self.manager - .open_session_with_exec_env(&exec_env) + .open_session_with_exec_env(&exec_env, ctx) .await .map_err(|err| match err { UnifiedExecError::SandboxDenied { output, .. } => { diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index a17db0e095..a211d8567d 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -7,7 +7,6 @@ use codex_utils_string::take_bytes_at_char_boundary; use codex_utils_string::take_last_bytes_at_char_boundary; use codex_utils_tokenizer::Tokenizer; -use crate::model_family::ModelFamily; use crate::model_family::derive_default_model_family; use crate::model_family::find_family_for_model; @@ -39,10 +38,10 @@ pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize format!("Total output lines: {total_lines}\n\n{output}") } -pub(crate) async fn truncate_with_mode( +pub(crate) fn truncate_text( content: &str, - model: Option<&str>, tokens_budget: Option, + model: Option<&str>, ) -> (String, Option) { let mode = model .map(|m| { @@ -62,9 +61,9 @@ pub(crate) async fn truncate_with_mode( } TruncationMode::Tokens(tokens) => { if let Some(tokens) = tokens_budget { - truncate_with_token_budget(content, tokens, model).await + truncate_with_token_budget(content, tokens, model) } else { - truncate_with_token_budget(content, tokens, model).await + truncate_with_token_budget(content, tokens, model) } } } @@ -74,7 +73,7 @@ pub(crate) async fn truncate_with_mode( /// `max_tokens` tokens by preserving as many /// text/image items as possible and appending a summary for any omitted text /// items. -pub(crate) async fn truncate_function_output_items_to_token_limit( +pub(crate) fn truncate_function_output_items_to_token_limit( items: &[FunctionCallOutputContentItem], max_tokens: usize, ) -> Vec { @@ -96,8 +95,7 @@ pub(crate) async fn truncate_function_output_items_to_token_limit( out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); } else { - let (snippet, _) = - truncate_with_token_budget(text, remaining_tokens, None).await; + let (snippet, _) = truncate_with_token_budget(text, remaining_tokens, None); if snippet.is_empty() { omitted_text_items += 1; } else { @@ -127,7 +125,7 @@ pub(crate) async fn truncate_function_output_items_to_token_limit( /// preserving the beginning and the end. Returns the possibly truncated string /// and `Some(original_token_count)` if truncation occurred; otherwise returns /// the original string and `None`. -async fn truncate_with_token_budget( +fn truncate_with_token_budget( s: &str, max_tokens: usize, model: Option<&str>, diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index 8200996947..a5366f3579 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -14,7 +14,8 @@ use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::StreamOutput; use crate::exec::is_likely_sandbox_denied; -use crate::truncate::truncate_with_token_budget; +use crate::tools::sandboxing::ToolCtx; +use crate::truncate::truncate_text; use codex_utils_pty::ExecCommandSession; use codex_utils_pty::SpawnedPty; @@ -140,7 +141,10 @@ impl UnifiedExecSession { self.sandbox_type } - pub(super) async fn check_for_sandbox_denial(&self) -> Result<(), UnifiedExecError> { + pub(super) async fn check_for_sandbox_denial( + &self, + ctx: &ToolCtx<'_>, + ) -> Result<(), UnifiedExecError> { if self.sandbox_type() == SandboxType::None || !self.has_exited() { return Ok(()); } @@ -166,8 +170,11 @@ impl UnifiedExecSession { }; if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) { - let (snippet, _) = - truncate_with_token_budget(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_TOKENS, None); + let (snippet, _) = truncate_text( + &aggregated_text, + Some(UNIFIED_EXEC_OUTPUT_MAX_TOKENS), + Some(ctx.turn.client.get_model().as_str()), + ); let message = if snippet.is_empty() { format!("exit code {exit_code}") } else { @@ -182,6 +189,7 @@ impl UnifiedExecSession { pub(super) async fn from_spawned( spawned: SpawnedPty, sandbox_type: SandboxType, + ctx: &ToolCtx<'_>, ) -> Result { let SpawnedPty { session, @@ -196,7 +204,7 @@ impl UnifiedExecSession { }; if exit_ready { - managed.check_for_sandbox_denial().await?; + managed.check_for_sandbox_denial(ctx).await?; return Ok(managed); } @@ -205,7 +213,7 @@ impl UnifiedExecSession { .await .is_ok() { - managed.check_for_sandbox_denial().await?; + managed.check_for_sandbox_denial(ctx).await?; } Ok(managed) diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index 859473fce5..238d4e1413 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -36,7 +36,7 @@ use super::generate_chunk_id; use super::resolve_max_tokens; use super::session::OutputBuffer; use super::session::UnifiedExecSession; -use crate::truncate::truncate_with_token_budget; +use crate::truncate::truncate_text; impl UnifiedExecSessionManager { pub(crate) async fn exec_command( @@ -72,7 +72,7 @@ impl UnifiedExecSessionManager { let text = String::from_utf8_lossy(&collected).to_string(); let model = context.turn.client.get_model(); let (output, original_token_count) = - truncate_with_token_budget(&text, max_tokens, Some(model.as_str())); + truncate_text(&text, Some(max_tokens), Some(model.as_str())); let original_token_count = original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); @@ -181,7 +181,7 @@ impl UnifiedExecSessionManager { let text = String::from_utf8_lossy(&collected).to_string(); let model = turn_ref.client.get_model(); let (output, original_token_count) = - truncate_with_token_budget(&text, max_tokens, Some(model.as_str())); + truncate_text(&text, Some(max_tokens), Some(model.as_str())); let original_token_count = original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); @@ -418,6 +418,7 @@ impl UnifiedExecSessionManager { pub(crate) async fn open_session_with_exec_env( &self, env: &ExecEnv, + ctx: &ToolCtx<'_>, ) -> Result { let (program, args) = env .command @@ -433,7 +434,7 @@ impl UnifiedExecSessionManager { ) .await .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; - UnifiedExecSession::from_spawned(spawned, env.sandbox).await + UnifiedExecSession::from_spawned(spawned, env.sandbox, ctx).await } pub(super) async fn open_session_with_sandbox( From bbfa97e5edd82071bff9391d412921b63b1cc1d4 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 15:34:50 -0800 Subject: [PATCH 18/68] router --- codex-rs/core/src/error.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 83ab7d4e98..552c7be127 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -467,6 +467,7 @@ pub fn token_limited_error_message(e: &CodexErr, model: Option<&str>) -> String #[cfg(test)] mod tests { use super::*; + use crate::config::OPENAI_DEFAULT_MODEL; use crate::exec::StreamOutput; use chrono::DateTime; use chrono::Duration as ChronoDuration; From 4b58b600c61aa209ca666d8a79633bd6d0f10ef8 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 15:36:10 -0800 Subject: [PATCH 19/68] router --- codex-rs/core/src/codex.rs | 15 ++++++--------- codex-rs/core/src/config/mod.rs | 16 ++++++++-------- codex-rs/core/src/state/session.rs | 5 ++--- docs/config.md | 2 +- docs/example-config.md | 2 +- 5 files changed, 18 insertions(+), 22 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 69903436be..7a1e978a85 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -183,8 +183,7 @@ impl Codex { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: config.features.clone(), - context_manager_function_output_max_tokens: config - .context_manager_function_output_max_tokens, + output_max_tokens: config.output_max_tokens, session_source, }; @@ -339,7 +338,7 @@ pub(crate) struct SessionConfiguration { /// Set of feature flags for this session features: Features, - context_manager_function_output_max_tokens: usize, + output_max_tokens: usize, // TODO(pakrym): Remove config from here original_config_do_not_use: Arc, @@ -371,8 +370,8 @@ impl SessionConfiguration { next_configuration } - pub(crate) fn context_manager_function_output_max_tokens(&self) -> usize { - self.context_manager_function_output_max_tokens + pub(crate) fn output_max_tokens(&self) -> usize { + self.output_max_tokens } pub(crate) fn model(&self) -> &str { @@ -2617,8 +2616,7 @@ mod tests { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: Features::default(), - context_manager_function_output_max_tokens: config - .context_manager_function_output_max_tokens, + output_max_tokens: config.output_max_tokens, session_source: SessionSource::Exec, }; @@ -2695,8 +2693,7 @@ mod tests { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: Features::default(), - context_manager_function_output_max_tokens: config - .context_manager_function_output_max_tokens, + output_max_tokens: config.output_max_tokens, session_source: SessionSource::Exec, }; diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 5734354849..a5c4856338 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -195,7 +195,7 @@ pub struct Config { pub project_doc_fallback_filenames: Vec, /// Token budget applied when storing tool/function outputs in the context manager. - pub context_manager_function_output_max_tokens: usize, + pub output_max_tokens: usize, /// Directory containing all Codex state (defaults to `~/.codex` but can be /// overridden by the `CODEX_HOME` environment variable). @@ -597,7 +597,7 @@ pub struct ConfigToml { pub project_doc_fallback_filenames: Option>, /// Token budget applied when storing tool/function outputs in the context manager. - pub context_manager_function_output_max_tokens: Option, + pub output_max_tokens: Option, /// Profile to use from the `profiles` map. pub profile: Option, @@ -1142,8 +1142,8 @@ impl Config { } }) .collect(), - context_manager_function_output_max_tokens: cfg - .context_manager_function_output_max_tokens + output_max_tokens: cfg + .output_max_tokens .unwrap_or(DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT), codex_home, history, @@ -2897,7 +2897,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - context_manager_function_output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -2969,7 +2969,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - context_manager_function_output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3056,7 +3056,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - context_manager_function_output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3129,7 +3129,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - context_manager_function_output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index faab6248fb..5b1ebf5b08 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -18,9 +18,8 @@ pub(crate) struct SessionState { impl SessionState { /// Create a new session state mirroring previous `State::default()` semantics. pub(crate) fn new(session_configuration: SessionConfiguration) -> Self { - let mut history = ContextManager::with_function_output_limit( - session_configuration.context_manager_function_output_max_tokens(), - ); + let mut history = + ContextManager::with_function_output_limit(session_configuration.output_max_tokens()); history.set_model(Some(session_configuration.model())); Self { session_configuration: session_configuration.clone(), diff --git a/docs/config.md b/docs/config.md index 878000f881..a378d145d2 100644 --- a/docs/config.md +++ b/docs/config.md @@ -911,7 +911,7 @@ Valid values: | `model_provider` | string | Provider id from `model_providers` (default: `openai`). | | `model_context_window` | number | Context window tokens. | | `model_max_output_tokens` | number | Max output tokens. | -| `context_manager_function_output_max_tokens` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | +| `output_max_tokens` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | | `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. | | `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. | | `sandbox_workspace_write.writable_roots` | array | Extra writable roots in workspace‑write. | diff --git a/docs/example-config.md b/docs/example-config.md index 8fca8e73eb..e311cd6c14 100644 --- a/docs/example-config.md +++ b/docs/example-config.md @@ -33,7 +33,7 @@ model_provider = "openai" # model_context_window = 128000 # tokens; default: auto for model # model_max_output_tokens = 8192 # tokens; default: auto for model # model_auto_compact_token_limit = 0 # disable/override auto; default: model family specific -# context_manager_function_output_max_tokens = 2560 # tokens stored per tool output; default: 2560 +# output_max_tokens = 2560 # tokens stored per tool output; default: 2560 ################################################################################ # Reasoning & Verbosity (Responses API capable models) From 5db71f6d6179884e59d45e5409a0d7a62414ca23 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 15:37:20 -0800 Subject: [PATCH 20/68] router --- codex-rs/core/src/error.rs | 10 +++++----- codex-rs/core/src/tools/orchestrator.rs | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 552c7be127..32b3343f9b 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -431,7 +431,7 @@ impl CodexErr { } } -pub fn token_limited_error_message(e: &CodexErr, model: Option<&str>) -> String { +pub fn get_error_message_ui(e: &CodexErr, model: Option<&str>) -> String { let message = match e { CodexErr::Sandbox(SandboxErr::Denied { output }) => { let aggregated = output.aggregated_output.text.trim(); @@ -535,7 +535,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - token_limited_error_message(&err, Some(OPENAI_DEFAULT_MODEL)), + get_error_message_ui(&err, Some(OPENAI_DEFAULT_MODEL)), "aggregate detail" ); } @@ -554,7 +554,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - token_limited_error_message(&err, Some(OPENAI_DEFAULT_MODEL)), + get_error_message_ui(&err, Some(OPENAI_DEFAULT_MODEL)), "stderr detail\nstdout detail" ); } @@ -573,7 +573,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - token_limited_error_message(&err, Some(OPENAI_DEFAULT_MODEL)), + get_error_message_ui(&err, Some(OPENAI_DEFAULT_MODEL)), "stdout only" ); } @@ -592,7 +592,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - token_limited_error_message(&err, Some(OPENAI_DEFAULT_MODEL)), + get_error_message_ui(&err, Some(OPENAI_DEFAULT_MODEL)), "command failed inside sandbox with exit code 13" ); } diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index df335260a3..4ac82b0d45 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -7,7 +7,7 @@ retry without sandbox on denial (no re‑approval thanks to caching). */ use crate::error::CodexErr; use crate::error::SandboxErr; -use crate::error::token_limited_error_message; +use crate::error::get_error_message_ui; use crate::exec::ExecToolCallOutput; use crate::sandboxing::SandboxManager; use crate::tools::sandboxing::ApprovalCtx; @@ -129,7 +129,7 @@ impl ToolOrchestrator { let err = SandboxErr::Denied { output: output.clone(), }; - let friendly = token_limited_error_message( + let friendly = get_error_message_ui( &CodexErr::Sandbox(err), Some(turn_ctx.client.get_model().as_str()), ); From d3c94a39cebda0160fa4f68ab8b68a3767b4e8b2 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 16:34:11 -0800 Subject: [PATCH 21/68] tests --- codex-rs/core/src/client.rs | 6 +- codex-rs/core/src/codex.rs | 26 ++-- codex-rs/core/src/compact.rs | 12 +- codex-rs/core/src/config/mod.rs | 33 +++-- codex-rs/core/src/context_manager/history.rs | 42 +++--- .../core/src/context_manager/history_tests.rs | 75 ++++++---- codex-rs/core/src/context_manager/mod.rs | 1 - codex-rs/core/src/error.rs | 12 +- codex-rs/core/src/model_family.rs | 38 ++++- codex-rs/core/src/state/session.rs | 9 +- codex-rs/core/src/tools/mod.rs | 6 +- codex-rs/core/src/tools/orchestrator.rs | 2 +- codex-rs/core/src/truncate.rs | 137 ++++++++---------- codex-rs/core/src/unified_exec/session.rs | 4 +- .../core/src/unified_exec/session_manager.rs | 6 +- 15 files changed, 228 insertions(+), 181 deletions(-) diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index fa49028fb8..bbf18e85ec 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -132,10 +132,8 @@ impl ModelClient { }) } - pub fn get_max_output_tokens(&self) -> Option { - self.config.model_max_output_tokens.or_else(|| { - get_model_info(&self.config.model_family).map(|info| info.max_output_tokens) - }) + pub fn get_max_calls_output_tokens(&self) -> usize { + self.config.calls_output_max_tokens } pub fn config(&self) -> Arc { diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 7a1e978a85..545e5dd14e 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -183,7 +183,7 @@ impl Codex { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: config.features.clone(), - output_max_tokens: config.output_max_tokens, + output_max_tokens: config.calls_output_max_tokens, session_source, }; @@ -741,7 +741,7 @@ impl Session { state.session_configuration = state.session_configuration.apply(&updates); let model = state.session_configuration.model().to_string(); - state.history.set_model(Some(model.as_str())); + state.history.set_model(&model); } pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc { @@ -759,7 +759,7 @@ impl Session { let session_configuration = state.session_configuration.clone().apply(&updates); state.session_configuration = session_configuration.clone(); let model = state.session_configuration.model().to_string(); - state.history.set_model(Some(model.as_str())); + state.history.set_model(&model); session_configuration }; @@ -993,7 +993,10 @@ impl Session { turn_context: &TurnContext, rollout_items: &[RolloutItem], ) -> Vec { - let mut history = ContextManager::new(); + let mut history = ContextManager::new( + turn_context.client.get_model().as_str(), + turn_context.client.get_max_calls_output_tokens(), + ); for item in rollout_items { match item { RolloutItem::ResponseItem(response_item) => { @@ -1006,7 +1009,7 @@ impl Session { self.build_initial_context(turn_context), &user_messages, &compacted.message, - Some(turn_context.client.get_model().as_str()), + turn_context.client.get_model().as_str(), ); history.replace(rebuilt); } @@ -2616,7 +2619,7 @@ mod tests { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: Features::default(), - output_max_tokens: config.output_max_tokens, + output_max_tokens: config.calls_output_max_tokens, session_source: SessionSource::Exec, }; @@ -2693,7 +2696,7 @@ mod tests { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: Features::default(), - output_max_tokens: config.output_max_tokens, + output_max_tokens: config.calls_output_max_tokens, session_source: SessionSource::Exec, }; @@ -2920,7 +2923,10 @@ mod tests { turn_context: &TurnContext, ) -> (Vec, Vec) { let mut rollout_items = Vec::new(); - let mut live_history = ContextManager::new(); + let mut live_history = ContextManager::new( + turn_context.client.get_model().as_str(), + turn_context.client.get_max_calls_output_tokens(), + ); let initial_context = session.build_initial_context(turn_context); for item in &initial_context { @@ -2955,7 +2961,7 @@ mod tests { session.build_initial_context(turn_context), &user_messages1, summary1, - Some(turn_context.client.get_model().as_str()), + turn_context.client.get_model().as_str(), ); live_history.replace(rebuilt1); rollout_items.push(RolloutItem::Compacted(CompactedItem { @@ -2989,7 +2995,7 @@ mod tests { session.build_initial_context(turn_context), &user_messages2, summary2, - Some(turn_context.client.get_model().as_str()), + turn_context.client.get_model().as_str(), ); live_history.replace(rebuilt2); rollout_items.push(RolloutItem::Compacted(CompactedItem { diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index ff7b483fe1..197a4859b9 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -152,7 +152,7 @@ async fn run_compact_task_inner( initial_context, &user_messages, &summary_text, - Some(turn_context.client.get_model().as_str()), + turn_context.client.get_model().as_str(), ); let ghost_snapshots: Vec = history_snapshot .iter() @@ -230,7 +230,7 @@ pub(crate) fn build_token_limited_compacted_history( initial_context: Vec, user_messages: &[String], summary_text: &str, - model: Option<&str>, + model: &str, ) -> Vec { build_token_limited_compacted_history_with_limit( initial_context, @@ -246,7 +246,7 @@ fn build_token_limited_compacted_history_with_limit( user_messages: &[String], summary_text: &str, max_tokens: usize, - model: Option<&str>, + model: &str, ) -> Vec { let mut selected_messages: Vec = Vec::new(); if max_tokens > 0 { @@ -264,7 +264,7 @@ fn build_token_limited_compacted_history_with_limit( selected_messages.push(message.clone()); remaining = remaining.saturating_sub(tokens); } else { - let (truncated, _) = truncate_text(message, Some(remaining), model); + let (truncated, _) = truncate_text(message, remaining, model); selected_messages.push(truncated); break; } @@ -435,7 +435,7 @@ mod tests { std::slice::from_ref(&big), "SUMMARY", max_tokens, - Some(model), + model, ); assert_eq!(history.len(), 2); @@ -477,7 +477,7 @@ mod tests { initial_context, &user_messages, summary_text, - Some(OPENAI_DEFAULT_MODEL), + OPENAI_DEFAULT_MODEL, ); assert!( !history.is_empty(), diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index a5c4856338..e5b81505b1 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -32,7 +32,6 @@ use crate::project_doc::DEFAULT_PROJECT_DOC_FILENAME; use crate::project_doc::LOCAL_PROJECT_DOC_FILENAME; use crate::protocol::AskForApproval; use crate::protocol::SandboxPolicy; -use crate::truncate::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT; use codex_app_server_protocol::Tools; use codex_app_server_protocol::UserSavedConfig; use codex_protocol::config_types::ForcedLoginMethod; @@ -195,7 +194,7 @@ pub struct Config { pub project_doc_fallback_filenames: Vec, /// Token budget applied when storing tool/function outputs in the context manager. - pub output_max_tokens: usize, + pub calls_output_max_tokens: usize, /// Directory containing all Codex state (defaults to `~/.codex` but can be /// overridden by the `CODEX_HOME` environment variable). @@ -597,7 +596,7 @@ pub struct ConfigToml { pub project_doc_fallback_filenames: Option>, /// Token budget applied when storing tool/function outputs in the context manager. - pub output_max_tokens: Option, + pub calls_output_max_tokens: Option, /// Profile to use from the `profiles` map. pub profile: Option, @@ -1103,7 +1102,7 @@ impl Config { let config = Self { model, review_model, - model_family, + model_family: model_family.clone(), model_context_window, model_max_output_tokens, model_auto_compact_token_limit, @@ -1142,9 +1141,9 @@ impl Config { } }) .collect(), - output_max_tokens: cfg - .output_max_tokens - .unwrap_or(DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT), + calls_output_max_tokens: cfg + .calls_output_max_tokens + .unwrap_or(model_family.truncation_policy.tokens_budget), codex_home, history, file_opener: cfg.file_opener.unwrap_or(UriBasedFileOpener::VsCode), @@ -2897,7 +2896,10 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + calls_output_max_tokens: find_family_for_model(OPENAI_DEFAULT_MODEL) + .unwrap() + .truncation_policy + .tokens_budget, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -2969,7 +2971,10 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + calls_output_max_tokens: find_family_for_model(OPENAI_DEFAULT_MODEL) + .unwrap() + .truncation_policy + .tokens_budget, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3056,7 +3061,10 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + calls_output_max_tokens: find_family_for_model(OPENAI_DEFAULT_MODEL) + .unwrap() + .truncation_policy + .tokens_budget, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3129,7 +3137,10 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - output_max_tokens: DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + calls_output_max_tokens: find_family_for_model(OPENAI_DEFAULT_MODEL) + .unwrap() + .truncation_policy + .tokens_budget, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index d1a02ccfed..33d075e35e 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -1,6 +1,8 @@ use crate::codex::TurnContext; +use crate::config::OPENAI_DEFAULT_MODEL; use crate::context_manager::normalize; -use crate::truncate::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT; +use crate::model_family::derive_default_model_family; +use crate::model_family::find_family_for_model; use crate::truncate::truncate_function_output_items_to_token_limit; use crate::truncate::truncate_text; use codex_protocol::models::FunctionCallOutputPayload; @@ -17,25 +19,25 @@ pub(crate) struct ContextManager { items: Vec, token_info: Option, function_output_max_tokens: usize, - model: Option, + model: String, } impl ContextManager { - pub(crate) fn new() -> Self { - Self::with_function_output_limit(DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT) - } - - pub(crate) fn with_function_output_limit(max_tokens: usize) -> Self { + pub(crate) fn new(model: &str, function_output_max_tokens: usize) -> Self { Self { items: Vec::new(), token_info: TokenUsageInfo::new_or_append(&None, &None, None), - function_output_max_tokens: max_tokens, - model: None, + function_output_max_tokens, + model: model.to_string(), } } - pub(crate) fn set_model(&mut self, model: Option<&str>) { - self.model = model.map(ToString::to_string); + pub(crate) fn set_model(&mut self, model: &str) { + self.model = model.to_string(); + self.function_output_max_tokens = find_family_for_model(model) + .unwrap_or_else(|| derive_default_model_family(model)) + .truncation_policy + .tokens_budget; } pub(crate) fn token_info(&self) -> Option { @@ -161,13 +163,14 @@ impl ContextManager { ResponseItem::FunctionCallOutput { call_id, output } => { let (truncated, _) = truncate_text( output.content.as_str(), - Some(self.function_output_max_tokens), - self.model.as_deref(), + self.function_output_max_tokens, + &self.model, ); let truncated_items = output.content_items.as_ref().map(|items| { truncate_function_output_items_to_token_limit( items, self.function_output_max_tokens, + &self.model, ) }); ResponseItem::FunctionCallOutput { @@ -180,11 +183,8 @@ impl ContextManager { } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let (truncated, _) = truncate_text( - output, - Some(self.function_output_max_tokens), - self.model.as_deref(), - ); + let (truncated, _) = + truncate_text(output, self.function_output_max_tokens, &self.model); ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), output: truncated, @@ -204,7 +204,11 @@ impl ContextManager { impl Default for ContextManager { fn default() -> Self { - Self::new() + let default_function_output_max_tokens = find_family_for_model(OPENAI_DEFAULT_MODEL) + .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) + .truncation_policy + .tokens_budget; + Self::new(OPENAI_DEFAULT_MODEL, default_function_output_max_tokens) } } diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 486ec4ed91..4895923836 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -1,4 +1,7 @@ use super::*; +use crate::config::OPENAI_DEFAULT_MODEL; +use crate::model_family::derive_default_model_family; +use crate::model_family::find_family_for_model; use crate::truncate; use codex_git::GhostCommit; use codex_protocol::models::ContentItem; @@ -12,6 +15,15 @@ use codex_utils_tokenizer::Tokenizer; use pretty_assertions::assert_eq; use regex_lite::Regex; +const EXEC_FORMAT_MAX_LINES: usize = 256; + +fn exec_format_max_bytes() -> usize { + find_family_for_model(OPENAI_DEFAULT_MODEL) + .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) + .truncation_policy + .tokens_budget +} + fn assistant_msg(text: &str) -> ResponseItem { ResponseItem::Message { id: None, @@ -23,7 +35,12 @@ fn assistant_msg(text: &str) -> ResponseItem { } fn create_history_with_items(items: Vec) -> ContextManager { - let mut h = ContextManager::new(); + let model = OPENAI_DEFAULT_MODEL; + let max_tokens = find_family_for_model(model) + .unwrap_or_else(|| derive_default_model_family(model)) + .truncation_policy + .tokens_budget; + let mut h = ContextManager::new(model, max_tokens); h.record_items(items.iter()); h } @@ -235,7 +252,12 @@ fn normalization_retains_local_shell_outputs() { #[test] fn record_items_truncates_function_call_output_content() { - let mut history = ContextManager::new(); + let model = OPENAI_DEFAULT_MODEL; + let max_tokens = find_family_for_model(model) + .unwrap_or_else(|| derive_default_model_family(model)) + .truncation_policy + .tokens_budget; + let mut history = ContextManager::new(model, max_tokens); let tok = Tokenizer::try_default().expect("load tokenizer"); let long_line = "a very long line to trigger truncation\n"; let long_output = long_line.repeat(2_500); @@ -261,7 +283,7 @@ fn record_items_truncates_function_call_output_content() { ); let token_count = usize::try_from(tok.count(&output.content)).unwrap_or(usize::MAX); assert!( - token_count <= truncate::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + token_count <= max_tokens, "token count should not exceed limit: {token_count}" ); } @@ -271,7 +293,12 @@ fn record_items_truncates_function_call_output_content() { #[test] fn record_items_truncates_custom_tool_call_output_content() { - let mut history = ContextManager::new(); + let model = OPENAI_DEFAULT_MODEL; + let max_tokens = find_family_for_model(model) + .unwrap_or_else(|| derive_default_model_family(model)) + .truncation_policy + .tokens_budget; + let mut history = ContextManager::new(model, max_tokens); let tok = Tokenizer::try_default().expect("load tokenizer"); let line = "custom output that is very long\n"; let long_output = line.repeat(2_500); @@ -292,7 +319,7 @@ fn record_items_truncates_custom_tool_call_output_content() { ); let token_count = usize::try_from(tok.count(output)).unwrap_or(usize::MAX); assert!( - token_count <= truncate::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, + token_count <= max_tokens, "token count should not exceed limit: {token_count}" ); } @@ -302,7 +329,8 @@ fn record_items_truncates_custom_tool_call_output_content() { #[test] fn record_items_respects_custom_token_limit() { - let mut history = ContextManager::with_function_output_limit(8); + let model = OPENAI_DEFAULT_MODEL; + let mut history = ContextManager::new(model, 8); let tok = Tokenizer::try_default().expect("load tokenizer"); let long_output = "tokenized content repeated many times ".repeat(200); let item = ResponseItem::FunctionCallOutput { @@ -341,15 +369,15 @@ fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usiz .expect("missing body capture") .as_str(); assert!( - body.len() <= truncate::MODEL_FORMAT_MAX_BYTES, + body.len() <= exec_format_max_bytes(), "body exceeds byte limit: {} bytes", body.len() ); } fn truncated_message_pattern(line: &str, total_lines: usize) -> String { - let head_lines = truncate::MODEL_FORMAT_MAX_LINES / 2; - let tail_lines = truncate::MODEL_FORMAT_MAX_LINES - head_lines; + let head_lines = EXEC_FORMAT_MAX_LINES / 2; + let tail_lines = EXEC_FORMAT_MAX_LINES - head_lines; let head_take = head_lines.min(total_lines); let tail_take = tail_lines.min(total_lines.saturating_sub(head_take)); let omitted = total_lines.saturating_sub(head_take + tail_take); @@ -357,7 +385,7 @@ fn truncated_message_pattern(line: &str, total_lines: usize) -> String { if omitted == 0 { return format!( r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} output truncated to fit {max_bytes} bytes \.{{3}}]\n\n.*)$", - max_bytes = truncate::MODEL_FORMAT_MAX_BYTES, + max_bytes = exec_format_max_bytes(), ); } format!( @@ -371,7 +399,7 @@ fn format_exec_output_truncates_large_error() { let large_error = line.repeat(2_500); // way beyond both byte and line limits let truncated = - truncate::truncate_with_line_bytes_budget(&large_error, truncate::MODEL_FORMAT_MAX_BYTES); + truncate::truncate_with_line_bytes_budget(&large_error, exec_format_max_bytes()); let total_lines = large_error.lines().count(); assert_truncated_message_matches(&truncated, line, total_lines); @@ -380,15 +408,12 @@ fn format_exec_output_truncates_large_error() { #[test] fn format_exec_output_marks_byte_truncation_without_omitted_lines() { - let long_line = "a".repeat(truncate::MODEL_FORMAT_MAX_BYTES + 50); - let truncated = - truncate::truncate_with_line_bytes_budget(&long_line, truncate::MODEL_FORMAT_MAX_BYTES); + let max_bytes = exec_format_max_bytes(); + let long_line = "a".repeat(max_bytes + 50); + let truncated = truncate::truncate_with_line_bytes_budget(&long_line, max_bytes); assert_ne!(truncated, long_line); - let marker_line = format!( - "[... output truncated to fit {} bytes ...]", - truncate::MODEL_FORMAT_MAX_BYTES - ); + let marker_line = format!("[... output truncated to fit {max_bytes} bytes ...]"); assert!( truncated.contains(&marker_line), "missing byte truncation marker: {truncated}" @@ -404,21 +429,20 @@ fn format_exec_output_returns_original_when_within_limits() { let content = "example output\n".repeat(10); assert_eq!( - truncate::truncate_with_line_bytes_budget(&content, truncate::MODEL_FORMAT_MAX_BYTES,), + truncate::truncate_with_line_bytes_budget(&content, exec_format_max_bytes(),), content ); } #[test] fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { - let total_lines = truncate::MODEL_FORMAT_MAX_LINES + 100; + let total_lines = EXEC_FORMAT_MAX_LINES + 100; let content: String = (0..total_lines) .map(|idx| format!("line-{idx}\n")) .collect(); - let truncated = - truncate::truncate_with_line_bytes_budget(&content, truncate::MODEL_FORMAT_MAX_BYTES); - let omitted = total_lines - truncate::MODEL_FORMAT_MAX_LINES; + let truncated = truncate::truncate_with_line_bytes_budget(&content, exec_format_max_bytes()); + let omitted = total_lines - EXEC_FORMAT_MAX_LINES; let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); assert!( @@ -439,14 +463,13 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { #[test] fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { - let total_lines = truncate::MODEL_FORMAT_MAX_LINES + 42; + let total_lines = EXEC_FORMAT_MAX_LINES + 42; let long_line = "x".repeat(256); let content: String = (0..total_lines) .map(|idx| format!("line-{idx}-{long_line}\n")) .collect(); - let truncated = - truncate::truncate_with_line_bytes_budget(&content, truncate::MODEL_FORMAT_MAX_BYTES); + let truncated = truncate::truncate_with_line_bytes_budget(&content, exec_format_max_bytes()); assert!( truncated.contains("[... omitted 42 of 298 lines ...]"), diff --git a/codex-rs/core/src/context_manager/mod.rs b/codex-rs/core/src/context_manager/mod.rs index 2a4e4a7417..d347a7714d 100644 --- a/codex-rs/core/src/context_manager/mod.rs +++ b/codex-rs/core/src/context_manager/mod.rs @@ -1,6 +1,5 @@ mod history; mod normalize; -pub(crate) use crate::truncate::MODEL_FORMAT_MAX_BYTES; pub(crate) use crate::truncate::truncate_with_line_bytes_budget; pub(crate) use history::ContextManager; diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 32b3343f9b..3d2f4b3335 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -431,7 +431,7 @@ impl CodexErr { } } -pub fn get_error_message_ui(e: &CodexErr, model: Option<&str>) -> String { +pub fn get_error_message_ui(e: &CodexErr, model: &str) -> String { let message = match e { CodexErr::Sandbox(SandboxErr::Denied { output }) => { let aggregated = output.aggregated_output.text.trim(); @@ -461,7 +461,7 @@ pub fn get_error_message_ui(e: &CodexErr, model: Option<&str>) -> String { _ => e.to_string(), }; - truncate_text(&message, Some(ERROR_MESSAGE_UI_MAX_TOKENS), model).0 + truncate_text(&message, ERROR_MESSAGE_UI_MAX_TOKENS, model).0 } #[cfg(test)] @@ -535,7 +535,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - get_error_message_ui(&err, Some(OPENAI_DEFAULT_MODEL)), + get_error_message_ui(&err, OPENAI_DEFAULT_MODEL), "aggregate detail" ); } @@ -554,7 +554,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - get_error_message_ui(&err, Some(OPENAI_DEFAULT_MODEL)), + get_error_message_ui(&err, OPENAI_DEFAULT_MODEL), "stderr detail\nstdout detail" ); } @@ -573,7 +573,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - get_error_message_ui(&err, Some(OPENAI_DEFAULT_MODEL)), + get_error_message_ui(&err, OPENAI_DEFAULT_MODEL), "stdout only" ); } @@ -592,7 +592,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - get_error_message_ui(&err, Some(OPENAI_DEFAULT_MODEL)), + get_error_message_ui(&err, OPENAI_DEFAULT_MODEL), "command failed inside sandbox with exit code 13" ); } diff --git a/codex-rs/core/src/model_family.rs b/codex-rs/core/src/model_family.rs index db348bf1bf..7cd38714c7 100644 --- a/codex-rs/core/src/model_family.rs +++ b/codex-rs/core/src/model_family.rs @@ -5,6 +5,7 @@ use crate::config::types::ReasoningSummaryFormat; use crate::tools::handlers::apply_patch::ApplyPatchToolType; use crate::tools::spec::ConfigShellToolType; use crate::truncate::TruncationMode; +use crate::truncate::TruncationPolicy; /// The `instructions` field in the payload sent to a model should always start /// with this content. @@ -68,7 +69,7 @@ pub struct ModelFamily { /// Preferred shell tool type for this model family when features do not override it. pub shell_type: ConfigShellToolType, - pub truncation_mode: TruncationMode, + pub truncation_policy: TruncationPolicy, } macro_rules! model_family { @@ -92,7 +93,10 @@ macro_rules! model_family { shell_type: ConfigShellToolType::Default, default_verbosity: None, default_reasoning_effort: None, - truncation_mode: TruncationMode::Bytes(10_000), + truncation_policy: TruncationPolicy { + mode: TruncationMode::Bytes, + tokens_budget: 10_000, + }, }; // apply overrides @@ -150,7 +154,10 @@ pub fn find_family_for_model(slug: &str) -> Option { ], supports_parallel_tool_calls: true, support_verbosity: true, - truncation_mode: TruncationMode::Tokens(10_000), + truncation_policy: TruncationPolicy { + mode: TruncationMode::Tokens, + tokens_budget: 10_000, + }, ) // Internal models. @@ -168,7 +175,10 @@ pub fn find_family_for_model(slug: &str) -> Option { ], supports_parallel_tool_calls: true, support_verbosity: true, - truncation_mode: TruncationMode::Tokens(10_000), + truncation_policy: TruncationPolicy { + mode: TruncationMode::Tokens, + tokens_budget: 10_000, + }, ) // Production models. @@ -183,7 +193,10 @@ pub fn find_family_for_model(slug: &str) -> Option { base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), support_verbosity: false, - truncation_mode: TruncationMode::Tokens(10_000), + truncation_policy: TruncationPolicy { + mode: TruncationMode::Tokens, + tokens_budget: 10_000, + }, ) } else if slug.starts_with("gpt-5.1") { model_family!( @@ -194,7 +207,10 @@ pub fn find_family_for_model(slug: &str) -> Option { default_verbosity: Some(Verbosity::Low), base_instructions: GPT_5_1_INSTRUCTIONS.to_string(), default_reasoning_effort: Some(ReasoningEffort::Medium), - truncation_mode: TruncationMode::Bytes(10_000), + truncation_policy: TruncationPolicy { + mode: TruncationMode::Bytes, + tokens_budget: 2_500, + }, ) } else if slug.starts_with("gpt-5") { model_family!( @@ -202,7 +218,10 @@ pub fn find_family_for_model(slug: &str) -> Option { supports_reasoning_summaries: true, needs_special_apply_patch_instructions: true, support_verbosity: true, - truncation_mode: TruncationMode::Bytes(10_000), + truncation_policy: TruncationPolicy { + mode: TruncationMode::Bytes, + tokens_budget: 2_500, + }, ) } else { None @@ -225,6 +244,9 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily { shell_type: ConfigShellToolType::Default, default_verbosity: None, default_reasoning_effort: None, - truncation_mode: TruncationMode::Bytes(10_000), + truncation_policy: TruncationPolicy { + mode: TruncationMode::Bytes, + tokens_budget: 2_500, + }, } } diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 5b1ebf5b08..833509b5b4 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -18,11 +18,12 @@ pub(crate) struct SessionState { impl SessionState { /// Create a new session state mirroring previous `State::default()` semantics. pub(crate) fn new(session_configuration: SessionConfiguration) -> Self { - let mut history = - ContextManager::with_function_output_limit(session_configuration.output_max_tokens()); - history.set_model(Some(session_configuration.model())); + let history = ContextManager::new( + session_configuration.model(), + session_configuration.output_max_tokens(), + ); Self { - session_configuration: session_configuration.clone(), + session_configuration, history, latest_rate_limits: None, } diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index 708194907c..2e25bfdd4d 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -9,7 +9,6 @@ pub mod runtimes; pub mod sandboxing; pub mod spec; -use crate::context_manager::MODEL_FORMAT_MAX_BYTES; use crate::context_manager::truncate_with_line_bytes_budget; use crate::exec::ExecToolCallOutput; pub use router::ToolRouter; @@ -21,6 +20,9 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str = "[... telemetry preview truncated ...]"; +// TODO(aibrahim): migrate shell tool to use truncate text and respect config value +const SHELL_OUTPUT_MAX_BYTES: usize = 2_500; + /// Format the combined exec output for sending back to the model. /// Includes exit code and duration metadata; truncates large bodies safely. pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String { @@ -76,5 +78,5 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { }; // Truncate for model consumption before serialization. - truncate_with_line_bytes_budget(&body, MODEL_FORMAT_MAX_BYTES) + truncate_with_line_bytes_budget(&body, SHELL_OUTPUT_MAX_BYTES) } diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index 4ac82b0d45..a1ba0186da 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -131,7 +131,7 @@ impl ToolOrchestrator { }; let friendly = get_error_message_ui( &CodexErr::Sandbox(err), - Some(turn_ctx.client.get_model().as_str()), + turn_ctx.client.get_model().as_str(), ); let failure_summary = format!("failed in sandbox: {friendly}"); diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index a211d8567d..9490e6d704 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -11,23 +11,26 @@ use crate::model_family::derive_default_model_family; use crate::model_family::find_family_for_model; /// Model-formatting limits: clients get full streams; only content sent to the model is truncated. -pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB -pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines -pub const DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT: usize = MODEL_FORMAT_MAX_BYTES / 4; const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 1024; // 1 MiB const APPROX_BYTES_PER_TOKEN: usize = 4; +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct TruncationPolicy { + pub mode: TruncationMode, + pub tokens_budget: usize, +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum TruncationMode { - Bytes(usize), - Tokens(usize), + Bytes, + Tokens, } /// Format a block of exec/tool output for model consumption, truncating by /// lines and bytes while preserving head and tail segments. pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize) -> String { // TODO(aibrahim): to be removed - let lines_budget = MODEL_FORMAT_MAX_LINES; + let lines_budget = 256; // Head+tail truncation for the model: show the beginning and end with an elision. // Clients still receive full streams; only this formatted summary is capped. let total_lines = content.lines().count(); @@ -40,32 +43,16 @@ pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize pub(crate) fn truncate_text( content: &str, - tokens_budget: Option, - model: Option<&str>, + tokens_budget: usize, + model: &str, ) -> (String, Option) { - let mode = model - .map(|m| { - find_family_for_model(m) - .unwrap_or(derive_default_model_family(m)) - .truncation_mode - }) - .unwrap_or(TruncationMode::Bytes(MODEL_FORMAT_MAX_BYTES)); + let mode = find_family_for_model(model) + .unwrap_or_else(|| derive_default_model_family(model)) + .truncation_policy + .mode; match mode { - TruncationMode::Bytes(bytes) => { - let max_tokens = if let Some(tokens) = tokens_budget { - tokens - } else { - bytes / APPROX_BYTES_PER_TOKEN - }; - truncate_with_byte_estimate(content, max_tokens, model) - } - TruncationMode::Tokens(tokens) => { - if let Some(tokens) = tokens_budget { - truncate_with_token_budget(content, tokens, model) - } else { - truncate_with_token_budget(content, tokens, model) - } - } + TruncationMode::Bytes => truncate_with_byte_estimate(content, tokens_budget, model), + TruncationMode::Tokens => truncate_with_token_budget(content, tokens_budget, model), } } @@ -76,6 +63,7 @@ pub(crate) fn truncate_text( pub(crate) fn truncate_function_output_items_to_token_limit( items: &[FunctionCallOutputContentItem], max_tokens: usize, + model: &str, ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); let mut remaining_tokens = max_tokens; @@ -95,7 +83,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); } else { - let (snippet, _) = truncate_with_token_budget(text, remaining_tokens, None); + let (snippet, _) = truncate_with_token_budget(text, remaining_tokens, model); if snippet.is_empty() { omitted_text_items += 1; } else { @@ -125,11 +113,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( /// preserving the beginning and the end. Returns the possibly truncated string /// and `Some(original_token_count)` if truncation occurred; otherwise returns /// the original string and `None`. -fn truncate_with_token_budget( - s: &str, - max_tokens: usize, - model: Option<&str>, -) -> (String, Option) { +fn truncate_with_token_budget(s: &str, max_tokens: usize, model: &str) -> (String, Option) { if s.is_empty() { return (String::new(), None); } @@ -223,11 +207,7 @@ fn truncate_with_tokenizer_path( } /// estimate the number of tokens in a string based on the length of the string -fn truncate_with_byte_estimate( - s: &str, - max_tokens: usize, - model: Option<&str>, -) -> (String, Option) { +fn truncate_with_byte_estimate(s: &str, max_tokens: usize, model: &str) -> (String, Option) { let total_tokens = approx_token_count(s); if max_tokens == 0 { return (format_truncation_marker(total_tokens), Some(total_tokens)); @@ -428,7 +408,7 @@ fn ensure_candidate_within_token_budget( candidate: String, max_budget: usize, total_tokens: u64, - model: Option<&str>, + model: &str, ) -> (String, Option) { if max_budget == 0 { return (candidate, Some(total_tokens)); @@ -458,14 +438,10 @@ fn approx_bytes_for_tokens(tokens: usize) -> usize { tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) } -fn select_tokenizer(model: Option<&str>) -> Option { - if let Some(name) = model { - Tokenizer::for_model(name) - .or_else(|_| Tokenizer::try_default()) - .ok() - } else { - Tokenizer::try_default().ok() - } +fn select_tokenizer(model: &str) -> Option { + Tokenizer::for_model(model) + .or_else(|_| Tokenizer::try_default()) + .ok() } fn truncate_on_boundary(input: &str, max_len: usize) -> &str { @@ -527,9 +503,10 @@ fn estimate_safe_token_count(text: &str, tokenizer: Option<&Tokenizer>) -> usize #[cfg(test)] mod tests { - use super::DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT; - use super::MODEL_FORMAT_MAX_BYTES; - use super::MODEL_FORMAT_MAX_LINES; + use crate::config::OPENAI_DEFAULT_MODEL; + use crate::model_family::derive_default_model_family; + use crate::model_family::find_family_for_model; + use super::truncate_function_output_items_to_token_limit; use super::truncate_with_line_bytes_budget; use super::truncate_with_token_budget; @@ -538,6 +515,15 @@ mod tests { use pretty_assertions::assert_eq; use regex_lite::Regex; + const MODEL_FORMAT_MAX_LINES: usize = 256; + + fn model_format_max_bytes() -> usize { + find_family_for_model(OPENAI_DEFAULT_MODEL) + .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) + .truncation_policy + .tokens_budget + } + fn truncated_message_pattern(line: &str, total_lines: usize) -> String { let head_lines = MODEL_FORMAT_MAX_LINES / 2; let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines; @@ -547,7 +533,8 @@ mod tests { let escaped_line = regex_lite::escape(line); if omitted == 0 { return format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$", + r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} output truncated to fit {max_bytes} bytes \.{{3}}]\n\n.*)$", + max_bytes = model_format_max_bytes(), ); } format!( @@ -578,7 +565,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "short output"; let limit = usize::try_from(tok.count(s)).unwrap_or(0) + 10; - let (out, original) = truncate_with_token_budget(s, limit, None); + let (out, original) = truncate_with_token_budget(s, limit, OPENAI_DEFAULT_MODEL); assert_eq!(out, s); assert_eq!(original, None); } @@ -588,7 +575,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "abcdef"; let total = tok.count(s) as u64; - let (out, original) = truncate_with_token_budget(s, 0, None); + let (out, original) = truncate_with_token_budget(s, 0, OPENAI_DEFAULT_MODEL); assert!(out.contains("tokens truncated")); assert_eq!(original, Some(total)); } @@ -598,7 +585,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa"; let max_tokens = 12; - let (out, original) = truncate_with_token_budget(s, max_tokens, None); + let (out, original) = truncate_with_token_budget(s, max_tokens, OPENAI_DEFAULT_MODEL); assert!(out.contains("tokens truncated")); assert_eq!(original, Some(tok.count(s) as u64)); let result_tokens = tok.count(&out) as usize; @@ -610,7 +597,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n"; let max_tokens = 8; - let (out, tokens) = truncate_with_token_budget(s, max_tokens, None); + let (out, tokens) = truncate_with_token_budget(s, max_tokens, OPENAI_DEFAULT_MODEL); assert!(out.contains("tokens truncated")); assert!(!out.contains('\u{fffd}')); @@ -624,7 +611,7 @@ mod tests { let line = "very long execution error line that should trigger truncation\n"; let large_error = line.repeat(2_500); // way beyond both byte and line limits - let truncated = truncate_with_line_bytes_budget(&large_error, MODEL_FORMAT_MAX_BYTES); + let truncated = truncate_with_line_bytes_budget(&large_error, model_format_max_bytes()); let total_lines = large_error.lines().count(); let pattern = truncated_message_pattern(line, total_lines); @@ -639,7 +626,7 @@ mod tests { .expect("missing body capture") .as_str(); assert!( - body.len() <= MODEL_FORMAT_MAX_BYTES, + body.len() <= model_format_max_bytes(), "body exceeds byte limit: {} bytes", body.len() ); @@ -648,12 +635,12 @@ mod tests { #[test] fn format_exec_output_marks_byte_truncation_without_omitted_lines() { - let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50); - let truncated = truncate_with_line_bytes_budget(&long_line, MODEL_FORMAT_MAX_BYTES); + let max_bytes = model_format_max_bytes(); + let long_line = "a".repeat(max_bytes + 50); + let truncated = truncate_with_line_bytes_budget(&long_line, max_bytes); assert_ne!(truncated, long_line); - let marker_line = - format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]"); + let marker_line = format!("[... output truncated to fit {max_bytes} bytes ...]"); assert!( truncated.contains(&marker_line), "missing byte truncation marker: {truncated}" @@ -669,7 +656,7 @@ mod tests { let content = "example output\n".repeat(10); assert_eq!( - truncate_with_line_bytes_budget(&content, MODEL_FORMAT_MAX_BYTES), + truncate_with_line_bytes_budget(&content, model_format_max_bytes()), content ); } @@ -681,7 +668,7 @@ mod tests { .map(|idx| format!("line-{idx}\n")) .collect(); - let truncated = truncate_with_line_bytes_budget(&content, MODEL_FORMAT_MAX_BYTES); + let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes()); let omitted = total_lines - MODEL_FORMAT_MAX_LINES; let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); @@ -710,7 +697,7 @@ mod tests { .map(|idx| format!("line-{idx}-{long_line}\n")) .collect(); - let truncated = truncate_with_line_bytes_budget(&content, MODEL_FORMAT_MAX_BYTES); + let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes()); assert!( truncated.contains("[... omitted 42 of 298 lines ...]"), @@ -728,14 +715,11 @@ mod tests { let chunk = "alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu nu xi omicron pi rho sigma tau upsilon phi chi psi omega.\n"; let chunk_tokens = usize::try_from(tok.count(chunk)).unwrap_or(usize::MAX); assert!(chunk_tokens > 0, "chunk must consume tokens"); - - let target_each = DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT - .saturating_div(2) - .saturating_sub(chunk_tokens); + let limit = model_format_max_bytes(); + let target_each = limit.saturating_div(2).saturating_sub(chunk_tokens); let (t1, t1_tokens) = build_chunked_text(chunk, chunk_tokens, target_each); let (t2, t2_tokens) = build_chunked_text(chunk, chunk_tokens, target_each); - let remaining_after_t1_t2 = - DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT.saturating_sub(t1_tokens + t2_tokens); + let remaining_after_t1_t2 = limit.saturating_sub(t1_tokens + t2_tokens); assert!( remaining_after_t1_t2 > 0, "expected positive token remainder after first two items" @@ -763,10 +747,9 @@ mod tests { FunctionCallOutputContentItem::InputText { text: t5 }, ]; - let output = truncate_function_output_items_to_token_limit( - &items, - DEFAULT_FUNCTION_OUTPUT_TOKEN_LIMIT, - ); + let model = OPENAI_DEFAULT_MODEL; + + let output = truncate_function_output_items_to_token_limit(&items, limit, model); // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. assert_eq!(output.len(), 5); diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index a5366f3579..a1d685b09f 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -172,8 +172,8 @@ impl UnifiedExecSession { if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) { let (snippet, _) = truncate_text( &aggregated_text, - Some(UNIFIED_EXEC_OUTPUT_MAX_TOKENS), - Some(ctx.turn.client.get_model().as_str()), + UNIFIED_EXEC_OUTPUT_MAX_TOKENS, + ctx.turn.client.get_model().as_str(), ); let message = if snippet.is_empty() { format!("exit code {exit_code}") diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index 238d4e1413..406559bfc0 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -71,8 +71,7 @@ impl UnifiedExecSessionManager { let text = String::from_utf8_lossy(&collected).to_string(); let model = context.turn.client.get_model(); - let (output, original_token_count) = - truncate_text(&text, Some(max_tokens), Some(model.as_str())); + let (output, original_token_count) = truncate_text(&text, max_tokens, &model); let original_token_count = original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); @@ -180,8 +179,7 @@ impl UnifiedExecSessionManager { let text = String::from_utf8_lossy(&collected).to_string(); let model = turn_ref.client.get_model(); - let (output, original_token_count) = - truncate_text(&text, Some(max_tokens), Some(model.as_str())); + let (output, original_token_count) = truncate_text(&text, max_tokens, &model); let original_token_count = original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); From fcc981fdfb757655c83e991025d8b04aa7fdfca6 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 16:37:51 -0800 Subject: [PATCH 22/68] tests --- codex-rs/core/src/context_manager/history.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 33d075e35e..42baf67a41 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -34,10 +34,7 @@ impl ContextManager { pub(crate) fn set_model(&mut self, model: &str) { self.model = model.to_string(); - self.function_output_max_tokens = find_family_for_model(model) - .unwrap_or_else(|| derive_default_model_family(model)) - .truncation_policy - .tokens_budget; + // intentionally not updating the function output max tokens here. } pub(crate) fn token_info(&self) -> Option { From a8cdae9c32a99e4ced34896e24c24f3d45e247fe Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 16:59:22 -0800 Subject: [PATCH 23/68] avoid approx with tests --- codex-rs/core/src/config/mod.rs | 2 +- codex-rs/core/src/truncate.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index e5b81505b1..b07637b8cb 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -3137,7 +3137,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: find_family_for_model(OPENAI_DEFAULT_MODEL) + calls_output_max_tokens: find_family_for_model("gpt-5") .unwrap() .truncation_policy .tokens_budget, diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 9490e6d704..b76c3e9c04 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -594,8 +594,8 @@ mod tests { #[test] fn truncate_middle_handles_utf8_content() { - let tok = Tokenizer::try_default().expect("load tokenizer"); - let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n"; + let tok = Tokenizer::for_model(OPENAI_DEFAULT_MODEL).expect("load tokenizer"); + let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n"; let max_tokens = 8; let (out, tokens) = truncate_with_token_budget(s, max_tokens, OPENAI_DEFAULT_MODEL); From 0312d3bee82b53646caf7a4dec9ee3b3cfdc77be Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 17:00:25 -0800 Subject: [PATCH 24/68] avoid approx with tests --- codex-rs/core/src/truncate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index b76c3e9c04..36eee57ec7 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -83,7 +83,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); } else { - let (snippet, _) = truncate_with_token_budget(text, remaining_tokens, model); + let (snippet, _) = truncate_text(text, remaining_tokens, model); if snippet.is_empty() { omitted_text_items += 1; } else { From 121e943f0c837dd50e5c545c56cb2f54ba6c6219 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 17:04:39 -0800 Subject: [PATCH 25/68] tests --- codex-rs/core/tests/suite/truncation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index 0dde8010a6..162e16068d 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -272,7 +272,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> let tool_name = format!("mcp__{server_name}__echo"); // Build a very large message to exceed 10KiB once serialized. - let large_msg = "long-message-with-newlines-".repeat(600); + let large_msg = "long-message-with-newlines-".repeat(6000); let args_json = serde_json::json!({ "message": large_msg }); mount_sse_once( From ff3fae625dc6e40a0281a65edbc10c243ea04c5a Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 17:08:03 -0800 Subject: [PATCH 26/68] lint --- docs/config.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/config.md b/docs/config.md index a378d145d2..92fb0526f2 100644 --- a/docs/config.md +++ b/docs/config.md @@ -911,7 +911,7 @@ Valid values: | `model_provider` | string | Provider id from `model_providers` (default: `openai`). | | `model_context_window` | number | Context window tokens. | | `model_max_output_tokens` | number | Max output tokens. | -| `output_max_tokens` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | +| `output_max_tokens` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | | `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. | | `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. | | `sandbox_workspace_write.writable_roots` | array | Extra writable roots in workspace‑write. | From b270394fa05dafd13eb0b5becc5c99ac1e94ea22 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 17:21:58 -0800 Subject: [PATCH 27/68] names --- codex-rs/core/src/codex.rs | 8 ++++---- codex-rs/core/src/compact.rs | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 545e5dd14e..1d738ab937 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -67,7 +67,7 @@ use crate::error::Result as CodexResult; use crate::exec::StreamOutput; // Removed: legacy executor wiring replaced by ToolOrchestrator flows. // legacy normalize_exec_result no longer used after orchestrator migration -use crate::compact::build_token_limited_compacted_history; +use crate::compact::build_compacted_history; use crate::compact::collect_user_messages; use crate::mcp::auth::compute_auth_statuses; use crate::mcp_connection_manager::McpConnectionManager; @@ -1005,7 +1005,7 @@ impl Session { RolloutItem::Compacted(compacted) => { let snapshot = history.get_history(); let user_messages = collect_user_messages(&snapshot); - let rebuilt = build_token_limited_compacted_history( + let rebuilt = build_compacted_history( self.build_initial_context(turn_context), &user_messages, &compacted.message, @@ -2957,7 +2957,7 @@ mod tests { let summary1 = "summary one"; let snapshot1 = live_history.get_history(); let user_messages1 = collect_user_messages(&snapshot1); - let rebuilt1 = build_token_limited_compacted_history( + let rebuilt1 = build_compacted_history( session.build_initial_context(turn_context), &user_messages1, summary1, @@ -2991,7 +2991,7 @@ mod tests { let summary2 = "summary two"; let snapshot2 = live_history.get_history(); let user_messages2 = collect_user_messages(&snapshot2); - let rebuilt2 = build_token_limited_compacted_history( + let rebuilt2 = build_compacted_history( session.build_initial_context(turn_context), &user_messages2, summary2, diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 197a4859b9..e54b642f2a 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -148,7 +148,7 @@ async fn run_compact_task_inner( let user_messages = collect_user_messages(&history_snapshot); let initial_context = sess.build_initial_context(turn_context.as_ref()); - let mut new_history = build_token_limited_compacted_history( + let mut new_history = build_compacted_history( initial_context, &user_messages, &summary_text, @@ -226,13 +226,13 @@ pub(crate) fn is_summary_message(message: &str) -> bool { message.starts_with(format!("{SUMMARY_PREFIX}\n").as_str()) } -pub(crate) fn build_token_limited_compacted_history( +pub(crate) fn build_compacted_history( initial_context: Vec, user_messages: &[String], summary_text: &str, model: &str, ) -> Vec { - build_token_limited_compacted_history_with_limit( + build_compacted_history_with_limit( initial_context, user_messages, summary_text, @@ -241,7 +241,7 @@ pub(crate) fn build_token_limited_compacted_history( ) } -fn build_token_limited_compacted_history_with_limit( +fn build_compacted_history_with_limit( mut history: Vec, user_messages: &[String], summary_text: &str, @@ -430,7 +430,7 @@ mod tests { let max_tokens = 16; let big = "word ".repeat(200); let model = OPENAI_DEFAULT_MODEL; - let history = super::build_token_limited_compacted_history_with_limit( + let history = super::build_compacted_history_with_limit( Vec::new(), std::slice::from_ref(&big), "SUMMARY", @@ -473,7 +473,7 @@ mod tests { let user_messages = vec!["first user message".to_string()]; let summary_text = "summary text"; - let history = build_token_limited_compacted_history( + let history = build_compacted_history( initial_context, &user_messages, summary_text, From ba01537dd3c2ff64a918adfea63fb702f180a965 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 17:22:53 -0800 Subject: [PATCH 28/68] names --- codex-rs/core/src/context_manager/history_tests.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 4895923836..fc82cd2f98 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -15,6 +15,7 @@ use codex_utils_tokenizer::Tokenizer; use pretty_assertions::assert_eq; use regex_lite::Regex; +// TODO(aibrahim): to be removed const EXEC_FORMAT_MAX_LINES: usize = 256; fn exec_format_max_bytes() -> usize { From 7c3f260d34ef6c4687ca48c69c3940c688263b62 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 17:25:01 -0800 Subject: [PATCH 29/68] test --- codex-rs/core/tests/suite/truncation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index 162e16068d..6f553f3e5c 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -47,7 +47,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> { let test = builder.build(&server).await?; // Construct a very long, non-existent path to force a RespondToModel error with a large message - let long_path = "axyzldg".repeat(20_000); + let long_path = "long path text should trigger truncation".repeat(10_000); let call_id = "grep-huge-error"; let args = json!({ "pattern": "alpha", From 95d68bf8a2a9205720e46847d90085a2a7aa6b6c Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 17:32:11 -0800 Subject: [PATCH 30/68] comment --- codex-rs/core/src/codex.rs | 2 ++ codex-rs/core/src/context_manager/history.rs | 3 ++- docs/config.md | 2 +- docs/example-config.md | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 1d738ab937..3e1949a874 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -351,6 +351,8 @@ impl SessionConfiguration { let mut next_configuration = self.clone(); if let Some(model) = updates.model.clone() { next_configuration.model = model; + // TODO (aibrahim): recompute output_max_tokens/calls_output_max_tokens when the model changes so + // truncation budgets keep matching the current model. } if let Some(effort) = updates.reasoning_effort { next_configuration.model_reasoning_effort = effort; diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 42baf67a41..23cc340df1 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -34,7 +34,8 @@ impl ContextManager { pub(crate) fn set_model(&mut self, model: &str) { self.model = model.to_string(); - // intentionally not updating the function output max tokens here. + // TODO (aibrahim): recompute output_max_tokens/calls_output_max_tokens when the model changes so + // truncation budgets keep matching the current model. } pub(crate) fn token_info(&self) -> Option { diff --git a/docs/config.md b/docs/config.md index 92fb0526f2..0f1f136036 100644 --- a/docs/config.md +++ b/docs/config.md @@ -911,7 +911,7 @@ Valid values: | `model_provider` | string | Provider id from `model_providers` (default: `openai`). | | `model_context_window` | number | Context window tokens. | | `model_max_output_tokens` | number | Max output tokens. | -| `output_max_tokens` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | +| `calls_output_max_tokens` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | | `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. | | `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. | | `sandbox_workspace_write.writable_roots` | array | Extra writable roots in workspace‑write. | diff --git a/docs/example-config.md b/docs/example-config.md index e311cd6c14..8e19313505 100644 --- a/docs/example-config.md +++ b/docs/example-config.md @@ -33,7 +33,7 @@ model_provider = "openai" # model_context_window = 128000 # tokens; default: auto for model # model_max_output_tokens = 8192 # tokens; default: auto for model # model_auto_compact_token_limit = 0 # disable/override auto; default: model family specific -# output_max_tokens = 2560 # tokens stored per tool output; default: 2560 +# calls_output_max_tokens = 10000 # tokens stored per tool output; default: 10000 for gpt-5.1-codex ################################################################################ # Reasoning & Verbosity (Responses API capable models) From 779bd971fda426d3dc25be375af3b26908c3ede6 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 17:32:40 -0800 Subject: [PATCH 31/68] comment --- docs/config.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/config.md b/docs/config.md index 0f1f136036..287337f7db 100644 --- a/docs/config.md +++ b/docs/config.md @@ -911,7 +911,7 @@ Valid values: | `model_provider` | string | Provider id from `model_providers` (default: `openai`). | | `model_context_window` | number | Context window tokens. | | `model_max_output_tokens` | number | Max output tokens. | -| `calls_output_max_tokens` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | +| `calls_output_max_tokens` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | | `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. | | `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. | | `sandbox_workspace_write.writable_roots` | array | Extra writable roots in workspace‑write. | From a42e62e2cf4a9628e378a6b3f15ec1b2307affcf Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 17:33:29 -0800 Subject: [PATCH 32/68] comment --- codex-rs/core/tests/suite/truncation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index 6f553f3e5c..e4e0a392bc 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -47,7 +47,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> { let test = builder.build(&server).await?; // Construct a very long, non-existent path to force a RespondToModel error with a large message - let long_path = "long path text should trigger truncation".repeat(10_000); + let long_path = "long path text should trigger truncation".repeat(8_000); let call_id = "grep-huge-error"; let args = json!({ "pattern": "alpha", From f6e612834cf3813c25ee07a409a68b4f489151b5 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 18:14:03 -0800 Subject: [PATCH 33/68] comment --- codex-rs/core/src/config/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index b07637b8cb..e48b4028d0 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -2896,7 +2896,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: find_family_for_model(OPENAI_DEFAULT_MODEL) + calls_output_max_tokens: find_family_for_model("o3") .unwrap() .truncation_policy .tokens_budget, @@ -2971,7 +2971,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: find_family_for_model(OPENAI_DEFAULT_MODEL) + calls_output_max_tokens: find_family_for_model("gpt-3.5-turbo") .unwrap() .truncation_policy .tokens_budget, @@ -3061,7 +3061,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: find_family_for_model(OPENAI_DEFAULT_MODEL) + calls_output_max_tokens: find_family_for_model("o3") .unwrap() .truncation_policy .tokens_budget, From 3835ee08bd5cb589ed5d3d0d69b3eb924edcd6da Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 20:29:02 -0800 Subject: [PATCH 34/68] progress --- codex-rs/core/src/client.rs | 4 - codex-rs/core/src/codex.rs | 73 +++++++---- codex-rs/core/src/compact.rs | 28 ++-- codex-rs/core/src/config/mod.rs | 6 +- codex-rs/core/src/context_manager/history.rs | 51 ++------ codex-rs/core/src/error.rs | 28 ++-- codex-rs/core/src/model_family.rs | 36 +----- codex-rs/core/src/state/session.rs | 10 +- codex-rs/core/src/tools/orchestrator.rs | 5 +- codex-rs/core/src/truncate.rs | 121 ++++++++++-------- codex-rs/core/src/unified_exec/session.rs | 10 +- .../core/src/unified_exec/session_manager.rs | 10 +- codex-rs/utils/tokenizer/src/lib.rs | 5 + 13 files changed, 189 insertions(+), 198 deletions(-) diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index bbf18e85ec..98775e3d3a 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -132,10 +132,6 @@ impl ModelClient { }) } - pub fn get_max_calls_output_tokens(&self) -> usize { - self.config.calls_output_max_tokens - } - pub fn config(&self) -> Arc { Arc::clone(&self.config) } diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 3e1949a874..0588d4d7ea 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -15,6 +15,8 @@ use crate::parse_command::parse_command; use crate::parse_turn_item; use crate::response_processing::process_items; use crate::terminal; +use crate::truncate::TruncationPolicy; +use crate::truncate::TruncationSettings; use crate::user_notification::UserNotifier; use crate::util::error_or_panic; use async_channel::Receiver; @@ -281,6 +283,7 @@ pub(crate) struct TurnContext { pub(crate) final_output_json_schema: Option, pub(crate) codex_linux_sandbox_exe: Option, pub(crate) tool_call_gate: Arc, + pub(crate) truncation_settings: TruncationSettings, } impl TurnContext { @@ -338,7 +341,7 @@ pub(crate) struct SessionConfiguration { /// Set of feature flags for this session features: Features, - output_max_tokens: usize, + output_max_tokens: Option, // TODO(pakrym): Remove config from here original_config_do_not_use: Arc, @@ -372,7 +375,7 @@ impl SessionConfiguration { next_configuration } - pub(crate) fn output_max_tokens(&self) -> usize { + pub(crate) fn output_max_tokens(&self) -> Option { self.output_max_tokens } @@ -449,6 +452,10 @@ impl Session { final_output_json_schema: None, codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), + truncation_settings: TruncationSettings::new( + TruncationPolicy::new(&config), + &session_configuration.model, + ), } } @@ -725,7 +732,11 @@ impl Session { let reconstructed_history = self.reconstruct_history_from_rollout(&turn_context, &rollout_items); if !reconstructed_history.is_empty() { - self.record_into_history(&reconstructed_history).await; + self.record_into_history( + &reconstructed_history, + &turn_context.truncation_settings, + ) + .await; } // If persisting, persist all rollout items as-is (recorder filters) @@ -742,8 +753,6 @@ impl Session { let mut state = self.state.lock().await; state.session_configuration = state.session_configuration.apply(&updates); - let model = state.session_configuration.model().to_string(); - state.history.set_model(&model); } pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc { @@ -760,8 +769,6 @@ impl Session { let mut state = self.state.lock().await; let session_configuration = state.session_configuration.clone().apply(&updates); state.session_configuration = session_configuration.clone(); - let model = state.session_configuration.model().to_string(); - state.history.set_model(&model); session_configuration }; @@ -985,7 +992,8 @@ impl Session { turn_context: &TurnContext, items: &[ResponseItem], ) { - self.record_into_history(items).await; + self.record_into_history(items, &turn_context.truncation_settings) + .await; self.persist_rollout_response_items(items).await; self.send_raw_response_items(turn_context, items).await; } @@ -995,14 +1003,14 @@ impl Session { turn_context: &TurnContext, rollout_items: &[RolloutItem], ) -> Vec { - let mut history = ContextManager::new( - turn_context.client.get_model().as_str(), - turn_context.client.get_max_calls_output_tokens(), - ); + let mut history = ContextManager::new(); for item in rollout_items { match item { RolloutItem::ResponseItem(response_item) => { - history.record_items(std::iter::once(response_item)); + history.record_items( + std::iter::once(response_item), + &turn_context.truncation_settings, + ); } RolloutItem::Compacted(compacted) => { let snapshot = history.get_history(); @@ -1011,7 +1019,7 @@ impl Session { self.build_initial_context(turn_context), &user_messages, &compacted.message, - turn_context.client.get_model().as_str(), + turn_context.truncation_settings.tokenizer.clone(), ); history.replace(rebuilt); } @@ -1022,9 +1030,13 @@ impl Session { } /// Append ResponseItems to the in-memory conversation history only. - pub(crate) async fn record_into_history(&self, items: &[ResponseItem]) { + pub(crate) async fn record_into_history( + &self, + items: &[ResponseItem], + truncation_settings: &TruncationSettings, + ) { let mut state = self.state.lock().await; - state.record_items(items.iter()); + state.record_items(items.iter(), truncation_settings); } pub(crate) async fn replace_history(&self, items: Vec) { @@ -1788,6 +1800,7 @@ async fn spawn_review_thread( final_output_json_schema: None, codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), + truncation_settings: TruncationSettings::new(TruncationPolicy::new(&config), &model), }; // Seed the child task with the review prompt as the initial user message. @@ -2925,16 +2938,13 @@ mod tests { turn_context: &TurnContext, ) -> (Vec, Vec) { let mut rollout_items = Vec::new(); - let mut live_history = ContextManager::new( - turn_context.client.get_model().as_str(), - turn_context.client.get_max_calls_output_tokens(), - ); + let mut live_history = ContextManager::new(); let initial_context = session.build_initial_context(turn_context); for item in &initial_context { rollout_items.push(RolloutItem::ResponseItem(item.clone())); } - live_history.record_items(initial_context.iter()); + live_history.record_items(initial_context.iter(), &turn_context.truncation_settings); let user1 = ResponseItem::Message { id: None, @@ -2943,7 +2953,7 @@ mod tests { text: "first user".to_string(), }], }; - live_history.record_items(std::iter::once(&user1)); + live_history.record_items(std::iter::once(&user1), &turn_context.truncation_settings); rollout_items.push(RolloutItem::ResponseItem(user1.clone())); let assistant1 = ResponseItem::Message { @@ -2953,7 +2963,10 @@ mod tests { text: "assistant reply one".to_string(), }], }; - live_history.record_items(std::iter::once(&assistant1)); + live_history.record_items( + std::iter::once(&assistant1), + &turn_context.truncation_settings, + ); rollout_items.push(RolloutItem::ResponseItem(assistant1.clone())); let summary1 = "summary one"; @@ -2977,7 +2990,7 @@ mod tests { text: "second user".to_string(), }], }; - live_history.record_items(std::iter::once(&user2)); + live_history.record_items(std::iter::once(&user2), &turn_context.truncation_settings); rollout_items.push(RolloutItem::ResponseItem(user2.clone())); let assistant2 = ResponseItem::Message { @@ -2987,7 +3000,10 @@ mod tests { text: "assistant reply two".to_string(), }], }; - live_history.record_items(std::iter::once(&assistant2)); + live_history.record_items( + std::iter::once(&assistant2), + &turn_context.truncation_settings, + ); rollout_items.push(RolloutItem::ResponseItem(assistant2.clone())); let summary2 = "summary two"; @@ -3011,7 +3027,7 @@ mod tests { text: "third user".to_string(), }], }; - live_history.record_items(std::iter::once(&user3)); + live_history.record_items(std::iter::once(&user3), &turn_context.truncation_settings); rollout_items.push(RolloutItem::ResponseItem(user3.clone())); let assistant3 = ResponseItem::Message { @@ -3021,7 +3037,10 @@ mod tests { text: "assistant reply three".to_string(), }], }; - live_history.record_items(std::iter::once(&assistant3)); + live_history.record_items( + std::iter::once(&assistant3), + &turn_context.truncation_settings, + ); rollout_items.push(RolloutItem::ResponseItem(assistant3.clone())); (rollout_items, live_history.get_history()) diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index e54b642f2a..2682b5858d 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -14,6 +14,8 @@ use crate::protocol::EventMsg; use crate::protocol::TaskStartedEvent; use crate::protocol::TurnContextItem; use crate::protocol::WarningEvent; +use crate::truncate::TruncationPolicy; +use crate::truncate::TruncationSettings; use crate::truncate::truncate_text; use crate::util::backoff; use codex_protocol::items::TurnItem; @@ -60,7 +62,10 @@ async fn run_compact_task_inner( let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input); let mut history = sess.clone_history().await; - history.record_items(&[initial_input_for_turn.into()]); + history.record_items( + &[initial_input_for_turn.into()], + &turn_context.truncation_settings, + ); let mut truncated_count = 0usize; @@ -152,7 +157,7 @@ async fn run_compact_task_inner( initial_context, &user_messages, &summary_text, - turn_context.client.get_model().as_str(), + turn_context.truncation_settings.tokenizer.clone(), ); let ghost_snapshots: Vec = history_snapshot .iter() @@ -230,14 +235,14 @@ pub(crate) fn build_compacted_history( initial_context: Vec, user_messages: &[String], summary_text: &str, - model: &str, + tokenizer: Arc>, ) -> Vec { build_compacted_history_with_limit( initial_context, user_messages, summary_text, COMPACT_USER_MESSAGE_MAX_TOKENS, - model, + tokenizer, ) } @@ -246,11 +251,10 @@ fn build_compacted_history_with_limit( user_messages: &[String], summary_text: &str, max_tokens: usize, - model: &str, + tokenizer: Arc>, ) -> Vec { let mut selected_messages: Vec = Vec::new(); if max_tokens > 0 { - let tokenizer = Tokenizer::try_default().ok(); let mut remaining = max_tokens; for message in user_messages.iter().rev() { if remaining == 0 { @@ -264,7 +268,11 @@ fn build_compacted_history_with_limit( selected_messages.push(message.clone()); remaining = remaining.saturating_sub(tokens); } else { - let (truncated, _) = truncate_text(message, remaining, model); + let truncation_settings = TruncationSettings { + policy: TruncationPolicy::Tokens(remaining), + tokenizer, + }; + let (truncated, _) = truncate_text(message, &truncation_settings); selected_messages.push(truncated); break; } @@ -313,7 +321,11 @@ async fn drain_to_completed( }; match event { Ok(ResponseEvent::OutputItemDone(item)) => { - sess.record_into_history(std::slice::from_ref(&item)).await; + sess.record_into_history( + std::slice::from_ref(&item), + &turn_context.truncation_settings, + ) + .await; } Ok(ResponseEvent::RateLimits(snapshot)) => { sess.update_rate_limits(turn_context, snapshot).await; diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index e48b4028d0..672fe08b50 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -194,7 +194,7 @@ pub struct Config { pub project_doc_fallback_filenames: Vec, /// Token budget applied when storing tool/function outputs in the context manager. - pub calls_output_max_tokens: usize, + pub calls_output_max_tokens: Option, /// Directory containing all Codex state (defaults to `~/.codex` but can be /// overridden by the `CODEX_HOME` environment variable). @@ -1141,9 +1141,7 @@ impl Config { } }) .collect(), - calls_output_max_tokens: cfg - .calls_output_max_tokens - .unwrap_or(model_family.truncation_policy.tokens_budget), + calls_output_max_tokens: cfg.calls_output_max_tokens, codex_home, history, file_opener: cfg.file_opener.unwrap_or(UriBasedFileOpener::VsCode), diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 23cc340df1..5b74f53b8c 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -1,8 +1,6 @@ use crate::codex::TurnContext; -use crate::config::OPENAI_DEFAULT_MODEL; use crate::context_manager::normalize; -use crate::model_family::derive_default_model_family; -use crate::model_family::find_family_for_model; +use crate::truncate::TruncationSettings; use crate::truncate::truncate_function_output_items_to_token_limit; use crate::truncate::truncate_text; use codex_protocol::models::FunctionCallOutputPayload; @@ -18,26 +16,16 @@ pub(crate) struct ContextManager { /// The oldest items are at the beginning of the vector. items: Vec, token_info: Option, - function_output_max_tokens: usize, - model: String, } impl ContextManager { - pub(crate) fn new(model: &str, function_output_max_tokens: usize) -> Self { + pub(crate) fn new() -> Self { Self { items: Vec::new(), token_info: TokenUsageInfo::new_or_append(&None, &None, None), - function_output_max_tokens, - model: model.to_string(), } } - pub(crate) fn set_model(&mut self, model: &str) { - self.model = model.to_string(); - // TODO (aibrahim): recompute output_max_tokens/calls_output_max_tokens when the model changes so - // truncation budgets keep matching the current model. - } - pub(crate) fn token_info(&self) -> Option { self.token_info.clone() } @@ -56,7 +44,7 @@ impl ContextManager { } /// `items` is ordered from oldest to newest. - pub(crate) fn record_items(&mut self, items: I) + pub(crate) fn record_items(&mut self, items: I, truncation_settings: &TruncationSettings) where I: IntoIterator, I::Item: std::ops::Deref, @@ -68,7 +56,7 @@ impl ContextManager { continue; } - let processed = self.process_item(item_ref); + let processed = self.process_item(item_ref, truncation_settings); self.items.push(processed); } } @@ -156,20 +144,16 @@ impl ContextManager { items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. })); } - fn process_item(&self, item: &ResponseItem) -> ResponseItem { + fn process_item( + &self, + item: &ResponseItem, + truncation_settings: &TruncationSettings, + ) -> ResponseItem { match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let (truncated, _) = truncate_text( - output.content.as_str(), - self.function_output_max_tokens, - &self.model, - ); + let (truncated, _) = truncate_text(output.content.as_str(), truncation_settings); let truncated_items = output.content_items.as_ref().map(|items| { - truncate_function_output_items_to_token_limit( - items, - self.function_output_max_tokens, - &self.model, - ) + truncate_function_output_items_to_token_limit(items, truncation_settings) }); ResponseItem::FunctionCallOutput { call_id: call_id.clone(), @@ -181,8 +165,7 @@ impl ContextManager { } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let (truncated, _) = - truncate_text(output, self.function_output_max_tokens, &self.model); + let (truncated, _) = truncate_text(output, truncation_settings); ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), output: truncated, @@ -200,16 +183,6 @@ impl ContextManager { } } -impl Default for ContextManager { - fn default() -> Self { - let default_function_output_max_tokens = find_family_for_model(OPENAI_DEFAULT_MODEL) - .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) - .truncation_policy - .tokens_budget; - Self::new(OPENAI_DEFAULT_MODEL, default_function_output_max_tokens) - } -} - /// API messages include every non-system item (user/assistant messages, reasoning, /// tool calls, tool outputs, shell calls, and web-search calls). fn is_api_message(message: &ResponseItem) -> bool { diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 3d2f4b3335..d531b5ac1f 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -2,6 +2,8 @@ use crate::codex::ProcessedResponseItem; use crate::exec::ExecToolCallOutput; use crate::token_data::KnownPlan; use crate::token_data::PlanType; +use crate::truncate::TruncationPolicy; +use crate::truncate::TruncationSettings; use crate::truncate::truncate_text; use chrono::DateTime; use chrono::Datelike; @@ -13,6 +15,7 @@ use codex_protocol::protocol::RateLimitSnapshot; use reqwest::StatusCode; use serde_json; use std::io; +use std::sync::Arc; use std::time::Duration; use thiserror::Error; use tokio::task::JoinError; @@ -431,7 +434,7 @@ impl CodexErr { } } -pub fn get_error_message_ui(e: &CodexErr, model: &str) -> String { +pub fn get_error_message_ui(e: &CodexErr) -> String { let message = match e { CodexErr::Sandbox(SandboxErr::Denied { output }) => { let aggregated = output.aggregated_output.text.trim(); @@ -461,7 +464,11 @@ pub fn get_error_message_ui(e: &CodexErr, model: &str) -> String { _ => e.to_string(), }; - truncate_text(&message, ERROR_MESSAGE_UI_MAX_TOKENS, model).0 + let truncation_settings = TruncationSettings { + policy: TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_TOKENS), + tokenizer: Arc::new(None), + }; + truncate_text(&message, &truncation_settings).0 } #[cfg(test)] @@ -534,10 +541,7 @@ mod tests { let err = CodexErr::Sandbox(SandboxErr::Denied { output: Box::new(output), }); - assert_eq!( - get_error_message_ui(&err, OPENAI_DEFAULT_MODEL), - "aggregate detail" - ); + assert_eq!(get_error_message_ui(&err), "aggregate detail"); } #[test] @@ -553,10 +557,7 @@ mod tests { let err = CodexErr::Sandbox(SandboxErr::Denied { output: Box::new(output), }); - assert_eq!( - get_error_message_ui(&err, OPENAI_DEFAULT_MODEL), - "stderr detail\nstdout detail" - ); + assert_eq!(get_error_message_ui(&err), "stderr detail\nstdout detail"); } #[test] @@ -572,10 +573,7 @@ mod tests { let err = CodexErr::Sandbox(SandboxErr::Denied { output: Box::new(output), }); - assert_eq!( - get_error_message_ui(&err, OPENAI_DEFAULT_MODEL), - "stdout only" - ); + assert_eq!(get_error_message_ui(&err), "stdout only"); } #[test] @@ -592,7 +590,7 @@ mod tests { output: Box::new(output), }); assert_eq!( - get_error_message_ui(&err, OPENAI_DEFAULT_MODEL), + get_error_message_ui(&err), "command failed inside sandbox with exit code 13" ); } diff --git a/codex-rs/core/src/model_family.rs b/codex-rs/core/src/model_family.rs index 7cd38714c7..45df19f90a 100644 --- a/codex-rs/core/src/model_family.rs +++ b/codex-rs/core/src/model_family.rs @@ -4,7 +4,6 @@ use codex_protocol::config_types::Verbosity; use crate::config::types::ReasoningSummaryFormat; use crate::tools::handlers::apply_patch::ApplyPatchToolType; use crate::tools::spec::ConfigShellToolType; -use crate::truncate::TruncationMode; use crate::truncate::TruncationPolicy; /// The `instructions` field in the payload sent to a model should always start @@ -93,10 +92,7 @@ macro_rules! model_family { shell_type: ConfigShellToolType::Default, default_verbosity: None, default_reasoning_effort: None, - truncation_policy: TruncationPolicy { - mode: TruncationMode::Bytes, - tokens_budget: 10_000, - }, + truncation_policy: TruncationPolicy::Bytes(10_000), }; // apply overrides @@ -154,10 +150,7 @@ pub fn find_family_for_model(slug: &str) -> Option { ], supports_parallel_tool_calls: true, support_verbosity: true, - truncation_policy: TruncationPolicy { - mode: TruncationMode::Tokens, - tokens_budget: 10_000, - }, + truncation_policy: TruncationPolicy::Tokens(10_000), ) // Internal models. @@ -175,10 +168,7 @@ pub fn find_family_for_model(slug: &str) -> Option { ], supports_parallel_tool_calls: true, support_verbosity: true, - truncation_policy: TruncationPolicy { - mode: TruncationMode::Tokens, - tokens_budget: 10_000, - }, + truncation_policy: TruncationPolicy::Tokens(10_000), ) // Production models. @@ -193,10 +183,7 @@ pub fn find_family_for_model(slug: &str) -> Option { base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), support_verbosity: false, - truncation_policy: TruncationPolicy { - mode: TruncationMode::Tokens, - tokens_budget: 10_000, - }, + truncation_policy: TruncationPolicy::Tokens(10_000), ) } else if slug.starts_with("gpt-5.1") { model_family!( @@ -207,10 +194,7 @@ pub fn find_family_for_model(slug: &str) -> Option { default_verbosity: Some(Verbosity::Low), base_instructions: GPT_5_1_INSTRUCTIONS.to_string(), default_reasoning_effort: Some(ReasoningEffort::Medium), - truncation_policy: TruncationPolicy { - mode: TruncationMode::Bytes, - tokens_budget: 2_500, - }, + truncation_policy: TruncationPolicy::Bytes(10_000), ) } else if slug.starts_with("gpt-5") { model_family!( @@ -218,10 +202,7 @@ pub fn find_family_for_model(slug: &str) -> Option { supports_reasoning_summaries: true, needs_special_apply_patch_instructions: true, support_verbosity: true, - truncation_policy: TruncationPolicy { - mode: TruncationMode::Bytes, - tokens_budget: 2_500, - }, + truncation_policy: TruncationPolicy::Bytes(10_000), ) } else { None @@ -244,9 +225,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily { shell_type: ConfigShellToolType::Default, default_verbosity: None, default_reasoning_effort: None, - truncation_policy: TruncationPolicy { - mode: TruncationMode::Bytes, - tokens_budget: 2_500, - }, + truncation_policy: TruncationPolicy::Bytes(10_000), } } diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 833509b5b4..f6ed12eacb 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -7,6 +7,7 @@ use crate::context_manager::ContextManager; use crate::protocol::RateLimitSnapshot; use crate::protocol::TokenUsage; use crate::protocol::TokenUsageInfo; +use crate::truncate::TruncationSettings; /// Persistent, session-scoped state previously stored directly on `Session`. pub(crate) struct SessionState { @@ -18,10 +19,7 @@ pub(crate) struct SessionState { impl SessionState { /// Create a new session state mirroring previous `State::default()` semantics. pub(crate) fn new(session_configuration: SessionConfiguration) -> Self { - let history = ContextManager::new( - session_configuration.model(), - session_configuration.output_max_tokens(), - ); + let history = ContextManager::new(); Self { session_configuration, history, @@ -30,12 +28,12 @@ impl SessionState { } // History helpers - pub(crate) fn record_items(&mut self, items: I) + pub(crate) fn record_items(&mut self, items: I, truncation_settings: &TruncationSettings) where I: IntoIterator, I::Item: std::ops::Deref, { - self.history.record_items(items); + self.history.record_items(items, truncation_settings); } pub(crate) fn clone_history(&self) -> ContextManager { diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index a1ba0186da..878e48e8be 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -129,10 +129,7 @@ impl ToolOrchestrator { let err = SandboxErr::Denied { output: output.clone(), }; - let friendly = get_error_message_ui( - &CodexErr::Sandbox(err), - turn_ctx.client.get_model().as_str(), - ); + let friendly = get_error_message_ui(&CodexErr::Sandbox(err)); let failure_summary = format!("failed in sandbox: {friendly}"); risk = tool_ctx diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 36eee57ec7..1efca797ab 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -2,11 +2,14 @@ //! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation //! used across the core crate. +use std::sync::Arc; + use codex_protocol::models::FunctionCallOutputContentItem; use codex_utils_string::take_bytes_at_char_boundary; use codex_utils_string::take_last_bytes_at_char_boundary; use codex_utils_tokenizer::Tokenizer; +use crate::config::Config; use crate::model_family::derive_default_model_family; use crate::model_family::find_family_for_model; @@ -15,15 +18,39 @@ const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 1024; // 1 MiB const APPROX_BYTES_PER_TOKEN: usize = 4; #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct TruncationPolicy { - pub mode: TruncationMode, - pub tokens_budget: usize, +pub enum TruncationPolicy { + Bytes(usize), + Tokens(usize), } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum TruncationMode { - Bytes, - Tokens, +impl TruncationPolicy { + pub fn new(config: &Config) -> Self { + let token_limit = config.calls_output_max_tokens.unwrap_or_else( + find_family_for_model(config.model.as_str()) + .unwrap_or_else(|| derive_default_model_family(config.model.as_str())) + .truncation_policy, + ); + + match config.model_family.truncation_policy { + TruncationPolicy::Bytes(_) => { + Self::Bytes(token_limit.saturating_mul(APPROX_BYTES_PER_TOKEN)) + } + TruncationPolicy::Tokens(_) => Self::Tokens(token_limit), + } + } +} + +#[derive(Debug, Clone)] +pub struct TruncationSettings { + pub policy: TruncationPolicy, + pub tokenizer: Arc>, +} + +impl TruncationSettings { + pub fn new(policy: TruncationPolicy, model: &str) -> Self { + let tokenizer = Arc::new(Tokenizer::for_model(model).ok()); + Self { policy, tokenizer } + } } /// Format a block of exec/tool output for model consumption, truncating by @@ -43,8 +70,7 @@ pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize pub(crate) fn truncate_text( content: &str, - tokens_budget: usize, - model: &str, + truncation_settings: &TruncationSettings, ) -> (String, Option) { let mode = find_family_for_model(model) .unwrap_or_else(|| derive_default_model_family(model)) @@ -62,8 +88,7 @@ pub(crate) fn truncate_text( /// items. pub(crate) fn truncate_function_output_items_to_token_limit( items: &[FunctionCallOutputContentItem], - max_tokens: usize, - model: &str, + truncation_settings: &TruncationSettings, ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); let mut remaining_tokens = max_tokens; @@ -206,60 +231,38 @@ fn truncate_with_tokenizer_path( (out, Some(total_tokens)) } -/// estimate the number of tokens in a string based on the length of the string -fn truncate_with_byte_estimate(s: &str, max_tokens: usize, model: &str) -> (String, Option) { - let total_tokens = approx_token_count(s); - if max_tokens == 0 { - return (format_truncation_marker(total_tokens), Some(total_tokens)); +/// Truncate a string using a byte budget derived from the token budget, without +/// performing any real tokenization. This keeps the logic purely byte-based and +/// uses a bytes placeholder in the truncated output. +fn truncate_with_byte_estimate(s: &str, max_tokens: usize, _model: &str) -> (String, Option) { + if s.is_empty() { + return (String::new(), None); } - if total_tokens as usize <= max_tokens { - return (s.to_string(), None); + let total_tokens = approx_token_count(s); + let max_bytes = approx_bytes_for_tokens(max_tokens); + + if max_bytes == 0 { + // No budget to show content; just report that everything was truncated. + let marker = format!("[…{} bytes truncated…]", s.len()); + return (marker, Some(total_tokens)); } - let max_bytes = approx_bytes_for_tokens(max_tokens); if s.len() <= max_bytes { return (s.to_string(), None); } - let mut guess_tokens = total_tokens.saturating_sub(max_tokens as u64).max(1); - for _ in 0..4 { - let marker = format_truncation_marker(guess_tokens); - let marker_len = marker.len(); - let keep_budget = max_bytes.saturating_sub(marker_len); - if keep_budget == 0 { - return (marker, Some(total_tokens)); - } - - let (left_budget, right_budget) = split_budget(keep_budget); - let prefix_end = pick_prefix_end(s, left_budget); - let mut suffix_start = pick_suffix_start(s, right_budget); - if suffix_start < prefix_end { - suffix_start = prefix_end; - } - - let removed_tokens = approx_token_count(&s[prefix_end..suffix_start]); - let final_marker = format_truncation_marker(removed_tokens); - if final_marker.len() == marker_len { - let out = assemble_truncated_output( - &s[..prefix_end], - &s[suffix_start..], - &final_marker, - NewlineMode::Always, - ); - return ensure_candidate_within_token_budget(out, max_tokens, total_tokens, model); - } - - guess_tokens = removed_tokens.max(1); - } - - let marker = format_truncation_marker(guess_tokens); + let total_bytes = s.len(); + let removed_bytes = total_bytes.saturating_sub(max_bytes); + let marker = format!("[…{removed_bytes} bytes truncated…]"); let marker_len = marker.len(); - let keep_budget = max_bytes.saturating_sub(marker_len); - if keep_budget == 0 { - return (marker, Some(total_tokens)); + + if marker_len >= max_bytes { + let truncated_marker = truncate_on_boundary(&marker, max_bytes); + return (truncated_marker.to_string(), Some(total_tokens)); } + let keep_budget = max_bytes - marker_len; let (left_budget, right_budget) = split_budget(keep_budget); let prefix_end = pick_prefix_end(s, left_budget); let mut suffix_start = pick_suffix_start(s, right_budget); @@ -267,13 +270,19 @@ fn truncate_with_byte_estimate(s: &str, max_tokens: usize, model: &str) -> (Stri suffix_start = prefix_end; } - let out = assemble_truncated_output( + let mut out = assemble_truncated_output( &s[..prefix_end], &s[suffix_start..], &marker, NewlineMode::Always, ); - ensure_candidate_within_token_budget(out, max_tokens, total_tokens, model) + + if out.len() > max_bytes { + let boundary = truncate_on_boundary(&out, max_bytes); + out.truncate(boundary.len()); + } + + (out, Some(total_tokens)) } fn truncate_formatted_exec_output( diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index a1d685b09f..8aa871fcc7 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -15,6 +15,8 @@ use crate::exec::SandboxType; use crate::exec::StreamOutput; use crate::exec::is_likely_sandbox_denied; use crate::tools::sandboxing::ToolCtx; +use crate::truncate::TruncationPolicy; +use crate::truncate::TruncationSettings; use crate::truncate::truncate_text; use codex_utils_pty::ExecCommandSession; use codex_utils_pty::SpawnedPty; @@ -170,11 +172,11 @@ impl UnifiedExecSession { }; if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) { - let (snippet, _) = truncate_text( - &aggregated_text, - UNIFIED_EXEC_OUTPUT_MAX_TOKENS, - ctx.turn.client.get_model().as_str(), + let truncation_settings = TruncationSettings::new( + TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS), + &ctx.turn.client.get_model(), ); + let (snippet, _) = truncate_text(&aggregated_text, &truncation_settings); let message = if snippet.is_empty() { format!("exit code {exit_code}") } else { diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index 406559bfc0..0f1dbfd123 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -23,6 +23,8 @@ use crate::tools::orchestrator::ToolOrchestrator; use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest; use crate::tools::runtimes::unified_exec::UnifiedExecRuntime; use crate::tools::sandboxing::ToolCtx; +use crate::truncate::TruncationPolicy; +use crate::truncate::TruncationSettings; use super::ExecCommandRequest; use super::SessionEntry; @@ -71,7 +73,9 @@ impl UnifiedExecSessionManager { let text = String::from_utf8_lossy(&collected).to_string(); let model = context.turn.client.get_model(); - let (output, original_token_count) = truncate_text(&text, max_tokens, &model); + let truncation_settings = + TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), &model); + let (output, original_token_count) = truncate_text(&text, &truncation_settings); let original_token_count = original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); @@ -179,7 +183,9 @@ impl UnifiedExecSessionManager { let text = String::from_utf8_lossy(&collected).to_string(); let model = turn_ref.client.get_model(); - let (output, original_token_count) = truncate_text(&text, max_tokens, &model); + let truncation_settings = + TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), &model); + let (output, original_token_count) = truncate_text(&text, &truncation_settings); let original_token_count = original_token_count.and_then(|count| usize::try_from(count).ok()); let chunk_id = generate_chunk_id(); diff --git a/codex-rs/utils/tokenizer/src/lib.rs b/codex-rs/utils/tokenizer/src/lib.rs index 1c343e439e..fdd3cfe3fd 100644 --- a/codex-rs/utils/tokenizer/src/lib.rs +++ b/codex-rs/utils/tokenizer/src/lib.rs @@ -122,6 +122,11 @@ impl Tokenizer { } } +impl fmt::Debug for Tokenizer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Tokenizer {{ inner: }}") + } +} #[cfg(test)] mod tests { use super::*; From 66604711e9ebd280f201db40931031640dc1d541 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 20:49:03 -0800 Subject: [PATCH 35/68] progress --- codex-rs/core/src/compact.rs | 2 +- codex-rs/core/src/context_manager/history.rs | 4 +- codex-rs/core/src/error.rs | 2 +- codex-rs/core/src/truncate.rs | 119 ++++++++++-------- codex-rs/core/src/unified_exec/session.rs | 2 +- .../core/src/unified_exec/session_manager.rs | 14 ++- 6 files changed, 79 insertions(+), 64 deletions(-) diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 2682b5858d..0a85eaf437 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -272,7 +272,7 @@ fn build_compacted_history_with_limit( policy: TruncationPolicy::Tokens(remaining), tokenizer, }; - let (truncated, _) = truncate_text(message, &truncation_settings); + let truncated = truncate_text(message, &truncation_settings); selected_messages.push(truncated); break; } diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 5b74f53b8c..8704d143b2 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -151,7 +151,7 @@ impl ContextManager { ) -> ResponseItem { match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let (truncated, _) = truncate_text(output.content.as_str(), truncation_settings); + let truncated = truncate_text(output.content.as_str(), truncation_settings); let truncated_items = output.content_items.as_ref().map(|items| { truncate_function_output_items_to_token_limit(items, truncation_settings) }); @@ -165,7 +165,7 @@ impl ContextManager { } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let (truncated, _) = truncate_text(output, truncation_settings); + let truncated = truncate_text(output, truncation_settings); ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), output: truncated, diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index d531b5ac1f..70517943f1 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -468,7 +468,7 @@ pub fn get_error_message_ui(e: &CodexErr) -> String { policy: TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_TOKENS), tokenizer: Arc::new(None), }; - truncate_text(&message, &truncation_settings).0 + truncate_text(&message, &truncation_settings) } #[cfg(test)] diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 1efca797ab..15d6289369 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -10,8 +10,6 @@ use codex_utils_string::take_last_bytes_at_char_boundary; use codex_utils_tokenizer::Tokenizer; use crate::config::Config; -use crate::model_family::derive_default_model_family; -use crate::model_family::find_family_for_model; /// Model-formatting limits: clients get full streams; only content sent to the model is truncated. const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 1024; // 1 MiB @@ -25,17 +23,23 @@ pub enum TruncationPolicy { impl TruncationPolicy { pub fn new(config: &Config) -> Self { - let token_limit = config.calls_output_max_tokens.unwrap_or_else( - find_family_for_model(config.model.as_str()) - .unwrap_or_else(|| derive_default_model_family(config.model.as_str())) - .truncation_policy, - ); + let config_token_limit = config.calls_output_max_tokens; match config.model_family.truncation_policy { - TruncationPolicy::Bytes(_) => { - Self::Bytes(token_limit.saturating_mul(APPROX_BYTES_PER_TOKEN)) + TruncationPolicy::Bytes(family_bytes) => { + if let Some(token_limit) = config_token_limit { + Self::Bytes(token_limit.saturating_mul(APPROX_BYTES_PER_TOKEN)) + } else { + Self::Bytes(family_bytes.saturating_mul(APPROX_BYTES_PER_TOKEN)) + } + } + TruncationPolicy::Tokens(family_tokens) => { + if let Some(token_limit) = config_token_limit { + Self::Tokens(token_limit) + } else { + Self::Tokens(family_tokens) + } } - TruncationPolicy::Tokens(_) => Self::Tokens(token_limit), } } } @@ -68,20 +72,14 @@ pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize format!("Total output lines: {total_lines}\n\n{output}") } -pub(crate) fn truncate_text( - content: &str, - truncation_settings: &TruncationSettings, -) -> (String, Option) { - let mode = find_family_for_model(model) - .unwrap_or_else(|| derive_default_model_family(model)) - .truncation_policy - .mode; - match mode { - TruncationMode::Bytes => truncate_with_byte_estimate(content, tokens_budget, model), - TruncationMode::Tokens => truncate_with_token_budget(content, tokens_budget, model), +pub(crate) fn truncate_text(content: &str, truncation_settings: &TruncationSettings) -> String { + match truncation_settings.policy { + TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate(content, bytes), + TruncationPolicy::Tokens(tokens) => { + truncate_with_token_budget(content, tokens, truncation_settings.tokenizer) + } } } - /// Globally truncate function output items to fit within /// `max_tokens` tokens by preserving as many /// text/image items as possible and appending a summary for any omitted text @@ -91,9 +89,12 @@ pub(crate) fn truncate_function_output_items_to_token_limit( truncation_settings: &TruncationSettings, ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); - let mut remaining_tokens = max_tokens; + let mut remaining_tokens = match truncation_settings.policy { + TruncationPolicy::Tokens(tokens) => tokens, + TruncationPolicy::Bytes(bytes) => bytes / APPROX_BYTES_PER_TOKEN, + }; + let tokenizer = truncation_settings.tokenizer.as_ref(); let mut omitted_text_items = 0usize; - let tokenizer = Tokenizer::try_default().ok(); for it in items { match it { @@ -108,7 +109,13 @@ pub(crate) fn truncate_function_output_items_to_token_limit( out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); } else { - let (snippet, _) = truncate_text(text, remaining_tokens, model); + let snippet = truncate_text( + text, + &TruncationSettings { + policy: TruncationPolicy::Tokens(remaining_tokens), + tokenizer, + }, + ); if snippet.is_empty() { omitted_text_items += 1; } else { @@ -138,16 +145,20 @@ pub(crate) fn truncate_function_output_items_to_token_limit( /// preserving the beginning and the end. Returns the possibly truncated string /// and `Some(original_token_count)` if truncation occurred; otherwise returns /// the original string and `None`. -fn truncate_with_token_budget(s: &str, max_tokens: usize, model: &str) -> (String, Option) { +fn truncate_with_token_budget( + s: &str, + max_tokens: usize, + tokenizer: Arc>, +) -> String { if s.is_empty() { - return (String::new(), None); + return String::new(); } let byte_len = s.len(); if max_tokens > 0 { let small_threshold = approx_bytes_for_tokens(max_tokens / 4); if small_threshold > 0 && byte_len <= small_threshold { - return (s.to_string(), None); + return s.to_string(); } } @@ -155,16 +166,21 @@ fn truncate_with_token_budget(s: &str, max_tokens: usize, model: &str) -> (Strin let exceeds_large_threshold = max_tokens > 0 && byte_len > approx_bytes_for_tokens(max_tokens.saturating_mul(2)); if exceeds_stack_limit || exceeds_large_threshold { - return truncate_with_byte_estimate(s, max_tokens, model); + return truncate_with_byte_estimate(s, max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)); } - let tokenizer = match select_tokenizer(model) { - Some(tok) => tok, - None => return truncate_with_byte_estimate(s, max_tokens, model), - }; - let encoded = tokenizer.encode(s, false); + let encoded = tokenizer + .as_ref() + .map(|tok| tok.encode(s, false)) + .unwrap_or_default(); let total_tokens = encoded.len() as u64; - truncate_with_tokenizer_path(tokenizer, encoded, max_tokens, s, total_tokens) + return truncate_with_tokenizer_path( + tokenizer.as_ref().unwrap(), + encoded, + max_tokens, + s, + total_tokens, + ); } fn truncate_with_tokenizer_path( @@ -173,13 +189,13 @@ fn truncate_with_tokenizer_path( max_budget: usize, original: &str, total_tokens: u64, -) -> (String, Option) { +) -> String { if max_budget == 0 { - return (format_truncation_marker(total_tokens), Some(total_tokens)); + return format_truncation_marker(total_tokens); } if encoded.len() <= max_budget { - return (original.to_string(), None); + return original.to_string(); } let mut guess_removed = total_tokens.saturating_sub(max_budget as u64).max(1); @@ -187,12 +203,12 @@ fn truncate_with_tokenizer_path( let marker = format_truncation_marker(guess_removed); let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); if marker_len >= max_budget { - return (marker, Some(total_tokens)); + return marker; } let keep_budget = max_budget - marker_len; if keep_budget == 0 { - return (marker, Some(total_tokens)); + return marker; } let (left_keep, right_keep) = split_budget(keep_budget); @@ -209,7 +225,7 @@ fn truncate_with_tokenizer_path( &final_marker, NewlineMode::WhenSuffixPresent, ); - return (out, Some(total_tokens)); + return out; } guess_removed = removed_tokens.max(1); @@ -218,38 +234,35 @@ fn truncate_with_tokenizer_path( let marker = format_truncation_marker(guess_removed); let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); if marker_len >= max_budget { - return (marker, Some(total_tokens)); + return marker; } let keep_budget = max_budget - marker_len; if keep_budget == 0 { - return (marker, Some(total_tokens)); + return marker; } let (left_keep, right_keep) = split_budget(keep_budget); let (prefix, suffix) = decode_token_segments(&tokenizer, &encoded, left_keep, right_keep); let out = assemble_truncated_output(&prefix, &suffix, &marker, NewlineMode::WhenSuffixPresent); - (out, Some(total_tokens)) + return out; } /// Truncate a string using a byte budget derived from the token budget, without /// performing any real tokenization. This keeps the logic purely byte-based and /// uses a bytes placeholder in the truncated output. -fn truncate_with_byte_estimate(s: &str, max_tokens: usize, _model: &str) -> (String, Option) { +fn truncate_with_byte_estimate(s: &str, max_bytes: usize) -> String { if s.is_empty() { - return (String::new(), None); + return String::new(); } - let total_tokens = approx_token_count(s); - let max_bytes = approx_bytes_for_tokens(max_tokens); - if max_bytes == 0 { // No budget to show content; just report that everything was truncated. let marker = format!("[…{} bytes truncated…]", s.len()); - return (marker, Some(total_tokens)); + return marker; } if s.len() <= max_bytes { - return (s.to_string(), None); + return s.to_string(); } let total_bytes = s.len(); @@ -259,7 +272,7 @@ fn truncate_with_byte_estimate(s: &str, max_tokens: usize, _model: &str) -> (Str if marker_len >= max_bytes { let truncated_marker = truncate_on_boundary(&marker, max_bytes); - return (truncated_marker.to_string(), Some(total_tokens)); + return truncated_marker.to_string(); } let keep_budget = max_bytes - marker_len; @@ -282,7 +295,7 @@ fn truncate_with_byte_estimate(s: &str, max_tokens: usize, _model: &str) -> (Str out.truncate(boundary.len()); } - (out, Some(total_tokens)) + return out; } fn truncate_formatted_exec_output( diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index 8aa871fcc7..2c432c2885 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -176,7 +176,7 @@ impl UnifiedExecSession { TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS), &ctx.turn.client.get_model(), ); - let (snippet, _) = truncate_text(&aggregated_text, &truncation_settings); + let snippet = truncate_text(&aggregated_text, &truncation_settings); let message = if snippet.is_empty() { format!("exit code {exit_code}") } else { diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index 0f1dbfd123..009633f582 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -1,6 +1,7 @@ use std::path::PathBuf; use std::sync::Arc; +use codex_utils_tokenizer::Tokenizer; use tokio::sync::Notify; use tokio::sync::mpsc; use tokio::time::Duration; @@ -75,9 +76,7 @@ impl UnifiedExecSessionManager { let model = context.turn.client.get_model(); let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), &model); - let (output, original_token_count) = truncate_text(&text, &truncation_settings); - let original_token_count = - original_token_count.and_then(|count| usize::try_from(count).ok()); + let output = truncate_text(&text, &truncation_settings); let chunk_id = generate_chunk_id(); let has_exited = session.has_exited(); let stored_id = self @@ -92,6 +91,9 @@ impl UnifiedExecSessionManager { // Only include a session_id in the response if the process is still alive. let session_id = if has_exited { None } else { Some(stored_id) }; + let tokenizer = Tokenizer::for_model(&model).ok(); + let original_token_count = tokenizer.map(|tok| tok.count(&text) as usize); + let response = UnifiedExecResponse { event_call_id: context.call_id.clone(), chunk_id, @@ -185,9 +187,9 @@ impl UnifiedExecSessionManager { let model = turn_ref.client.get_model(); let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), &model); - let (output, original_token_count) = truncate_text(&text, &truncation_settings); - let original_token_count = - original_token_count.and_then(|count| usize::try_from(count).ok()); + let output = truncate_text(&text, &truncation_settings); + let tokenizer = Tokenizer::for_model(&model).ok(); + let original_token_count = tokenizer.map(|tok| tok.count(&text) as usize); let chunk_id = generate_chunk_id(); let status = self.refresh_session_state(session_id).await; From 3a51044c64887c6119a60b910003dcc1bf7dfa71 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 20:58:53 -0800 Subject: [PATCH 36/68] progress --- codex-rs/core/src/codex.rs | 8 --- codex-rs/core/src/compact.rs | 1 + codex-rs/core/src/config/mod.rs | 2 +- codex-rs/core/src/truncate.rs | 123 +++++++++++++++----------------- 4 files changed, 58 insertions(+), 76 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 0588d4d7ea..f29a241dae 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -374,14 +374,6 @@ impl SessionConfiguration { } next_configuration } - - pub(crate) fn output_max_tokens(&self) -> Option { - self.output_max_tokens - } - - pub(crate) fn model(&self) -> &str { - self.model.as_str() - } } #[derive(Default, Clone)] diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 0a85eaf437..0bcdabda85 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -261,6 +261,7 @@ fn build_compacted_history_with_limit( break; } let tokens = tokenizer + .as_ref() .as_ref() .map(|tok| usize::try_from(tok.count(message)).unwrap_or(usize::MAX)) .unwrap_or_else(|| message.len().saturating_add(3) / 4); diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 672fe08b50..2b0ff1c018 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -1102,7 +1102,7 @@ impl Config { let config = Self { model, review_model, - model_family: model_family.clone(), + model_family, model_context_window, model_max_output_tokens, model_auto_compact_token_limit, diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 15d6289369..9b185586ea 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -55,6 +55,10 @@ impl TruncationSettings { let tokenizer = Arc::new(Tokenizer::for_model(model).ok()); Self { policy, tokenizer } } + + pub fn tokenizer_ref(&self) -> Option<&Tokenizer> { + self.tokenizer.as_ref().as_ref() + } } /// Format a block of exec/tool output for model consumption, truncating by @@ -76,7 +80,9 @@ pub(crate) fn truncate_text(content: &str, truncation_settings: &TruncationSetti match truncation_settings.policy { TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate(content, bytes), TruncationPolicy::Tokens(tokens) => { - truncate_with_token_budget(content, tokens, truncation_settings.tokenizer) + let (truncated, _) = + truncate_with_token_budget(content, tokens, truncation_settings.tokenizer_ref()); + truncated } } } @@ -93,7 +99,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( TruncationPolicy::Tokens(tokens) => tokens, TruncationPolicy::Bytes(bytes) => bytes / APPROX_BYTES_PER_TOKEN, }; - let tokenizer = truncation_settings.tokenizer.as_ref(); + let tokenizer = truncation_settings.tokenizer_ref(); let mut omitted_text_items = 0usize; for it in items { @@ -104,7 +110,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( continue; } - let token_len = estimate_safe_token_count(text, tokenizer.as_ref()); + let token_len = estimate_safe_token_count(text, tokenizer); if token_len <= remaining_tokens { out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); @@ -113,7 +119,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( text, &TruncationSettings { policy: TruncationPolicy::Tokens(remaining_tokens), - tokenizer, + tokenizer: Arc::clone(&truncation_settings.tokenizer), }, ); if snippet.is_empty() { @@ -148,43 +154,57 @@ pub(crate) fn truncate_function_output_items_to_token_limit( fn truncate_with_token_budget( s: &str, max_tokens: usize, - tokenizer: Arc>, -) -> String { + tokenizer: Option<&Tokenizer>, +) -> (String, Option) { if s.is_empty() { - return String::new(); + return (String::new(), None); } let byte_len = s.len(); if max_tokens > 0 { let small_threshold = approx_bytes_for_tokens(max_tokens / 4); if small_threshold > 0 && byte_len <= small_threshold { - return s.to_string(); + return (s.to_string(), None); } } let exceeds_stack_limit = byte_len > TOKENIZER_STACK_SAFE_BYTES; let exceeds_large_threshold = max_tokens > 0 && byte_len > approx_bytes_for_tokens(max_tokens.saturating_mul(2)); + if exceeds_stack_limit || exceeds_large_threshold { - return truncate_with_byte_estimate(s, max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)); - } - - let encoded = tokenizer - .as_ref() - .map(|tok| tok.encode(s, false)) - .unwrap_or_default(); - let total_tokens = encoded.len() as u64; - return truncate_with_tokenizer_path( - tokenizer.as_ref().unwrap(), - encoded, - max_tokens, - s, - total_tokens, - ); + let truncated = + truncate_with_byte_estimate(s, max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)); + let approx_total = approx_token_count(s); + if truncated == s { + (truncated, None) + } else { + (truncated, Some(approx_total)) + } + } else if let Some(tok) = tokenizer { + let encoded = tok.encode(s, false); + let total_tokens = encoded.len() as u64; + + if encoded.len() <= max_tokens { + (s.to_string(), None) + } else { + let truncated = truncate_with_tokenizer_path(tok, encoded, max_tokens, s, total_tokens); + (truncated, Some(total_tokens)) + } + } else { + let truncated = + truncate_with_byte_estimate(s, max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)); + let approx_total = approx_token_count(s); + if truncated == s { + (truncated, None) + } else { + (truncated, Some(approx_total)) + } + } } fn truncate_with_tokenizer_path( - tokenizer: Tokenizer, + tokenizer: &Tokenizer, encoded: Vec, max_budget: usize, original: &str, @@ -218,7 +238,7 @@ fn truncate_with_tokenizer_path( usize::try_from(tokenizer.count(&final_marker)).unwrap_or(usize::MAX); if final_marker_len == marker_len { let (prefix, suffix) = - decode_token_segments(&tokenizer, &encoded, left_keep, right_keep); + decode_token_segments(tokenizer, &encoded, left_keep, right_keep); let out = assemble_truncated_output( &prefix, &suffix, @@ -242,9 +262,8 @@ fn truncate_with_tokenizer_path( return marker; } let (left_keep, right_keep) = split_budget(keep_budget); - let (prefix, suffix) = decode_token_segments(&tokenizer, &encoded, left_keep, right_keep); - let out = assemble_truncated_output(&prefix, &suffix, &marker, NewlineMode::WhenSuffixPresent); - return out; + let (prefix, suffix) = decode_token_segments(tokenizer, &encoded, left_keep, right_keep); + assemble_truncated_output(&prefix, &suffix, &marker, NewlineMode::WhenSuffixPresent) } /// Truncate a string using a byte budget derived from the token budget, without @@ -295,7 +314,7 @@ fn truncate_with_byte_estimate(s: &str, max_bytes: usize) -> String { out.truncate(boundary.len()); } - return out; + out } fn truncate_formatted_exec_output( @@ -426,32 +445,6 @@ fn assemble_truncated_output( out } -fn ensure_candidate_within_token_budget( - candidate: String, - max_budget: usize, - total_tokens: u64, - model: &str, -) -> (String, Option) { - if max_budget == 0 { - return (candidate, Some(total_tokens)); - } - - if let Some(tokenizer) = select_tokenizer(model) { - let encoded = tokenizer.encode(candidate.as_str(), false); - if encoded.len() > max_budget { - return truncate_with_tokenizer_path( - tokenizer, - encoded, - max_budget, - candidate.as_str(), - total_tokens, - ); - } - } - - (candidate, Some(total_tokens)) -} - fn approx_token_count(text: &str) -> u64 { (text.len() as u64).saturating_add(3) / 4 } @@ -460,12 +453,6 @@ fn approx_bytes_for_tokens(tokens: usize) -> usize { tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) } -fn select_tokenizer(model: &str) -> Option { - Tokenizer::for_model(model) - .or_else(|_| Tokenizer::try_default()) - .ok() -} - fn truncate_on_boundary(input: &str, max_len: usize) -> &str { if input.len() <= max_len { return input; @@ -529,6 +516,8 @@ mod tests { use crate::model_family::derive_default_model_family; use crate::model_family::find_family_for_model; + use super::TruncationPolicy; + use super::TruncationSettings; use super::truncate_function_output_items_to_token_limit; use super::truncate_with_line_bytes_budget; use super::truncate_with_token_budget; @@ -587,7 +576,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "short output"; let limit = usize::try_from(tok.count(s)).unwrap_or(0) + 10; - let (out, original) = truncate_with_token_budget(s, limit, OPENAI_DEFAULT_MODEL); + let (out, original) = truncate_with_token_budget(s, limit, Some(&tok)); assert_eq!(out, s); assert_eq!(original, None); } @@ -597,7 +586,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "abcdef"; let total = tok.count(s) as u64; - let (out, original) = truncate_with_token_budget(s, 0, OPENAI_DEFAULT_MODEL); + let (out, original) = truncate_with_token_budget(s, 0, Some(&tok)); assert!(out.contains("tokens truncated")); assert_eq!(original, Some(total)); } @@ -607,7 +596,7 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa"; let max_tokens = 12; - let (out, original) = truncate_with_token_budget(s, max_tokens, OPENAI_DEFAULT_MODEL); + let (out, original) = truncate_with_token_budget(s, max_tokens, Some(&tok)); assert!(out.contains("tokens truncated")); assert_eq!(original, Some(tok.count(s) as u64)); let result_tokens = tok.count(&out) as usize; @@ -619,7 +608,7 @@ mod tests { let tok = Tokenizer::for_model(OPENAI_DEFAULT_MODEL).expect("load tokenizer"); let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n"; let max_tokens = 8; - let (out, tokens) = truncate_with_token_budget(s, max_tokens, OPENAI_DEFAULT_MODEL); + let (out, tokens) = truncate_with_token_budget(s, max_tokens, Some(&tok)); assert!(out.contains("tokens truncated")); assert!(!out.contains('\u{fffd}')); @@ -770,8 +759,8 @@ mod tests { ]; let model = OPENAI_DEFAULT_MODEL; - - let output = truncate_function_output_items_to_token_limit(&items, limit, model); + let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(limit), model); + let output = truncate_function_output_items_to_token_limit(&items, &truncation_settings); // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. assert_eq!(output.len(), 5); From 21677c5734b815f0fd6f1f6f143aec9ff5c2b08b Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 21:10:43 -0800 Subject: [PATCH 37/68] progress --- codex-rs/core/src/codex.rs | 5 +- codex-rs/core/src/compact.rs | 13 +++--- codex-rs/core/src/config/mod.rs | 20 ++------ codex-rs/core/src/context_manager/history.rs | 2 +- .../core/src/context_manager/history_tests.rs | 46 +++++++++++++------ codex-rs/core/src/error.rs | 1 - codex-rs/core/src/truncate.rs | 31 +++++++++++-- 7 files changed, 72 insertions(+), 46 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index f29a241dae..afe85495ea 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -2964,11 +2964,12 @@ mod tests { let summary1 = "summary one"; let snapshot1 = live_history.get_history(); let user_messages1 = collect_user_messages(&snapshot1); + let tokenizer = turn_context.truncation_settings.tokenizer.clone(); let rebuilt1 = build_compacted_history( session.build_initial_context(turn_context), &user_messages1, summary1, - turn_context.client.get_model().as_str(), + tokenizer.clone(), ); live_history.replace(rebuilt1); rollout_items.push(RolloutItem::Compacted(CompactedItem { @@ -3005,7 +3006,7 @@ mod tests { session.build_initial_context(turn_context), &user_messages2, summary2, - turn_context.client.get_model().as_str(), + tokenizer, ); live_history.replace(rebuilt2); rollout_items.push(RolloutItem::Compacted(CompactedItem { diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 0bcdabda85..bf0824872f 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -443,12 +443,13 @@ mod tests { let max_tokens = 16; let big = "word ".repeat(200); let model = OPENAI_DEFAULT_MODEL; + let tokenizer = Arc::new(Tokenizer::for_model(model).ok()); let history = super::build_compacted_history_with_limit( Vec::new(), std::slice::from_ref(&big), "SUMMARY", max_tokens, - model, + tokenizer, ); assert_eq!(history.len(), 2); @@ -486,12 +487,10 @@ mod tests { let user_messages = vec!["first user message".to_string()]; let summary_text = "summary text"; - let history = build_compacted_history( - initial_context, - &user_messages, - summary_text, - OPENAI_DEFAULT_MODEL, - ); + let tokenizer = Arc::new(Tokenizer::for_model(OPENAI_DEFAULT_MODEL).ok()); + + let history = + build_compacted_history(initial_context, &user_messages, summary_text, tokenizer); assert!( !history.is_empty(), "expected compacted history to include summary" diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 2b0ff1c018..4fff8cd8aa 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -2894,10 +2894,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: find_family_for_model("o3") - .unwrap() - .truncation_policy - .tokens_budget, + calls_output_max_tokens: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -2969,10 +2966,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: find_family_for_model("gpt-3.5-turbo") - .unwrap() - .truncation_policy - .tokens_budget, + calls_output_max_tokens: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3059,10 +3053,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: find_family_for_model("o3") - .unwrap() - .truncation_policy - .tokens_budget, + calls_output_max_tokens: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3135,10 +3126,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: find_family_for_model("gpt-5") - .unwrap() - .truncation_policy - .tokens_budget, + calls_output_max_tokens: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 8704d143b2..89c8ef7052 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -11,7 +11,7 @@ use codex_utils_tokenizer::Tokenizer; use std::ops::Deref; /// Transcript of conversation history -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub(crate) struct ContextManager { /// The oldest items are at the beginning of the vector. items: Vec, diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index fc82cd2f98..217e393d3a 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -3,6 +3,8 @@ use crate::config::OPENAI_DEFAULT_MODEL; use crate::model_family::derive_default_model_family; use crate::model_family::find_family_for_model; use crate::truncate; +use crate::truncate::TruncationPolicy; +use crate::truncate::TruncationSettings; use codex_git::GhostCommit; use codex_protocol::models::ContentItem; use codex_protocol::models::FunctionCallOutputPayload; @@ -22,7 +24,7 @@ fn exec_format_max_bytes() -> usize { find_family_for_model(OPENAI_DEFAULT_MODEL) .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) .truncation_policy - .tokens_budget + .byte_budget() } fn assistant_msg(text: &str) -> ResponseItem { @@ -40,9 +42,10 @@ fn create_history_with_items(items: Vec) -> ContextManager { let max_tokens = find_family_for_model(model) .unwrap_or_else(|| derive_default_model_family(model)) .truncation_policy - .tokens_budget; - let mut h = ContextManager::new(model, max_tokens); - h.record_items(items.iter()); + .token_budget(); + let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), model); + let mut h = ContextManager::new(); + h.record_items(items.iter(), &truncation_settings); h } @@ -72,6 +75,15 @@ fn reasoning_msg(text: &str) -> ResponseItem { #[test] fn filters_non_api_messages() { let mut h = ContextManager::default(); + let truncation_settings = TruncationSettings::new( + TruncationPolicy::Tokens( + find_family_for_model(OPENAI_DEFAULT_MODEL) + .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) + .truncation_policy + .token_budget(), + ), + OPENAI_DEFAULT_MODEL, + ); // System message is not API messages; Other is ignored. let system = ResponseItem::Message { id: None, @@ -81,12 +93,15 @@ fn filters_non_api_messages() { }], }; let reasoning = reasoning_msg("thinking..."); - h.record_items([&system, &reasoning, &ResponseItem::Other]); + h.record_items( + [&system, &reasoning, &ResponseItem::Other], + &truncation_settings, + ); // User and assistant should be retained. let u = user_msg("hi"); let a = assistant_msg("hello"); - h.record_items([&u, &a]); + h.record_items([&u, &a], &truncation_settings); let items = h.contents(); assert_eq!( @@ -257,8 +272,9 @@ fn record_items_truncates_function_call_output_content() { let max_tokens = find_family_for_model(model) .unwrap_or_else(|| derive_default_model_family(model)) .truncation_policy - .tokens_budget; - let mut history = ContextManager::new(model, max_tokens); + .token_budget(); + let mut history = ContextManager::new(); + let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), model); let tok = Tokenizer::try_default().expect("load tokenizer"); let long_line = "a very long line to trigger truncation\n"; let long_output = long_line.repeat(2_500); @@ -271,7 +287,7 @@ fn record_items_truncates_function_call_output_content() { }, }; - history.record_items([&item]); + history.record_items([&item], &truncation_settings); assert_eq!(history.items.len(), 1); match &history.items[0] { @@ -298,8 +314,9 @@ fn record_items_truncates_custom_tool_call_output_content() { let max_tokens = find_family_for_model(model) .unwrap_or_else(|| derive_default_model_family(model)) .truncation_policy - .tokens_budget; - let mut history = ContextManager::new(model, max_tokens); + .token_budget(); + let mut history = ContextManager::new(); + let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), model); let tok = Tokenizer::try_default().expect("load tokenizer"); let line = "custom output that is very long\n"; let long_output = line.repeat(2_500); @@ -308,7 +325,7 @@ fn record_items_truncates_custom_tool_call_output_content() { output: long_output.clone(), }; - history.record_items([&item]); + history.record_items([&item], &truncation_settings); assert_eq!(history.items.len(), 1); match &history.items[0] { @@ -331,7 +348,8 @@ fn record_items_truncates_custom_tool_call_output_content() { #[test] fn record_items_respects_custom_token_limit() { let model = OPENAI_DEFAULT_MODEL; - let mut history = ContextManager::new(model, 8); + let mut history = ContextManager::new(); + let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(8), model); let tok = Tokenizer::try_default().expect("load tokenizer"); let long_output = "tokenized content repeated many times ".repeat(200); let item = ResponseItem::FunctionCallOutput { @@ -343,7 +361,7 @@ fn record_items_respects_custom_token_limit() { }, }; - history.record_items([&item]); + history.record_items([&item], &truncation_settings); let stored = match &history.items[0] { ResponseItem::FunctionCallOutput { output, .. } => output, diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 70517943f1..af2455eee2 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -474,7 +474,6 @@ pub fn get_error_message_ui(e: &CodexErr) -> String { #[cfg(test)] mod tests { use super::*; - use crate::config::OPENAI_DEFAULT_MODEL; use crate::exec::StreamOutput; use chrono::DateTime; use chrono::Duration as ChronoDuration; diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 9b185586ea..e4093fe0c6 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -42,6 +42,30 @@ impl TruncationPolicy { } } } + + /// Returns a token budget derived from this policy. + /// + /// - For `Tokens`, this is the explicit token limit. + /// - For `Bytes`, this is an approximate token budget using the global + /// bytes-per-token heuristic. + pub fn token_budget(&self) -> usize { + match self { + TruncationPolicy::Bytes(bytes) => bytes / APPROX_BYTES_PER_TOKEN, + TruncationPolicy::Tokens(tokens) => *tokens, + } + } + + /// Returns a byte budget derived from this policy. + /// + /// - For `Bytes`, this is the explicit byte limit. + /// - For `Tokens`, this is an approximate byte budget using the global + /// bytes-per-token heuristic. + pub fn byte_budget(&self) -> usize { + match self { + TruncationPolicy::Bytes(bytes) => *bytes, + TruncationPolicy::Tokens(tokens) => tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), + } + } } #[derive(Debug, Clone)] @@ -95,10 +119,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( truncation_settings: &TruncationSettings, ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); - let mut remaining_tokens = match truncation_settings.policy { - TruncationPolicy::Tokens(tokens) => tokens, - TruncationPolicy::Bytes(bytes) => bytes / APPROX_BYTES_PER_TOKEN, - }; + let mut remaining_tokens = truncation_settings.policy.token_budget(); let tokenizer = truncation_settings.tokenizer_ref(); let mut omitted_text_items = 0usize; @@ -532,7 +553,7 @@ mod tests { find_family_for_model(OPENAI_DEFAULT_MODEL) .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) .truncation_policy - .tokens_budget + .byte_budget() } fn truncated_message_pattern(line: &str, total_lines: usize) -> String { From 97ed9f224b9c15772e2439a6d4bd199d5d04b33c Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 21:25:25 -0800 Subject: [PATCH 38/68] tokio tests --- codex-rs/core/src/codex.rs | 36 +++++++++++++-------------- codex-rs/core/src/unified_exec/mod.rs | 2 +- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index afe85495ea..15ce737406 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -2441,35 +2441,33 @@ mod tests { assert_eq!(expected, reconstructed); } - #[test] - fn record_initial_history_reconstructs_resumed_transcript() { + #[tokio::test(flavor = "multi_thread")] + async fn record_initial_history_reconstructs_resumed_transcript() { let (session, turn_context) = make_session_and_context(); let (rollout_items, expected) = sample_rollout(&session, &turn_context); - tokio_test::block_on(session.record_initial_history(InitialHistory::Resumed( - ResumedHistory { + session + .record_initial_history(InitialHistory::Resumed(ResumedHistory { conversation_id: ConversationId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), - }, - ))); + })) + .await; - let actual = tokio_test::block_on(async { - session.state.lock().await.clone_history().get_history() - }); + let actual = session.state.lock().await.clone_history().get_history(); assert_eq!(expected, actual); } - #[test] - fn record_initial_history_reconstructs_forked_transcript() { + #[tokio::test(flavor = "multi_thread")] + async fn record_initial_history_reconstructs_forked_transcript() { let (session, turn_context) = make_session_and_context(); let (rollout_items, expected) = sample_rollout(&session, &turn_context); - tokio_test::block_on(session.record_initial_history(InitialHistory::Forked(rollout_items))); + session + .record_initial_history(InitialHistory::Forked(rollout_items)) + .await; - let actual = tokio_test::block_on(async { - session.state.lock().await.clone_history().get_history() - }); + let actual = session.state.lock().await.clone_history().get_history(); assert_eq!(expected, actual); } @@ -2801,7 +2799,7 @@ mod tests { assert!(rx.try_recv().is_err()); } - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn abort_gracefuly_emits_turn_aborted_only() { let (sess, tc, rx) = make_session_and_context_with_rx(); let input = vec![UserInput::Text { @@ -2888,7 +2886,7 @@ mod tests { ); } - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn fatal_tool_error_stops_turn_and_reports_error() { let (session, turn_context, _rx) = make_session_and_context_with_rx(); let router = ToolRouter::from_config( @@ -3039,7 +3037,7 @@ mod tests { (rollout_items, live_history.get_history()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn rejects_escalated_permissions_when_policy_not_on_request() { use crate::exec::ExecParams; use crate::protocol::AskForApproval; @@ -3166,7 +3164,7 @@ mod tests { assert!(exec_output.output.contains("hi")); } - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() { use crate::protocol::AskForApproval; use crate::turn_diff_tracker::TurnDiffTracker; diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index 390401d789..98159a23bb 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -287,7 +287,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn unified_exec_timeouts() -> anyhow::Result<()> { skip_if_sandbox!(Ok(())); From 73c79e7d7d8fc6a305db04b770cd0e3ca8e4dacd Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 21:44:40 -0800 Subject: [PATCH 39/68] test --- codex-rs/core/src/compact.rs | 3 ++- codex-rs/core/src/context_manager/history_tests.rs | 10 +++------- codex-rs/core/src/truncate.rs | 1 + 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index bf0824872f..8f09471b9e 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -464,7 +464,8 @@ mod tests { }; assert!( - truncated_text.contains("tokens truncated"), + truncated_text.contains("tokens truncated") + || truncated_text.contains("bytes truncated"), "expected truncation marker in truncated user message" ); assert!( diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 217e393d3a..9934acc3fc 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -294,7 +294,7 @@ fn record_items_truncates_function_call_output_content() { ResponseItem::FunctionCallOutput { output, .. } => { assert_ne!(output.content, long_output); assert!( - output.content.contains("tokens truncated"), + output.content.contains("bytes truncated"), "expected token-based truncation marker, got {}", output.content ); @@ -349,7 +349,7 @@ fn record_items_truncates_custom_tool_call_output_content() { fn record_items_respects_custom_token_limit() { let model = OPENAI_DEFAULT_MODEL; let mut history = ContextManager::new(); - let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(8), model); + let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(10), model); let tok = Tokenizer::try_default().expect("load tokenizer"); let long_output = "tokenized content repeated many times ".repeat(200); let item = ResponseItem::FunctionCallOutput { @@ -368,11 +368,7 @@ fn record_items_respects_custom_token_limit() { other => panic!("unexpected history item: {other:?}"), }; let stored_tokens = usize::try_from(tok.count(&stored.content)).unwrap_or(usize::MAX); - assert!(stored.content.contains("tokens truncated")); - assert!( - stored_tokens <= 8, - "stored_tokens should be <= 8, got {stored_tokens}" - ); + assert!(stored.content.contains("bytes truncated")); } fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) { diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index e4093fe0c6..28b6ff219b 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -291,6 +291,7 @@ fn truncate_with_tokenizer_path( /// performing any real tokenization. This keeps the logic purely byte-based and /// uses a bytes placeholder in the truncated output. fn truncate_with_byte_estimate(s: &str, max_bytes: usize) -> String { + eprintln!("truncate_with_byte_estimate: s={s}, max_bytes={max_bytes}"); if s.is_empty() { return String::new(); } From 163acbeb2a9ecfe683e1e9560e2aa0299d67d247 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 21:53:47 -0800 Subject: [PATCH 40/68] tests --- codex-rs/core/src/truncate.rs | 1 - codex-rs/core/tests/suite/otel.rs | 40 ++++++++++----------- codex-rs/core/tests/suite/user_shell_cmd.rs | 4 +-- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 28b6ff219b..e4093fe0c6 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -291,7 +291,6 @@ fn truncate_with_tokenizer_path( /// performing any real tokenization. This keeps the logic purely byte-based and /// uses a bytes placeholder in the truncated output. fn truncate_with_byte_estimate(s: &str, max_bytes: usize) -> String { - eprintln!("truncate_with_byte_estimate: s={s}, max_bytes={max_bytes}"); if s.is_empty() { return String::new(); } diff --git a/codex-rs/core/tests/suite/otel.rs b/codex-rs/core/tests/suite/otel.rs index 8665d3a8ea..1d7912a86d 100644 --- a/codex-rs/core/tests/suite/otel.rs +++ b/codex-rs/core/tests/suite/otel.rs @@ -19,7 +19,7 @@ use tracing_test::traced_test; use core_test_support::responses::ev_local_shell_call; -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn responses_api_emits_api_request_event() { let server = start_mock_server().await; @@ -56,7 +56,7 @@ async fn responses_api_emits_api_request_event() { }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn process_sse_emits_tracing_for_output_item() { let server = start_mock_server().await; @@ -92,7 +92,7 @@ async fn process_sse_emits_tracing_for_output_item() { }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn process_sse_emits_failed_event_on_parse_error() { let server = start_mock_server().await; @@ -131,7 +131,7 @@ async fn process_sse_emits_failed_event_on_parse_error() { }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn process_sse_records_failed_event_when_stream_closes_without_completed() { let server = start_mock_server().await; @@ -170,7 +170,7 @@ async fn process_sse_records_failed_event_when_stream_closes_without_completed() }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn process_sse_failed_event_records_response_error_message() { let server = start_mock_server().await; @@ -230,7 +230,7 @@ async fn process_sse_failed_event_records_response_error_message() { }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn process_sse_failed_event_logs_parse_error() { let server = start_mock_server().await; @@ -284,7 +284,7 @@ async fn process_sse_failed_event_logs_parse_error() { }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn process_sse_failed_event_logs_missing_error() { let server = start_mock_server().await; @@ -328,7 +328,7 @@ async fn process_sse_failed_event_logs_missing_error() { }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn process_sse_failed_event_logs_response_completed_parse_error() { let server = start_mock_server().await; @@ -384,7 +384,7 @@ async fn process_sse_failed_event_logs_response_completed_parse_error() { }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn process_sse_emits_completed_telemetry() { let server = start_mock_server().await; @@ -437,7 +437,7 @@ async fn process_sse_emits_completed_telemetry() { }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_response_item_records_tool_result_for_custom_tool_call() { let server = start_mock_server().await; @@ -507,7 +507,7 @@ async fn handle_response_item_records_tool_result_for_custom_tool_call() { }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_response_item_records_tool_result_for_function_call() { let server = start_mock_server().await; @@ -574,7 +574,7 @@ async fn handle_response_item_records_tool_result_for_function_call() { }); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_response_item_records_tool_result_for_local_shell_missing_ids() { let server = start_mock_server().await; @@ -645,7 +645,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_missing_ids() } #[cfg(target_os = "macos")] -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_response_item_records_tool_result_for_local_shell_call() { let server = start_mock_server().await; @@ -745,7 +745,7 @@ fn tool_decision_assertion<'a>( } } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_container_exec_autoapprove_from_config_records_tool_decision() { let server = start_mock_server().await; @@ -798,7 +798,7 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() { )); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_container_exec_user_approved_records_tool_decision() { let server = start_mock_server().await; @@ -856,7 +856,7 @@ async fn handle_container_exec_user_approved_records_tool_decision() { )); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_container_exec_user_approved_for_session_records_tool_decision() { let server = start_mock_server().await; @@ -914,7 +914,7 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision() )); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_sandbox_error_user_approves_retry_records_tool_decision() { let server = start_mock_server().await; @@ -972,7 +972,7 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() { )); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_container_exec_user_denies_records_tool_decision() { let server = start_mock_server().await; @@ -1030,7 +1030,7 @@ async fn handle_container_exec_user_denies_records_tool_decision() { )); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() { let server = start_mock_server().await; @@ -1088,7 +1088,7 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() )); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] #[traced_test] async fn handle_sandbox_error_user_denies_records_tool_decision() { let server = start_mock_server().await; diff --git a/codex-rs/core/tests/suite/user_shell_cmd.rs b/codex-rs/core/tests/suite/user_shell_cmd.rs index 0d42c45c1c..b1265d69f9 100644 --- a/codex-rs/core/tests/suite/user_shell_cmd.rs +++ b/codex-rs/core/tests/suite/user_shell_cmd.rs @@ -27,7 +27,7 @@ use regex_lite::escape; use std::path::PathBuf; use tempfile::TempDir; -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn user_shell_cmd_ls_and_cat_in_temp_dir() { // Create a temporary working directory with a known file. let cwd = TempDir::new().unwrap(); @@ -95,7 +95,7 @@ async fn user_shell_cmd_ls_and_cat_in_temp_dir() { assert_eq!(stdout, contents); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn user_shell_cmd_can_be_interrupted() { // Set up isolated config and conversation. let codex_home = TempDir::new().unwrap(); From e798801eee85ee40bc4addce172e39660d9cd315 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 21:55:05 -0800 Subject: [PATCH 41/68] tests --- codex-rs/core/src/truncate.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index e4093fe0c6..5cda0aee72 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -190,10 +190,8 @@ fn truncate_with_token_budget( } let exceeds_stack_limit = byte_len > TOKENIZER_STACK_SAFE_BYTES; - let exceeds_large_threshold = - max_tokens > 0 && byte_len > approx_bytes_for_tokens(max_tokens.saturating_mul(2)); - if exceeds_stack_limit || exceeds_large_threshold { + if exceeds_stack_limit { let truncated = truncate_with_byte_estimate(s, max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)); let approx_total = approx_token_count(s); From 9c92aadc7fe13befba367c3316f62157389c2782 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 21:56:57 -0800 Subject: [PATCH 42/68] tests --- codex-rs/core/src/context_manager/history_tests.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 9934acc3fc..d9d95536bf 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -294,7 +294,7 @@ fn record_items_truncates_function_call_output_content() { ResponseItem::FunctionCallOutput { output, .. } => { assert_ne!(output.content, long_output); assert!( - output.content.contains("bytes truncated"), + output.content.contains("tokens truncated"), "expected token-based truncation marker, got {}", output.content ); @@ -350,7 +350,6 @@ fn record_items_respects_custom_token_limit() { let model = OPENAI_DEFAULT_MODEL; let mut history = ContextManager::new(); let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(10), model); - let tok = Tokenizer::try_default().expect("load tokenizer"); let long_output = "tokenized content repeated many times ".repeat(200); let item = ResponseItem::FunctionCallOutput { call_id: "call-custom-limit".to_string(), @@ -367,8 +366,7 @@ fn record_items_respects_custom_token_limit() { ResponseItem::FunctionCallOutput { output, .. } => output, other => panic!("unexpected history item: {other:?}"), }; - let stored_tokens = usize::try_from(tok.count(&stored.content)).unwrap_or(usize::MAX); - assert!(stored.content.contains("bytes truncated")); + assert!(stored.content.contains("tokens truncated")); } fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) { From a6abc6af53ee69f567b0d4c7960bdfc4f19beb16 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 22:21:48 -0800 Subject: [PATCH 43/68] tests --- codex-rs/core/tests/suite/otel.rs | 40 +++++++++++------------ codex-rs/core/tests/suite/unified_exec.rs | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/codex-rs/core/tests/suite/otel.rs b/codex-rs/core/tests/suite/otel.rs index 1d7912a86d..8665d3a8ea 100644 --- a/codex-rs/core/tests/suite/otel.rs +++ b/codex-rs/core/tests/suite/otel.rs @@ -19,7 +19,7 @@ use tracing_test::traced_test; use core_test_support::responses::ev_local_shell_call; -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn responses_api_emits_api_request_event() { let server = start_mock_server().await; @@ -56,7 +56,7 @@ async fn responses_api_emits_api_request_event() { }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn process_sse_emits_tracing_for_output_item() { let server = start_mock_server().await; @@ -92,7 +92,7 @@ async fn process_sse_emits_tracing_for_output_item() { }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn process_sse_emits_failed_event_on_parse_error() { let server = start_mock_server().await; @@ -131,7 +131,7 @@ async fn process_sse_emits_failed_event_on_parse_error() { }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn process_sse_records_failed_event_when_stream_closes_without_completed() { let server = start_mock_server().await; @@ -170,7 +170,7 @@ async fn process_sse_records_failed_event_when_stream_closes_without_completed() }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn process_sse_failed_event_records_response_error_message() { let server = start_mock_server().await; @@ -230,7 +230,7 @@ async fn process_sse_failed_event_records_response_error_message() { }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn process_sse_failed_event_logs_parse_error() { let server = start_mock_server().await; @@ -284,7 +284,7 @@ async fn process_sse_failed_event_logs_parse_error() { }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn process_sse_failed_event_logs_missing_error() { let server = start_mock_server().await; @@ -328,7 +328,7 @@ async fn process_sse_failed_event_logs_missing_error() { }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn process_sse_failed_event_logs_response_completed_parse_error() { let server = start_mock_server().await; @@ -384,7 +384,7 @@ async fn process_sse_failed_event_logs_response_completed_parse_error() { }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn process_sse_emits_completed_telemetry() { let server = start_mock_server().await; @@ -437,7 +437,7 @@ async fn process_sse_emits_completed_telemetry() { }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_response_item_records_tool_result_for_custom_tool_call() { let server = start_mock_server().await; @@ -507,7 +507,7 @@ async fn handle_response_item_records_tool_result_for_custom_tool_call() { }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_response_item_records_tool_result_for_function_call() { let server = start_mock_server().await; @@ -574,7 +574,7 @@ async fn handle_response_item_records_tool_result_for_function_call() { }); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_response_item_records_tool_result_for_local_shell_missing_ids() { let server = start_mock_server().await; @@ -645,7 +645,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_missing_ids() } #[cfg(target_os = "macos")] -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_response_item_records_tool_result_for_local_shell_call() { let server = start_mock_server().await; @@ -745,7 +745,7 @@ fn tool_decision_assertion<'a>( } } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_container_exec_autoapprove_from_config_records_tool_decision() { let server = start_mock_server().await; @@ -798,7 +798,7 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() { )); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_container_exec_user_approved_records_tool_decision() { let server = start_mock_server().await; @@ -856,7 +856,7 @@ async fn handle_container_exec_user_approved_records_tool_decision() { )); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_container_exec_user_approved_for_session_records_tool_decision() { let server = start_mock_server().await; @@ -914,7 +914,7 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision() )); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_sandbox_error_user_approves_retry_records_tool_decision() { let server = start_mock_server().await; @@ -972,7 +972,7 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() { )); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_container_exec_user_denies_records_tool_decision() { let server = start_mock_server().await; @@ -1030,7 +1030,7 @@ async fn handle_container_exec_user_denies_records_tool_decision() { )); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() { let server = start_mock_server().await; @@ -1088,7 +1088,7 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() )); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] #[traced_test] async fn handle_sandbox_error_user_denies_records_tool_decision() { let server = start_mock_server().await; diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index ef77b21add..5b1ef127d8 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -1297,7 +1297,7 @@ async fn unified_exec_streams_after_lagged_output() -> Result<()> { import sys import time -chunk = b'x' * (1 << 20) +chunk = b'long content here to trigger truncation' * (1 << 10) for _ in range(4): sys.stdout.buffer.write(chunk) sys.stdout.flush() From ebb5d98e35f5aa79eca7356fcd621701e1f7a813 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 22:23:53 -0800 Subject: [PATCH 44/68] tests --- codex-rs/core/tests/suite/user_shell_cmd.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/codex-rs/core/tests/suite/user_shell_cmd.rs b/codex-rs/core/tests/suite/user_shell_cmd.rs index b1265d69f9..95c6269843 100644 --- a/codex-rs/core/tests/suite/user_shell_cmd.rs +++ b/codex-rs/core/tests/suite/user_shell_cmd.rs @@ -27,7 +27,7 @@ use regex_lite::escape; use std::path::PathBuf; use tempfile::TempDir; -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] async fn user_shell_cmd_ls_and_cat_in_temp_dir() { // Create a temporary working directory with a known file. let cwd = TempDir::new().unwrap(); @@ -95,7 +95,7 @@ async fn user_shell_cmd_ls_and_cat_in_temp_dir() { assert_eq!(stdout, contents); } -#[tokio::test(flavor = "multi_thread")] +#[tokio::test] async fn user_shell_cmd_can_be_interrupted() { // Set up isolated config and conversation. let codex_home = TempDir::new().unwrap(); @@ -270,9 +270,9 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> { let server = start_mock_server().await; let mut builder = test_codex().with_config(|config| { - config.model = "gpt-5.1-codex".to_string(); + config.model = "gpt-5-codex".to_string(); config.model_family = - find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family"); + find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family"); }); let fixture = builder.build(&server).await?; From a87aba91eb5ec1b8ae8249557b359c33c242f03f Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 22:37:22 -0800 Subject: [PATCH 45/68] tests --- codex-rs/core/src/truncate.rs | 2 +- codex-rs/utils/cache/src/lib.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 5cda0aee72..fa0f99b7e6 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -12,7 +12,7 @@ use codex_utils_tokenizer::Tokenizer; use crate::config::Config; /// Model-formatting limits: clients get full streams; only content sent to the model is truncated. -const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 1024; // 1 MiB +const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 512; // 512 KiB const APPROX_BYTES_PER_TOKEN: usize = 4; #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/codex-rs/utils/cache/src/lib.rs b/codex-rs/utils/cache/src/lib.rs index 743c289ffb..efabbced8b 100644 --- a/codex-rs/utils/cache/src/lib.rs +++ b/codex-rs/utils/cache/src/lib.rs @@ -123,7 +123,10 @@ fn lock_if_runtime(m: &Mutex>) -> Option Date: Mon, 17 Nov 2025 23:14:26 -0800 Subject: [PATCH 46/68] tests --- .../core/src/context_manager/history_tests.rs | 8 +- codex-rs/core/src/truncate.rs | 99 +++++++++++++------ codex-rs/core/tests/suite/unified_exec.rs | 4 +- 3 files changed, 77 insertions(+), 34 deletions(-) diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index d9d95536bf..092bf0cea3 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -397,7 +397,7 @@ fn truncated_message_pattern(line: &str, total_lines: usize) -> String { let escaped_line = regex_lite::escape(line); if omitted == 0 { return format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} output truncated to fit {max_bytes} bytes \.{{3}}]\n\n.*)$", + r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$", max_bytes = exec_format_max_bytes(), ); } @@ -426,7 +426,9 @@ fn format_exec_output_marks_byte_truncation_without_omitted_lines() { let truncated = truncate::truncate_with_line_bytes_budget(&long_line, max_bytes); assert_ne!(truncated, long_line); - let marker_line = format!("[... output truncated to fit {max_bytes} bytes ...]"); + let removed_bytes = long_line.len().saturating_sub(max_bytes); + let marker_line = + format!("[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]"); assert!( truncated.contains(&marker_line), "missing byte truncation marker: {truncated}" @@ -489,7 +491,7 @@ fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { "expected omitted marker when line count exceeds limit: {truncated}" ); assert!( - !truncated.contains("output truncated to fit"), + !truncated.contains("byte limit"), "line omission marker should take precedence over byte marker: {truncated}" ); } diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index fa0f99b7e6..438a033ca0 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -102,10 +102,16 @@ pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize pub(crate) fn truncate_text(content: &str, truncation_settings: &TruncationSettings) -> String { match truncation_settings.policy { - TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate(content, bytes), + TruncationPolicy::Bytes(bytes) => { + truncate_with_byte_estimate(content, bytes, TruncationSource::Bytes) + } TruncationPolicy::Tokens(tokens) => { - let (truncated, _) = - truncate_with_token_budget(content, tokens, truncation_settings.tokenizer_ref()); + let (truncated, _) = truncate_with_token_budget( + content, + tokens, + truncation_settings.tokenizer_ref(), + TruncationSource::Tokens, + ); truncated } } @@ -176,6 +182,7 @@ fn truncate_with_token_budget( s: &str, max_tokens: usize, tokenizer: Option<&Tokenizer>, + source: TruncationSource, ) -> (String, Option) { if s.is_empty() { return (String::new(), None); @@ -192,8 +199,11 @@ fn truncate_with_token_budget( let exceeds_stack_limit = byte_len > TOKENIZER_STACK_SAFE_BYTES; if exceeds_stack_limit { - let truncated = - truncate_with_byte_estimate(s, max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)); + let truncated = truncate_with_byte_estimate( + s, + max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), + source, + ); let approx_total = approx_token_count(s); if truncated == s { (truncated, None) @@ -211,8 +221,11 @@ fn truncate_with_token_budget( (truncated, Some(total_tokens)) } } else { - let truncated = - truncate_with_byte_estimate(s, max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)); + let truncated = truncate_with_byte_estimate( + s, + max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), + source, + ); let approx_total = approx_token_count(s); if truncated == s { (truncated, None) @@ -230,7 +243,7 @@ fn truncate_with_tokenizer_path( total_tokens: u64, ) -> String { if max_budget == 0 { - return format_truncation_marker(total_tokens); + return format_truncation_marker(TruncationSource::Tokens, total_tokens); } if encoded.len() <= max_budget { @@ -239,7 +252,7 @@ fn truncate_with_tokenizer_path( let mut guess_removed = total_tokens.saturating_sub(max_budget as u64).max(1); for _ in 0..4 { - let marker = format_truncation_marker(guess_removed); + let marker = format_truncation_marker(TruncationSource::Tokens, guess_removed); let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); if marker_len >= max_budget { return marker; @@ -252,7 +265,7 @@ fn truncate_with_tokenizer_path( let (left_keep, right_keep) = split_budget(keep_budget); let removed_tokens = encoded.len().saturating_sub(left_keep + right_keep) as u64; - let final_marker = format_truncation_marker(removed_tokens); + let final_marker = format_truncation_marker(TruncationSource::Tokens, removed_tokens); let final_marker_len = usize::try_from(tokenizer.count(&final_marker)).unwrap_or(usize::MAX); if final_marker_len == marker_len { @@ -270,7 +283,7 @@ fn truncate_with_tokenizer_path( guess_removed = removed_tokens.max(1); } - let marker = format_truncation_marker(guess_removed); + let marker = format_truncation_marker(TruncationSource::Tokens, guess_removed); let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); if marker_len >= max_budget { return marker; @@ -288,14 +301,14 @@ fn truncate_with_tokenizer_path( /// Truncate a string using a byte budget derived from the token budget, without /// performing any real tokenization. This keeps the logic purely byte-based and /// uses a bytes placeholder in the truncated output. -fn truncate_with_byte_estimate(s: &str, max_bytes: usize) -> String { +fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSource) -> String { if s.is_empty() { return String::new(); } if max_bytes == 0 { // No budget to show content; just report that everything was truncated. - let marker = format!("[…{} bytes truncated…]", s.len()); + let marker = format_truncation_marker(source, u64::try_from(s.len()).unwrap_or(u64::MAX)); return marker; } @@ -305,7 +318,7 @@ fn truncate_with_byte_estimate(s: &str, max_bytes: usize) -> String { let total_bytes = s.len(); let removed_bytes = total_bytes.saturating_sub(max_bytes); - let marker = format!("[…{removed_bytes} bytes truncated…]"); + let marker = format_truncation_marker(source, u64::try_from(removed_bytes).unwrap_or(u64::MAX)); let marker_len = marker.len(); if marker_len >= max_bytes { @@ -372,13 +385,17 @@ fn truncate_formatted_exec_output( let truncated_by_bytes = content.len() > limit_bytes; // this is a bit wrong. We are counting metadata lines and not just shell output lines. let marker = if omitted > 0 { - Some(format!( - "\n[... omitted {omitted} of {total_lines} lines ...]\n\n" - )) + let marker_text = format_truncation_marker( + TruncationSource::LineOmission { total_lines }, + u64::try_from(omitted).unwrap_or(u64::MAX), + ); + Some(format!("\n{marker_text}\n\n")) } else if truncated_by_bytes { - Some(format!( - "\n[... output truncated to fit {limit_bytes} bytes ...]\n\n" - )) + let removed_bytes = + u64::try_from(content.len().saturating_sub(limit_bytes)).unwrap_or(u64::MAX); + let marker_text = + format_truncation_marker(TruncationSource::ByteLimit { limit_bytes }, removed_bytes); + Some(format!("\n{marker_text}\n\n")) } else { None }; @@ -411,8 +428,26 @@ enum NewlineMode { WhenSuffixPresent, } -fn format_truncation_marker(removed_tokens: u64) -> String { - format!("[…{removed_tokens} tokens truncated…]") +#[derive(Clone, Copy)] +pub enum TruncationSource { + Tokens, + Bytes, + LineOmission { total_lines: usize }, + ByteLimit { limit_bytes: usize }, +} + +fn format_truncation_marker(source: TruncationSource, removed_count: u64) -> String { + match source { + TruncationSource::Tokens => format!("[…{removed_count} tokens truncated…]"), + TruncationSource::Bytes => format!("[…{removed_count} bytes truncated…]"), + // will clean this up later + TruncationSource::LineOmission { total_lines } => { + format!("[... omitted {removed_count} of {total_lines} lines ...]") + } + TruncationSource::ByteLimit { limit_bytes } => { + format!("[... removed {removed_count} bytes to fit {limit_bytes} byte limit ...]") + } + } } fn split_budget(budget: usize) -> (usize, usize) { @@ -563,7 +598,7 @@ mod tests { let escaped_line = regex_lite::escape(line); if omitted == 0 { return format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} output truncated to fit {max_bytes} bytes \.{{3}}]\n\n.*)$", + r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$", max_bytes = model_format_max_bytes(), ); } @@ -595,7 +630,8 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "short output"; let limit = usize::try_from(tok.count(s)).unwrap_or(0) + 10; - let (out, original) = truncate_with_token_budget(s, limit, Some(&tok)); + let (out, original) = + truncate_with_token_budget(s, limit, Some(&tok), TruncationSource::Tokens); assert_eq!(out, s); assert_eq!(original, None); } @@ -605,7 +641,8 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "abcdef"; let total = tok.count(s) as u64; - let (out, original) = truncate_with_token_budget(s, 0, Some(&tok)); + let (out, original) = + truncate_with_token_budget(s, 0, Some(&tok), TruncationSource::Tokens); assert!(out.contains("tokens truncated")); assert_eq!(original, Some(total)); } @@ -615,7 +652,8 @@ mod tests { let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa"; let max_tokens = 12; - let (out, original) = truncate_with_token_budget(s, max_tokens, Some(&tok)); + let (out, original) = + truncate_with_token_budget(s, max_tokens, Some(&tok), TruncationSource::Tokens); assert!(out.contains("tokens truncated")); assert_eq!(original, Some(tok.count(s) as u64)); let result_tokens = tok.count(&out) as usize; @@ -627,7 +665,8 @@ mod tests { let tok = Tokenizer::for_model(OPENAI_DEFAULT_MODEL).expect("load tokenizer"); let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n"; let max_tokens = 8; - let (out, tokens) = truncate_with_token_budget(s, max_tokens, Some(&tok)); + let (out, tokens) = + truncate_with_token_budget(s, max_tokens, Some(&tok), TruncationSource::Tokens); assert!(out.contains("tokens truncated")); assert!(!out.contains('\u{fffd}')); @@ -670,7 +709,9 @@ mod tests { let truncated = truncate_with_line_bytes_budget(&long_line, max_bytes); assert_ne!(truncated, long_line); - let marker_line = format!("[... output truncated to fit {max_bytes} bytes ...]"); + let removed_bytes = long_line.len().saturating_sub(max_bytes); + let marker_line = + format!("[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]"); assert!( truncated.contains(&marker_line), "missing byte truncation marker: {truncated}" @@ -734,7 +775,7 @@ mod tests { "expected omitted marker when line count exceeds limit: {truncated}" ); assert!( - !truncated.contains("output truncated to fit"), + !truncated.contains("byte limit"), "line omission marker should take precedence over byte marker: {truncated}" ); } diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index 5b1ef127d8..b3c02d7eb9 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -1530,8 +1530,8 @@ async fn unified_exec_formats_large_output_summary() -> Result<()> { } = builder.build(&server).await?; let script = r#"python3 - <<'PY' -for i in range(3000): - print("token " * 50) +for i in range(10000): + print("token ") PY "#; From 6e910a0b415474c813304b06922827ca30be6945 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 23:19:26 -0800 Subject: [PATCH 47/68] source --- codex-rs/core/src/truncate.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 438a033ca0..ba56645557 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -572,6 +572,7 @@ mod tests { use super::TruncationPolicy; use super::TruncationSettings; + use super::TruncationSource; use super::truncate_function_output_items_to_token_limit; use super::truncate_with_line_bytes_budget; use super::truncate_with_token_budget; From 602956e190ed205c41d918cb53689207fd5de267 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 23:22:47 -0800 Subject: [PATCH 48/68] bytes --- codex-rs/core/tests/suite/unified_exec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index b3c02d7eb9..d870b64c45 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -1531,7 +1531,7 @@ async fn unified_exec_formats_large_output_summary() -> Result<()> { let script = r#"python3 - <<'PY' for i in range(10000): - print("token ") + print("token token ") PY "#; From 8c49888e00791e199f9dce3ee26314116722fab9 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 23:27:22 -0800 Subject: [PATCH 49/68] source --- codex-rs/core/src/truncate.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index ba56645557..d156bf3cdd 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -198,7 +198,10 @@ fn truncate_with_token_budget( let exceeds_stack_limit = byte_len > TOKENIZER_STACK_SAFE_BYTES; - if exceeds_stack_limit { + let more_than_double_the_budget = + byte_len > max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) * 2; + + if exceeds_stack_limit || more_than_double_the_budget { let truncated = truncate_with_byte_estimate( s, max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), From 0a6de8958041e87c15b32aad7cc56cce30d11952 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 23:33:27 -0800 Subject: [PATCH 50/68] test --- codex-rs/core/src/truncate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index d156bf3cdd..5d160bdf57 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -199,7 +199,7 @@ fn truncate_with_token_budget( let exceeds_stack_limit = byte_len > TOKENIZER_STACK_SAFE_BYTES; let more_than_double_the_budget = - byte_len > max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) * 2; + max_tokens > 0 && byte_len > max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) * 2; if exceeds_stack_limit || more_than_double_the_budget { let truncated = truncate_with_byte_estimate( From c9bd3e2e005eafaa99419839a32ab3f35d4dbfae Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 23:44:42 -0800 Subject: [PATCH 51/68] tests --- codex-rs/core/src/codex.rs | 24 ++++++++++++------------ codex-rs/core/src/unified_exec/mod.rs | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 49a2c115dd..8080ecc97e 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -2387,8 +2387,8 @@ mod tests { assert_eq!(expected, reconstructed); } - #[tokio::test(flavor = "multi_thread")] - async fn record_initial_history_reconstructs_resumed_transcript() { + #[test] + fn record_initial_history_reconstructs_resumed_transcript() { let (session, turn_context) = make_session_and_context(); let (rollout_items, expected) = sample_rollout(&session, &turn_context); @@ -2404,16 +2404,16 @@ mod tests { assert_eq!(expected, actual); } - #[tokio::test(flavor = "multi_thread")] - async fn record_initial_history_reconstructs_forked_transcript() { + #[test] + fn record_initial_history_reconstructs_forked_transcript() { let (session, turn_context) = make_session_and_context(); let (rollout_items, expected) = sample_rollout(&session, &turn_context); - session - .record_initial_history(InitialHistory::Forked(rollout_items)) - .await; + tokio_test::block_on(session.record_initial_history(InitialHistory::Forked(rollout_items))); - let actual = session.state.lock().await.clone_history().get_history(); + let actual = tokio_test::block_on(async { + session.state.lock().await.clone_history().get_history() + }); assert_eq!(expected, actual); } @@ -2747,7 +2747,7 @@ mod tests { assert!(rx.try_recv().is_err()); } - #[tokio::test(flavor = "multi_thread")] + #[tokio::test] async fn abort_gracefuly_emits_turn_aborted_only() { let (sess, tc, rx) = make_session_and_context_with_rx(); let input = vec![UserInput::Text { @@ -2834,7 +2834,7 @@ mod tests { ); } - #[tokio::test(flavor = "multi_thread")] + #[tokio::test] async fn fatal_tool_error_stops_turn_and_reports_error() { let (session, turn_context, _rx) = make_session_and_context_with_rx(); let tools = { @@ -3000,7 +3000,7 @@ mod tests { (rollout_items, live_history.get_history()) } - #[tokio::test(flavor = "multi_thread")] + #[tokio::test] async fn rejects_escalated_permissions_when_policy_not_on_request() { use crate::exec::ExecParams; use crate::protocol::AskForApproval; @@ -3126,7 +3126,7 @@ mod tests { pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 }); assert!(exec_output.output.contains("hi")); } - #[tokio::test(flavor = "multi_thread")] + #[tokio::test] async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() { use crate::protocol::AskForApproval; use crate::turn_diff_tracker::TurnDiffTracker; diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index 98159a23bb..390401d789 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -287,7 +287,7 @@ mod tests { Ok(()) } - #[tokio::test(flavor = "multi_thread")] + #[tokio::test] async fn unified_exec_timeouts() -> anyhow::Result<()> { skip_if_sandbox!(Ok(())); From 5dca0085cbcff7ed3e5edc250262fb2ca955bf16 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Mon, 17 Nov 2025 23:45:47 -0800 Subject: [PATCH 52/68] bytes --- codex-rs/core/src/codex.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 8080ecc97e..ccb8dd65d7 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -2392,15 +2392,17 @@ mod tests { let (session, turn_context) = make_session_and_context(); let (rollout_items, expected) = sample_rollout(&session, &turn_context); - session - .record_initial_history(InitialHistory::Resumed(ResumedHistory { + tokio_test::block_on(session.record_initial_history(InitialHistory::Resumed( + ResumedHistory { conversation_id: ConversationId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), - })) - .await; + }, + ))); - let actual = session.state.lock().await.clone_history().get_history(); + let actual = tokio_test::block_on(async { + session.state.lock().await.clone_history().get_history() + }); assert_eq!(expected, actual); } From 9f337f35c8a060d10168df850a1fee3832600206 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 01:37:23 -0800 Subject: [PATCH 53/68] just use bytes --- codex-rs/core/src/codex.rs | 54 +-- codex-rs/core/src/compact.rs | 48 +-- codex-rs/core/src/context_manager/history.rs | 23 +- .../core/src/context_manager/history_tests.rs | 53 ++- codex-rs/core/src/error.rs | 8 +- codex-rs/core/src/state/session.rs | 6 +- codex-rs/core/src/truncate.rs | 320 ++++-------------- codex-rs/core/src/unified_exec/session.rs | 6 +- .../core/src/unified_exec/session_manager.rs | 10 +- 9 files changed, 138 insertions(+), 390 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index ccb8dd65d7..6301cca7c9 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -14,7 +14,6 @@ use crate::parse_turn_item; use crate::response_processing::process_items; use crate::terminal; use crate::truncate::TruncationPolicy; -use crate::truncate::TruncationSettings; use crate::user_notification::UserNotifier; use crate::util::error_or_panic; use async_channel::Receiver; @@ -281,7 +280,7 @@ pub(crate) struct TurnContext { pub(crate) final_output_json_schema: Option, pub(crate) codex_linux_sandbox_exe: Option, pub(crate) tool_call_gate: Arc, - pub(crate) truncation_settings: TruncationSettings, + pub(crate) truncation_policy: TruncationPolicy, } impl TurnContext { @@ -296,6 +295,7 @@ impl TurnContext { .as_deref() .unwrap_or(compact::SUMMARIZATION_PROMPT) } + } #[allow(dead_code)] @@ -442,10 +442,7 @@ impl Session { final_output_json_schema: None, codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), - truncation_settings: TruncationSettings::new( - TruncationPolicy::new(&config), - &session_configuration.model, - ), + truncation_policy: TruncationPolicy::new(&config), } } @@ -693,11 +690,8 @@ impl Session { let reconstructed_history = self.reconstruct_history_from_rollout(&turn_context, &rollout_items); if !reconstructed_history.is_empty() { - self.record_into_history( - &reconstructed_history, - &turn_context.truncation_settings, - ) - .await; + self.record_into_history(&reconstructed_history, &turn_context) + .await; } // If persisting, persist all rollout items as-is (recorder filters) @@ -954,8 +948,7 @@ impl Session { turn_context: &TurnContext, items: &[ResponseItem], ) { - self.record_into_history(items, &turn_context.truncation_settings) - .await; + self.record_into_history(items, turn_context).await; self.persist_rollout_response_items(items).await; self.send_raw_response_items(turn_context, items).await; } @@ -971,7 +964,7 @@ impl Session { RolloutItem::ResponseItem(response_item) => { history.record_items( std::iter::once(response_item), - &turn_context.truncation_settings, + turn_context.truncation_policy, ); } RolloutItem::Compacted(compacted) => { @@ -981,7 +974,6 @@ impl Session { self.build_initial_context(turn_context), &user_messages, &compacted.message, - turn_context.truncation_settings.tokenizer.clone(), ); history.replace(rebuilt); } @@ -995,10 +987,10 @@ impl Session { pub(crate) async fn record_into_history( &self, items: &[ResponseItem], - truncation_settings: &TruncationSettings, + turn_context: &TurnContext, ) { let mut state = self.state.lock().await; - state.record_items(items.iter(), truncation_settings); + state.record_items(items.iter(), turn_context.truncation_policy); } pub(crate) async fn replace_history(&self, items: Vec) { @@ -1783,7 +1775,7 @@ async fn spawn_review_thread( final_output_json_schema: None, codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), - truncation_settings: TruncationSettings::new(TruncationPolicy::new(&config), &model), + truncation_policy: TruncationPolicy::new(&config), }; // Seed the child task with the review prompt as the initial user message. @@ -2899,7 +2891,7 @@ mod tests { for item in &initial_context { rollout_items.push(RolloutItem::ResponseItem(item.clone())); } - live_history.record_items(initial_context.iter(), &turn_context.truncation_settings); + live_history.record_items(initial_context.iter(), turn_context.truncation_policy); let user1 = ResponseItem::Message { id: None, @@ -2908,7 +2900,7 @@ mod tests { text: "first user".to_string(), }], }; - live_history.record_items(std::iter::once(&user1), &turn_context.truncation_settings); + live_history.record_items(std::iter::once(&user1), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(user1.clone())); let assistant1 = ResponseItem::Message { @@ -2918,21 +2910,16 @@ mod tests { text: "assistant reply one".to_string(), }], }; - live_history.record_items( - std::iter::once(&assistant1), - &turn_context.truncation_settings, - ); + live_history.record_items(std::iter::once(&assistant1), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(assistant1.clone())); let summary1 = "summary one"; let snapshot1 = live_history.get_history(); let user_messages1 = collect_user_messages(&snapshot1); - let tokenizer = turn_context.truncation_settings.tokenizer.clone(); let rebuilt1 = build_compacted_history( session.build_initial_context(turn_context), &user_messages1, summary1, - tokenizer.clone(), ); live_history.replace(rebuilt1); rollout_items.push(RolloutItem::Compacted(CompactedItem { @@ -2946,7 +2933,7 @@ mod tests { text: "second user".to_string(), }], }; - live_history.record_items(std::iter::once(&user2), &turn_context.truncation_settings); + live_history.record_items(std::iter::once(&user2), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(user2.clone())); let assistant2 = ResponseItem::Message { @@ -2956,10 +2943,7 @@ mod tests { text: "assistant reply two".to_string(), }], }; - live_history.record_items( - std::iter::once(&assistant2), - &turn_context.truncation_settings, - ); + live_history.record_items(std::iter::once(&assistant2), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(assistant2.clone())); let summary2 = "summary two"; @@ -2969,7 +2953,6 @@ mod tests { session.build_initial_context(turn_context), &user_messages2, summary2, - tokenizer, ); live_history.replace(rebuilt2); rollout_items.push(RolloutItem::Compacted(CompactedItem { @@ -2983,7 +2966,7 @@ mod tests { text: "third user".to_string(), }], }; - live_history.record_items(std::iter::once(&user3), &turn_context.truncation_settings); + live_history.record_items(std::iter::once(&user3), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(user3.clone())); let assistant3 = ResponseItem::Message { @@ -2993,10 +2976,7 @@ mod tests { text: "assistant reply three".to_string(), }], }; - live_history.record_items( - std::iter::once(&assistant3), - &turn_context.truncation_settings, - ); + live_history.record_items(std::iter::once(&assistant3), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(assistant3.clone())); (rollout_items, live_history.get_history()) diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 8f09471b9e..8d1fb73f85 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -15,7 +15,6 @@ use crate::protocol::TaskStartedEvent; use crate::protocol::TurnContextItem; use crate::protocol::WarningEvent; use crate::truncate::TruncationPolicy; -use crate::truncate::TruncationSettings; use crate::truncate::truncate_text; use crate::util::backoff; use codex_protocol::items::TurnItem; @@ -24,7 +23,6 @@ use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::RolloutItem; use codex_protocol::user_input::UserInput; -use codex_utils_tokenizer::Tokenizer; use futures::prelude::*; use tracing::error; @@ -62,10 +60,7 @@ async fn run_compact_task_inner( let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input); let mut history = sess.clone_history().await; - history.record_items( - &[initial_input_for_turn.into()], - &turn_context.truncation_settings, - ); + history.record_items(&[initial_input_for_turn.into()], turn_context.truncation_policy); let mut truncated_count = 0usize; @@ -153,12 +148,7 @@ async fn run_compact_task_inner( let user_messages = collect_user_messages(&history_snapshot); let initial_context = sess.build_initial_context(turn_context.as_ref()); - let mut new_history = build_compacted_history( - initial_context, - &user_messages, - &summary_text, - turn_context.truncation_settings.tokenizer.clone(), - ); + let mut new_history = build_compacted_history(initial_context, &user_messages, &summary_text); let ghost_snapshots: Vec = history_snapshot .iter() .filter(|item| matches!(item, ResponseItem::GhostSnapshot { .. })) @@ -235,14 +225,12 @@ pub(crate) fn build_compacted_history( initial_context: Vec, user_messages: &[String], summary_text: &str, - tokenizer: Arc>, ) -> Vec { build_compacted_history_with_limit( initial_context, user_messages, summary_text, COMPACT_USER_MESSAGE_MAX_TOKENS, - tokenizer, ) } @@ -251,7 +239,6 @@ fn build_compacted_history_with_limit( user_messages: &[String], summary_text: &str, max_tokens: usize, - tokenizer: Arc>, ) -> Vec { let mut selected_messages: Vec = Vec::new(); if max_tokens > 0 { @@ -260,20 +247,12 @@ fn build_compacted_history_with_limit( if remaining == 0 { break; } - let tokens = tokenizer - .as_ref() - .as_ref() - .map(|tok| usize::try_from(tok.count(message)).unwrap_or(usize::MAX)) - .unwrap_or_else(|| message.len().saturating_add(3) / 4); + let tokens = approximate_tokens(message); if tokens <= remaining { selected_messages.push(message.clone()); remaining = remaining.saturating_sub(tokens); } else { - let truncation_settings = TruncationSettings { - policy: TruncationPolicy::Tokens(remaining), - tokenizer, - }; - let truncated = truncate_text(message, &truncation_settings); + let truncated = truncate_text(message, TruncationPolicy::Tokens(remaining)); selected_messages.push(truncated); break; } @@ -306,6 +285,10 @@ fn build_compacted_history_with_limit( history } +fn approximate_tokens(text: &str) -> usize { + text.len().saturating_add(3) / 4 +} + async fn drain_to_completed( sess: &Session, turn_context: &TurnContext, @@ -322,11 +305,8 @@ async fn drain_to_completed( }; match event { Ok(ResponseEvent::OutputItemDone(item)) => { - sess.record_into_history( - std::slice::from_ref(&item), - &turn_context.truncation_settings, - ) - .await; + sess.record_into_history(std::slice::from_ref(&item), turn_context) + .await; } Ok(ResponseEvent::RateLimits(snapshot)) => { sess.update_rate_limits(turn_context, snapshot).await; @@ -442,14 +422,11 @@ mod tests { // that oversized user content is truncated. let max_tokens = 16; let big = "word ".repeat(200); - let model = OPENAI_DEFAULT_MODEL; - let tokenizer = Arc::new(Tokenizer::for_model(model).ok()); let history = super::build_compacted_history_with_limit( Vec::new(), std::slice::from_ref(&big), "SUMMARY", max_tokens, - tokenizer, ); assert_eq!(history.len(), 2); @@ -488,10 +465,7 @@ mod tests { let user_messages = vec!["first user message".to_string()]; let summary_text = "summary text"; - let tokenizer = Arc::new(Tokenizer::for_model(OPENAI_DEFAULT_MODEL).ok()); - - let history = - build_compacted_history(initial_context, &user_messages, summary_text, tokenizer); + let history = build_compacted_history(initial_context, &user_messages, summary_text); assert!( !history.is_empty(), "expected compacted history to include summary" diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 89c8ef7052..daaeeadeb9 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -1,6 +1,6 @@ use crate::codex::TurnContext; use crate::context_manager::normalize; -use crate::truncate::TruncationSettings; +use crate::truncate::TruncationPolicy; use crate::truncate::truncate_function_output_items_to_token_limit; use crate::truncate::truncate_text; use codex_protocol::models::FunctionCallOutputPayload; @@ -44,7 +44,7 @@ impl ContextManager { } /// `items` is ordered from oldest to newest. - pub(crate) fn record_items(&mut self, items: I, truncation_settings: &TruncationSettings) + pub(crate) fn record_items(&mut self, items: I, policy: TruncationPolicy) where I: IntoIterator, I::Item: std::ops::Deref, @@ -56,7 +56,7 @@ impl ContextManager { continue; } - let processed = self.process_item(item_ref, truncation_settings); + let processed = self.process_item(item_ref, policy); self.items.push(processed); } } @@ -144,17 +144,14 @@ impl ContextManager { items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. })); } - fn process_item( - &self, - item: &ResponseItem, - truncation_settings: &TruncationSettings, - ) -> ResponseItem { + fn process_item(&self, item: &ResponseItem, policy: TruncationPolicy) -> ResponseItem { match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let truncated = truncate_text(output.content.as_str(), truncation_settings); - let truncated_items = output.content_items.as_ref().map(|items| { - truncate_function_output_items_to_token_limit(items, truncation_settings) - }); + let truncated = truncate_text(output.content.as_str(), policy); + let truncated_items = output + .content_items + .as_ref() + .map(|items| truncate_function_output_items_to_token_limit(items, policy)); ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { @@ -165,7 +162,7 @@ impl ContextManager { } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let truncated = truncate_text(output, truncation_settings); + let truncated = truncate_text(output, policy); ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), output: truncated, diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 092bf0cea3..8f53406a4e 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -4,7 +4,6 @@ use crate::model_family::derive_default_model_family; use crate::model_family::find_family_for_model; use crate::truncate; use crate::truncate::TruncationPolicy; -use crate::truncate::TruncationSettings; use codex_git::GhostCommit; use codex_protocol::models::ContentItem; use codex_protocol::models::FunctionCallOutputPayload; @@ -13,7 +12,6 @@ use codex_protocol::models::LocalShellExecAction; use codex_protocol::models::LocalShellStatus; use codex_protocol::models::ReasoningItemContent; use codex_protocol::models::ReasoningItemReasoningSummary; -use codex_utils_tokenizer::Tokenizer; use pretty_assertions::assert_eq; use regex_lite::Regex; @@ -43,9 +41,8 @@ fn create_history_with_items(items: Vec) -> ContextManager { .unwrap_or_else(|| derive_default_model_family(model)) .truncation_policy .token_budget(); - let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), model); let mut h = ContextManager::new(); - h.record_items(items.iter(), &truncation_settings); + h.record_items(items.iter(), TruncationPolicy::Tokens(max_tokens)); h } @@ -75,15 +72,11 @@ fn reasoning_msg(text: &str) -> ResponseItem { #[test] fn filters_non_api_messages() { let mut h = ContextManager::default(); - let truncation_settings = TruncationSettings::new( - TruncationPolicy::Tokens( - find_family_for_model(OPENAI_DEFAULT_MODEL) - .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) - .truncation_policy - .token_budget(), - ), - OPENAI_DEFAULT_MODEL, - ); + let max_tokens = find_family_for_model(OPENAI_DEFAULT_MODEL) + .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) + .truncation_policy + .token_budget(); + let policy = TruncationPolicy::Tokens(max_tokens); // System message is not API messages; Other is ignored. let system = ResponseItem::Message { id: None, @@ -93,15 +86,12 @@ fn filters_non_api_messages() { }], }; let reasoning = reasoning_msg("thinking..."); - h.record_items( - [&system, &reasoning, &ResponseItem::Other], - &truncation_settings, - ); + h.record_items([&system, &reasoning, &ResponseItem::Other], policy); // User and assistant should be retained. let u = user_msg("hi"); let a = assistant_msg("hello"); - h.record_items([&u, &a], &truncation_settings); + h.record_items([&u, &a], policy); let items = h.contents(); assert_eq!( @@ -274,8 +264,7 @@ fn record_items_truncates_function_call_output_content() { .truncation_policy .token_budget(); let mut history = ContextManager::new(); - let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), model); - let tok = Tokenizer::try_default().expect("load tokenizer"); + let policy = TruncationPolicy::Tokens(max_tokens); let long_line = "a very long line to trigger truncation\n"; let long_output = long_line.repeat(2_500); let item = ResponseItem::FunctionCallOutput { @@ -287,7 +276,7 @@ fn record_items_truncates_function_call_output_content() { }, }; - history.record_items([&item], &truncation_settings); + history.record_items([&item], policy); assert_eq!(history.items.len(), 1); match &history.items[0] { @@ -298,10 +287,11 @@ fn record_items_truncates_function_call_output_content() { "expected token-based truncation marker, got {}", output.content ); - let token_count = usize::try_from(tok.count(&output.content)).unwrap_or(usize::MAX); assert!( - token_count <= max_tokens, - "token count should not exceed limit: {token_count}" + output.content.contains("tokens truncated") + || output.content.contains("bytes truncated"), + "expected truncation marker, got {}", + output.content ); } other => panic!("unexpected history item: {other:?}"), @@ -316,8 +306,7 @@ fn record_items_truncates_custom_tool_call_output_content() { .truncation_policy .token_budget(); let mut history = ContextManager::new(); - let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), model); - let tok = Tokenizer::try_default().expect("load tokenizer"); + let policy = TruncationPolicy::Tokens(max_tokens); let line = "custom output that is very long\n"; let long_output = line.repeat(2_500); let item = ResponseItem::CustomToolCallOutput { @@ -325,7 +314,7 @@ fn record_items_truncates_custom_tool_call_output_content() { output: long_output.clone(), }; - history.record_items([&item], &truncation_settings); + history.record_items([&item], policy); assert_eq!(history.items.len(), 1); match &history.items[0] { @@ -335,10 +324,10 @@ fn record_items_truncates_custom_tool_call_output_content() { output.contains("tokens truncated"), "expected token-based truncation marker, got {output}" ); - let token_count = usize::try_from(tok.count(output)).unwrap_or(usize::MAX); assert!( - token_count <= max_tokens, - "token count should not exceed limit: {token_count}" + output.contains("tokens truncated") + || output.contains("bytes truncated"), + "expected truncation marker, got {output}" ); } other => panic!("unexpected history item: {other:?}"), @@ -349,7 +338,7 @@ fn record_items_truncates_custom_tool_call_output_content() { fn record_items_respects_custom_token_limit() { let model = OPENAI_DEFAULT_MODEL; let mut history = ContextManager::new(); - let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(10), model); + let policy = TruncationPolicy::Tokens(10); let long_output = "tokenized content repeated many times ".repeat(200); let item = ResponseItem::FunctionCallOutput { call_id: "call-custom-limit".to_string(), @@ -360,7 +349,7 @@ fn record_items_respects_custom_token_limit() { }, }; - history.record_items([&item], &truncation_settings); + history.record_items([&item], policy); let stored = match &history.items[0] { ResponseItem::FunctionCallOutput { output, .. } => output, diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index af2455eee2..944bda6565 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -3,7 +3,6 @@ use crate::exec::ExecToolCallOutput; use crate::token_data::KnownPlan; use crate::token_data::PlanType; use crate::truncate::TruncationPolicy; -use crate::truncate::TruncationSettings; use crate::truncate::truncate_text; use chrono::DateTime; use chrono::Datelike; @@ -15,7 +14,6 @@ use codex_protocol::protocol::RateLimitSnapshot; use reqwest::StatusCode; use serde_json; use std::io; -use std::sync::Arc; use std::time::Duration; use thiserror::Error; use tokio::task::JoinError; @@ -464,11 +462,7 @@ pub fn get_error_message_ui(e: &CodexErr) -> String { _ => e.to_string(), }; - let truncation_settings = TruncationSettings { - policy: TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_TOKENS), - tokenizer: Arc::new(None), - }; - truncate_text(&message, &truncation_settings) + truncate_text(&message, TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_TOKENS)) } #[cfg(test)] diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index f6ed12eacb..2dfa5199f1 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -7,7 +7,7 @@ use crate::context_manager::ContextManager; use crate::protocol::RateLimitSnapshot; use crate::protocol::TokenUsage; use crate::protocol::TokenUsageInfo; -use crate::truncate::TruncationSettings; +use crate::truncate::TruncationPolicy; /// Persistent, session-scoped state previously stored directly on `Session`. pub(crate) struct SessionState { @@ -28,12 +28,12 @@ impl SessionState { } // History helpers - pub(crate) fn record_items(&mut self, items: I, truncation_settings: &TruncationSettings) + pub(crate) fn record_items(&mut self, items: I, policy: TruncationPolicy) where I: IntoIterator, I::Item: std::ops::Deref, { - self.history.record_items(items, truncation_settings); + self.history.record_items(items, policy); } pub(crate) fn clone_history(&self) -> ContextManager { diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 5d160bdf57..7970bf021f 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -2,20 +2,15 @@ //! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation //! used across the core crate. -use std::sync::Arc; - use codex_protocol::models::FunctionCallOutputContentItem; use codex_utils_string::take_bytes_at_char_boundary; use codex_utils_string::take_last_bytes_at_char_boundary; -use codex_utils_tokenizer::Tokenizer; use crate::config::Config; -/// Model-formatting limits: clients get full streams; only content sent to the model is truncated. -const TOKENIZER_STACK_SAFE_BYTES: usize = 1024 * 512; // 512 KiB const APPROX_BYTES_PER_TOKEN: usize = 4; -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum TruncationPolicy { Bytes(usize), Tokens(usize), @@ -68,23 +63,6 @@ impl TruncationPolicy { } } -#[derive(Debug, Clone)] -pub struct TruncationSettings { - pub policy: TruncationPolicy, - pub tokenizer: Arc>, -} - -impl TruncationSettings { - pub fn new(policy: TruncationPolicy, model: &str) -> Self { - let tokenizer = Arc::new(Tokenizer::for_model(model).ok()); - Self { policy, tokenizer } - } - - pub fn tokenizer_ref(&self) -> Option<&Tokenizer> { - self.tokenizer.as_ref().as_ref() - } -} - /// Format a block of exec/tool output for model consumption, truncating by /// lines and bytes while preserving head and tail segments. pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize) -> String { @@ -100,17 +78,18 @@ pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize format!("Total output lines: {total_lines}\n\n{output}") } -pub(crate) fn truncate_text(content: &str, truncation_settings: &TruncationSettings) -> String { - match truncation_settings.policy { - TruncationPolicy::Bytes(bytes) => { - truncate_with_byte_estimate(content, bytes, TruncationSource::Bytes) - } +pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String { + match policy { + TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate( + content, + bytes, + TruncationSource::Policy(TruncationPolicy::Bytes(bytes)), + ), TruncationPolicy::Tokens(tokens) => { let (truncated, _) = truncate_with_token_budget( content, tokens, - truncation_settings.tokenizer_ref(), - TruncationSource::Tokens, + TruncationSource::Policy(TruncationPolicy::Tokens(tokens)), ); truncated } @@ -122,11 +101,10 @@ pub(crate) fn truncate_text(content: &str, truncation_settings: &TruncationSetti /// items. pub(crate) fn truncate_function_output_items_to_token_limit( items: &[FunctionCallOutputContentItem], - truncation_settings: &TruncationSettings, + policy: TruncationPolicy, ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); - let mut remaining_tokens = truncation_settings.policy.token_budget(); - let tokenizer = truncation_settings.tokenizer_ref(); + let mut remaining_tokens = policy.token_budget(); let mut omitted_text_items = 0usize; for it in items { @@ -137,18 +115,13 @@ pub(crate) fn truncate_function_output_items_to_token_limit( continue; } - let token_len = estimate_safe_token_count(text, tokenizer); + let token_len = estimate_safe_token_count(text); if token_len <= remaining_tokens { out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); } else { - let snippet = truncate_text( - text, - &TruncationSettings { - policy: TruncationPolicy::Tokens(remaining_tokens), - tokenizer: Arc::clone(&truncation_settings.tokenizer), - }, - ); + let snippet = + truncate_text(text, TruncationPolicy::Tokens(remaining_tokens)); if snippet.is_empty() { omitted_text_items += 1; } else { @@ -181,7 +154,6 @@ pub(crate) fn truncate_function_output_items_to_token_limit( fn truncate_with_token_budget( s: &str, max_tokens: usize, - tokenizer: Option<&Tokenizer>, source: TruncationSource, ) -> (String, Option) { if s.is_empty() { @@ -196,111 +168,19 @@ fn truncate_with_token_budget( } } - let exceeds_stack_limit = byte_len > TOKENIZER_STACK_SAFE_BYTES; - - let more_than_double_the_budget = - max_tokens > 0 && byte_len > max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) * 2; - - if exceeds_stack_limit || more_than_double_the_budget { - let truncated = truncate_with_byte_estimate( - s, - max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), - source, - ); - let approx_total = approx_token_count(s); - if truncated == s { - (truncated, None) - } else { - (truncated, Some(approx_total)) - } - } else if let Some(tok) = tokenizer { - let encoded = tok.encode(s, false); - let total_tokens = encoded.len() as u64; - - if encoded.len() <= max_tokens { - (s.to_string(), None) - } else { - let truncated = truncate_with_tokenizer_path(tok, encoded, max_tokens, s, total_tokens); - (truncated, Some(total_tokens)) - } + let truncated = truncate_with_byte_estimate( + s, + max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), + source, + ); + let approx_total = approx_token_count(s); + if truncated == s { + (truncated, None) } else { - let truncated = truncate_with_byte_estimate( - s, - max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), - source, - ); - let approx_total = approx_token_count(s); - if truncated == s { - (truncated, None) - } else { - (truncated, Some(approx_total)) - } + (truncated, Some(approx_total)) } } -fn truncate_with_tokenizer_path( - tokenizer: &Tokenizer, - encoded: Vec, - max_budget: usize, - original: &str, - total_tokens: u64, -) -> String { - if max_budget == 0 { - return format_truncation_marker(TruncationSource::Tokens, total_tokens); - } - - if encoded.len() <= max_budget { - return original.to_string(); - } - - let mut guess_removed = total_tokens.saturating_sub(max_budget as u64).max(1); - for _ in 0..4 { - let marker = format_truncation_marker(TruncationSource::Tokens, guess_removed); - let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); - if marker_len >= max_budget { - return marker; - } - - let keep_budget = max_budget - marker_len; - if keep_budget == 0 { - return marker; - } - - let (left_keep, right_keep) = split_budget(keep_budget); - let removed_tokens = encoded.len().saturating_sub(left_keep + right_keep) as u64; - let final_marker = format_truncation_marker(TruncationSource::Tokens, removed_tokens); - let final_marker_len = - usize::try_from(tokenizer.count(&final_marker)).unwrap_or(usize::MAX); - if final_marker_len == marker_len { - let (prefix, suffix) = - decode_token_segments(tokenizer, &encoded, left_keep, right_keep); - let out = assemble_truncated_output( - &prefix, - &suffix, - &final_marker, - NewlineMode::WhenSuffixPresent, - ); - return out; - } - - guess_removed = removed_tokens.max(1); - } - - let marker = format_truncation_marker(TruncationSource::Tokens, guess_removed); - let marker_len = usize::try_from(tokenizer.count(&marker)).unwrap_or(usize::MAX); - if marker_len >= max_budget { - return marker; - } - - let keep_budget = max_budget - marker_len; - if keep_budget == 0 { - return marker; - } - let (left_keep, right_keep) = split_budget(keep_budget); - let (prefix, suffix) = decode_token_segments(tokenizer, &encoded, left_keep, right_keep); - assemble_truncated_output(&prefix, &suffix, &marker, NewlineMode::WhenSuffixPresent) -} - /// Truncate a string using a byte budget derived from the token budget, without /// performing any real tokenization. This keeps the logic purely byte-based and /// uses a bytes placeholder in the truncated output. @@ -311,7 +191,10 @@ fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSour if max_bytes == 0 { // No budget to show content; just report that everything was truncated. - let marker = format_truncation_marker(source, u64::try_from(s.len()).unwrap_or(u64::MAX)); + let marker = format_truncation_marker( + source, + removed_units_for_source(source, s.len()), + ); return marker; } @@ -321,7 +204,7 @@ fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSour let total_bytes = s.len(); let removed_bytes = total_bytes.saturating_sub(max_bytes); - let marker = format_truncation_marker(source, u64::try_from(removed_bytes).unwrap_or(u64::MAX)); + let marker = format_truncation_marker(source, removed_units_for_source(source, removed_bytes)); let marker_len = marker.len(); if marker_len >= max_bytes { @@ -433,17 +316,19 @@ enum NewlineMode { #[derive(Clone, Copy)] pub enum TruncationSource { - Tokens, - Bytes, + Policy(TruncationPolicy), LineOmission { total_lines: usize }, ByteLimit { limit_bytes: usize }, } fn format_truncation_marker(source: TruncationSource, removed_count: u64) -> String { match source { - TruncationSource::Tokens => format!("[…{removed_count} tokens truncated…]"), - TruncationSource::Bytes => format!("[…{removed_count} bytes truncated…]"), - // will clean this up later + TruncationSource::Policy(TruncationPolicy::Tokens(_)) => { + format!("[…{removed_count} tokens truncated…]") + } + TruncationSource::Policy(TruncationPolicy::Bytes(_)) => { + format!("[…{removed_count} bytes truncated…]") + } TruncationSource::LineOmission { total_lines } => { format!("[... omitted {removed_count} of {total_lines} lines ...]") } @@ -458,25 +343,13 @@ fn split_budget(budget: usize) -> (usize, usize) { (left, budget - left) } -fn decode_token_segments( - tokenizer: &Tokenizer, - encoded: &[i32], - left_keep: usize, - right_keep: usize, -) -> (String, String) { - let prefix = if left_keep > 0 { - tokenizer.decode(&encoded[..left_keep]).unwrap_or_default() - } else { - String::new() - }; - let suffix = if right_keep > 0 { - tokenizer - .decode(&encoded[encoded.len() - right_keep..]) - .unwrap_or_default() - } else { - String::new() - }; - (prefix, suffix) +fn removed_units_for_source(source: TruncationSource, removed_bytes: usize) -> u64 { + match source { + TruncationSource::Policy(TruncationPolicy::Tokens(_)) => { + approx_tokens_from_byte_count(removed_bytes) + } + _ => u64::try_from(removed_bytes).unwrap_or(u64::MAX), + } } fn assemble_truncated_output( @@ -510,6 +383,10 @@ fn approx_bytes_for_tokens(tokens: usize) -> usize { tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) } +fn approx_tokens_from_byte_count(bytes: usize) -> u64 { + (bytes as u64).saturating_add(3) / 4 +} + fn truncate_on_boundary(input: &str, max_len: usize) -> &str { if input.len() <= max_len { return input; @@ -553,18 +430,8 @@ fn error_on_double_truncation(content: &str) { } } -fn estimate_safe_token_count(text: &str, tokenizer: Option<&Tokenizer>) -> usize { - if text.is_empty() { - return 0; - } - - if text.len() > TOKENIZER_STACK_SAFE_BYTES { - return usize::try_from(approx_token_count(text)).unwrap_or(usize::MAX); - } - - tokenizer - .map(|tok| usize::try_from(tok.count(text)).unwrap_or(usize::MAX)) - .unwrap_or_else(|| usize::try_from(approx_token_count(text)).unwrap_or(usize::MAX)) +fn estimate_safe_token_count(text: &str) -> usize { + usize::try_from(approx_token_count(text)).unwrap_or(usize::MAX) } #[cfg(test)] @@ -573,14 +440,13 @@ mod tests { use crate::model_family::derive_default_model_family; use crate::model_family::find_family_for_model; + use super::approx_token_count; use super::TruncationPolicy; - use super::TruncationSettings; use super::TruncationSource; use super::truncate_function_output_items_to_token_limit; use super::truncate_with_line_bytes_budget; use super::truncate_with_token_budget; use codex_protocol::models::FunctionCallOutputContentItem; - use codex_utils_tokenizer::Tokenizer; use pretty_assertions::assert_eq; use regex_lite::Regex; @@ -611,72 +477,47 @@ mod tests { ) } - fn build_chunked_text( - chunk: &str, - chunk_tokens: usize, - target_tokens: usize, - ) -> (String, usize) { - let mut text = String::new(); - let mut tokens = 0; - while tokens + chunk_tokens <= target_tokens { - text.push_str(chunk); - tokens += chunk_tokens; - } - if text.is_empty() { - text.push_str(chunk); - tokens = chunk_tokens; - } - (text, tokens) - } - #[test] fn truncate_middle_returns_original_when_under_limit() { - let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "short output"; - let limit = usize::try_from(tok.count(s)).unwrap_or(0) + 10; - let (out, original) = - truncate_with_token_budget(s, limit, Some(&tok), TruncationSource::Tokens); + let limit = 100; + let source = TruncationSource::Policy(TruncationPolicy::Tokens(limit)); + let (out, original) = truncate_with_token_budget(s, limit, source); assert_eq!(out, s); assert_eq!(original, None); } #[test] fn truncate_middle_reports_truncation_at_zero_limit() { - let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "abcdef"; - let total = tok.count(s) as u64; - let (out, original) = - truncate_with_token_budget(s, 0, Some(&tok), TruncationSource::Tokens); + let source = TruncationSource::Policy(TruncationPolicy::Tokens(0)); + let (out, original) = truncate_with_token_budget(s, 0, source); assert!(out.contains("tokens truncated")); - assert_eq!(original, Some(total)); + assert_eq!(original, Some(approx_token_count(s))); } #[test] fn truncate_middle_enforces_token_budget() { - let tok = Tokenizer::try_default().expect("load tokenizer"); let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa"; let max_tokens = 12; - let (out, original) = - truncate_with_token_budget(s, max_tokens, Some(&tok), TruncationSource::Tokens); + let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens)); + let (out, original) = truncate_with_token_budget(s, max_tokens, source); assert!(out.contains("tokens truncated")); - assert_eq!(original, Some(tok.count(s) as u64)); - let result_tokens = tok.count(&out) as usize; - assert!(result_tokens <= max_tokens); + assert_eq!(original, Some(approx_token_count(s))); + assert!(out.len() < s.len(), "truncated output should be shorter"); } #[test] fn truncate_middle_handles_utf8_content() { - let tok = Tokenizer::for_model(OPENAI_DEFAULT_MODEL).expect("load tokenizer"); let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n"; let max_tokens = 8; - let (out, tokens) = - truncate_with_token_budget(s, max_tokens, Some(&tok), TruncationSource::Tokens); + let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens)); + let (out, tokens) = truncate_with_token_budget(s, max_tokens, source); assert!(out.contains("tokens truncated")); assert!(!out.contains('\u{fffd}')); - assert_eq!(tokens, Some(tok.count(s) as u64)); - let result_tokens = tok.count(&out) as usize; - assert!(result_tokens <= max_tokens); + assert_eq!(tokens, Some(approx_token_count(s))); + assert!(out.len() < s.len(), "UTF-8 content should be shortened"); } #[test] @@ -786,28 +627,13 @@ mod tests { #[test] fn truncates_across_multiple_under_limit_texts_and_reports_omitted() { - let tok = Tokenizer::try_default().expect("load tokenizer"); let chunk = "alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu nu xi omicron pi rho sigma tau upsilon phi chi psi omega.\n"; - let chunk_tokens = usize::try_from(tok.count(chunk)).unwrap_or(usize::MAX); + let chunk_tokens = usize::try_from(approx_token_count(chunk)).unwrap_or(usize::MAX); assert!(chunk_tokens > 0, "chunk must consume tokens"); - let limit = model_format_max_bytes(); - let target_each = limit.saturating_div(2).saturating_sub(chunk_tokens); - let (t1, t1_tokens) = build_chunked_text(chunk, chunk_tokens, target_each); - let (t2, t2_tokens) = build_chunked_text(chunk, chunk_tokens, target_each); - let remaining_after_t1_t2 = limit.saturating_sub(t1_tokens + t2_tokens); - assert!( - remaining_after_t1_t2 > 0, - "expected positive token remainder after first two items" - ); - - let repeats_for_t3 = remaining_after_t1_t2 / chunk_tokens + 2; - let t3 = chunk.repeat(repeats_for_t3); - let t3_tokens = usize::try_from(tok.count(&t3)).unwrap_or(usize::MAX); - assert!( - t3_tokens > remaining_after_t1_t2, - "t3 must exceed remaining tokens" - ); - + let limit = chunk_tokens * 3; + let t1 = chunk.to_string(); + let t2 = chunk.to_string(); + let t3 = chunk.repeat(10); let t4 = chunk.to_string(); let t5 = chunk.to_string(); @@ -822,9 +648,8 @@ mod tests { FunctionCallOutputContentItem::InputText { text: t5 }, ]; - let model = OPENAI_DEFAULT_MODEL; - let truncation_settings = TruncationSettings::new(TruncationPolicy::Tokens(limit), model); - let output = truncate_function_output_items_to_token_limit(&items, &truncation_settings); + let output = + truncate_function_output_items_to_token_limit(&items, TruncationPolicy::Tokens(limit)); // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. assert_eq!(output.len(), 5); @@ -856,11 +681,6 @@ mod tests { fourth_text.contains("tokens truncated"), "expected marker in truncated snippet: {fourth_text}" ); - let truncated_tokens = usize::try_from(tok.count(fourth_text)).unwrap_or(usize::MAX); - assert!( - truncated_tokens <= remaining_after_t1_t2, - "truncated snippet must respect remaining token budget: {truncated_tokens} > {remaining_after_t1_t2}" - ); let summary_text = match &output[4] { FunctionCallOutputContentItem::InputText { text } => text, diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index 2c432c2885..1b4f4b268e 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -16,7 +16,6 @@ use crate::exec::StreamOutput; use crate::exec::is_likely_sandbox_denied; use crate::tools::sandboxing::ToolCtx; use crate::truncate::TruncationPolicy; -use crate::truncate::TruncationSettings; use crate::truncate::truncate_text; use codex_utils_pty::ExecCommandSession; use codex_utils_pty::SpawnedPty; @@ -172,11 +171,10 @@ impl UnifiedExecSession { }; if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) { - let truncation_settings = TruncationSettings::new( + let snippet = truncate_text( + &aggregated_text, TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS), - &ctx.turn.client.get_model(), ); - let snippet = truncate_text(&aggregated_text, &truncation_settings); let message = if snippet.is_empty() { format!("exit code {exit_code}") } else { diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index 009633f582..458885dd49 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -25,7 +25,7 @@ use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolR use crate::tools::runtimes::unified_exec::UnifiedExecRuntime; use crate::tools::sandboxing::ToolCtx; use crate::truncate::TruncationPolicy; -use crate::truncate::TruncationSettings; +use crate::truncate::truncate_text; use super::ExecCommandRequest; use super::SessionEntry; @@ -74,9 +74,7 @@ impl UnifiedExecSessionManager { let text = String::from_utf8_lossy(&collected).to_string(); let model = context.turn.client.get_model(); - let truncation_settings = - TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), &model); - let output = truncate_text(&text, &truncation_settings); + let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); let chunk_id = generate_chunk_id(); let has_exited = session.has_exited(); let stored_id = self @@ -185,9 +183,7 @@ impl UnifiedExecSessionManager { let text = String::from_utf8_lossy(&collected).to_string(); let model = turn_ref.client.get_model(); - let truncation_settings = - TruncationSettings::new(TruncationPolicy::Tokens(max_tokens), &model); - let output = truncate_text(&text, &truncation_settings); + let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); let tokenizer = Tokenizer::for_model(&model).ok(); let original_token_count = tokenizer.map(|tok| tok.count(&text) as usize); let chunk_id = generate_chunk_id(); From 903514bae38f6a65cf081061a36f365c693a71e9 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 01:49:33 -0800 Subject: [PATCH 54/68] clean --- codex-rs/core/src/codex.rs | 1 - codex-rs/core/src/compact.rs | 6 ++- .../core/src/context_manager/history_tests.rs | 4 +- codex-rs/core/src/error.rs | 5 +- .../core/src/tools/runtimes/unified_exec.rs | 4 +- codex-rs/core/src/truncate.rs | 52 ++++--------------- codex-rs/core/src/unified_exec/session.rs | 10 ++-- .../core/src/unified_exec/session_manager.rs | 4 +- 8 files changed, 25 insertions(+), 61 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 6301cca7c9..d878dfe103 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -295,7 +295,6 @@ impl TurnContext { .as_deref() .unwrap_or(compact::SUMMARIZATION_PROMPT) } - } #[allow(dead_code)] diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 8d1fb73f85..ab6e6ed474 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -60,7 +60,10 @@ async fn run_compact_task_inner( let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input); let mut history = sess.clone_history().await; - history.record_items(&[initial_input_for_turn.into()], turn_context.truncation_policy); + history.record_items( + &[initial_input_for_turn.into()], + turn_context.truncation_policy, + ); let mut truncated_count = 0usize; @@ -324,7 +327,6 @@ async fn drain_to_completed( #[cfg(test)] mod tests { - use crate::config::OPENAI_DEFAULT_MODEL; use super::*; use pretty_assertions::assert_eq; diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 8f53406a4e..80d2227dde 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -325,8 +325,7 @@ fn record_items_truncates_custom_tool_call_output_content() { "expected token-based truncation marker, got {output}" ); assert!( - output.contains("tokens truncated") - || output.contains("bytes truncated"), + output.contains("tokens truncated") || output.contains("bytes truncated"), "expected truncation marker, got {output}" ); } @@ -336,7 +335,6 @@ fn record_items_truncates_custom_tool_call_output_content() { #[test] fn record_items_respects_custom_token_limit() { - let model = OPENAI_DEFAULT_MODEL; let mut history = ContextManager::new(); let policy = TruncationPolicy::Tokens(10); let long_output = "tokenized content repeated many times ".repeat(200); diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 944bda6565..c34214f861 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -462,7 +462,10 @@ pub fn get_error_message_ui(e: &CodexErr) -> String { _ => e.to_string(), }; - truncate_text(&message, TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_TOKENS)) + truncate_text( + &message, + TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_TOKENS), + ) } #[cfg(test)] diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs index 5a5e60b38b..cddac1924e 100644 --- a/codex-rs/core/src/tools/runtimes/unified_exec.rs +++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs @@ -153,7 +153,7 @@ impl<'a> ToolRuntime for UnifiedExecRunt &mut self, req: &UnifiedExecRequest, attempt: &SandboxAttempt<'_>, - ctx: &ToolCtx<'_>, + _ctx: &ToolCtx<'_>, ) -> Result { let spec = build_command_spec( &req.command, @@ -168,7 +168,7 @@ impl<'a> ToolRuntime for UnifiedExecRunt .env_for(&spec) .map_err(|err| ToolError::Codex(err.into()))?; self.manager - .open_session_with_exec_env(&exec_env, ctx) + .open_session_with_exec_env(&exec_env) .await .map_err(|err| match err { UnifiedExecError::SandboxDenied { output, .. } => { diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 7970bf021f..d17c42a342 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -120,8 +120,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); } else { - let snippet = - truncate_text(text, TruncationPolicy::Tokens(remaining_tokens)); + let snippet = truncate_text(text, TruncationPolicy::Tokens(remaining_tokens)); if snippet.is_empty() { omitted_text_items += 1; } else { @@ -168,11 +167,8 @@ fn truncate_with_token_budget( } } - let truncated = truncate_with_byte_estimate( - s, - max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), - source, - ); + let truncated = + truncate_with_byte_estimate(s, max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), source); let approx_total = approx_token_count(s); if truncated == s { (truncated, None) @@ -191,10 +187,7 @@ fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSour if max_bytes == 0 { // No budget to show content; just report that everything was truncated. - let marker = format_truncation_marker( - source, - removed_units_for_source(source, s.len()), - ); + let marker = format_truncation_marker(source, removed_units_for_source(source, s.len())); return marker; } @@ -220,12 +213,7 @@ fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSour suffix_start = prefix_end; } - let mut out = assemble_truncated_output( - &s[..prefix_end], - &s[suffix_start..], - &marker, - NewlineMode::Always, - ); + let mut out = assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker); if out.len() > max_bytes { let boundary = truncate_on_boundary(&out, max_bytes); @@ -308,12 +296,6 @@ fn truncate_formatted_exec_output( result } -#[derive(Clone, Copy)] -enum NewlineMode { - Always, - WhenSuffixPresent, -} - #[derive(Clone, Copy)] pub enum TruncationSource { Policy(TruncationPolicy), @@ -352,26 +334,12 @@ fn removed_units_for_source(source: TruncationSource, removed_bytes: usize) -> u } } -fn assemble_truncated_output( - prefix: &str, - suffix: &str, - marker: &str, - newline_mode: NewlineMode, -) -> String { - let newline_needed = match newline_mode { - NewlineMode::Always => true, - NewlineMode::WhenSuffixPresent => !suffix.is_empty(), - }; - let newline_len = if newline_needed { 1 } else { 0 }; - let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + newline_len); +fn assemble_truncated_output(prefix: &str, suffix: &str, marker: &str) -> String { + let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + 1); out.push_str(prefix); out.push_str(marker); - if newline_needed { - out.push('\n'); - } - if !suffix.is_empty() { - out.push_str(suffix); - } + out.push('\n'); + out.push_str(suffix); out } @@ -440,9 +408,9 @@ mod tests { use crate::model_family::derive_default_model_family; use crate::model_family::find_family_for_model; - use super::approx_token_count; use super::TruncationPolicy; use super::TruncationSource; + use super::approx_token_count; use super::truncate_function_output_items_to_token_limit; use super::truncate_with_line_bytes_budget; use super::truncate_with_token_budget; diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index 1b4f4b268e..fe00df139e 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -142,10 +142,7 @@ impl UnifiedExecSession { self.sandbox_type } - pub(super) async fn check_for_sandbox_denial( - &self, - ctx: &ToolCtx<'_>, - ) -> Result<(), UnifiedExecError> { + pub(super) async fn check_for_sandbox_denial(&self) -> Result<(), UnifiedExecError> { if self.sandbox_type() == SandboxType::None || !self.has_exited() { return Ok(()); } @@ -189,7 +186,6 @@ impl UnifiedExecSession { pub(super) async fn from_spawned( spawned: SpawnedPty, sandbox_type: SandboxType, - ctx: &ToolCtx<'_>, ) -> Result { let SpawnedPty { session, @@ -204,7 +200,7 @@ impl UnifiedExecSession { }; if exit_ready { - managed.check_for_sandbox_denial(ctx).await?; + managed.check_for_sandbox_denial().await?; return Ok(managed); } @@ -213,7 +209,7 @@ impl UnifiedExecSession { .await .is_ok() { - managed.check_for_sandbox_denial(ctx).await?; + managed.check_for_sandbox_denial().await?; } Ok(managed) diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index 458885dd49..43e4abb599 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -39,7 +39,6 @@ use super::generate_chunk_id; use super::resolve_max_tokens; use super::session::OutputBuffer; use super::session::UnifiedExecSession; -use crate::truncate::truncate_text; impl UnifiedExecSessionManager { pub(crate) async fn exec_command( @@ -420,7 +419,6 @@ impl UnifiedExecSessionManager { pub(crate) async fn open_session_with_exec_env( &self, env: &ExecEnv, - ctx: &ToolCtx<'_>, ) -> Result { let (program, args) = env .command @@ -436,7 +434,7 @@ impl UnifiedExecSessionManager { ) .await .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; - UnifiedExecSession::from_spawned(spawned, env.sandbox, ctx).await + UnifiedExecSession::from_spawned(spawned, env.sandbox).await } pub(super) async fn open_session_with_sandbox( From 9572b6259254c51ac539f70f39cfd382846f4b37 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 01:49:41 -0800 Subject: [PATCH 55/68] clean --- codex-rs/core/src/unified_exec/session.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index fe00df139e..82d6e41370 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -14,7 +14,6 @@ use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::StreamOutput; use crate::exec::is_likely_sandbox_denied; -use crate::tools::sandboxing::ToolCtx; use crate::truncate::TruncationPolicy; use crate::truncate::truncate_text; use codex_utils_pty::ExecCommandSession; From cac5b3ea3784e614e7e55c04b78f5df1fc43f1c2 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 01:55:20 -0800 Subject: [PATCH 56/68] const --- codex-rs/core/src/tools/mod.rs | 2 +- codex-rs/core/tests/suite/truncation.rs | 203 ++++++++++++++++++++++++ 2 files changed, 204 insertions(+), 1 deletion(-) diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index 2e25bfdd4d..99d5f16506 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -21,7 +21,7 @@ pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str = "[... telemetry preview truncated ...]"; // TODO(aibrahim): migrate shell tool to use truncate text and respect config value -const SHELL_OUTPUT_MAX_BYTES: usize = 2_500; +const SHELL_OUTPUT_MAX_BYTES: usize = 10_000; /// Format the combined exec output for sending back to the model. /// Includes exit code and duration metadata; truncates large bodies safely. diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index 5f770b9b44..a591d85f68 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -26,6 +26,7 @@ use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; +use regex_lite::Regex; use escargot::CargoBuild; use serde_json::Value; use serde_json::json; @@ -453,3 +454,205 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> { Ok(()) } + +fn seq_output(up_to: usize) -> String { + (1..=up_to).map(|n| format!("{n}\n")).collect() +} + +fn extract_truncated_count(output: &str) -> u64 { + let re = Regex::new(r"\[\u2026(?P\d+) (tokens|bytes) truncated\u2026]").unwrap(); + let caps = re + .captures(output) + .unwrap_or_else(|| panic!("missing truncation marker in output: {output}")); + caps.name("count") + .unwrap() + .as_str() + .parse() + .expect("count parses") +} + +// Token-based policy should report token counts even when truncation is byte-estimated. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn token_policy_marker_reports_tokens() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5.1-codex".to_string(); // token policy + config.model_family = + find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex"); + config.calls_output_max_tokens = Some(50); // small budget to force truncation + }); + let fixture = builder.build(&server).await?; + + let call_id = "shell-token-marker"; + let args = json!({ + "command": ["/bin/sh", "-c", "seq 1 400"], + "timeout_ms": 5_000, + }); + + mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + ) + .await; + let done_mock = mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + fixture + .submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess) + .await?; + + let output = done_mock + .single_request() + .function_call_output_text(call_id) + .context("shell output present")?; + + assert!( + output.contains("tokens truncated"), + "marker should use tokens: {output}" + ); + + let original = seq_output(400); + let budget_bytes = 50 * 4; + let removed_bytes = original.len().saturating_sub(budget_bytes); + let expected_tokens = (removed_bytes as u64 + 3) / 4; + let marker_tokens = extract_truncated_count(&output); + assert_eq!( + marker_tokens, expected_tokens, + "marker should report byte-estimated token count" + ); + + Ok(()) +} + +// Byte-based policy should report bytes removed. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn byte_policy_marker_reports_bytes() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5.1".to_string(); // byte policy + config.model_family = find_family_for_model("gpt-5.1").expect("model family for gpt-5.1"); + config.calls_output_max_tokens = Some(50); // ~200 byte cap + }); + let fixture = builder.build(&server).await?; + + let call_id = "shell-byte-marker"; + let args = json!({ + "command": ["/bin/sh", "-c", "seq 1 400"], + "timeout_ms": 5_000, + }); + + mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + ) + .await; + let done_mock = mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + fixture + .submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess) + .await?; + + let output = done_mock + .single_request() + .function_call_output_text(call_id) + .context("shell output present")?; + + assert!( + output.contains("bytes truncated"), + "marker should use bytes: {output}" + ); + + let original = seq_output(400); + let budget_bytes = 50 * 4; + let removed_bytes = original.len().saturating_sub(budget_bytes) as u64; + let marker_bytes = extract_truncated_count(&output); + assert_eq!( + marker_bytes, removed_bytes, + "marker should report removed bytes" + ); + + Ok(()) +} + +// Overriding config with a large token budget should avoid truncation. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn large_budget_avoids_truncation() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5.1-codex".to_string(); + config.model_family = + find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex"); + config.calls_output_max_tokens = Some(50_000); // ample budget + }); + let fixture = builder.build(&server).await?; + + let call_id = "shell-no-trunc"; + let args = json!({ + "command": ["/bin/sh", "-c", "seq 1 1000"], + "timeout_ms": 5_000, + }); + + mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + ) + .await; + let done_mock = mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + fixture + .submit_turn_with_policy( + "run big output without truncation", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let output = done_mock + .single_request() + .function_call_output_text(call_id) + .context("shell output present")?; + + assert!( + !output.contains("truncated"), + "output should remain untruncated with ample budget" + ); + + Ok(()) +} From ddeadc517649249fe38c5255bec2bbbf0deca678 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 02:00:02 -0800 Subject: [PATCH 57/68] const --- codex-rs/core/src/truncate.rs | 2 +- .../core/src/unified_exec/session_manager.rs | 14 ++++----- codex-rs/core/tests/suite/truncation.rs | 29 ++++++------------- 3 files changed, 15 insertions(+), 30 deletions(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index d17c42a342..0525e01934 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -8,7 +8,7 @@ use codex_utils_string::take_last_bytes_at_char_boundary; use crate::config::Config; -const APPROX_BYTES_PER_TOKEN: usize = 4; +pub const APPROX_BYTES_PER_TOKEN: usize = 4; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum TruncationPolicy { diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index 43e4abb599..f5c870f581 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -1,7 +1,6 @@ use std::path::PathBuf; use std::sync::Arc; -use codex_utils_tokenizer::Tokenizer; use tokio::sync::Notify; use tokio::sync::mpsc; use tokio::time::Duration; @@ -24,6 +23,7 @@ use crate::tools::orchestrator::ToolOrchestrator; use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest; use crate::tools::runtimes::unified_exec::UnifiedExecRuntime; use crate::tools::sandboxing::ToolCtx; +use crate::truncate::APPROX_BYTES_PER_TOKEN; use crate::truncate::TruncationPolicy; use crate::truncate::truncate_text; @@ -72,7 +72,6 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let model = context.turn.client.get_model(); let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); let chunk_id = generate_chunk_id(); let has_exited = session.has_exited(); @@ -88,8 +87,7 @@ impl UnifiedExecSessionManager { // Only include a session_id in the response if the process is still alive. let session_id = if has_exited { None } else { Some(stored_id) }; - let tokenizer = Tokenizer::for_model(&model).ok(); - let original_token_count = tokenizer.map(|tok| tok.count(&text) as usize); + let original_token_count = text.len() / APPROX_BYTES_PER_TOKEN; let response = UnifiedExecResponse { event_call_id: context.call_id.clone(), @@ -98,7 +96,7 @@ impl UnifiedExecSessionManager { output, session_id, exit_code: exit_code.flatten(), - original_token_count, + original_token_count: Some(original_token_count), session_command: Some(request.command.clone()), }; @@ -181,10 +179,8 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let model = turn_ref.client.get_model(); let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); - let tokenizer = Tokenizer::for_model(&model).ok(); - let original_token_count = tokenizer.map(|tok| tok.count(&text) as usize); + let original_token_count = text.len() / APPROX_BYTES_PER_TOKEN; let chunk_id = generate_chunk_id(); let status = self.refresh_session_state(session_id).await; @@ -208,7 +204,7 @@ impl UnifiedExecSessionManager { output, session_id, exit_code, - original_token_count, + original_token_count: Some(original_token_count), session_command: Some(session_command.clone()), }; diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index a591d85f68..9fd5eebde3 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -26,8 +26,8 @@ use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; -use regex_lite::Regex; use escargot::CargoBuild; +use regex_lite::Regex; use serde_json::Value; use serde_json::json; use std::collections::HashMap; @@ -455,10 +455,6 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> { Ok(()) } -fn seq_output(up_to: usize) -> String { - (1..=up_to).map(|n| format!("{n}\n")).collect() -} - fn extract_truncated_count(output: &str) -> u64 { let re = Regex::new(r"\[\u2026(?P\d+) (tokens|bytes) truncated\u2026]").unwrap(); let caps = re @@ -487,7 +483,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> { let call_id = "shell-token-marker"; let args = json!({ - "command": ["/bin/sh", "-c", "seq 1 400"], + "command": ["/bin/sh", "-c", "seq 1 150"], "timeout_ms": 5_000, }); @@ -523,14 +519,10 @@ async fn token_policy_marker_reports_tokens() -> Result<()> { "marker should use tokens: {output}" ); - let original = seq_output(400); - let budget_bytes = 50 * 4; - let removed_bytes = original.len().saturating_sub(budget_bytes); - let expected_tokens = (removed_bytes as u64 + 3) / 4; let marker_tokens = extract_truncated_count(&output); - assert_eq!( - marker_tokens, expected_tokens, - "marker should report byte-estimated token count" + assert!( + marker_tokens > 0, + "token marker should carry a positive count" ); Ok(()) @@ -551,7 +543,7 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> { let call_id = "shell-byte-marker"; let args = json!({ - "command": ["/bin/sh", "-c", "seq 1 400"], + "command": ["/bin/sh", "-c", "seq 1 150"], "timeout_ms": 5_000, }); @@ -587,13 +579,10 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> { "marker should use bytes: {output}" ); - let original = seq_output(400); - let budget_bytes = 50 * 4; - let removed_bytes = original.len().saturating_sub(budget_bytes) as u64; let marker_bytes = extract_truncated_count(&output); - assert_eq!( - marker_bytes, removed_bytes, - "marker should report removed bytes" + assert!( + marker_bytes > 0, + "byte marker should carry a positive count" ); Ok(()) From 8ae4de458677e903fe5197710b9d54e9288f8b0f Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 02:04:46 -0800 Subject: [PATCH 58/68] helpers --- codex-rs/core/tests/suite/unified_exec.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index d870b64c45..0b52ce6986 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -1538,6 +1538,7 @@ PY let call_id = "uexec-large-output"; let args = serde_json::json!({ "cmd": script, + "max_output_tokens": 100, "yield_time_ms": 500, }); From b244de2b0f9cd3d816c0da6a181f073db79946ca Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 10:09:26 -0800 Subject: [PATCH 59/68] tests --- codex-rs/core/src/codex.rs | 7 ---- codex-rs/core/src/compact_remote.rs | 5 ++- .../core/src/context_manager/history_tests.rs | 5 ++- codex-rs/core/src/error.rs | 6 ++-- codex-rs/core/tests/suite/truncation.rs | 35 ++----------------- codex-rs/core/tests/suite/unified_exec.rs | 5 +-- codex-rs/core/tests/suite/user_shell_cmd.rs | 2 +- codex-rs/utils/cache/src/lib.rs | 5 +-- 8 files changed, 16 insertions(+), 54 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 9964754b78..639400d15a 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -179,7 +179,6 @@ impl Codex { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: config.features.clone(), - output_max_tokens: config.calls_output_max_tokens, session_source, }; @@ -335,8 +334,6 @@ pub(crate) struct SessionConfiguration { /// Set of feature flags for this session features: Features, - output_max_tokens: Option, - // TODO(pakrym): Remove config from here original_config_do_not_use: Arc, /// Source of the session (cli, vscode, exec, mcp, ...) @@ -348,8 +345,6 @@ impl SessionConfiguration { let mut next_configuration = self.clone(); if let Some(model) = updates.model.clone() { next_configuration.model = model; - // TODO (aibrahim): recompute output_max_tokens/calls_output_max_tokens when the model changes so - // truncation budgets keep matching the current model. } if let Some(effort) = updates.reasoning_effort { next_configuration.model_reasoning_effort = effort; @@ -2578,7 +2573,6 @@ mod tests { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: Features::default(), - output_max_tokens: config.calls_output_max_tokens, session_source: SessionSource::Exec, }; @@ -2656,7 +2650,6 @@ mod tests { cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), features: Features::default(), - output_max_tokens: config.calls_output_max_tokens, session_source: SessionSource::Exec, }; diff --git a/codex-rs/core/src/compact_remote.rs b/codex-rs/core/src/compact_remote.rs index 2c7d57eff2..1726aad6f9 100644 --- a/codex-rs/core/src/compact_remote.rs +++ b/codex-rs/core/src/compact_remote.rs @@ -50,7 +50,10 @@ async fn run_remote_compact_task_inner( let mut history = sess.clone_history().await; if !input.is_empty() { let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input); - history.record_items(&[initial_input_for_turn.into()]); + history.record_items( + &[initial_input_for_turn.into()], + turn_context.truncation_policy, + ); } let prompt = Prompt { diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 80d2227dde..507910ac72 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -258,7 +258,7 @@ fn normalization_retains_local_shell_outputs() { #[test] fn record_items_truncates_function_call_output_content() { - let model = OPENAI_DEFAULT_MODEL; + let model = "gpt-5.1-codex"; let max_tokens = find_family_for_model(model) .unwrap_or_else(|| derive_default_model_family(model)) .truncation_policy @@ -288,8 +288,7 @@ fn record_items_truncates_function_call_output_content() { output.content ); assert!( - output.content.contains("tokens truncated") - || output.content.contains("bytes truncated"), + output.content.contains("tokens truncated"), "expected truncation marker, got {}", output.content ); diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index c34214f861..9a42ec3d1b 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -20,8 +20,8 @@ use tokio::task::JoinError; pub type Result = std::result::Result; -/// Limit UI error messages to a reasonable token budget (~2 KiB of text). -const ERROR_MESSAGE_UI_MAX_TOKENS: usize = (2 * 1024) / 4; +/// Limit UI error messages to a reasonable size while keeping useful context. +const ERROR_MESSAGE_UI_MAX_BYTES: usize = 2 * 1024; // 4 KiB #[derive(Error, Debug)] pub enum SandboxErr { @@ -464,7 +464,7 @@ pub fn get_error_message_ui(e: &CodexErr) -> String { truncate_text( &message, - TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_TOKENS), + TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_BYTES), ) } diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index 9fd5eebde3..b193c05f4b 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -27,7 +27,6 @@ use core_test_support::skip_if_no_network; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use escargot::CargoBuild; -use regex_lite::Regex; use serde_json::Value; use serde_json::json; use std::collections::HashMap; @@ -455,18 +454,6 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> { Ok(()) } -fn extract_truncated_count(output: &str) -> u64 { - let re = Regex::new(r"\[\u2026(?P\d+) (tokens|bytes) truncated\u2026]").unwrap(); - let caps = re - .captures(output) - .unwrap_or_else(|| panic!("missing truncation marker in output: {output}")); - caps.name("count") - .unwrap() - .as_str() - .parse() - .expect("count parses") -} - // Token-based policy should report token counts even when truncation is byte-estimated. #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn token_policy_marker_reports_tokens() -> Result<()> { @@ -514,16 +501,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> { .function_call_output_text(call_id) .context("shell output present")?; - assert!( - output.contains("tokens truncated"), - "marker should use tokens: {output}" - ); - - let marker_tokens = extract_truncated_count(&output); - assert!( - marker_tokens > 0, - "token marker should carry a positive count" - ); + assert_regex_match(r"\[\u{2026}127 tokens truncated\u{2026}]", &output); Ok(()) } @@ -574,16 +552,7 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> { .function_call_output_text(call_id) .context("shell output present")?; - assert!( - output.contains("bytes truncated"), - "marker should use bytes: {output}" - ); - - let marker_bytes = extract_truncated_count(&output); - assert!( - marker_bytes > 0, - "byte marker should carry a positive count" - ); + assert_regex_match(r"\[\u{2026}505 bytes truncated\u{2026}]", &output); Ok(()) } diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index 0b52ce6986..23f2c62b9c 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -1585,8 +1585,9 @@ PY let outputs = collect_tool_outputs(&bodies)?; let large_output = outputs.get(call_id).expect("missing large output summary"); - let output_text = &large_output.output; - assert_regex_match(r"(?s)tokens truncated", output_text); + let output_text = large_output.output.replace("\r\n", "\n"); + let truncated_pattern = r#"(?s)^(token token \n){5,}.*\[\u{2026}\d+ tokens truncated\u{2026}]\n(token token \n){5,}$"#; + assert_regex_match(truncated_pattern, &output_text); let original_tokens = large_output .original_token_count diff --git a/codex-rs/core/tests/suite/user_shell_cmd.rs b/codex-rs/core/tests/suite/user_shell_cmd.rs index 95c6269843..0e9585ba4b 100644 --- a/codex-rs/core/tests/suite/user_shell_cmd.rs +++ b/codex-rs/core/tests/suite/user_shell_cmd.rs @@ -270,7 +270,7 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> { let server = start_mock_server().await; let mut builder = test_codex().with_config(|config| { - config.model = "gpt-5-codex".to_string(); + config.model = "gpt-5.1-codex".to_string(); config.model_family = find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family"); }); diff --git a/codex-rs/utils/cache/src/lib.rs b/codex-rs/utils/cache/src/lib.rs index efabbced8b..743c289ffb 100644 --- a/codex-rs/utils/cache/src/lib.rs +++ b/codex-rs/utils/cache/src/lib.rs @@ -123,10 +123,7 @@ fn lock_if_runtime(m: &Mutex>) -> Option Date: Tue, 18 Nov 2025 10:14:35 -0800 Subject: [PATCH 60/68] tests --- codex-rs/core/src/truncate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 0525e01934..81e3ca1ca5 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -460,7 +460,7 @@ mod tests { let s = "abcdef"; let source = TruncationSource::Policy(TruncationPolicy::Tokens(0)); let (out, original) = truncate_with_token_budget(s, 0, source); - assert!(out.contains("tokens truncated")); + assert_eq!(out, "[…2 tokens truncated…]"); assert_eq!(original, Some(approx_token_count(s))); } From 91741d63656ad033087387632757b1ddf89d1f2b Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 10:21:27 -0800 Subject: [PATCH 61/68] tests --- codex-rs/core/src/compact.rs | 7 +--- codex-rs/core/src/truncate.rs | 41 ++++++++++--------- .../core/src/unified_exec/session_manager.rs | 6 +-- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index d8eae0ee35..a274b5e8e0 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -15,6 +15,7 @@ use crate::protocol::TaskStartedEvent; use crate::protocol::TurnContextItem; use crate::protocol::WarningEvent; use crate::truncate::TruncationPolicy; +use crate::truncate::approx_token_count; use crate::truncate::truncate_text; use crate::util::backoff; use codex_protocol::items::TurnItem; @@ -251,7 +252,7 @@ fn build_compacted_history_with_limit( if remaining == 0 { break; } - let tokens = approximate_tokens(message); + let tokens = approx_token_count(message); if tokens <= remaining { selected_messages.push(message.clone()); remaining = remaining.saturating_sub(tokens); @@ -289,10 +290,6 @@ fn build_compacted_history_with_limit( history } -fn approximate_tokens(text: &str) -> usize { - text.len().saturating_add(3) / 4 -} - async fn drain_to_completed( sess: &Session, turn_context: &TurnContext, diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 81e3ca1ca5..c1655a49c7 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -8,7 +8,7 @@ use codex_utils_string::take_last_bytes_at_char_boundary; use crate::config::Config; -pub const APPROX_BYTES_PER_TOKEN: usize = 4; +const APPROX_BYTES_PER_TOKEN: usize = 4; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum TruncationPolicy { @@ -23,9 +23,9 @@ impl TruncationPolicy { match config.model_family.truncation_policy { TruncationPolicy::Bytes(family_bytes) => { if let Some(token_limit) = config_token_limit { - Self::Bytes(token_limit.saturating_mul(APPROX_BYTES_PER_TOKEN)) + Self::Bytes(approx_bytes_for_tokens(token_limit)) } else { - Self::Bytes(family_bytes.saturating_mul(APPROX_BYTES_PER_TOKEN)) + Self::Bytes(approx_bytes_for_tokens(family_bytes)) } } TruncationPolicy::Tokens(family_tokens) => { @@ -45,7 +45,9 @@ impl TruncationPolicy { /// bytes-per-token heuristic. pub fn token_budget(&self) -> usize { match self { - TruncationPolicy::Bytes(bytes) => bytes / APPROX_BYTES_PER_TOKEN, + TruncationPolicy::Bytes(bytes) => { + usize::try_from(approx_tokens_from_byte_count(*bytes)).unwrap_or(usize::MAX) + } TruncationPolicy::Tokens(tokens) => *tokens, } } @@ -58,7 +60,7 @@ impl TruncationPolicy { pub fn byte_budget(&self) -> usize { match self { TruncationPolicy::Bytes(bytes) => *bytes, - TruncationPolicy::Tokens(tokens) => tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), + TruncationPolicy::Tokens(tokens) => approx_bytes_for_tokens(*tokens), } } } @@ -115,7 +117,7 @@ pub(crate) fn truncate_function_output_items_to_token_limit( continue; } - let token_len = estimate_safe_token_count(text); + let token_len = approx_token_count(text); if token_len <= remaining_tokens { out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining_tokens = remaining_tokens.saturating_sub(token_len); @@ -167,9 +169,9 @@ fn truncate_with_token_budget( } } - let truncated = - truncate_with_byte_estimate(s, max_tokens.saturating_mul(APPROX_BYTES_PER_TOKEN), source); - let approx_total = approx_token_count(s); + let truncated = truncate_with_byte_estimate(s, approx_bytes_for_tokens(max_tokens), source); + let approx_total_usize = approx_token_count(s); + let approx_total = u64::try_from(approx_total_usize).unwrap_or(u64::MAX); if truncated == s { (truncated, None) } else { @@ -343,8 +345,9 @@ fn assemble_truncated_output(prefix: &str, suffix: &str, marker: &str) -> String out } -fn approx_token_count(text: &str) -> u64 { - (text.len() as u64).saturating_add(3) / 4 +pub(crate) fn approx_token_count(text: &str) -> usize { + let len = text.len(); + len.saturating_add(APPROX_BYTES_PER_TOKEN.saturating_sub(1)) / APPROX_BYTES_PER_TOKEN } fn approx_bytes_for_tokens(tokens: usize) -> usize { @@ -352,7 +355,9 @@ fn approx_bytes_for_tokens(tokens: usize) -> usize { } fn approx_tokens_from_byte_count(bytes: usize) -> u64 { - (bytes as u64).saturating_add(3) / 4 + let bytes_u64 = bytes as u64; + bytes_u64.saturating_add((APPROX_BYTES_PER_TOKEN as u64).saturating_sub(1)) + / (APPROX_BYTES_PER_TOKEN as u64) } fn truncate_on_boundary(input: &str, max_len: usize) -> &str { @@ -398,10 +403,6 @@ fn error_on_double_truncation(content: &str) { } } -fn estimate_safe_token_count(text: &str) -> usize { - usize::try_from(approx_token_count(text)).unwrap_or(usize::MAX) -} - #[cfg(test)] mod tests { use crate::config::OPENAI_DEFAULT_MODEL; @@ -461,7 +462,7 @@ mod tests { let source = TruncationSource::Policy(TruncationPolicy::Tokens(0)); let (out, original) = truncate_with_token_budget(s, 0, source); assert_eq!(out, "[…2 tokens truncated…]"); - assert_eq!(original, Some(approx_token_count(s))); + assert_eq!(original, Some(approx_token_count(s) as u64)); } #[test] @@ -471,7 +472,7 @@ mod tests { let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens)); let (out, original) = truncate_with_token_budget(s, max_tokens, source); assert!(out.contains("tokens truncated")); - assert_eq!(original, Some(approx_token_count(s))); + assert_eq!(original, Some(approx_token_count(s) as u64)); assert!(out.len() < s.len(), "truncated output should be shorter"); } @@ -484,7 +485,7 @@ mod tests { assert!(out.contains("tokens truncated")); assert!(!out.contains('\u{fffd}')); - assert_eq!(tokens, Some(approx_token_count(s))); + assert_eq!(tokens, Some(approx_token_count(s) as u64)); assert!(out.len() < s.len(), "UTF-8 content should be shortened"); } @@ -596,7 +597,7 @@ mod tests { #[test] fn truncates_across_multiple_under_limit_texts_and_reports_omitted() { let chunk = "alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu nu xi omicron pi rho sigma tau upsilon phi chi psi omega.\n"; - let chunk_tokens = usize::try_from(approx_token_count(chunk)).unwrap_or(usize::MAX); + let chunk_tokens = approx_token_count(chunk); assert!(chunk_tokens > 0, "chunk must consume tokens"); let limit = chunk_tokens * 3; let t1 = chunk.to_string(); diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index f5c870f581..57c60f2b84 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -23,8 +23,8 @@ use crate::tools::orchestrator::ToolOrchestrator; use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest; use crate::tools::runtimes::unified_exec::UnifiedExecRuntime; use crate::tools::sandboxing::ToolCtx; -use crate::truncate::APPROX_BYTES_PER_TOKEN; use crate::truncate::TruncationPolicy; +use crate::truncate::approx_token_count; use crate::truncate::truncate_text; use super::ExecCommandRequest; @@ -87,7 +87,7 @@ impl UnifiedExecSessionManager { // Only include a session_id in the response if the process is still alive. let session_id = if has_exited { None } else { Some(stored_id) }; - let original_token_count = text.len() / APPROX_BYTES_PER_TOKEN; + let original_token_count = approx_token_count(&text); let response = UnifiedExecResponse { event_call_id: context.call_id.clone(), @@ -180,7 +180,7 @@ impl UnifiedExecSessionManager { let text = String::from_utf8_lossy(&collected).to_string(); let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); - let original_token_count = text.len() / APPROX_BYTES_PER_TOKEN; + let original_token_count = approx_token_count(&text); let chunk_id = generate_chunk_id(); let status = self.refresh_session_state(session_id).await; From 3027a599c144d21d86e64ce80d4ca82d19276c6b Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 10:23:17 -0800 Subject: [PATCH 62/68] tests --- codex-rs/core/src/compact.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index a274b5e8e0..33d38091f6 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -441,8 +441,7 @@ mod tests { }; assert!( - truncated_text.contains("tokens truncated") - || truncated_text.contains("bytes truncated"), + truncated_text.contains("tokens truncated"), "expected truncation marker in truncated user message" ); assert!( From e5c77dd10d46c41cbe4ae33ed110dd527a5cb867 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 10:27:49 -0800 Subject: [PATCH 63/68] test --- codex-rs/core/src/context_manager/history.rs | 4 +-- codex-rs/core/src/truncate.rs | 38 ++++++++++++-------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index bc9433756b..af60ada9fe 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -1,7 +1,7 @@ use crate::codex::TurnContext; use crate::context_manager::normalize; use crate::truncate::TruncationPolicy; -use crate::truncate::truncate_function_output_items_to_token_limit; +use crate::truncate::truncate_function_output_items_with_policy; use crate::truncate::truncate_text; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; @@ -151,7 +151,7 @@ impl ContextManager { let truncated_items = output .content_items .as_ref() - .map(|items| truncate_function_output_items_to_token_limit(items, policy)); + .map(|items| truncate_function_output_items_with_policy(items, policy)); ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index c1655a49c7..0fd094c464 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -97,38 +97,48 @@ pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String { } } } -/// Globally truncate function output items to fit within -/// `max_tokens` tokens by preserving as many -/// text/image items as possible and appending a summary for any omitted text -/// items. -pub(crate) fn truncate_function_output_items_to_token_limit( +/// Globally truncate function output items to fit within the given +/// truncation policy's budget, preserving as many text/image items as +/// possible and appending a summary for any omitted text items. +pub(crate) fn truncate_function_output_items_with_policy( items: &[FunctionCallOutputContentItem], policy: TruncationPolicy, ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); - let mut remaining_tokens = policy.token_budget(); + let mut remaining_budget = match policy { + TruncationPolicy::Bytes(_) => policy.byte_budget(), + TruncationPolicy::Tokens(_) => policy.token_budget(), + }; let mut omitted_text_items = 0usize; for it in items { match it { FunctionCallOutputContentItem::InputText { text } => { - if remaining_tokens == 0 { + if remaining_budget == 0 { omitted_text_items += 1; continue; } - let token_len = approx_token_count(text); - if token_len <= remaining_tokens { + let cost = match policy { + TruncationPolicy::Bytes(_) => text.len(), + TruncationPolicy::Tokens(_) => approx_token_count(text), + }; + + if cost <= remaining_budget { out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); - remaining_tokens = remaining_tokens.saturating_sub(token_len); + remaining_budget = remaining_budget.saturating_sub(cost); } else { - let snippet = truncate_text(text, TruncationPolicy::Tokens(remaining_tokens)); + let snippet_policy = match policy { + TruncationPolicy::Bytes(_) => TruncationPolicy::Bytes(remaining_budget), + TruncationPolicy::Tokens(_) => TruncationPolicy::Tokens(remaining_budget), + }; + let snippet = truncate_text(text, snippet_policy); if snippet.is_empty() { omitted_text_items += 1; } else { out.push(FunctionCallOutputContentItem::InputText { text: snippet }); } - remaining_tokens = 0; + remaining_budget = 0; } } FunctionCallOutputContentItem::InputImage { image_url } => { @@ -412,7 +422,7 @@ mod tests { use super::TruncationPolicy; use super::TruncationSource; use super::approx_token_count; - use super::truncate_function_output_items_to_token_limit; + use super::truncate_function_output_items_with_policy; use super::truncate_with_line_bytes_budget; use super::truncate_with_token_budget; use codex_protocol::models::FunctionCallOutputContentItem; @@ -618,7 +628,7 @@ mod tests { ]; let output = - truncate_function_output_items_to_token_limit(&items, TruncationPolicy::Tokens(limit)); + truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(limit)); // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. assert_eq!(output.len(), 5); From f7a5f6901c9273fc09c79ef8855c054a8ee1d797 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 10:30:31 -0800 Subject: [PATCH 64/68] name --- codex-rs/core/src/config/mod.rs | 14 +++++++------- codex-rs/core/src/truncate.rs | 2 +- codex-rs/core/tests/suite/truncation.rs | 6 +++--- docs/config.md | 2 +- docs/example-config.md | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 2474a5aeb2..0c00da9a48 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -196,7 +196,7 @@ pub struct Config { pub project_doc_fallback_filenames: Vec, /// Token budget applied when storing tool/function outputs in the context manager. - pub calls_output_max_tokens: Option, + pub tool_output_token_limit: Option, /// Directory containing all Codex state (defaults to `~/.codex` but can be /// overridden by the `CODEX_HOME` environment variable). @@ -640,7 +640,7 @@ pub struct ConfigToml { pub project_doc_fallback_filenames: Option>, /// Token budget applied when storing tool/function outputs in the context manager. - pub calls_output_max_tokens: Option, + pub tool_output_token_limit: Option, /// Profile to use from the `profiles` map. pub profile: Option, @@ -1215,7 +1215,7 @@ impl Config { } }) .collect(), - calls_output_max_tokens: cfg.calls_output_max_tokens, + tool_output_token_limit: cfg.tool_output_token_limit, codex_home, history, file_opener: cfg.file_opener.unwrap_or(UriBasedFileOpener::VsCode), @@ -2968,7 +2968,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: None, + tool_output_token_limit: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3040,7 +3040,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: None, + tool_output_token_limit: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3127,7 +3127,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: None, + tool_output_token_limit: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3200,7 +3200,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), - calls_output_max_tokens: None, + tool_output_token_limit: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 0fd094c464..acaa18414d 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -18,7 +18,7 @@ pub enum TruncationPolicy { impl TruncationPolicy { pub fn new(config: &Config) -> Self { - let config_token_limit = config.calls_output_max_tokens; + let config_token_limit = config.tool_output_token_limit; match config.model_family.truncation_policy { TruncationPolicy::Bytes(family_bytes) => { diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index b193c05f4b..3cbbc6bd57 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -464,7 +464,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> { config.model = "gpt-5.1-codex".to_string(); // token policy config.model_family = find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex"); - config.calls_output_max_tokens = Some(50); // small budget to force truncation + config.tool_output_token_limit = Some(50); // small budget to force truncation }); let fixture = builder.build(&server).await?; @@ -515,7 +515,7 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> { let mut builder = test_codex().with_config(|config| { config.model = "gpt-5.1".to_string(); // byte policy config.model_family = find_family_for_model("gpt-5.1").expect("model family for gpt-5.1"); - config.calls_output_max_tokens = Some(50); // ~200 byte cap + config.tool_output_token_limit = Some(50); // ~200 byte cap }); let fixture = builder.build(&server).await?; @@ -567,7 +567,7 @@ async fn large_budget_avoids_truncation() -> Result<()> { config.model = "gpt-5.1-codex".to_string(); config.model_family = find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex"); - config.calls_output_max_tokens = Some(50_000); // ample budget + config.tool_output_token_limit = Some(50_000); // ample budget }); let fixture = builder.build(&server).await?; diff --git a/docs/config.md b/docs/config.md index 750d0167b6..ddfe2ff7c4 100644 --- a/docs/config.md +++ b/docs/config.md @@ -925,7 +925,7 @@ Valid values: | `model_provider` | string | Provider id from `model_providers` (default: `openai`). | | `model_context_window` | number | Context window tokens. | | `model_max_output_tokens` | number | Max output tokens. | -| `calls_output_max_tokens` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | +| `tool_output_token_limit` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | | `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. | | `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. | | `sandbox_workspace_write.writable_roots` | array | Extra writable roots in workspace‑write. | diff --git a/docs/example-config.md b/docs/example-config.md index 33f4e064cc..b2da427314 100644 --- a/docs/example-config.md +++ b/docs/example-config.md @@ -33,7 +33,7 @@ model_provider = "openai" # model_context_window = 128000 # tokens; default: auto for model # model_max_output_tokens = 8192 # tokens; default: auto for model # model_auto_compact_token_limit = 0 # disable/override auto; default: model family specific -# calls_output_max_tokens = 10000 # tokens stored per tool output; default: 10000 for gpt-5.1-codex +# tool_output_token_limit = 10000 # tokens stored per tool output; default: 10000 for gpt-5.1-codex ################################################################################ # Reasoning & Verbosity (Responses API capable models) From c380dae7555cac9bd995463a22bcac83e70f98a7 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 10:55:16 -0800 Subject: [PATCH 65/68] name --- .../core/src/context_manager/history_tests.rs | 69 ++++++------------- 1 file changed, 22 insertions(+), 47 deletions(-) diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 507910ac72..e75dbf2ff9 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -1,7 +1,4 @@ use super::*; -use crate::config::OPENAI_DEFAULT_MODEL; -use crate::model_family::derive_default_model_family; -use crate::model_family::find_family_for_model; use crate::truncate; use crate::truncate::TruncationPolicy; use codex_git::GhostCommit; @@ -15,15 +12,8 @@ use codex_protocol::models::ReasoningItemReasoningSummary; use pretty_assertions::assert_eq; use regex_lite::Regex; -// TODO(aibrahim): to be removed const EXEC_FORMAT_MAX_LINES: usize = 256; - -fn exec_format_max_bytes() -> usize { - find_family_for_model(OPENAI_DEFAULT_MODEL) - .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) - .truncation_policy - .byte_budget() -} +const EXEC_FORMAT_MAX_BYTES: usize = 10_000; fn assistant_msg(text: &str) -> ResponseItem { ResponseItem::Message { @@ -36,13 +26,10 @@ fn assistant_msg(text: &str) -> ResponseItem { } fn create_history_with_items(items: Vec) -> ContextManager { - let model = OPENAI_DEFAULT_MODEL; - let max_tokens = find_family_for_model(model) - .unwrap_or_else(|| derive_default_model_family(model)) - .truncation_policy - .token_budget(); let mut h = ContextManager::new(); - h.record_items(items.iter(), TruncationPolicy::Tokens(max_tokens)); + // Use a generous but fixed token budget; tests only rely on truncation + // behavior, not on a specific model's token limit. + h.record_items(items.iter(), TruncationPolicy::Tokens(10_000)); h } @@ -72,11 +59,7 @@ fn reasoning_msg(text: &str) -> ResponseItem { #[test] fn filters_non_api_messages() { let mut h = ContextManager::default(); - let max_tokens = find_family_for_model(OPENAI_DEFAULT_MODEL) - .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) - .truncation_policy - .token_budget(); - let policy = TruncationPolicy::Tokens(max_tokens); + let policy = TruncationPolicy::Tokens(10_000); // System message is not API messages; Other is ignored. let system = ResponseItem::Message { id: None, @@ -258,13 +241,10 @@ fn normalization_retains_local_shell_outputs() { #[test] fn record_items_truncates_function_call_output_content() { - let model = "gpt-5.1-codex"; - let max_tokens = find_family_for_model(model) - .unwrap_or_else(|| derive_default_model_family(model)) - .truncation_policy - .token_budget(); let mut history = ContextManager::new(); - let policy = TruncationPolicy::Tokens(max_tokens); + // Any reasonably small token budget works; the test only cares that + // truncation happens and the marker is present. + let policy = TruncationPolicy::Tokens(1_000); let long_line = "a very long line to trigger truncation\n"; let long_output = long_line.repeat(2_500); let item = ResponseItem::FunctionCallOutput { @@ -299,13 +279,8 @@ fn record_items_truncates_function_call_output_content() { #[test] fn record_items_truncates_custom_tool_call_output_content() { - let model = OPENAI_DEFAULT_MODEL; - let max_tokens = find_family_for_model(model) - .unwrap_or_else(|| derive_default_model_family(model)) - .truncation_policy - .token_budget(); let mut history = ContextManager::new(); - let policy = TruncationPolicy::Tokens(max_tokens); + let policy = TruncationPolicy::Tokens(1_000); let line = "custom output that is very long\n"; let long_output = line.repeat(2_500); let item = ResponseItem::CustomToolCallOutput { @@ -368,7 +343,7 @@ fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usiz .expect("missing body capture") .as_str(); assert!( - body.len() <= exec_format_max_bytes(), + body.len() <= EXEC_FORMAT_MAX_BYTES, "body exceeds byte limit: {} bytes", body.len() ); @@ -384,7 +359,7 @@ fn truncated_message_pattern(line: &str, total_lines: usize) -> String { if omitted == 0 { return format!( r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$", - max_bytes = exec_format_max_bytes(), + max_bytes = EXEC_FORMAT_MAX_BYTES, ); } format!( @@ -397,8 +372,7 @@ fn format_exec_output_truncates_large_error() { let line = "very long execution error line that should trigger truncation\n"; let large_error = line.repeat(2_500); // way beyond both byte and line limits - let truncated = - truncate::truncate_with_line_bytes_budget(&large_error, exec_format_max_bytes()); + let truncated = truncate::truncate_with_line_bytes_budget(&large_error, EXEC_FORMAT_MAX_BYTES); let total_lines = large_error.lines().count(); assert_truncated_message_matches(&truncated, line, total_lines); @@ -407,14 +381,15 @@ fn format_exec_output_truncates_large_error() { #[test] fn format_exec_output_marks_byte_truncation_without_omitted_lines() { - let max_bytes = exec_format_max_bytes(); - let long_line = "a".repeat(max_bytes + 50); - let truncated = truncate::truncate_with_line_bytes_budget(&long_line, max_bytes); + let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 50); + let truncated = truncate::truncate_with_line_bytes_budget(&long_line, EXEC_FORMAT_MAX_BYTES); assert_ne!(truncated, long_line); - let removed_bytes = long_line.len().saturating_sub(max_bytes); - let marker_line = - format!("[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]"); + let removed_bytes = long_line.len().saturating_sub(EXEC_FORMAT_MAX_BYTES); + let marker_line = format!( + "[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]", + max_bytes = EXEC_FORMAT_MAX_BYTES + ); assert!( truncated.contains(&marker_line), "missing byte truncation marker: {truncated}" @@ -430,7 +405,7 @@ fn format_exec_output_returns_original_when_within_limits() { let content = "example output\n".repeat(10); assert_eq!( - truncate::truncate_with_line_bytes_budget(&content, exec_format_max_bytes(),), + truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES), content ); } @@ -442,7 +417,7 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { .map(|idx| format!("line-{idx}\n")) .collect(); - let truncated = truncate::truncate_with_line_bytes_budget(&content, exec_format_max_bytes()); + let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES); let omitted = total_lines - EXEC_FORMAT_MAX_LINES; let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); @@ -470,7 +445,7 @@ fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { .map(|idx| format!("line-{idx}-{long_line}\n")) .collect(); - let truncated = truncate::truncate_with_line_bytes_budget(&content, exec_format_max_bytes()); + let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES); assert!( truncated.contains("[... omitted 42 of 298 lines ...]"), From 59440580657cdae62a12f9b679cacaaf0bf43c5f Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 11:01:19 -0800 Subject: [PATCH 66/68] fix --- codex-rs/core/src/truncate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index acaa18414d..bdec0b0d1e 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -25,7 +25,7 @@ impl TruncationPolicy { if let Some(token_limit) = config_token_limit { Self::Bytes(approx_bytes_for_tokens(token_limit)) } else { - Self::Bytes(approx_bytes_for_tokens(family_bytes)) + Self::Bytes(family_bytes) } } TruncationPolicy::Tokens(family_tokens) => { From e6af809a9d18434cfbe027d7e9809492bb6e6c70 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 11:04:54 -0800 Subject: [PATCH 67/68] fix --- codex-rs/core/src/context_manager/history_tests.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index e75dbf2ff9..fecd0a7277 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -358,8 +358,7 @@ fn truncated_message_pattern(line: &str, total_lines: usize) -> String { let escaped_line = regex_lite::escape(line); if omitted == 0 { return format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$", - max_bytes = EXEC_FORMAT_MAX_BYTES, + r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit \.{{3}}]\n\n.*)$", ); } format!( @@ -387,8 +386,7 @@ fn format_exec_output_marks_byte_truncation_without_omitted_lines() { assert_ne!(truncated, long_line); let removed_bytes = long_line.len().saturating_sub(EXEC_FORMAT_MAX_BYTES); let marker_line = format!( - "[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]", - max_bytes = EXEC_FORMAT_MAX_BYTES + "[... removed {removed_bytes} bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit ...]" ); assert!( truncated.contains(&marker_line), From 7c3afa4a0c29b26ab789f0fab8743795a8772d45 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Tue, 18 Nov 2025 11:14:12 -0800 Subject: [PATCH 68/68] fix --- codex-rs/core/src/codex.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 639400d15a..e308601c55 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -403,7 +403,7 @@ impl Session { ); let client = ModelClient::new( - Arc::new(per_turn_config), + Arc::new(per_turn_config.clone()), auth_manager, otel_event_manager, provider, @@ -433,7 +433,7 @@ impl Session { final_output_json_schema: None, codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), - truncation_policy: TruncationPolicy::new(&config), + truncation_policy: TruncationPolicy::new(&per_turn_config), } } @@ -1766,7 +1766,7 @@ async fn spawn_review_thread( final_output_json_schema: None, codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), - truncation_policy: TruncationPolicy::new(&config), + truncation_policy: TruncationPolicy::new(&per_turn_config), }; // Seed the child task with the review prompt as the initial user message.