diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 64d06d0571..e308601c55 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -13,6 +13,7 @@ use crate::parse_command::parse_command; use crate::parse_turn_item; use crate::response_processing::process_items; use crate::terminal; +use crate::truncate::TruncationPolicy; use crate::user_notification::UserNotifier; use crate::util::error_or_panic; use async_channel::Receiver; @@ -275,6 +276,7 @@ pub(crate) struct TurnContext { pub(crate) final_output_json_schema: Option, pub(crate) codex_linux_sandbox_exe: Option, pub(crate) tool_call_gate: Arc, + pub(crate) truncation_policy: TruncationPolicy, } impl TurnContext { @@ -401,7 +403,7 @@ impl Session { ); let client = ModelClient::new( - Arc::new(per_turn_config), + Arc::new(per_turn_config.clone()), auth_manager, otel_event_manager, provider, @@ -431,6 +433,7 @@ impl Session { final_output_json_schema: None, codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), + truncation_policy: TruncationPolicy::new(&per_turn_config), } } @@ -678,7 +681,8 @@ impl Session { let reconstructed_history = self.reconstruct_history_from_rollout(&turn_context, &rollout_items); if !reconstructed_history.is_empty() { - self.record_into_history(&reconstructed_history).await; + self.record_into_history(&reconstructed_history, &turn_context) + .await; } // If persisting, persist all rollout items as-is (recorder filters) @@ -935,7 +939,7 @@ impl Session { turn_context: &TurnContext, items: &[ResponseItem], ) { - self.record_into_history(items).await; + self.record_into_history(items, turn_context).await; self.persist_rollout_response_items(items).await; self.send_raw_response_items(turn_context, items).await; } @@ -949,7 +953,10 @@ impl Session { for item in rollout_items { match item { RolloutItem::ResponseItem(response_item) => { - history.record_items(std::iter::once(response_item)); + history.record_items( + std::iter::once(response_item), + turn_context.truncation_policy, + ); } RolloutItem::Compacted(compacted) => { let snapshot = history.get_history(); @@ -973,9 +980,13 @@ impl Session { } /// Append ResponseItems to the in-memory conversation history only. - pub(crate) async fn record_into_history(&self, items: &[ResponseItem]) { + pub(crate) async fn record_into_history( + &self, + items: &[ResponseItem], + turn_context: &TurnContext, + ) { let mut state = self.state.lock().await; - state.record_items(items.iter()); + state.record_items(items.iter(), turn_context.truncation_policy); } pub(crate) async fn replace_history(&self, items: Vec) { @@ -1755,6 +1766,7 @@ async fn spawn_review_thread( final_output_json_schema: None, codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), + truncation_policy: TruncationPolicy::new(&per_turn_config), }; // Seed the child task with the review prompt as the initial user message. @@ -2886,7 +2898,7 @@ mod tests { for item in &initial_context { rollout_items.push(RolloutItem::ResponseItem(item.clone())); } - live_history.record_items(initial_context.iter()); + live_history.record_items(initial_context.iter(), turn_context.truncation_policy); let user1 = ResponseItem::Message { id: None, @@ -2895,7 +2907,7 @@ mod tests { text: "first user".to_string(), }], }; - live_history.record_items(std::iter::once(&user1)); + live_history.record_items(std::iter::once(&user1), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(user1.clone())); let assistant1 = ResponseItem::Message { @@ -2905,7 +2917,7 @@ mod tests { text: "assistant reply one".to_string(), }], }; - live_history.record_items(std::iter::once(&assistant1)); + live_history.record_items(std::iter::once(&assistant1), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(assistant1.clone())); let summary1 = "summary one"; @@ -2929,7 +2941,7 @@ mod tests { text: "second user".to_string(), }], }; - live_history.record_items(std::iter::once(&user2)); + live_history.record_items(std::iter::once(&user2), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(user2.clone())); let assistant2 = ResponseItem::Message { @@ -2939,7 +2951,7 @@ mod tests { text: "assistant reply two".to_string(), }], }; - live_history.record_items(std::iter::once(&assistant2)); + live_history.record_items(std::iter::once(&assistant2), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(assistant2.clone())); let summary2 = "summary two"; @@ -2963,7 +2975,7 @@ mod tests { text: "third user".to_string(), }], }; - live_history.record_items(std::iter::once(&user3)); + live_history.record_items(std::iter::once(&user3), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(user3.clone())); let assistant3 = ResponseItem::Message { @@ -2973,7 +2985,7 @@ mod tests { text: "assistant reply three".to_string(), }], }; - live_history.record_items(std::iter::once(&assistant3)); + live_history.record_items(std::iter::once(&assistant3), turn_context.truncation_policy); rollout_items.push(RolloutItem::ResponseItem(assistant3.clone())); (rollout_items, live_history.get_history()) diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index 0495c161d5..33d38091f6 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -14,7 +14,9 @@ use crate::protocol::EventMsg; use crate::protocol::TaskStartedEvent; use crate::protocol::TurnContextItem; use crate::protocol::WarningEvent; -use crate::truncate::truncate_middle; +use crate::truncate::TruncationPolicy; +use crate::truncate::approx_token_count; +use crate::truncate::truncate_text; use crate::util::backoff; use codex_protocol::items::TurnItem; use codex_protocol::models::ContentItem; @@ -59,7 +61,10 @@ async fn run_compact_task_inner( let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input); let mut history = sess.clone_history().await; - history.record_items(&[initial_input_for_turn.into()]); + history.record_items( + &[initial_input_for_turn.into()], + turn_context.truncation_policy, + ); let mut truncated_count = 0usize; @@ -230,7 +235,7 @@ pub(crate) fn build_compacted_history( initial_context, user_messages, summary_text, - COMPACT_USER_MESSAGE_MAX_TOKENS * 4, + COMPACT_USER_MESSAGE_MAX_TOKENS, ) } @@ -238,20 +243,21 @@ fn build_compacted_history_with_limit( mut history: Vec, user_messages: &[String], summary_text: &str, - max_bytes: usize, + max_tokens: usize, ) -> Vec { let mut selected_messages: Vec = Vec::new(); - if max_bytes > 0 { - let mut remaining = max_bytes; + if max_tokens > 0 { + let mut remaining = max_tokens; for message in user_messages.iter().rev() { if remaining == 0 { break; } - if message.len() <= remaining { + let tokens = approx_token_count(message); + if tokens <= remaining { selected_messages.push(message.clone()); - remaining = remaining.saturating_sub(message.len()); + remaining = remaining.saturating_sub(tokens); } else { - let (truncated, _) = truncate_middle(message, remaining); + let truncated = truncate_text(message, TruncationPolicy::Tokens(remaining)); selected_messages.push(truncated); break; } @@ -300,7 +306,8 @@ async fn drain_to_completed( }; match event { Ok(ResponseEvent::OutputItemDone(item)) => { - sess.record_into_history(std::slice::from_ref(&item)).await; + sess.record_into_history(std::slice::from_ref(&item), turn_context) + .await; } Ok(ResponseEvent::RateLimits(snapshot)) => { sess.update_rate_limits(turn_context, snapshot).await; @@ -318,6 +325,7 @@ async fn drain_to_completed( #[cfg(test)] mod tests { + use super::*; use pretty_assertions::assert_eq; @@ -409,16 +417,16 @@ mod tests { } #[test] - fn build_compacted_history_truncates_overlong_user_messages() { + fn build_token_limited_compacted_history_truncates_overlong_user_messages() { // Use a small truncation limit so the test remains fast while still validating // that oversized user content is truncated. - let max_bytes = 128; - let big = "X".repeat(max_bytes + 50); + let max_tokens = 16; + let big = "word ".repeat(200); let history = super::build_compacted_history_with_limit( Vec::new(), std::slice::from_ref(&big), "SUMMARY", - max_bytes, + max_tokens, ); assert_eq!(history.len(), 2); @@ -451,7 +459,7 @@ mod tests { } #[test] - fn build_compacted_history_appends_summary_message() { + fn build_token_limited_compacted_history_appends_summary_message() { let initial_context: Vec = Vec::new(); let user_messages = vec!["first user message".to_string()]; let summary_text = "summary text"; diff --git a/codex-rs/core/src/compact_remote.rs b/codex-rs/core/src/compact_remote.rs index 2c7d57eff2..1726aad6f9 100644 --- a/codex-rs/core/src/compact_remote.rs +++ b/codex-rs/core/src/compact_remote.rs @@ -50,7 +50,10 @@ async fn run_remote_compact_task_inner( let mut history = sess.clone_history().await; if !input.is_empty() { let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input); - history.record_items(&[initial_input_for_turn.into()]); + history.record_items( + &[initial_input_for_turn.into()], + turn_context.truncation_policy, + ); } let prompt = Prompt { diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 5b57d4dc01..0c00da9a48 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -195,6 +195,9 @@ pub struct Config { /// Additional filenames to try when looking for project-level docs. pub project_doc_fallback_filenames: Vec, + /// Token budget applied when storing tool/function outputs in the context manager. + pub tool_output_token_limit: Option, + /// Directory containing all Codex state (defaults to `~/.codex` but can be /// overridden by the `CODEX_HOME` environment variable). pub codex_home: PathBuf, @@ -636,6 +639,9 @@ pub struct ConfigToml { /// Ordered list of fallback filenames to look for when AGENTS.md is missing. pub project_doc_fallback_filenames: Option>, + /// Token budget applied when storing tool/function outputs in the context manager. + pub tool_output_token_limit: Option, + /// Profile to use from the `profiles` map. pub profile: Option, @@ -1209,6 +1215,7 @@ impl Config { } }) .collect(), + tool_output_token_limit: cfg.tool_output_token_limit, codex_home, history, file_opener: cfg.file_opener.unwrap_or(UriBasedFileOpener::VsCode), @@ -2961,6 +2968,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), + tool_output_token_limit: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3032,6 +3040,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), + tool_output_token_limit: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3118,6 +3127,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), + tool_output_token_limit: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, @@ -3190,6 +3200,7 @@ model_verbosity = "high" model_providers: fixture.model_provider_map.clone(), project_doc_max_bytes: PROJECT_DOC_MAX_BYTES, project_doc_fallback_filenames: Vec::new(), + tool_output_token_limit: None, codex_home: fixture.codex_home(), history: History::default(), file_opener: UriBasedFileOpener::VsCode, diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 50e3a8bc94..af60ada9fe 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -1,8 +1,8 @@ use crate::codex::TurnContext; use crate::context_manager::normalize; -use crate::truncate; -use crate::truncate::format_output_for_model_body; -use crate::truncate::globally_truncate_function_output_items; +use crate::truncate::TruncationPolicy; +use crate::truncate::truncate_function_output_items_with_policy; +use crate::truncate::truncate_text; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::TokenUsage; @@ -10,12 +10,6 @@ use codex_protocol::protocol::TokenUsageInfo; use codex_utils_tokenizer::Tokenizer; use std::ops::Deref; -const CONTEXT_WINDOW_HARD_LIMIT_FACTOR: f64 = 1.1; -const CONTEXT_WINDOW_HARD_LIMIT_BYTES: usize = - (truncate::MODEL_FORMAT_MAX_BYTES as f64 * CONTEXT_WINDOW_HARD_LIMIT_FACTOR) as usize; -const CONTEXT_WINDOW_HARD_LIMIT_LINES: usize = - (truncate::MODEL_FORMAT_MAX_LINES as f64 * CONTEXT_WINDOW_HARD_LIMIT_FACTOR) as usize; - /// Transcript of conversation history #[derive(Debug, Clone, Default)] pub(crate) struct ContextManager { @@ -50,7 +44,7 @@ impl ContextManager { } /// `items` is ordered from oldest to newest. - pub(crate) fn record_items(&mut self, items: I) + pub(crate) fn record_items(&mut self, items: I, policy: TruncationPolicy) where I: IntoIterator, I::Item: std::ops::Deref, @@ -62,7 +56,7 @@ impl ContextManager { continue; } - let processed = Self::process_item(&item); + let processed = self.process_item(item_ref, policy); self.items.push(processed); } } @@ -150,18 +144,14 @@ impl ContextManager { items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. })); } - fn process_item(item: &ResponseItem) -> ResponseItem { + fn process_item(&self, item: &ResponseItem, policy: TruncationPolicy) -> ResponseItem { match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let truncated = format_output_for_model_body( - output.content.as_str(), - CONTEXT_WINDOW_HARD_LIMIT_BYTES, - CONTEXT_WINDOW_HARD_LIMIT_LINES, - ); + let truncated = truncate_text(output.content.as_str(), policy); let truncated_items = output .content_items .as_ref() - .map(|items| globally_truncate_function_output_items(items)); + .map(|items| truncate_function_output_items_with_policy(items, policy)); ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { @@ -172,11 +162,7 @@ impl ContextManager { } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let truncated = format_output_for_model_body( - output, - CONTEXT_WINDOW_HARD_LIMIT_BYTES, - CONTEXT_WINDOW_HARD_LIMIT_LINES, - ); + let truncated = truncate_text(output, policy); ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), output: truncated, diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index c81749c2c1..fecd0a7277 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -1,9 +1,8 @@ use super::*; -use crate::context_manager::MODEL_FORMAT_MAX_LINES; use crate::truncate; +use crate::truncate::TruncationPolicy; use codex_git::GhostCommit; use codex_protocol::models::ContentItem; -use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::LocalShellAction; use codex_protocol::models::LocalShellExecAction; @@ -13,6 +12,9 @@ use codex_protocol::models::ReasoningItemReasoningSummary; use pretty_assertions::assert_eq; use regex_lite::Regex; +const EXEC_FORMAT_MAX_LINES: usize = 256; +const EXEC_FORMAT_MAX_BYTES: usize = 10_000; + fn assistant_msg(text: &str) -> ResponseItem { ResponseItem::Message { id: None, @@ -25,7 +27,9 @@ fn assistant_msg(text: &str) -> ResponseItem { fn create_history_with_items(items: Vec) -> ContextManager { let mut h = ContextManager::new(); - h.record_items(items.iter()); + // Use a generous but fixed token budget; tests only rely on truncation + // behavior, not on a specific model's token limit. + h.record_items(items.iter(), TruncationPolicy::Tokens(10_000)); h } @@ -55,6 +59,7 @@ fn reasoning_msg(text: &str) -> ResponseItem { #[test] fn filters_non_api_messages() { let mut h = ContextManager::default(); + let policy = TruncationPolicy::Tokens(10_000); // System message is not API messages; Other is ignored. let system = ResponseItem::Message { id: None, @@ -64,12 +69,12 @@ fn filters_non_api_messages() { }], }; let reasoning = reasoning_msg("thinking..."); - h.record_items([&system, &reasoning, &ResponseItem::Other]); + h.record_items([&system, &reasoning, &ResponseItem::Other], policy); // User and assistant should be retained. let u = user_msg("hi"); let a = assistant_msg("hello"); - h.record_items([&u, &a]); + h.record_items([&u, &a], policy); let items = h.contents(); assert_eq!( @@ -237,6 +242,9 @@ fn normalization_retains_local_shell_outputs() { #[test] fn record_items_truncates_function_call_output_content() { let mut history = ContextManager::new(); + // Any reasonably small token budget works; the test only cares that + // truncation happens and the marker is present. + let policy = TruncationPolicy::Tokens(1_000); let long_line = "a very long line to trigger truncation\n"; let long_output = long_line.repeat(2_500); let item = ResponseItem::FunctionCallOutput { @@ -248,15 +256,20 @@ fn record_items_truncates_function_call_output_content() { }, }; - history.record_items([&item]); + history.record_items([&item], policy); assert_eq!(history.items.len(), 1); match &history.items[0] { ResponseItem::FunctionCallOutput { output, .. } => { assert_ne!(output.content, long_output); assert!( - output.content.starts_with("Total output lines:"), - "expected truncated summary, got {}", + output.content.contains("tokens truncated"), + "expected token-based truncation marker, got {}", + output.content + ); + assert!( + output.content.contains("tokens truncated"), + "expected truncation marker, got {}", output.content ); } @@ -267,6 +280,7 @@ fn record_items_truncates_function_call_output_content() { #[test] fn record_items_truncates_custom_tool_call_output_content() { let mut history = ContextManager::new(); + let policy = TruncationPolicy::Tokens(1_000); let line = "custom output that is very long\n"; let long_output = line.repeat(2_500); let item = ResponseItem::CustomToolCallOutput { @@ -274,21 +288,48 @@ fn record_items_truncates_custom_tool_call_output_content() { output: long_output.clone(), }; - history.record_items([&item]); + history.record_items([&item], policy); assert_eq!(history.items.len(), 1); match &history.items[0] { ResponseItem::CustomToolCallOutput { output, .. } => { assert_ne!(output, &long_output); assert!( - output.starts_with("Total output lines:"), - "expected truncated summary, got {output}" + output.contains("tokens truncated"), + "expected token-based truncation marker, got {output}" + ); + assert!( + output.contains("tokens truncated") || output.contains("bytes truncated"), + "expected truncation marker, got {output}" ); } other => panic!("unexpected history item: {other:?}"), } } +#[test] +fn record_items_respects_custom_token_limit() { + let mut history = ContextManager::new(); + let policy = TruncationPolicy::Tokens(10); + let long_output = "tokenized content repeated many times ".repeat(200); + let item = ResponseItem::FunctionCallOutput { + call_id: "call-custom-limit".to_string(), + output: FunctionCallOutputPayload { + content: long_output, + success: Some(true), + ..Default::default() + }, + }; + + history.record_items([&item], policy); + + let stored = match &history.items[0] { + ResponseItem::FunctionCallOutput { output, .. } => output, + other => panic!("unexpected history item: {other:?}"), + }; + assert!(stored.content.contains("tokens truncated")); +} + fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) { let pattern = truncated_message_pattern(line, total_lines); let regex = Regex::new(&pattern).unwrap_or_else(|err| { @@ -302,23 +343,22 @@ fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usiz .expect("missing body capture") .as_str(); assert!( - body.len() <= truncate::MODEL_FORMAT_MAX_BYTES, + body.len() <= EXEC_FORMAT_MAX_BYTES, "body exceeds byte limit: {} bytes", body.len() ); } fn truncated_message_pattern(line: &str, total_lines: usize) -> String { - let head_lines = MODEL_FORMAT_MAX_LINES / 2; - let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines; + let head_lines = EXEC_FORMAT_MAX_LINES / 2; + let tail_lines = EXEC_FORMAT_MAX_LINES - head_lines; let head_take = head_lines.min(total_lines); let tail_take = tail_lines.min(total_lines.saturating_sub(head_take)); let omitted = total_lines.saturating_sub(head_take + tail_take); let escaped_line = regex_lite::escape(line); if omitted == 0 { return format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} output truncated to fit {max_bytes} bytes \.{{3}}]\n\n.*)$", - max_bytes = truncate::MODEL_FORMAT_MAX_BYTES, + r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit \.{{3}}]\n\n.*)$", ); } format!( @@ -331,11 +371,7 @@ fn format_exec_output_truncates_large_error() { let line = "very long execution error line that should trigger truncation\n"; let large_error = line.repeat(2_500); // way beyond both byte and line limits - let truncated = truncate::format_output_for_model_body( - &large_error, - truncate::MODEL_FORMAT_MAX_BYTES, - truncate::MODEL_FORMAT_MAX_LINES, - ); + let truncated = truncate::truncate_with_line_bytes_budget(&large_error, EXEC_FORMAT_MAX_BYTES); let total_lines = large_error.lines().count(); assert_truncated_message_matches(&truncated, line, total_lines); @@ -344,17 +380,13 @@ fn format_exec_output_truncates_large_error() { #[test] fn format_exec_output_marks_byte_truncation_without_omitted_lines() { - let long_line = "a".repeat(truncate::MODEL_FORMAT_MAX_BYTES + 50); - let truncated = truncate::format_output_for_model_body( - &long_line, - truncate::MODEL_FORMAT_MAX_BYTES, - truncate::MODEL_FORMAT_MAX_LINES, - ); + let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 50); + let truncated = truncate::truncate_with_line_bytes_budget(&long_line, EXEC_FORMAT_MAX_BYTES); assert_ne!(truncated, long_line); + let removed_bytes = long_line.len().saturating_sub(EXEC_FORMAT_MAX_BYTES); let marker_line = format!( - "[... output truncated to fit {} bytes ...]", - truncate::MODEL_FORMAT_MAX_BYTES + "[... removed {removed_bytes} bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit ...]" ); assert!( truncated.contains(&marker_line), @@ -371,28 +403,20 @@ fn format_exec_output_returns_original_when_within_limits() { let content = "example output\n".repeat(10); assert_eq!( - truncate::format_output_for_model_body( - &content, - truncate::MODEL_FORMAT_MAX_BYTES, - truncate::MODEL_FORMAT_MAX_LINES - ), + truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES), content ); } #[test] fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { - let total_lines = truncate::MODEL_FORMAT_MAX_LINES + 100; + let total_lines = EXEC_FORMAT_MAX_LINES + 100; let content: String = (0..total_lines) .map(|idx| format!("line-{idx}\n")) .collect(); - let truncated = truncate::format_output_for_model_body( - &content, - truncate::MODEL_FORMAT_MAX_BYTES, - truncate::MODEL_FORMAT_MAX_LINES, - ); - let omitted = total_lines - truncate::MODEL_FORMAT_MAX_LINES; + let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES); + let omitted = total_lines - EXEC_FORMAT_MAX_LINES; let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); assert!( @@ -413,103 +437,24 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { #[test] fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { - let total_lines = truncate::MODEL_FORMAT_MAX_LINES + 42; + let total_lines = EXEC_FORMAT_MAX_LINES + 42; let long_line = "x".repeat(256); let content: String = (0..total_lines) .map(|idx| format!("line-{idx}-{long_line}\n")) .collect(); - let truncated = truncate::format_output_for_model_body( - &content, - truncate::MODEL_FORMAT_MAX_BYTES, - truncate::MODEL_FORMAT_MAX_LINES, - ); + let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES); assert!( truncated.contains("[... omitted 42 of 298 lines ...]"), "expected omitted marker when line count exceeds limit: {truncated}" ); assert!( - !truncated.contains("output truncated to fit"), + !truncated.contains("byte limit"), "line omission marker should take precedence over byte marker: {truncated}" ); } -#[test] -fn truncates_across_multiple_under_limit_texts_and_reports_omitted() { - // Arrange: several text items, none exceeding per-item limit, but total exceeds budget. - let budget = truncate::MODEL_FORMAT_MAX_BYTES; - let t1_len = (budget / 2).saturating_sub(10); - let t2_len = (budget / 2).saturating_sub(10); - let remaining_after_t1_t2 = budget.saturating_sub(t1_len + t2_len); - let t3_len = 50; // gets truncated to remaining_after_t1_t2 - let t4_len = 5; // omitted - let t5_len = 7; // omitted - - let t1 = "a".repeat(t1_len); - let t2 = "b".repeat(t2_len); - let t3 = "c".repeat(t3_len); - let t4 = "d".repeat(t4_len); - let t5 = "e".repeat(t5_len); - - let item = ResponseItem::FunctionCallOutput { - call_id: "call-omit".to_string(), - output: FunctionCallOutputPayload { - content: "irrelevant".to_string(), - content_items: Some(vec![ - FunctionCallOutputContentItem::InputText { text: t1 }, - FunctionCallOutputContentItem::InputText { text: t2 }, - FunctionCallOutputContentItem::InputImage { - image_url: "img:mid".to_string(), - }, - FunctionCallOutputContentItem::InputText { text: t3 }, - FunctionCallOutputContentItem::InputText { text: t4 }, - FunctionCallOutputContentItem::InputText { text: t5 }, - ]), - success: Some(true), - }, - }; - - let mut history = ContextManager::new(); - history.record_items([&item]); - assert_eq!(history.items.len(), 1); - let json = serde_json::to_value(&history.items[0]).expect("serialize to json"); - - let output = json - .get("output") - .expect("output field") - .as_array() - .expect("array output"); - - // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. - assert_eq!(output.len(), 5); - - let first = output[0].as_object().expect("first obj"); - assert_eq!(first.get("type").unwrap(), "input_text"); - let first_text = first.get("text").unwrap().as_str().unwrap(); - assert_eq!(first_text.len(), t1_len); - - let second = output[1].as_object().expect("second obj"); - assert_eq!(second.get("type").unwrap(), "input_text"); - let second_text = second.get("text").unwrap().as_str().unwrap(); - assert_eq!(second_text.len(), t2_len); - - assert_eq!( - output[2], - serde_json::json!({"type": "input_image", "image_url": "img:mid"}) - ); - - let fourth = output[3].as_object().expect("fourth obj"); - assert_eq!(fourth.get("type").unwrap(), "input_text"); - let fourth_text = fourth.get("text").unwrap().as_str().unwrap(); - assert_eq!(fourth_text.len(), remaining_after_t1_t2); - - let summary = output[4].as_object().expect("summary obj"); - assert_eq!(summary.get("type").unwrap(), "input_text"); - let summary_text = summary.get("text").unwrap().as_str().unwrap(); - assert!(summary_text.contains("omitted 2 text items")); -} - //TODO(aibrahim): run CI in release mode. #[cfg(not(debug_assertions))] #[test] diff --git a/codex-rs/core/src/context_manager/mod.rs b/codex-rs/core/src/context_manager/mod.rs index ab0d2e8168..d347a7714d 100644 --- a/codex-rs/core/src/context_manager/mod.rs +++ b/codex-rs/core/src/context_manager/mod.rs @@ -1,7 +1,5 @@ mod history; mod normalize; -pub(crate) use crate::truncate::MODEL_FORMAT_MAX_BYTES; -pub(crate) use crate::truncate::MODEL_FORMAT_MAX_LINES; -pub(crate) use crate::truncate::format_output_for_model_body; +pub(crate) use crate::truncate::truncate_with_line_bytes_budget; pub(crate) use history::ContextManager; diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index 64ba8df848..9a42ec3d1b 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -2,7 +2,8 @@ use crate::codex::ProcessedResponseItem; use crate::exec::ExecToolCallOutput; use crate::token_data::KnownPlan; use crate::token_data::PlanType; -use crate::truncate::truncate_middle; +use crate::truncate::TruncationPolicy; +use crate::truncate::truncate_text; use chrono::DateTime; use chrono::Datelike; use chrono::Local; @@ -461,7 +462,10 @@ pub fn get_error_message_ui(e: &CodexErr) -> String { _ => e.to_string(), }; - truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0 + truncate_text( + &message, + TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_BYTES), + ) } #[cfg(test)] diff --git a/codex-rs/core/src/model_family.rs b/codex-rs/core/src/model_family.rs index 150420fecf..0758d20310 100644 --- a/codex-rs/core/src/model_family.rs +++ b/codex-rs/core/src/model_family.rs @@ -4,6 +4,7 @@ use codex_protocol::config_types::Verbosity; use crate::config::types::ReasoningSummaryFormat; use crate::tools::handlers::apply_patch::ApplyPatchToolType; use crate::tools::spec::ConfigShellToolType; +use crate::truncate::TruncationPolicy; /// The `instructions` field in the payload sent to a model should always start /// with this content. @@ -66,6 +67,8 @@ pub struct ModelFamily { /// Preferred shell tool type for this model family when features do not override it. pub shell_type: ConfigShellToolType, + + pub truncation_policy: TruncationPolicy, } macro_rules! model_family { @@ -89,6 +92,7 @@ macro_rules! model_family { shell_type: ConfigShellToolType::Default, default_verbosity: None, default_reasoning_effort: None, + truncation_policy: TruncationPolicy::Bytes(10_000), }; // apply overrides @@ -146,6 +150,7 @@ pub fn find_family_for_model(slug: &str) -> Option { ], supports_parallel_tool_calls: true, support_verbosity: true, + truncation_policy: TruncationPolicy::Tokens(10_000), ) // Internal models. @@ -164,6 +169,7 @@ pub fn find_family_for_model(slug: &str) -> Option { shell_type: if cfg!(windows) { ConfigShellToolType::ShellCommand } else { ConfigShellToolType::Default }, supports_parallel_tool_calls: true, support_verbosity: true, + truncation_policy: TruncationPolicy::Tokens(10_000), ) // Production models. @@ -180,6 +186,7 @@ pub fn find_family_for_model(slug: &str) -> Option { shell_type: if cfg!(windows) { ConfigShellToolType::ShellCommand } else { ConfigShellToolType::Default }, supports_parallel_tool_calls: true, support_verbosity: false, + truncation_policy: TruncationPolicy::Tokens(10_000), ) } else if slug.starts_with("gpt-5.1") { model_family!( @@ -190,6 +197,7 @@ pub fn find_family_for_model(slug: &str) -> Option { default_verbosity: Some(Verbosity::Low), base_instructions: GPT_5_1_INSTRUCTIONS.to_string(), default_reasoning_effort: Some(ReasoningEffort::Medium), + truncation_policy: TruncationPolicy::Bytes(10_000), supports_parallel_tool_calls: true, ) } else if slug.starts_with("gpt-5") { @@ -198,6 +206,7 @@ pub fn find_family_for_model(slug: &str) -> Option { supports_reasoning_summaries: true, needs_special_apply_patch_instructions: true, support_verbosity: true, + truncation_policy: TruncationPolicy::Bytes(10_000), ) } else { None @@ -220,5 +229,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily { shell_type: ConfigShellToolType::Default, default_verbosity: None, default_reasoning_effort: None, + truncation_policy: TruncationPolicy::Bytes(10_000), } } diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 5b630d5ce9..2dfa5199f1 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -7,6 +7,7 @@ use crate::context_manager::ContextManager; use crate::protocol::RateLimitSnapshot; use crate::protocol::TokenUsage; use crate::protocol::TokenUsageInfo; +use crate::truncate::TruncationPolicy; /// Persistent, session-scoped state previously stored directly on `Session`. pub(crate) struct SessionState { @@ -18,20 +19,21 @@ pub(crate) struct SessionState { impl SessionState { /// Create a new session state mirroring previous `State::default()` semantics. pub(crate) fn new(session_configuration: SessionConfiguration) -> Self { + let history = ContextManager::new(); Self { session_configuration, - history: ContextManager::new(), + history, latest_rate_limits: None, } } // History helpers - pub(crate) fn record_items(&mut self, items: I) + pub(crate) fn record_items(&mut self, items: I, policy: TruncationPolicy) where I: IntoIterator, I::Item: std::ops::Deref, { - self.history.record_items(items) + self.history.record_items(items, policy); } pub(crate) fn clone_history(&self) -> ContextManager { diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index c94a7c28d9..99d5f16506 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -9,9 +9,7 @@ pub mod runtimes; pub mod sandboxing; pub mod spec; -use crate::context_manager::MODEL_FORMAT_MAX_BYTES; -use crate::context_manager::MODEL_FORMAT_MAX_LINES; -use crate::context_manager::format_output_for_model_body; +use crate::context_manager::truncate_with_line_bytes_budget; use crate::exec::ExecToolCallOutput; pub use router::ToolRouter; use serde::Serialize; @@ -22,6 +20,9 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str = "[... telemetry preview truncated ...]"; +// TODO(aibrahim): migrate shell tool to use truncate text and respect config value +const SHELL_OUTPUT_MAX_BYTES: usize = 10_000; + /// Format the combined exec output for sending back to the model. /// Includes exit code and duration metadata; truncates large bodies safely. pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String { @@ -77,5 +78,5 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { }; // Truncate for model consumption before serialization. - format_output_for_model_body(&body, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES) + truncate_with_line_bytes_budget(&body, SHELL_OUTPUT_MAX_BYTES) } diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 42d6a967de..bdec0b0d1e 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -5,45 +5,142 @@ use codex_protocol::models::FunctionCallOutputContentItem; use codex_utils_string::take_bytes_at_char_boundary; use codex_utils_string::take_last_bytes_at_char_boundary; -use codex_utils_tokenizer::Tokenizer; -/// Model-formatting limits: clients get full streams; only content sent to the model is truncated. -pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB -pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines +use crate::config::Config; -/// Globally truncate function output items to fit within `MODEL_FORMAT_MAX_BYTES` -/// by preserving as many text/image items as possible and appending a summary -/// for any omitted text items. -pub(crate) fn globally_truncate_function_output_items( +const APPROX_BYTES_PER_TOKEN: usize = 4; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum TruncationPolicy { + Bytes(usize), + Tokens(usize), +} + +impl TruncationPolicy { + pub fn new(config: &Config) -> Self { + let config_token_limit = config.tool_output_token_limit; + + match config.model_family.truncation_policy { + TruncationPolicy::Bytes(family_bytes) => { + if let Some(token_limit) = config_token_limit { + Self::Bytes(approx_bytes_for_tokens(token_limit)) + } else { + Self::Bytes(family_bytes) + } + } + TruncationPolicy::Tokens(family_tokens) => { + if let Some(token_limit) = config_token_limit { + Self::Tokens(token_limit) + } else { + Self::Tokens(family_tokens) + } + } + } + } + + /// Returns a token budget derived from this policy. + /// + /// - For `Tokens`, this is the explicit token limit. + /// - For `Bytes`, this is an approximate token budget using the global + /// bytes-per-token heuristic. + pub fn token_budget(&self) -> usize { + match self { + TruncationPolicy::Bytes(bytes) => { + usize::try_from(approx_tokens_from_byte_count(*bytes)).unwrap_or(usize::MAX) + } + TruncationPolicy::Tokens(tokens) => *tokens, + } + } + + /// Returns a byte budget derived from this policy. + /// + /// - For `Bytes`, this is the explicit byte limit. + /// - For `Tokens`, this is an approximate byte budget using the global + /// bytes-per-token heuristic. + pub fn byte_budget(&self) -> usize { + match self { + TruncationPolicy::Bytes(bytes) => *bytes, + TruncationPolicy::Tokens(tokens) => approx_bytes_for_tokens(*tokens), + } + } +} + +/// Format a block of exec/tool output for model consumption, truncating by +/// lines and bytes while preserving head and tail segments. +pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize) -> String { + // TODO(aibrahim): to be removed + let lines_budget = 256; + // Head+tail truncation for the model: show the beginning and end with an elision. + // Clients still receive full streams; only this formatted summary is capped. + let total_lines = content.lines().count(); + if content.len() <= bytes_budget && total_lines <= lines_budget { + return content.to_string(); + } + let output = truncate_formatted_exec_output(content, total_lines, bytes_budget, lines_budget); + format!("Total output lines: {total_lines}\n\n{output}") +} + +pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String { + match policy { + TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate( + content, + bytes, + TruncationSource::Policy(TruncationPolicy::Bytes(bytes)), + ), + TruncationPolicy::Tokens(tokens) => { + let (truncated, _) = truncate_with_token_budget( + content, + tokens, + TruncationSource::Policy(TruncationPolicy::Tokens(tokens)), + ); + truncated + } + } +} +/// Globally truncate function output items to fit within the given +/// truncation policy's budget, preserving as many text/image items as +/// possible and appending a summary for any omitted text items. +pub(crate) fn truncate_function_output_items_with_policy( items: &[FunctionCallOutputContentItem], + policy: TruncationPolicy, ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); - let mut remaining = MODEL_FORMAT_MAX_BYTES; + let mut remaining_budget = match policy { + TruncationPolicy::Bytes(_) => policy.byte_budget(), + TruncationPolicy::Tokens(_) => policy.token_budget(), + }; let mut omitted_text_items = 0usize; for it in items { match it { FunctionCallOutputContentItem::InputText { text } => { - if remaining == 0 { + if remaining_budget == 0 { omitted_text_items += 1; continue; } - let len = text.len(); - if len <= remaining { + let cost = match policy { + TruncationPolicy::Bytes(_) => text.len(), + TruncationPolicy::Tokens(_) => approx_token_count(text), + }; + + if cost <= remaining_budget { out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); - remaining -= len; + remaining_budget = remaining_budget.saturating_sub(cost); } else { - let slice = take_bytes_at_char_boundary(text, remaining); - if !slice.is_empty() { - out.push(FunctionCallOutputContentItem::InputText { - text: slice.to_string(), - }); + let snippet_policy = match policy { + TruncationPolicy::Bytes(_) => TruncationPolicy::Bytes(remaining_budget), + TruncationPolicy::Tokens(_) => TruncationPolicy::Tokens(remaining_budget), + }; + let snippet = truncate_text(text, snippet_policy); + if snippet.is_empty() { + omitted_text_items += 1; + } else { + out.push(FunctionCallOutputContentItem::InputText { text: snippet }); } - remaining = 0; + remaining_budget = 0; } } - // todo(aibrahim): handle input images; resize FunctionCallOutputContentItem::InputImage { image_url } => { out.push(FunctionCallOutputContentItem::InputImage { image_url: image_url.clone(), @@ -61,21 +158,81 @@ pub(crate) fn globally_truncate_function_output_items( out } -/// Format a block of exec/tool output for model consumption, truncating by -/// lines and bytes while preserving head and tail segments. -pub(crate) fn format_output_for_model_body( - content: &str, - limit_bytes: usize, - limit_lines: usize, -) -> String { - // Head+tail truncation for the model: show the beginning and end with an elision. - // Clients still receive full streams; only this formatted summary is capped. - let total_lines = content.lines().count(); - if content.len() <= limit_bytes && total_lines <= limit_lines { - return content.to_string(); +/// Truncate the middle of a UTF-8 string to at most `max_tokens` tokens, +/// preserving the beginning and the end. Returns the possibly truncated string +/// and `Some(original_token_count)` if truncation occurred; otherwise returns +/// the original string and `None`. +fn truncate_with_token_budget( + s: &str, + max_tokens: usize, + source: TruncationSource, +) -> (String, Option) { + if s.is_empty() { + return (String::new(), None); } - let output = truncate_formatted_exec_output(content, total_lines, limit_bytes, limit_lines); - format!("Total output lines: {total_lines}\n\n{output}") + + let byte_len = s.len(); + if max_tokens > 0 { + let small_threshold = approx_bytes_for_tokens(max_tokens / 4); + if small_threshold > 0 && byte_len <= small_threshold { + return (s.to_string(), None); + } + } + + let truncated = truncate_with_byte_estimate(s, approx_bytes_for_tokens(max_tokens), source); + let approx_total_usize = approx_token_count(s); + let approx_total = u64::try_from(approx_total_usize).unwrap_or(u64::MAX); + if truncated == s { + (truncated, None) + } else { + (truncated, Some(approx_total)) + } +} + +/// Truncate a string using a byte budget derived from the token budget, without +/// performing any real tokenization. This keeps the logic purely byte-based and +/// uses a bytes placeholder in the truncated output. +fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSource) -> String { + if s.is_empty() { + return String::new(); + } + + if max_bytes == 0 { + // No budget to show content; just report that everything was truncated. + let marker = format_truncation_marker(source, removed_units_for_source(source, s.len())); + return marker; + } + + if s.len() <= max_bytes { + return s.to_string(); + } + + let total_bytes = s.len(); + let removed_bytes = total_bytes.saturating_sub(max_bytes); + let marker = format_truncation_marker(source, removed_units_for_source(source, removed_bytes)); + let marker_len = marker.len(); + + if marker_len >= max_bytes { + let truncated_marker = truncate_on_boundary(&marker, max_bytes); + return truncated_marker.to_string(); + } + + let keep_budget = max_bytes - marker_len; + let (left_budget, right_budget) = split_budget(keep_budget); + let prefix_end = pick_prefix_end(s, left_budget); + let mut suffix_start = pick_suffix_start(s, right_budget); + if suffix_start < prefix_end { + suffix_start = prefix_end; + } + + let mut out = assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker); + + if out.len() > max_bytes { + let boundary = truncate_on_boundary(&out, max_bytes); + out.truncate(boundary.len()); + } + + out } fn truncate_formatted_exec_output( @@ -114,13 +271,17 @@ fn truncate_formatted_exec_output( let truncated_by_bytes = content.len() > limit_bytes; // this is a bit wrong. We are counting metadata lines and not just shell output lines. let marker = if omitted > 0 { - Some(format!( - "\n[... omitted {omitted} of {total_lines} lines ...]\n\n" - )) + let marker_text = format_truncation_marker( + TruncationSource::LineOmission { total_lines }, + u64::try_from(omitted).unwrap_or(u64::MAX), + ); + Some(format!("\n{marker_text}\n\n")) } else if truncated_by_bytes { - Some(format!( - "\n[... output truncated to fit {limit_bytes} bytes ...]\n\n" - )) + let removed_bytes = + u64::try_from(content.len().saturating_sub(limit_bytes)).unwrap_or(u64::MAX); + let marker_text = + format_truncation_marker(TruncationSource::ByteLimit { limit_bytes }, removed_bytes); + Some(format!("\n{marker_text}\n\n")) } else { None }; @@ -147,192 +308,136 @@ fn truncate_formatted_exec_output( result } -fn error_on_double_truncation(content: &str) { - if content.contains("Total output lines:") && content.contains("omitted") { - tracing::error!( - "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}" - ); - } +#[derive(Clone, Copy)] +pub enum TruncationSource { + Policy(TruncationPolicy), + LineOmission { total_lines: usize }, + ByteLimit { limit_bytes: usize }, } -/// Truncate an output string to a maximum number of “tokens”, where tokens are -/// approximated as individual `char`s. Preserves a prefix and suffix with an -/// elision marker describing how many tokens were omitted. -pub(crate) fn truncate_output_to_tokens( - output: &str, - max_tokens: usize, -) -> (String, Option) { - if max_tokens == 0 { - let total_tokens = output.chars().count(); - let message = format!("…{total_tokens} tokens truncated…"); - return (message, Some(total_tokens)); +fn format_truncation_marker(source: TruncationSource, removed_count: u64) -> String { + match source { + TruncationSource::Policy(TruncationPolicy::Tokens(_)) => { + format!("[…{removed_count} tokens truncated…]") + } + TruncationSource::Policy(TruncationPolicy::Bytes(_)) => { + format!("[…{removed_count} bytes truncated…]") + } + TruncationSource::LineOmission { total_lines } => { + format!("[... omitted {removed_count} of {total_lines} lines ...]") + } + TruncationSource::ByteLimit { limit_bytes } => { + format!("[... removed {removed_count} bytes to fit {limit_bytes} byte limit ...]") + } } +} - let tokens: Vec = output.chars().collect(); - let total_tokens = tokens.len(); - if total_tokens <= max_tokens { - return (output.to_string(), None); - } +fn split_budget(budget: usize) -> (usize, usize) { + let left = budget / 2; + (left, budget - left) +} - let half = max_tokens / 2; - if half == 0 { - let truncated = total_tokens.saturating_sub(max_tokens); - let message = format!("…{truncated} tokens truncated…"); - return (message, Some(total_tokens)); +fn removed_units_for_source(source: TruncationSource, removed_bytes: usize) -> u64 { + match source { + TruncationSource::Policy(TruncationPolicy::Tokens(_)) => { + approx_tokens_from_byte_count(removed_bytes) + } + _ => u64::try_from(removed_bytes).unwrap_or(u64::MAX), } +} - let truncated = total_tokens.saturating_sub(half * 2); - let mut truncated_output = String::new(); - truncated_output.extend(&tokens[..half]); - truncated_output.push_str(&format!("…{truncated} tokens truncated…")); - truncated_output.extend(&tokens[total_tokens - half..]); - (truncated_output, Some(total_tokens)) +fn assemble_truncated_output(prefix: &str, suffix: &str, marker: &str) -> String { + let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + 1); + out.push_str(prefix); + out.push_str(marker); + out.push('\n'); + out.push_str(suffix); + out } -/// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes, -/// preserving the beginning and the end. Returns the possibly truncated -/// string and `Some(original_token_count)` (counted with the local tokenizer; -/// falls back to a 4-bytes-per-token estimate if the tokenizer cannot load) -/// if truncation occurred; otherwise returns the original string and `None`. -pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option) { - if s.len() <= max_bytes { - return (s.to_string(), None); - } +pub(crate) fn approx_token_count(text: &str) -> usize { + let len = text.len(); + len.saturating_add(APPROX_BYTES_PER_TOKEN.saturating_sub(1)) / APPROX_BYTES_PER_TOKEN +} - // Build a tokenizer for counting (default to o200k_base; fall back to cl100k_base). - // If both fail, fall back to a 4-bytes-per-token estimate. - let tok = Tokenizer::try_default().ok(); - let token_count = |text: &str| -> u64 { - if let Some(ref t) = tok { - t.count(text) as u64 - } else { - (text.len() as u64).div_ceil(4) - } - }; +fn approx_bytes_for_tokens(tokens: usize) -> usize { + tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) +} - let total_tokens = token_count(s); - if max_bytes == 0 { - return ( - format!("…{total_tokens} tokens truncated…"), - Some(total_tokens), - ); - } +fn approx_tokens_from_byte_count(bytes: usize) -> u64 { + let bytes_u64 = bytes as u64; + bytes_u64.saturating_add((APPROX_BYTES_PER_TOKEN as u64).saturating_sub(1)) + / (APPROX_BYTES_PER_TOKEN as u64) +} - fn truncate_on_boundary(input: &str, max_len: usize) -> &str { - if input.len() <= max_len { - return input; - } - let mut end = max_len; - while end > 0 && !input.is_char_boundary(end) { - end -= 1; - } - &input[..end] +fn truncate_on_boundary(input: &str, max_len: usize) -> &str { + if input.len() <= max_len { + return input; } - - fn pick_prefix_end(s: &str, left_budget: usize) -> usize { - if let Some(head) = s.get(..left_budget) - && let Some(i) = head.rfind('\n') - { - return i + 1; - } - truncate_on_boundary(s, left_budget).len() + let mut end = max_len; + while end > 0 && !input.is_char_boundary(end) { + end -= 1; } + &input[..end] +} - fn pick_suffix_start(s: &str, right_budget: usize) -> usize { - let start_tail = s.len().saturating_sub(right_budget); - if let Some(tail) = s.get(start_tail..) - && let Some(i) = tail.find('\n') - { - return start_tail + i + 1; - } - - let mut idx = start_tail.min(s.len()); - while idx < s.len() && !s.is_char_boundary(idx) { - idx += 1; - } - idx - } - - // Iterate to stabilize marker length → keep budget → boundaries. - let mut guess_tokens: u64 = 1; - for _ in 0..4 { - let marker = format!("…{guess_tokens} tokens truncated…"); - let marker_len = marker.len(); - let keep_budget = max_bytes.saturating_sub(marker_len); - if keep_budget == 0 { - return ( - format!("…{total_tokens} tokens truncated…"), - Some(total_tokens), - ); - } - - let left_budget = keep_budget / 2; - let right_budget = keep_budget - left_budget; - let prefix_end = pick_prefix_end(s, left_budget); - let mut suffix_start = pick_suffix_start(s, right_budget); - if suffix_start < prefix_end { - suffix_start = prefix_end; - } - - // Tokens actually removed (middle slice) using the real tokenizer. - let removed_tokens = token_count(&s[prefix_end..suffix_start]); - - // If the number of digits in the token count does not change the marker length, - // we can finalize output. - let final_marker = format!("…{removed_tokens} tokens truncated…"); - if final_marker.len() == marker_len { - let kept_content_bytes = prefix_end + (s.len() - suffix_start); - let mut out = String::with_capacity(final_marker.len() + kept_content_bytes + 1); - out.push_str(&s[..prefix_end]); - out.push_str(&final_marker); - out.push('\n'); - out.push_str(&s[suffix_start..]); - return (out, Some(total_tokens)); - } - - guess_tokens = removed_tokens; +fn pick_prefix_end(s: &str, left_budget: usize) -> usize { + if let Some(head) = s.get(..left_budget) + && let Some(i) = head.rfind('\n') + { + return i + 1; } + truncate_on_boundary(s, left_budget).len() +} - // Fallback build after iterations: compute with the last guess. - let marker = format!("…{guess_tokens} tokens truncated…"); - let marker_len = marker.len(); - let keep_budget = max_bytes.saturating_sub(marker_len); - if keep_budget == 0 { - return ( - format!("…{total_tokens} tokens truncated…"), - Some(total_tokens), - ); +fn pick_suffix_start(s: &str, right_budget: usize) -> usize { + let start_tail = s.len().saturating_sub(right_budget); + if let Some(tail) = s.get(start_tail..) + && let Some(i) = tail.find('\n') + { + return start_tail + i + 1; } - let left_budget = keep_budget / 2; - let right_budget = keep_budget - left_budget; - let prefix_end = pick_prefix_end(s, left_budget); - let mut suffix_start = pick_suffix_start(s, right_budget); - if suffix_start < prefix_end { - suffix_start = prefix_end; + let mut idx = start_tail.min(s.len()); + while idx < s.len() && !s.is_char_boundary(idx) { + idx += 1; } + idx +} - let mut out = String::with_capacity(marker_len + prefix_end + (s.len() - suffix_start) + 1); - out.push_str(&s[..prefix_end]); - out.push_str(&marker); - out.push('\n'); - out.push_str(&s[suffix_start..]); - (out, Some(total_tokens)) +fn error_on_double_truncation(content: &str) { + if content.contains("Total output lines:") && content.contains("omitted") { + tracing::error!( + "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}" + ); + } } #[cfg(test)] mod tests { - use super::MODEL_FORMAT_MAX_BYTES; - use super::MODEL_FORMAT_MAX_LINES; - use super::format_output_for_model_body; - use super::globally_truncate_function_output_items; - use super::truncate_middle; - use super::truncate_output_to_tokens; + use crate::config::OPENAI_DEFAULT_MODEL; + use crate::model_family::derive_default_model_family; + use crate::model_family::find_family_for_model; + + use super::TruncationPolicy; + use super::TruncationSource; + use super::approx_token_count; + use super::truncate_function_output_items_with_policy; + use super::truncate_with_line_bytes_budget; + use super::truncate_with_token_budget; use codex_protocol::models::FunctionCallOutputContentItem; - use codex_utils_tokenizer::Tokenizer; use pretty_assertions::assert_eq; use regex_lite::Regex; + const MODEL_FORMAT_MAX_LINES: usize = 256; + + fn model_format_max_bytes() -> usize { + find_family_for_model(OPENAI_DEFAULT_MODEL) + .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) + .truncation_policy + .byte_budget() + } + fn truncated_message_pattern(line: &str, total_lines: usize) -> String { let head_lines = MODEL_FORMAT_MAX_LINES / 2; let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines; @@ -342,7 +447,8 @@ mod tests { let escaped_line = regex_lite::escape(line); if omitted == 0 { return format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$", + r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$", + max_bytes = model_format_max_bytes(), ); } format!( @@ -351,88 +457,46 @@ mod tests { } #[test] - fn truncate_middle_no_newlines_fallback() { - let tok = Tokenizer::try_default().expect("load tokenizer"); - let s = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ*"; - let max_bytes = 32; - let (out, original) = truncate_middle(s, max_bytes); - assert!(out.starts_with("abc")); - assert!(out.contains("tokens truncated")); - assert!(out.ends_with("XYZ*")); - assert_eq!(original, Some(tok.count(s) as u64)); + fn truncate_middle_returns_original_when_under_limit() { + let s = "short output"; + let limit = 100; + let source = TruncationSource::Policy(TruncationPolicy::Tokens(limit)); + let (out, original) = truncate_with_token_budget(s, limit, source); + assert_eq!(out, s); + assert_eq!(original, None); } #[test] - fn truncate_middle_prefers_newline_boundaries() { - let tok = Tokenizer::try_default().expect("load tokenizer"); - let mut s = String::new(); - for i in 1..=20 { - s.push_str(&format!("{i:03}\n")); - } - assert_eq!(s.len(), 80); - - let max_bytes = 64; - let (out, tokens) = truncate_middle(&s, max_bytes); - assert!(out.starts_with("001\n002\n003\n004\n")); - assert!(out.contains("tokens truncated")); - assert!(out.ends_with("017\n018\n019\n020\n")); - assert_eq!(tokens, Some(tok.count(&s) as u64)); + fn truncate_middle_reports_truncation_at_zero_limit() { + let s = "abcdef"; + let source = TruncationSource::Policy(TruncationPolicy::Tokens(0)); + let (out, original) = truncate_with_token_budget(s, 0, source); + assert_eq!(out, "[…2 tokens truncated…]"); + assert_eq!(original, Some(approx_token_count(s) as u64)); } #[test] - fn truncate_middle_handles_utf8_content() { - let tok = Tokenizer::try_default().expect("load tokenizer"); - let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n"; - let max_bytes = 32; - let (out, tokens) = truncate_middle(s, max_bytes); - + fn truncate_middle_enforces_token_budget() { + let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa"; + let max_tokens = 12; + let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens)); + let (out, original) = truncate_with_token_budget(s, max_tokens, source); assert!(out.contains("tokens truncated")); - assert!(!out.contains('\u{fffd}')); - assert_eq!(tokens, Some(tok.count(s) as u64)); + assert_eq!(original, Some(approx_token_count(s) as u64)); + assert!(out.len() < s.len(), "truncated output should be shorter"); } #[test] - fn truncate_middle_prefers_newline_boundaries_2() { - let tok = Tokenizer::try_default().expect("load tokenizer"); - // Build a multi-line string of 20 numbered lines (each "NNN\n"). - let mut s = String::new(); - for i in 1..=20 { - s.push_str(&format!("{i:03}\n")); - } - assert_eq!(s.len(), 80); + fn truncate_middle_handles_utf8_content() { + let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n"; + let max_tokens = 8; + let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens)); + let (out, tokens) = truncate_with_token_budget(s, max_tokens, source); - let max_bytes = 64; - let (out, total) = truncate_middle(&s, max_bytes); - assert!(out.starts_with("001\n002\n003\n004\n")); assert!(out.contains("tokens truncated")); - assert!(out.ends_with("017\n018\n019\n020\n")); - assert_eq!(total, Some(tok.count(&s) as u64)); - } - - #[test] - fn truncate_output_to_tokens_returns_original_when_under_limit() { - let s = "short output"; - let (truncated, original) = truncate_output_to_tokens(s, 100); - assert_eq!(truncated, s); - assert_eq!(original, None); - } - - #[test] - fn truncate_output_to_tokens_reports_truncation_at_zero_limit() { - let s = "abcdef"; - let (truncated, original) = truncate_output_to_tokens(s, 0); - assert!(truncated.contains("tokens truncated")); - assert_eq!(original, Some(s.chars().count())); - } - - #[test] - fn truncate_output_to_tokens_preserves_prefix_and_suffix() { - let s = "abcdefghijklmnopqrstuvwxyz"; - let max_tokens = 10; - let (truncated, original) = truncate_output_to_tokens(s, max_tokens); - assert!(truncated.starts_with("abcde")); - assert!(truncated.ends_with("vwxyz")); - assert_eq!(original, Some(s.chars().count())); + assert!(!out.contains('\u{fffd}')); + assert_eq!(tokens, Some(approx_token_count(s) as u64)); + assert!(out.len() < s.len(), "UTF-8 content should be shortened"); } #[test] @@ -440,11 +504,7 @@ mod tests { let line = "very long execution error line that should trigger truncation\n"; let large_error = line.repeat(2_500); // way beyond both byte and line limits - let truncated = format_output_for_model_body( - &large_error, - MODEL_FORMAT_MAX_BYTES, - MODEL_FORMAT_MAX_LINES, - ); + let truncated = truncate_with_line_bytes_budget(&large_error, model_format_max_bytes()); let total_lines = large_error.lines().count(); let pattern = truncated_message_pattern(line, total_lines); @@ -459,7 +519,7 @@ mod tests { .expect("missing body capture") .as_str(); assert!( - body.len() <= MODEL_FORMAT_MAX_BYTES, + body.len() <= model_format_max_bytes(), "body exceeds byte limit: {} bytes", body.len() ); @@ -468,16 +528,14 @@ mod tests { #[test] fn format_exec_output_marks_byte_truncation_without_omitted_lines() { - let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50); - let truncated = format_output_for_model_body( - &long_line, - MODEL_FORMAT_MAX_BYTES, - MODEL_FORMAT_MAX_LINES, - ); + let max_bytes = model_format_max_bytes(); + let long_line = "a".repeat(max_bytes + 50); + let truncated = truncate_with_line_bytes_budget(&long_line, max_bytes); assert_ne!(truncated, long_line); + let removed_bytes = long_line.len().saturating_sub(max_bytes); let marker_line = - format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]"); + format!("[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]"); assert!( truncated.contains(&marker_line), "missing byte truncation marker: {truncated}" @@ -493,7 +551,7 @@ mod tests { let content = "example output\n".repeat(10); assert_eq!( - format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES), + truncate_with_line_bytes_budget(&content, model_format_max_bytes()), content ); } @@ -505,8 +563,7 @@ mod tests { .map(|idx| format!("line-{idx}\n")) .collect(); - let truncated = - format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES); + let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes()); let omitted = total_lines - MODEL_FORMAT_MAX_LINES; let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); @@ -535,39 +592,33 @@ mod tests { .map(|idx| format!("line-{idx}-{long_line}\n")) .collect(); - let truncated = - format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES); + let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes()); assert!( truncated.contains("[... omitted 42 of 298 lines ...]"), "expected omitted marker when line count exceeds limit: {truncated}" ); assert!( - !truncated.contains("output truncated to fit"), + !truncated.contains("byte limit"), "line omission marker should take precedence over byte marker: {truncated}" ); } #[test] fn truncates_across_multiple_under_limit_texts_and_reports_omitted() { - // Arrange: several text items, none exceeding per-item limit, but total exceeds budget. - let budget = MODEL_FORMAT_MAX_BYTES; - let t1_len = (budget / 2).saturating_sub(10); - let t2_len = (budget / 2).saturating_sub(10); - let remaining_after_t1_t2 = budget.saturating_sub(t1_len + t2_len); - let t3_len = 50; // gets truncated to remaining_after_t1_t2 - let t4_len = 5; // omitted - let t5_len = 7; // omitted - - let t1 = "a".repeat(t1_len); - let t2 = "b".repeat(t2_len); - let t3 = "c".repeat(t3_len); - let t4 = "d".repeat(t4_len); - let t5 = "e".repeat(t5_len); + let chunk = "alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu nu xi omicron pi rho sigma tau upsilon phi chi psi omega.\n"; + let chunk_tokens = approx_token_count(chunk); + assert!(chunk_tokens > 0, "chunk must consume tokens"); + let limit = chunk_tokens * 3; + let t1 = chunk.to_string(); + let t2 = chunk.to_string(); + let t3 = chunk.repeat(10); + let t4 = chunk.to_string(); + let t5 = chunk.to_string(); let items = vec![ - FunctionCallOutputContentItem::InputText { text: t1 }, - FunctionCallOutputContentItem::InputText { text: t2 }, + FunctionCallOutputContentItem::InputText { text: t1.clone() }, + FunctionCallOutputContentItem::InputText { text: t2.clone() }, FunctionCallOutputContentItem::InputImage { image_url: "img:mid".to_string(), }, @@ -576,7 +627,8 @@ mod tests { FunctionCallOutputContentItem::InputText { text: t5 }, ]; - let output = globally_truncate_function_output_items(&items); + let output = + truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(limit)); // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted. assert_eq!(output.len(), 5); @@ -585,13 +637,13 @@ mod tests { FunctionCallOutputContentItem::InputText { text } => text, other => panic!("unexpected first item: {other:?}"), }; - assert_eq!(first_text.len(), t1_len); + assert_eq!(first_text, &t1); let second_text = match &output[1] { FunctionCallOutputContentItem::InputText { text } => text, other => panic!("unexpected second item: {other:?}"), }; - assert_eq!(second_text.len(), t2_len); + assert_eq!(second_text, &t2); assert_eq!( output[2], @@ -604,7 +656,10 @@ mod tests { FunctionCallOutputContentItem::InputText { text } => text, other => panic!("unexpected fourth item: {other:?}"), }; - assert_eq!(fourth_text.len(), remaining_after_t1_t2); + assert!( + fourth_text.contains("tokens truncated"), + "expected marker in truncated snippet: {fourth_text}" + ); let summary_text = match &output[4] { FunctionCallOutputContentItem::InputText { text } => text, diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index f77b744497..390401d789 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -45,6 +45,7 @@ pub(crate) const MIN_YIELD_TIME_MS: u64 = 250; pub(crate) const MAX_YIELD_TIME_MS: u64 = 30_000; pub(crate) const DEFAULT_MAX_OUTPUT_TOKENS: usize = 10_000; pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 1024 * 1024; // 1 MiB +pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_TOKENS: usize = UNIFIED_EXEC_OUTPUT_MAX_BYTES / 4; pub(crate) struct UnifiedExecContext { pub session: Arc, diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index bdb935f171..82d6e41370 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -14,11 +14,13 @@ use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::StreamOutput; use crate::exec::is_likely_sandbox_denied; -use crate::truncate::truncate_middle; +use crate::truncate::TruncationPolicy; +use crate::truncate::truncate_text; use codex_utils_pty::ExecCommandSession; use codex_utils_pty::SpawnedPty; use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES; +use super::UNIFIED_EXEC_OUTPUT_MAX_TOKENS; use super::UnifiedExecError; #[derive(Debug, Default)] @@ -165,7 +167,10 @@ impl UnifiedExecSession { }; if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) { - let (snippet, _) = truncate_middle(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_BYTES); + let snippet = truncate_text( + &aggregated_text, + TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS), + ); let message = if snippet.is_empty() { format!("exit code {exit_code}") } else { diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index fee46df8b8..57c60f2b84 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -23,6 +23,9 @@ use crate::tools::orchestrator::ToolOrchestrator; use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest; use crate::tools::runtimes::unified_exec::UnifiedExecRuntime; use crate::tools::sandboxing::ToolCtx; +use crate::truncate::TruncationPolicy; +use crate::truncate::approx_token_count; +use crate::truncate::truncate_text; use super::ExecCommandRequest; use super::SessionEntry; @@ -36,7 +39,6 @@ use super::generate_chunk_id; use super::resolve_max_tokens; use super::session::OutputBuffer; use super::session::UnifiedExecSession; -use crate::truncate::truncate_output_to_tokens; impl UnifiedExecSessionManager { pub(crate) async fn exec_command( @@ -70,7 +72,7 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens); + let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); let chunk_id = generate_chunk_id(); let has_exited = session.has_exited(); let stored_id = self @@ -85,6 +87,8 @@ impl UnifiedExecSessionManager { // Only include a session_id in the response if the process is still alive. let session_id = if has_exited { None } else { Some(stored_id) }; + let original_token_count = approx_token_count(&text); + let response = UnifiedExecResponse { event_call_id: context.call_id.clone(), chunk_id, @@ -92,7 +96,7 @@ impl UnifiedExecSessionManager { output, session_id, exit_code: exit_code.flatten(), - original_token_count, + original_token_count: Some(original_token_count), session_command: Some(request.command.clone()), }; @@ -175,7 +179,8 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens); + let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); + let original_token_count = approx_token_count(&text); let chunk_id = generate_chunk_id(); let status = self.refresh_session_state(session_id).await; @@ -199,7 +204,7 @@ impl UnifiedExecSessionManager { output, session_id, exit_code, - original_token_count, + original_token_count: Some(original_token_count), session_command: Some(session_command.clone()), }; diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index 200d27e887..3cbbc6bd57 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -27,7 +27,6 @@ use core_test_support::skip_if_no_network; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use escargot::CargoBuild; -use regex_lite::Regex; use serde_json::Value; use serde_json::json; use std::collections::HashMap; @@ -48,7 +47,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> { let test = builder.build(&server).await?; // Construct a very long, non-existent path to force a RespondToModel error with a large message - let long_path = "a".repeat(20_000); + let long_path = "long path text should trigger truncation".repeat(8_000); let call_id = "grep-huge-error"; let args = json!({ "pattern": "alpha", @@ -80,12 +79,16 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> { tracing::debug!(output = %output, "truncated function error output"); - // Expect plaintext with byte-truncation marker and no omitted-lines marker + // Expect plaintext with token-based truncation marker and no omitted-lines marker assert!( serde_json::from_str::(&output).is_err(), "expected error output to be plain text", ); - let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]\s*$"#; + assert!( + !output.contains("Total output lines:"), + "error output should not include line-based truncation header: {output}", + ); + let truncated_pattern = r"(?s)^unable to access `.*tokens truncated.*$"; assert_regex_match(truncated_pattern, &output); assert!( !output.contains("omitted"), @@ -269,7 +272,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> let tool_name = format!("mcp__{server_name}__echo"); // Build a very large message to exceed 10KiB once serialized. - let large_msg = "long-message-with-newlines-".repeat(600); + let large_msg = "long-message-with-newlines-".repeat(6000); let args_json = serde_json::json!({ "message": large_msg }); mount_sse_once( @@ -334,22 +337,19 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> .function_call_output_text(call_id) .context("function_call_output present for rmcp call")?; - // Expect plain text with byte-based truncation marker. + // Expect plain text with token-based truncation marker; the original JSON body + // is truncated in the middle of the echo string. assert!( serde_json::from_str::(&output).is_err(), "expected truncated MCP output to be plain text" ); assert!( - output.starts_with("Total output lines: 1\n\n{"), - "expected total line header and JSON head, got: {output}" + !output.contains("Total output lines:"), + "MCP output should not include line-based truncation header: {output}" ); - let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]") - .expect("compile regex"); - assert!( - byte_marker.is_match(&output), - "expected byte truncation marker, got: {output}" - ); + let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#; + assert_regex_match(truncated_pattern, &output); Ok(()) } @@ -453,3 +453,164 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> { Ok(()) } + +// Token-based policy should report token counts even when truncation is byte-estimated. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn token_policy_marker_reports_tokens() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5.1-codex".to_string(); // token policy + config.model_family = + find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex"); + config.tool_output_token_limit = Some(50); // small budget to force truncation + }); + let fixture = builder.build(&server).await?; + + let call_id = "shell-token-marker"; + let args = json!({ + "command": ["/bin/sh", "-c", "seq 1 150"], + "timeout_ms": 5_000, + }); + + mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + ) + .await; + let done_mock = mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + fixture + .submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess) + .await?; + + let output = done_mock + .single_request() + .function_call_output_text(call_id) + .context("shell output present")?; + + assert_regex_match(r"\[\u{2026}127 tokens truncated\u{2026}]", &output); + + Ok(()) +} + +// Byte-based policy should report bytes removed. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn byte_policy_marker_reports_bytes() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5.1".to_string(); // byte policy + config.model_family = find_family_for_model("gpt-5.1").expect("model family for gpt-5.1"); + config.tool_output_token_limit = Some(50); // ~200 byte cap + }); + let fixture = builder.build(&server).await?; + + let call_id = "shell-byte-marker"; + let args = json!({ + "command": ["/bin/sh", "-c", "seq 1 150"], + "timeout_ms": 5_000, + }); + + mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + ) + .await; + let done_mock = mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + fixture + .submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess) + .await?; + + let output = done_mock + .single_request() + .function_call_output_text(call_id) + .context("shell output present")?; + + assert_regex_match(r"\[\u{2026}505 bytes truncated\u{2026}]", &output); + + Ok(()) +} + +// Overriding config with a large token budget should avoid truncation. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn large_budget_avoids_truncation() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5.1-codex".to_string(); + config.model_family = + find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex"); + config.tool_output_token_limit = Some(50_000); // ample budget + }); + let fixture = builder.build(&server).await?; + + let call_id = "shell-no-trunc"; + let args = json!({ + "command": ["/bin/sh", "-c", "seq 1 1000"], + "timeout_ms": 5_000, + }); + + mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + ) + .await; + let done_mock = mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + fixture + .submit_turn_with_policy( + "run big output without truncation", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let output = done_mock + .single_request() + .function_call_output_text(call_id) + .context("shell output present")?; + + assert!( + !output.contains("truncated"), + "output should remain untruncated with ample budget" + ); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index 970be5277e..23f2c62b9c 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -26,9 +26,11 @@ use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use core_test_support::wait_for_event_match; +use core_test_support::wait_for_event_with_timeout; use regex_lite::Regex; use serde_json::Value; use serde_json::json; +use tokio::time::Duration; fn extract_output_text(item: &Value) -> Option<&str> { item.get("output").and_then(|value| match value { @@ -814,7 +816,7 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> { let call_id = "uexec-metadata"; let args = serde_json::json!({ - "cmd": "printf 'abcdefghijklmnopqrstuvwxyz'", + "cmd": "printf 'token one token two token three token four token five token six token seven'", "yield_time_ms": 500, "max_output_tokens": 6, }); @@ -1295,7 +1297,7 @@ async fn unified_exec_streams_after_lagged_output() -> Result<()> { import sys import time -chunk = b'x' * (1 << 20) +chunk = b'long content here to trigger truncation' * (1 << 10) for _ in range(4): sys.stdout.buffer.write(chunk) sys.stdout.flush() @@ -1365,8 +1367,13 @@ PY summary: ReasoningSummary::Auto, }) .await?; - - wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; + // This is a worst case scenario for the truncate logic. + wait_for_event_with_timeout( + &codex, + |event| matches!(event, EventMsg::TaskComplete(_)), + Duration::from_secs(10), + ) + .await; let requests = server.received_requests().await.expect("recorded requests"); assert!(!requests.is_empty(), "expected at least one POST request"); @@ -1523,14 +1530,15 @@ async fn unified_exec_formats_large_output_summary() -> Result<()> { } = builder.build(&server).await?; let script = r#"python3 - <<'PY' -for i in range(300): - print(f"line-{i}") +for i in range(10000): + print("token token ") PY "#; let call_id = "uexec-large-output"; let args = serde_json::json!({ "cmd": script, + "max_output_tokens": 100, "yield_time_ms": 500, }); @@ -1577,15 +1585,14 @@ PY let outputs = collect_tool_outputs(&bodies)?; let large_output = outputs.get(call_id).expect("missing large output summary"); - assert_regex_match( - concat!( - r"(?s)", - r"line-0.*?", - r"\[\.{3} omitted \d+ of \d+ lines \.{3}\].*?", - r"line-299", - ), - &large_output.output, - ); + let output_text = large_output.output.replace("\r\n", "\n"); + let truncated_pattern = r#"(?s)^(token token \n){5,}.*\[\u{2026}\d+ tokens truncated\u{2026}]\n(token token \n){5,}$"#; + assert_regex_match(truncated_pattern, &output_text); + + let original_tokens = large_output + .original_token_count + .expect("missing original_token_count for large output summary"); + assert!(original_tokens > 0); Ok(()) } diff --git a/codex-rs/core/tests/suite/user_shell_cmd.rs b/codex-rs/core/tests/suite/user_shell_cmd.rs index 0d42c45c1c..0e9585ba4b 100644 --- a/codex-rs/core/tests/suite/user_shell_cmd.rs +++ b/codex-rs/core/tests/suite/user_shell_cmd.rs @@ -272,7 +272,7 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> { let mut builder = test_codex().with_config(|config| { config.model = "gpt-5.1-codex".to_string(); config.model_family = - find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family"); + find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family"); }); let fixture = builder.build(&server).await?; diff --git a/codex-rs/utils/tokenizer/src/lib.rs b/codex-rs/utils/tokenizer/src/lib.rs index 23ab261fa6..1e151393a9 100644 --- a/codex-rs/utils/tokenizer/src/lib.rs +++ b/codex-rs/utils/tokenizer/src/lib.rs @@ -122,6 +122,11 @@ impl Tokenizer { } } +impl fmt::Debug for Tokenizer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Tokenizer {{ inner: }}") + } +} #[cfg(test)] mod tests { use super::*; diff --git a/docs/config.md b/docs/config.md index 51867773a9..ddfe2ff7c4 100644 --- a/docs/config.md +++ b/docs/config.md @@ -925,6 +925,7 @@ Valid values: | `model_provider` | string | Provider id from `model_providers` (default: `openai`). | | `model_context_window` | number | Context window tokens. | | `model_max_output_tokens` | number | Max output tokens. | +| `tool_output_token_limit` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). | | `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. | | `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. | | `sandbox_workspace_write.writable_roots` | array | Extra writable roots in workspace‑write. | diff --git a/docs/example-config.md b/docs/example-config.md index 21b360ccef..b2da427314 100644 --- a/docs/example-config.md +++ b/docs/example-config.md @@ -33,6 +33,7 @@ model_provider = "openai" # model_context_window = 128000 # tokens; default: auto for model # model_max_output_tokens = 8192 # tokens; default: auto for model # model_auto_compact_token_limit = 0 # disable/override auto; default: model family specific +# tool_output_token_limit = 10000 # tokens stored per tool output; default: 10000 for gpt-5.1-codex ################################################################################ # Reasoning & Verbosity (Responses API capable models)