diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 64d06d0571..e308601c55 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -13,6 +13,7 @@ use crate::parse_command::parse_command;
 use crate::parse_turn_item;
 use crate::response_processing::process_items;
 use crate::terminal;
+use crate::truncate::TruncationPolicy;
 use crate::user_notification::UserNotifier;
 use crate::util::error_or_panic;
 use async_channel::Receiver;
@@ -275,6 +276,7 @@ pub(crate) struct TurnContext {
     pub(crate) final_output_json_schema: Option<Value>,
     pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
     pub(crate) tool_call_gate: Arc<ReadinessFlag>,
+    pub(crate) truncation_policy: TruncationPolicy,
 }
 
 impl TurnContext {
@@ -401,7 +403,7 @@ impl Session {
         );
 
         let client = ModelClient::new(
-            Arc::new(per_turn_config),
+            Arc::new(per_turn_config.clone()),
             auth_manager,
             otel_event_manager,
             provider,
@@ -431,6 +433,7 @@ impl Session {
             final_output_json_schema: None,
             codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
             tool_call_gate: Arc::new(ReadinessFlag::new()),
+            truncation_policy: TruncationPolicy::new(&per_turn_config),
         }
     }
 
@@ -678,7 +681,8 @@ impl Session {
                 let reconstructed_history =
                     self.reconstruct_history_from_rollout(&turn_context, &rollout_items);
                 if !reconstructed_history.is_empty() {
-                    self.record_into_history(&reconstructed_history).await;
+                    self.record_into_history(&reconstructed_history, &turn_context)
+                        .await;
                 }
 
                 // If persisting, persist all rollout items as-is (recorder filters)
@@ -935,7 +939,7 @@ impl Session {
         turn_context: &TurnContext,
         items: &[ResponseItem],
     ) {
-        self.record_into_history(items).await;
+        self.record_into_history(items, turn_context).await;
         self.persist_rollout_response_items(items).await;
         self.send_raw_response_items(turn_context, items).await;
     }
@@ -949,7 +953,10 @@ impl Session {
         for item in rollout_items {
             match item {
                 RolloutItem::ResponseItem(response_item) => {
-                    history.record_items(std::iter::once(response_item));
+                    history.record_items(
+                        std::iter::once(response_item),
+                        turn_context.truncation_policy,
+                    );
                 }
                 RolloutItem::Compacted(compacted) => {
                     let snapshot = history.get_history();
@@ -973,9 +980,13 @@ impl Session {
     }
 
     /// Append ResponseItems to the in-memory conversation history only.
-    pub(crate) async fn record_into_history(&self, items: &[ResponseItem]) {
+    pub(crate) async fn record_into_history(
+        &self,
+        items: &[ResponseItem],
+        turn_context: &TurnContext,
+    ) {
         let mut state = self.state.lock().await;
-        state.record_items(items.iter());
+        state.record_items(items.iter(), turn_context.truncation_policy);
     }
 
     pub(crate) async fn replace_history(&self, items: Vec<ResponseItem>) {
@@ -1755,6 +1766,7 @@ async fn spawn_review_thread(
         final_output_json_schema: None,
         codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
         tool_call_gate: Arc::new(ReadinessFlag::new()),
+        truncation_policy: TruncationPolicy::new(&per_turn_config),
     };
 
     // Seed the child task with the review prompt as the initial user message.
@@ -2886,7 +2898,7 @@ mod tests {
         for item in &initial_context {
             rollout_items.push(RolloutItem::ResponseItem(item.clone()));
         }
-        live_history.record_items(initial_context.iter());
+        live_history.record_items(initial_context.iter(), turn_context.truncation_policy);
 
         let user1 = ResponseItem::Message {
             id: None,
@@ -2895,7 +2907,7 @@ mod tests {
                 text: "first user".to_string(),
             }],
         };
-        live_history.record_items(std::iter::once(&user1));
+        live_history.record_items(std::iter::once(&user1), turn_context.truncation_policy);
         rollout_items.push(RolloutItem::ResponseItem(user1.clone()));
 
         let assistant1 = ResponseItem::Message {
@@ -2905,7 +2917,7 @@ mod tests {
                 text: "assistant reply one".to_string(),
             }],
         };
-        live_history.record_items(std::iter::once(&assistant1));
+        live_history.record_items(std::iter::once(&assistant1), turn_context.truncation_policy);
         rollout_items.push(RolloutItem::ResponseItem(assistant1.clone()));
 
         let summary1 = "summary one";
@@ -2929,7 +2941,7 @@ mod tests {
                 text: "second user".to_string(),
             }],
         };
-        live_history.record_items(std::iter::once(&user2));
+        live_history.record_items(std::iter::once(&user2), turn_context.truncation_policy);
         rollout_items.push(RolloutItem::ResponseItem(user2.clone()));
 
         let assistant2 = ResponseItem::Message {
@@ -2939,7 +2951,7 @@ mod tests {
                 text: "assistant reply two".to_string(),
             }],
         };
-        live_history.record_items(std::iter::once(&assistant2));
+        live_history.record_items(std::iter::once(&assistant2), turn_context.truncation_policy);
         rollout_items.push(RolloutItem::ResponseItem(assistant2.clone()));
 
         let summary2 = "summary two";
@@ -2963,7 +2975,7 @@ mod tests {
                 text: "third user".to_string(),
             }],
         };
-        live_history.record_items(std::iter::once(&user3));
+        live_history.record_items(std::iter::once(&user3), turn_context.truncation_policy);
         rollout_items.push(RolloutItem::ResponseItem(user3.clone()));
 
         let assistant3 = ResponseItem::Message {
@@ -2973,7 +2985,7 @@ mod tests {
                 text: "assistant reply three".to_string(),
             }],
         };
-        live_history.record_items(std::iter::once(&assistant3));
+        live_history.record_items(std::iter::once(&assistant3), turn_context.truncation_policy);
         rollout_items.push(RolloutItem::ResponseItem(assistant3.clone()));
 
         (rollout_items, live_history.get_history())
diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs
index 0495c161d5..33d38091f6 100644
--- a/codex-rs/core/src/compact.rs
+++ b/codex-rs/core/src/compact.rs
@@ -14,7 +14,9 @@ use crate::protocol::EventMsg;
 use crate::protocol::TaskStartedEvent;
 use crate::protocol::TurnContextItem;
 use crate::protocol::WarningEvent;
-use crate::truncate::truncate_middle;
+use crate::truncate::TruncationPolicy;
+use crate::truncate::approx_token_count;
+use crate::truncate::truncate_text;
 use crate::util::backoff;
 use codex_protocol::items::TurnItem;
 use codex_protocol::models::ContentItem;
@@ -59,7 +61,10 @@ async fn run_compact_task_inner(
     let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
 
     let mut history = sess.clone_history().await;
-    history.record_items(&[initial_input_for_turn.into()]);
+    history.record_items(
+        &[initial_input_for_turn.into()],
+        turn_context.truncation_policy,
+    );
 
     let mut truncated_count = 0usize;
 
@@ -230,7 +235,7 @@ pub(crate) fn build_compacted_history(
         initial_context,
         user_messages,
         summary_text,
-        COMPACT_USER_MESSAGE_MAX_TOKENS * 4,
+        COMPACT_USER_MESSAGE_MAX_TOKENS,
     )
 }
 
@@ -238,20 +243,21 @@ fn build_compacted_history_with_limit(
     mut history: Vec<ResponseItem>,
     user_messages: &[String],
     summary_text: &str,
-    max_bytes: usize,
+    max_tokens: usize,
 ) -> Vec<ResponseItem> {
     let mut selected_messages: Vec<String> = Vec::new();
-    if max_bytes > 0 {
-        let mut remaining = max_bytes;
+    if max_tokens > 0 {
+        let mut remaining = max_tokens;
         for message in user_messages.iter().rev() {
             if remaining == 0 {
                 break;
             }
-            if message.len() <= remaining {
+            let tokens = approx_token_count(message);
+            if tokens <= remaining {
                 selected_messages.push(message.clone());
-                remaining = remaining.saturating_sub(message.len());
+                remaining = remaining.saturating_sub(tokens);
             } else {
-                let (truncated, _) = truncate_middle(message, remaining);
+                let truncated = truncate_text(message, TruncationPolicy::Tokens(remaining));
                 selected_messages.push(truncated);
                 break;
             }
@@ -300,7 +306,8 @@ async fn drain_to_completed(
         };
         match event {
             Ok(ResponseEvent::OutputItemDone(item)) => {
-                sess.record_into_history(std::slice::from_ref(&item)).await;
+                sess.record_into_history(std::slice::from_ref(&item), turn_context)
+                    .await;
             }
             Ok(ResponseEvent::RateLimits(snapshot)) => {
                 sess.update_rate_limits(turn_context, snapshot).await;
@@ -318,6 +325,7 @@ async fn drain_to_completed(
 
 #[cfg(test)]
 mod tests {
+
     use super::*;
     use pretty_assertions::assert_eq;
 
@@ -409,16 +417,16 @@ mod tests {
     }
 
     #[test]
-    fn build_compacted_history_truncates_overlong_user_messages() {
+    fn build_token_limited_compacted_history_truncates_overlong_user_messages() {
         // Use a small truncation limit so the test remains fast while still validating
         // that oversized user content is truncated.
-        let max_bytes = 128;
-        let big = "X".repeat(max_bytes + 50);
+        let max_tokens = 16;
+        let big = "word ".repeat(200);
         let history = super::build_compacted_history_with_limit(
             Vec::new(),
             std::slice::from_ref(&big),
             "SUMMARY",
-            max_bytes,
+            max_tokens,
         );
         assert_eq!(history.len(), 2);
 
@@ -451,7 +459,7 @@ mod tests {
     }
 
     #[test]
-    fn build_compacted_history_appends_summary_message() {
+    fn build_token_limited_compacted_history_appends_summary_message() {
         let initial_context: Vec<ResponseItem> = Vec::new();
         let user_messages = vec!["first user message".to_string()];
         let summary_text = "summary text";
diff --git a/codex-rs/core/src/compact_remote.rs b/codex-rs/core/src/compact_remote.rs
index 2c7d57eff2..1726aad6f9 100644
--- a/codex-rs/core/src/compact_remote.rs
+++ b/codex-rs/core/src/compact_remote.rs
@@ -50,7 +50,10 @@ async fn run_remote_compact_task_inner(
     let mut history = sess.clone_history().await;
     if !input.is_empty() {
         let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
-        history.record_items(&[initial_input_for_turn.into()]);
+        history.record_items(
+            &[initial_input_for_turn.into()],
+            turn_context.truncation_policy,
+        );
     }
 
     let prompt = Prompt {
diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs
index 5b57d4dc01..0c00da9a48 100644
--- a/codex-rs/core/src/config/mod.rs
+++ b/codex-rs/core/src/config/mod.rs
@@ -195,6 +195,9 @@ pub struct Config {
     /// Additional filenames to try when looking for project-level docs.
     pub project_doc_fallback_filenames: Vec<String>,
 
+    /// Token budget applied when storing tool/function outputs in the context manager.
+    pub tool_output_token_limit: Option<usize>,
+
     /// Directory containing all Codex state (defaults to `~/.codex` but can be
     /// overridden by the `CODEX_HOME` environment variable).
     pub codex_home: PathBuf,
@@ -636,6 +639,9 @@ pub struct ConfigToml {
     /// Ordered list of fallback filenames to look for when AGENTS.md is missing.
     pub project_doc_fallback_filenames: Option<Vec<String>>,
 
+    /// Token budget applied when storing tool/function outputs in the context manager.
+    pub tool_output_token_limit: Option<usize>,
+
     /// Profile to use from the `profiles` map.
     pub profile: Option<String>,
 
@@ -1209,6 +1215,7 @@ impl Config {
                     }
                 })
                 .collect(),
+            tool_output_token_limit: cfg.tool_output_token_limit,
             codex_home,
             history,
             file_opener: cfg.file_opener.unwrap_or(UriBasedFileOpener::VsCode),
@@ -2961,6 +2968,7 @@ model_verbosity = "high"
                 model_providers: fixture.model_provider_map.clone(),
                 project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
                 project_doc_fallback_filenames: Vec::new(),
+                tool_output_token_limit: None,
                 codex_home: fixture.codex_home(),
                 history: History::default(),
                 file_opener: UriBasedFileOpener::VsCode,
@@ -3032,6 +3040,7 @@ model_verbosity = "high"
             model_providers: fixture.model_provider_map.clone(),
             project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
             project_doc_fallback_filenames: Vec::new(),
+            tool_output_token_limit: None,
             codex_home: fixture.codex_home(),
             history: History::default(),
             file_opener: UriBasedFileOpener::VsCode,
@@ -3118,6 +3127,7 @@ model_verbosity = "high"
             model_providers: fixture.model_provider_map.clone(),
             project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
             project_doc_fallback_filenames: Vec::new(),
+            tool_output_token_limit: None,
             codex_home: fixture.codex_home(),
             history: History::default(),
             file_opener: UriBasedFileOpener::VsCode,
@@ -3190,6 +3200,7 @@ model_verbosity = "high"
             model_providers: fixture.model_provider_map.clone(),
             project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
             project_doc_fallback_filenames: Vec::new(),
+            tool_output_token_limit: None,
             codex_home: fixture.codex_home(),
             history: History::default(),
             file_opener: UriBasedFileOpener::VsCode,
diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs
index 50e3a8bc94..af60ada9fe 100644
--- a/codex-rs/core/src/context_manager/history.rs
+++ b/codex-rs/core/src/context_manager/history.rs
@@ -1,8 +1,8 @@
 use crate::codex::TurnContext;
 use crate::context_manager::normalize;
-use crate::truncate;
-use crate::truncate::format_output_for_model_body;
-use crate::truncate::globally_truncate_function_output_items;
+use crate::truncate::TruncationPolicy;
+use crate::truncate::truncate_function_output_items_with_policy;
+use crate::truncate::truncate_text;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseItem;
 use codex_protocol::protocol::TokenUsage;
@@ -10,12 +10,6 @@ use codex_protocol::protocol::TokenUsageInfo;
 use codex_utils_tokenizer::Tokenizer;
 use std::ops::Deref;
 
-const CONTEXT_WINDOW_HARD_LIMIT_FACTOR: f64 = 1.1;
-const CONTEXT_WINDOW_HARD_LIMIT_BYTES: usize =
-    (truncate::MODEL_FORMAT_MAX_BYTES as f64 * CONTEXT_WINDOW_HARD_LIMIT_FACTOR) as usize;
-const CONTEXT_WINDOW_HARD_LIMIT_LINES: usize =
-    (truncate::MODEL_FORMAT_MAX_LINES as f64 * CONTEXT_WINDOW_HARD_LIMIT_FACTOR) as usize;
-
 /// Transcript of conversation history
 #[derive(Debug, Clone, Default)]
 pub(crate) struct ContextManager {
@@ -50,7 +44,7 @@ impl ContextManager {
     }
 
     /// `items` is ordered from oldest to newest.
-    pub(crate) fn record_items<I>(&mut self, items: I)
+    pub(crate) fn record_items<I>(&mut self, items: I, policy: TruncationPolicy)
     where
         I: IntoIterator,
         I::Item: std::ops::Deref<Target = ResponseItem>,
@@ -62,7 +56,7 @@ impl ContextManager {
                 continue;
             }
 
-            let processed = Self::process_item(&item);
+            let processed = self.process_item(item_ref, policy);
             self.items.push(processed);
         }
     }
@@ -150,18 +144,14 @@ impl ContextManager {
         items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. }));
     }
 
-    fn process_item(item: &ResponseItem) -> ResponseItem {
+    fn process_item(&self, item: &ResponseItem, policy: TruncationPolicy) -> ResponseItem {
         match item {
             ResponseItem::FunctionCallOutput { call_id, output } => {
-                let truncated = format_output_for_model_body(
-                    output.content.as_str(),
-                    CONTEXT_WINDOW_HARD_LIMIT_BYTES,
-                    CONTEXT_WINDOW_HARD_LIMIT_LINES,
-                );
+                let truncated = truncate_text(output.content.as_str(), policy);
                 let truncated_items = output
                     .content_items
                     .as_ref()
-                    .map(|items| globally_truncate_function_output_items(items));
+                    .map(|items| truncate_function_output_items_with_policy(items, policy));
                 ResponseItem::FunctionCallOutput {
                     call_id: call_id.clone(),
                     output: FunctionCallOutputPayload {
@@ -172,11 +162,7 @@ impl ContextManager {
                 }
             }
             ResponseItem::CustomToolCallOutput { call_id, output } => {
-                let truncated = format_output_for_model_body(
-                    output,
-                    CONTEXT_WINDOW_HARD_LIMIT_BYTES,
-                    CONTEXT_WINDOW_HARD_LIMIT_LINES,
-                );
+                let truncated = truncate_text(output, policy);
                 ResponseItem::CustomToolCallOutput {
                     call_id: call_id.clone(),
                     output: truncated,
diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs
index c81749c2c1..fecd0a7277 100644
--- a/codex-rs/core/src/context_manager/history_tests.rs
+++ b/codex-rs/core/src/context_manager/history_tests.rs
@@ -1,9 +1,8 @@
 use super::*;
-use crate::context_manager::MODEL_FORMAT_MAX_LINES;
 use crate::truncate;
+use crate::truncate::TruncationPolicy;
 use codex_git::GhostCommit;
 use codex_protocol::models::ContentItem;
-use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::LocalShellAction;
 use codex_protocol::models::LocalShellExecAction;
@@ -13,6 +12,9 @@ use codex_protocol::models::ReasoningItemReasoningSummary;
 use pretty_assertions::assert_eq;
 use regex_lite::Regex;
 
+const EXEC_FORMAT_MAX_LINES: usize = 256;
+const EXEC_FORMAT_MAX_BYTES: usize = 10_000;
+
 fn assistant_msg(text: &str) -> ResponseItem {
     ResponseItem::Message {
         id: None,
@@ -25,7 +27,9 @@ fn assistant_msg(text: &str) -> ResponseItem {
 
 fn create_history_with_items(items: Vec<ResponseItem>) -> ContextManager {
     let mut h = ContextManager::new();
-    h.record_items(items.iter());
+    // Use a generous but fixed token budget; tests only rely on truncation
+    // behavior, not on a specific model's token limit.
+    h.record_items(items.iter(), TruncationPolicy::Tokens(10_000));
     h
 }
 
@@ -55,6 +59,7 @@ fn reasoning_msg(text: &str) -> ResponseItem {
 #[test]
 fn filters_non_api_messages() {
     let mut h = ContextManager::default();
+    let policy = TruncationPolicy::Tokens(10_000);
     // System message is not API messages; Other is ignored.
     let system = ResponseItem::Message {
         id: None,
@@ -64,12 +69,12 @@ fn filters_non_api_messages() {
         }],
     };
     let reasoning = reasoning_msg("thinking...");
-    h.record_items([&system, &reasoning, &ResponseItem::Other]);
+    h.record_items([&system, &reasoning, &ResponseItem::Other], policy);
 
     // User and assistant should be retained.
     let u = user_msg("hi");
     let a = assistant_msg("hello");
-    h.record_items([&u, &a]);
+    h.record_items([&u, &a], policy);
 
     let items = h.contents();
     assert_eq!(
@@ -237,6 +242,9 @@ fn normalization_retains_local_shell_outputs() {
 #[test]
 fn record_items_truncates_function_call_output_content() {
     let mut history = ContextManager::new();
+    // Any reasonably small token budget works; the test only cares that
+    // truncation happens and the marker is present.
+    let policy = TruncationPolicy::Tokens(1_000);
     let long_line = "a very long line to trigger truncation\n";
     let long_output = long_line.repeat(2_500);
     let item = ResponseItem::FunctionCallOutput {
@@ -248,15 +256,20 @@ fn record_items_truncates_function_call_output_content() {
         },
     };
 
-    history.record_items([&item]);
+    history.record_items([&item], policy);
 
     assert_eq!(history.items.len(), 1);
     match &history.items[0] {
         ResponseItem::FunctionCallOutput { output, .. } => {
             assert_ne!(output.content, long_output);
             assert!(
-                output.content.starts_with("Total output lines:"),
-                "expected truncated summary, got {}",
+                output.content.contains("tokens truncated"),
+                "expected token-based truncation marker, got {}",
+                output.content
+            );
+            assert!(
+                output.content.contains("tokens truncated"),
+                "expected truncation marker, got {}",
                 output.content
             );
         }
@@ -267,6 +280,7 @@ fn record_items_truncates_function_call_output_content() {
 #[test]
 fn record_items_truncates_custom_tool_call_output_content() {
     let mut history = ContextManager::new();
+    let policy = TruncationPolicy::Tokens(1_000);
     let line = "custom output that is very long\n";
     let long_output = line.repeat(2_500);
     let item = ResponseItem::CustomToolCallOutput {
@@ -274,21 +288,48 @@ fn record_items_truncates_custom_tool_call_output_content() {
         output: long_output.clone(),
     };
 
-    history.record_items([&item]);
+    history.record_items([&item], policy);
 
     assert_eq!(history.items.len(), 1);
     match &history.items[0] {
         ResponseItem::CustomToolCallOutput { output, .. } => {
             assert_ne!(output, &long_output);
             assert!(
-                output.starts_with("Total output lines:"),
-                "expected truncated summary, got {output}"
+                output.contains("tokens truncated"),
+                "expected token-based truncation marker, got {output}"
+            );
+            assert!(
+                output.contains("tokens truncated") || output.contains("bytes truncated"),
+                "expected truncation marker, got {output}"
             );
         }
         other => panic!("unexpected history item: {other:?}"),
     }
 }
 
+#[test]
+fn record_items_respects_custom_token_limit() {
+    let mut history = ContextManager::new();
+    let policy = TruncationPolicy::Tokens(10);
+    let long_output = "tokenized content repeated many times ".repeat(200);
+    let item = ResponseItem::FunctionCallOutput {
+        call_id: "call-custom-limit".to_string(),
+        output: FunctionCallOutputPayload {
+            content: long_output,
+            success: Some(true),
+            ..Default::default()
+        },
+    };
+
+    history.record_items([&item], policy);
+
+    let stored = match &history.items[0] {
+        ResponseItem::FunctionCallOutput { output, .. } => output,
+        other => panic!("unexpected history item: {other:?}"),
+    };
+    assert!(stored.content.contains("tokens truncated"));
+}
+
 fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
     let pattern = truncated_message_pattern(line, total_lines);
     let regex = Regex::new(&pattern).unwrap_or_else(|err| {
@@ -302,23 +343,22 @@ fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usiz
         .expect("missing body capture")
         .as_str();
     assert!(
-        body.len() <= truncate::MODEL_FORMAT_MAX_BYTES,
+        body.len() <= EXEC_FORMAT_MAX_BYTES,
         "body exceeds byte limit: {} bytes",
         body.len()
     );
 }
 
 fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
-    let head_lines = MODEL_FORMAT_MAX_LINES / 2;
-    let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines;
+    let head_lines = EXEC_FORMAT_MAX_LINES / 2;
+    let tail_lines = EXEC_FORMAT_MAX_LINES - head_lines;
     let head_take = head_lines.min(total_lines);
     let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
     let omitted = total_lines.saturating_sub(head_take + tail_take);
     let escaped_line = regex_lite::escape(line);
     if omitted == 0 {
         return format!(
-            r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {max_bytes} bytes \.{{3}}]\n\n.*)$",
-            max_bytes = truncate::MODEL_FORMAT_MAX_BYTES,
+            r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit \.{{3}}]\n\n.*)$",
         );
     }
     format!(
@@ -331,11 +371,7 @@ fn format_exec_output_truncates_large_error() {
     let line = "very long execution error line that should trigger truncation\n";
     let large_error = line.repeat(2_500); // way beyond both byte and line limits
 
-    let truncated = truncate::format_output_for_model_body(
-        &large_error,
-        truncate::MODEL_FORMAT_MAX_BYTES,
-        truncate::MODEL_FORMAT_MAX_LINES,
-    );
+    let truncated = truncate::truncate_with_line_bytes_budget(&large_error, EXEC_FORMAT_MAX_BYTES);
 
     let total_lines = large_error.lines().count();
     assert_truncated_message_matches(&truncated, line, total_lines);
@@ -344,17 +380,13 @@ fn format_exec_output_truncates_large_error() {
 
 #[test]
 fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
-    let long_line = "a".repeat(truncate::MODEL_FORMAT_MAX_BYTES + 50);
-    let truncated = truncate::format_output_for_model_body(
-        &long_line,
-        truncate::MODEL_FORMAT_MAX_BYTES,
-        truncate::MODEL_FORMAT_MAX_LINES,
-    );
+    let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 50);
+    let truncated = truncate::truncate_with_line_bytes_budget(&long_line, EXEC_FORMAT_MAX_BYTES);
 
     assert_ne!(truncated, long_line);
+    let removed_bytes = long_line.len().saturating_sub(EXEC_FORMAT_MAX_BYTES);
     let marker_line = format!(
-        "[... output truncated to fit {} bytes ...]",
-        truncate::MODEL_FORMAT_MAX_BYTES
+        "[... removed {removed_bytes} bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit ...]"
     );
     assert!(
         truncated.contains(&marker_line),
@@ -371,28 +403,20 @@ fn format_exec_output_returns_original_when_within_limits() {
     let content = "example output\n".repeat(10);
 
     assert_eq!(
-        truncate::format_output_for_model_body(
-            &content,
-            truncate::MODEL_FORMAT_MAX_BYTES,
-            truncate::MODEL_FORMAT_MAX_LINES
-        ),
+        truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES),
         content
     );
 }
 
 #[test]
 fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
-    let total_lines = truncate::MODEL_FORMAT_MAX_LINES + 100;
+    let total_lines = EXEC_FORMAT_MAX_LINES + 100;
     let content: String = (0..total_lines)
         .map(|idx| format!("line-{idx}\n"))
         .collect();
 
-    let truncated = truncate::format_output_for_model_body(
-        &content,
-        truncate::MODEL_FORMAT_MAX_BYTES,
-        truncate::MODEL_FORMAT_MAX_LINES,
-    );
-    let omitted = total_lines - truncate::MODEL_FORMAT_MAX_LINES;
+    let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES);
+    let omitted = total_lines - EXEC_FORMAT_MAX_LINES;
     let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
 
     assert!(
@@ -413,103 +437,24 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
 
 #[test]
 fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
-    let total_lines = truncate::MODEL_FORMAT_MAX_LINES + 42;
+    let total_lines = EXEC_FORMAT_MAX_LINES + 42;
     let long_line = "x".repeat(256);
     let content: String = (0..total_lines)
         .map(|idx| format!("line-{idx}-{long_line}\n"))
         .collect();
 
-    let truncated = truncate::format_output_for_model_body(
-        &content,
-        truncate::MODEL_FORMAT_MAX_BYTES,
-        truncate::MODEL_FORMAT_MAX_LINES,
-    );
+    let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES);
 
     assert!(
         truncated.contains("[... omitted 42 of 298 lines ...]"),
         "expected omitted marker when line count exceeds limit: {truncated}"
     );
     assert!(
-        !truncated.contains("output truncated to fit"),
+        !truncated.contains("byte limit"),
         "line omission marker should take precedence over byte marker: {truncated}"
     );
 }
 
-#[test]
-fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
-    // Arrange: several text items, none exceeding per-item limit, but total exceeds budget.
-    let budget = truncate::MODEL_FORMAT_MAX_BYTES;
-    let t1_len = (budget / 2).saturating_sub(10);
-    let t2_len = (budget / 2).saturating_sub(10);
-    let remaining_after_t1_t2 = budget.saturating_sub(t1_len + t2_len);
-    let t3_len = 50; // gets truncated to remaining_after_t1_t2
-    let t4_len = 5; // omitted
-    let t5_len = 7; // omitted
-
-    let t1 = "a".repeat(t1_len);
-    let t2 = "b".repeat(t2_len);
-    let t3 = "c".repeat(t3_len);
-    let t4 = "d".repeat(t4_len);
-    let t5 = "e".repeat(t5_len);
-
-    let item = ResponseItem::FunctionCallOutput {
-        call_id: "call-omit".to_string(),
-        output: FunctionCallOutputPayload {
-            content: "irrelevant".to_string(),
-            content_items: Some(vec![
-                FunctionCallOutputContentItem::InputText { text: t1 },
-                FunctionCallOutputContentItem::InputText { text: t2 },
-                FunctionCallOutputContentItem::InputImage {
-                    image_url: "img:mid".to_string(),
-                },
-                FunctionCallOutputContentItem::InputText { text: t3 },
-                FunctionCallOutputContentItem::InputText { text: t4 },
-                FunctionCallOutputContentItem::InputText { text: t5 },
-            ]),
-            success: Some(true),
-        },
-    };
-
-    let mut history = ContextManager::new();
-    history.record_items([&item]);
-    assert_eq!(history.items.len(), 1);
-    let json = serde_json::to_value(&history.items[0]).expect("serialize to json");
-
-    let output = json
-        .get("output")
-        .expect("output field")
-        .as_array()
-        .expect("array output");
-
-    // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted.
-    assert_eq!(output.len(), 5);
-
-    let first = output[0].as_object().expect("first obj");
-    assert_eq!(first.get("type").unwrap(), "input_text");
-    let first_text = first.get("text").unwrap().as_str().unwrap();
-    assert_eq!(first_text.len(), t1_len);
-
-    let second = output[1].as_object().expect("second obj");
-    assert_eq!(second.get("type").unwrap(), "input_text");
-    let second_text = second.get("text").unwrap().as_str().unwrap();
-    assert_eq!(second_text.len(), t2_len);
-
-    assert_eq!(
-        output[2],
-        serde_json::json!({"type": "input_image", "image_url": "img:mid"})
-    );
-
-    let fourth = output[3].as_object().expect("fourth obj");
-    assert_eq!(fourth.get("type").unwrap(), "input_text");
-    let fourth_text = fourth.get("text").unwrap().as_str().unwrap();
-    assert_eq!(fourth_text.len(), remaining_after_t1_t2);
-
-    let summary = output[4].as_object().expect("summary obj");
-    assert_eq!(summary.get("type").unwrap(), "input_text");
-    let summary_text = summary.get("text").unwrap().as_str().unwrap();
-    assert!(summary_text.contains("omitted 2 text items"));
-}
-
 //TODO(aibrahim): run CI in release mode.
 #[cfg(not(debug_assertions))]
 #[test]
diff --git a/codex-rs/core/src/context_manager/mod.rs b/codex-rs/core/src/context_manager/mod.rs
index ab0d2e8168..d347a7714d 100644
--- a/codex-rs/core/src/context_manager/mod.rs
+++ b/codex-rs/core/src/context_manager/mod.rs
@@ -1,7 +1,5 @@
 mod history;
 mod normalize;
 
-pub(crate) use crate::truncate::MODEL_FORMAT_MAX_BYTES;
-pub(crate) use crate::truncate::MODEL_FORMAT_MAX_LINES;
-pub(crate) use crate::truncate::format_output_for_model_body;
+pub(crate) use crate::truncate::truncate_with_line_bytes_budget;
 pub(crate) use history::ContextManager;
diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs
index 64ba8df848..9a42ec3d1b 100644
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -2,7 +2,8 @@ use crate::codex::ProcessedResponseItem;
 use crate::exec::ExecToolCallOutput;
 use crate::token_data::KnownPlan;
 use crate::token_data::PlanType;
-use crate::truncate::truncate_middle;
+use crate::truncate::TruncationPolicy;
+use crate::truncate::truncate_text;
 use chrono::DateTime;
 use chrono::Datelike;
 use chrono::Local;
@@ -461,7 +462,10 @@ pub fn get_error_message_ui(e: &CodexErr) -> String {
         _ => e.to_string(),
     };
 
-    truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0
+    truncate_text(
+        &message,
+        TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_BYTES),
+    )
 }
 
 #[cfg(test)]
diff --git a/codex-rs/core/src/model_family.rs b/codex-rs/core/src/model_family.rs
index 150420fecf..0758d20310 100644
--- a/codex-rs/core/src/model_family.rs
+++ b/codex-rs/core/src/model_family.rs
@@ -4,6 +4,7 @@ use codex_protocol::config_types::Verbosity;
 use crate::config::types::ReasoningSummaryFormat;
 use crate::tools::handlers::apply_patch::ApplyPatchToolType;
 use crate::tools::spec::ConfigShellToolType;
+use crate::truncate::TruncationPolicy;
 
 /// The `instructions` field in the payload sent to a model should always start
 /// with this content.
@@ -66,6 +67,8 @@ pub struct ModelFamily {
 
     /// Preferred shell tool type for this model family when features do not override it.
     pub shell_type: ConfigShellToolType,
+
+    pub truncation_policy: TruncationPolicy,
 }
 
 macro_rules! model_family {
@@ -89,6 +92,7 @@ macro_rules! model_family {
             shell_type: ConfigShellToolType::Default,
             default_verbosity: None,
             default_reasoning_effort: None,
+            truncation_policy: TruncationPolicy::Bytes(10_000),
         };
 
         // apply overrides
@@ -146,6 +150,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
             ],
             supports_parallel_tool_calls: true,
             support_verbosity: true,
+            truncation_policy: TruncationPolicy::Tokens(10_000),
         )
 
     // Internal models.
@@ -164,6 +169,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
             shell_type: if cfg!(windows) { ConfigShellToolType::ShellCommand } else { ConfigShellToolType::Default },
             supports_parallel_tool_calls: true,
             support_verbosity: true,
+            truncation_policy: TruncationPolicy::Tokens(10_000),
         )
 
     // Production models.
@@ -180,6 +186,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
             shell_type: if cfg!(windows) { ConfigShellToolType::ShellCommand } else { ConfigShellToolType::Default },
             supports_parallel_tool_calls: true,
             support_verbosity: false,
+            truncation_policy: TruncationPolicy::Tokens(10_000),
         )
     } else if slug.starts_with("gpt-5.1") {
         model_family!(
@@ -190,6 +197,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
             default_verbosity: Some(Verbosity::Low),
             base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
             default_reasoning_effort: Some(ReasoningEffort::Medium),
+            truncation_policy: TruncationPolicy::Bytes(10_000),
             supports_parallel_tool_calls: true,
         )
     } else if slug.starts_with("gpt-5") {
@@ -198,6 +206,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
             supports_reasoning_summaries: true,
             needs_special_apply_patch_instructions: true,
             support_verbosity: true,
+            truncation_policy: TruncationPolicy::Bytes(10_000),
         )
     } else {
         None
@@ -220,5 +229,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
         shell_type: ConfigShellToolType::Default,
         default_verbosity: None,
         default_reasoning_effort: None,
+        truncation_policy: TruncationPolicy::Bytes(10_000),
     }
 }
diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs
index 5b630d5ce9..2dfa5199f1 100644
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -7,6 +7,7 @@ use crate::context_manager::ContextManager;
 use crate::protocol::RateLimitSnapshot;
 use crate::protocol::TokenUsage;
 use crate::protocol::TokenUsageInfo;
+use crate::truncate::TruncationPolicy;
 
 /// Persistent, session-scoped state previously stored directly on `Session`.
 pub(crate) struct SessionState {
@@ -18,20 +19,21 @@ pub(crate) struct SessionState {
 impl SessionState {
     /// Create a new session state mirroring previous `State::default()` semantics.
     pub(crate) fn new(session_configuration: SessionConfiguration) -> Self {
+        let history = ContextManager::new();
         Self {
             session_configuration,
-            history: ContextManager::new(),
+            history,
             latest_rate_limits: None,
         }
     }
 
     // History helpers
-    pub(crate) fn record_items<I>(&mut self, items: I)
+    pub(crate) fn record_items<I>(&mut self, items: I, policy: TruncationPolicy)
     where
         I: IntoIterator,
         I::Item: std::ops::Deref<Target = ResponseItem>,
     {
-        self.history.record_items(items)
+        self.history.record_items(items, policy);
     }
 
     pub(crate) fn clone_history(&self) -> ContextManager {
diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs
index c94a7c28d9..99d5f16506 100644
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -9,9 +9,7 @@ pub mod runtimes;
 pub mod sandboxing;
 pub mod spec;
 
-use crate::context_manager::MODEL_FORMAT_MAX_BYTES;
-use crate::context_manager::MODEL_FORMAT_MAX_LINES;
-use crate::context_manager::format_output_for_model_body;
+use crate::context_manager::truncate_with_line_bytes_budget;
 use crate::exec::ExecToolCallOutput;
 pub use router::ToolRouter;
 use serde::Serialize;
@@ -22,6 +20,9 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
 pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
     "[... telemetry preview truncated ...]";
 
+// TODO(aibrahim): migrate shell tool to use truncate text and respect config value
+const SHELL_OUTPUT_MAX_BYTES: usize = 10_000;
+
 /// Format the combined exec output for sending back to the model.
 /// Includes exit code and duration metadata; truncates large bodies safely.
 pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
@@ -77,5 +78,5 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
     };
 
     // Truncate for model consumption before serialization.
-    format_output_for_model_body(&body, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES)
+    truncate_with_line_bytes_budget(&body, SHELL_OUTPUT_MAX_BYTES)
 }
diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs
index 42d6a967de..bdec0b0d1e 100644
--- a/codex-rs/core/src/truncate.rs
+++ b/codex-rs/core/src/truncate.rs
@@ -5,45 +5,142 @@
 use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_utils_string::take_bytes_at_char_boundary;
 use codex_utils_string::take_last_bytes_at_char_boundary;
-use codex_utils_tokenizer::Tokenizer;
 
-/// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
-pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
-pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
+use crate::config::Config;
 
-/// Globally truncate function output items to fit within `MODEL_FORMAT_MAX_BYTES`
-/// by preserving as many text/image items as possible and appending a summary
-/// for any omitted text items.
-pub(crate) fn globally_truncate_function_output_items(
+const APPROX_BYTES_PER_TOKEN: usize = 4;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum TruncationPolicy {
+    Bytes(usize),
+    Tokens(usize),
+}
+
+impl TruncationPolicy {
+    pub fn new(config: &Config) -> Self {
+        let config_token_limit = config.tool_output_token_limit;
+
+        match config.model_family.truncation_policy {
+            TruncationPolicy::Bytes(family_bytes) => {
+                if let Some(token_limit) = config_token_limit {
+                    Self::Bytes(approx_bytes_for_tokens(token_limit))
+                } else {
+                    Self::Bytes(family_bytes)
+                }
+            }
+            TruncationPolicy::Tokens(family_tokens) => {
+                if let Some(token_limit) = config_token_limit {
+                    Self::Tokens(token_limit)
+                } else {
+                    Self::Tokens(family_tokens)
+                }
+            }
+        }
+    }
+
+    /// Returns a token budget derived from this policy.
+    ///
+    /// - For `Tokens`, this is the explicit token limit.
+    /// - For `Bytes`, this is an approximate token budget using the global
+    ///   bytes-per-token heuristic.
+    pub fn token_budget(&self) -> usize {
+        match self {
+            TruncationPolicy::Bytes(bytes) => {
+                usize::try_from(approx_tokens_from_byte_count(*bytes)).unwrap_or(usize::MAX)
+            }
+            TruncationPolicy::Tokens(tokens) => *tokens,
+        }
+    }
+
+    /// Returns a byte budget derived from this policy.
+    ///
+    /// - For `Bytes`, this is the explicit byte limit.
+    /// - For `Tokens`, this is an approximate byte budget using the global
+    ///   bytes-per-token heuristic.
+    pub fn byte_budget(&self) -> usize {
+        match self {
+            TruncationPolicy::Bytes(bytes) => *bytes,
+            TruncationPolicy::Tokens(tokens) => approx_bytes_for_tokens(*tokens),
+        }
+    }
+}
+
+/// Format a block of exec/tool output for model consumption, truncating by
+/// lines and bytes while preserving head and tail segments.
+pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize) -> String {
+    // TODO(aibrahim): to be removed
+    let lines_budget = 256;
+    // Head+tail truncation for the model: show the beginning and end with an elision.
+    // Clients still receive full streams; only this formatted summary is capped.
+    let total_lines = content.lines().count();
+    if content.len() <= bytes_budget && total_lines <= lines_budget {
+        return content.to_string();
+    }
+    let output = truncate_formatted_exec_output(content, total_lines, bytes_budget, lines_budget);
+    format!("Total output lines: {total_lines}\n\n{output}")
+}
+
+pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
+    match policy {
+        TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate(
+            content,
+            bytes,
+            TruncationSource::Policy(TruncationPolicy::Bytes(bytes)),
+        ),
+        TruncationPolicy::Tokens(tokens) => {
+            let (truncated, _) = truncate_with_token_budget(
+                content,
+                tokens,
+                TruncationSource::Policy(TruncationPolicy::Tokens(tokens)),
+            );
+            truncated
+        }
+    }
+}
+/// Globally truncate function output items to fit within the given
+/// truncation policy's budget, preserving as many text/image items as
+/// possible and appending a summary for any omitted text items.
+pub(crate) fn truncate_function_output_items_with_policy(
     items: &[FunctionCallOutputContentItem],
+    policy: TruncationPolicy,
 ) -> Vec<FunctionCallOutputContentItem> {
     let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
-    let mut remaining = MODEL_FORMAT_MAX_BYTES;
+    let mut remaining_budget = match policy {
+        TruncationPolicy::Bytes(_) => policy.byte_budget(),
+        TruncationPolicy::Tokens(_) => policy.token_budget(),
+    };
     let mut omitted_text_items = 0usize;
 
     for it in items {
         match it {
             FunctionCallOutputContentItem::InputText { text } => {
-                if remaining == 0 {
+                if remaining_budget == 0 {
                     omitted_text_items += 1;
                     continue;
                 }
 
-                let len = text.len();
-                if len <= remaining {
+                let cost = match policy {
+                    TruncationPolicy::Bytes(_) => text.len(),
+                    TruncationPolicy::Tokens(_) => approx_token_count(text),
+                };
+
+                if cost <= remaining_budget {
                     out.push(FunctionCallOutputContentItem::InputText { text: text.clone() });
-                    remaining -= len;
+                    remaining_budget = remaining_budget.saturating_sub(cost);
                 } else {
-                    let slice = take_bytes_at_char_boundary(text, remaining);
-                    if !slice.is_empty() {
-                        out.push(FunctionCallOutputContentItem::InputText {
-                            text: slice.to_string(),
-                        });
+                    let snippet_policy = match policy {
+                        TruncationPolicy::Bytes(_) => TruncationPolicy::Bytes(remaining_budget),
+                        TruncationPolicy::Tokens(_) => TruncationPolicy::Tokens(remaining_budget),
+                    };
+                    let snippet = truncate_text(text, snippet_policy);
+                    if snippet.is_empty() {
+                        omitted_text_items += 1;
+                    } else {
+                        out.push(FunctionCallOutputContentItem::InputText { text: snippet });
                     }
-                    remaining = 0;
+                    remaining_budget = 0;
                 }
             }
-            // todo(aibrahim): handle input images; resize
             FunctionCallOutputContentItem::InputImage { image_url } => {
                 out.push(FunctionCallOutputContentItem::InputImage {
                     image_url: image_url.clone(),
@@ -61,21 +158,81 @@ pub(crate) fn globally_truncate_function_output_items(
     out
 }
 
-/// Format a block of exec/tool output for model consumption, truncating by
-/// lines and bytes while preserving head and tail segments.
-pub(crate) fn format_output_for_model_body(
-    content: &str,
-    limit_bytes: usize,
-    limit_lines: usize,
-) -> String {
-    // Head+tail truncation for the model: show the beginning and end with an elision.
-    // Clients still receive full streams; only this formatted summary is capped.
-    let total_lines = content.lines().count();
-    if content.len() <= limit_bytes && total_lines <= limit_lines {
-        return content.to_string();
+/// Truncate the middle of a UTF-8 string to at most `max_tokens` tokens,
+/// preserving the beginning and the end. Returns the possibly truncated string
+/// and `Some(original_token_count)` if truncation occurred; otherwise returns
+/// the original string and `None`.
+fn truncate_with_token_budget(
+    s: &str,
+    max_tokens: usize,
+    source: TruncationSource,
+) -> (String, Option<u64>) {
+    if s.is_empty() {
+        return (String::new(), None);
     }
-    let output = truncate_formatted_exec_output(content, total_lines, limit_bytes, limit_lines);
-    format!("Total output lines: {total_lines}\n\n{output}")
+
+    let byte_len = s.len();
+    if max_tokens > 0 {
+        let small_threshold = approx_bytes_for_tokens(max_tokens / 4);
+        if small_threshold > 0 && byte_len <= small_threshold {
+            return (s.to_string(), None);
+        }
+    }
+
+    let truncated = truncate_with_byte_estimate(s, approx_bytes_for_tokens(max_tokens), source);
+    let approx_total_usize = approx_token_count(s);
+    let approx_total = u64::try_from(approx_total_usize).unwrap_or(u64::MAX);
+    if truncated == s {
+        (truncated, None)
+    } else {
+        (truncated, Some(approx_total))
+    }
+}
+
+/// Truncate a string using a byte budget derived from the token budget, without
+/// performing any real tokenization. This keeps the logic purely byte-based and
+/// uses a bytes placeholder in the truncated output.
+fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSource) -> String {
+    if s.is_empty() {
+        return String::new();
+    }
+
+    if max_bytes == 0 {
+        // No budget to show content; just report that everything was truncated.
+        let marker = format_truncation_marker(source, removed_units_for_source(source, s.len()));
+        return marker;
+    }
+
+    if s.len() <= max_bytes {
+        return s.to_string();
+    }
+
+    let total_bytes = s.len();
+    let removed_bytes = total_bytes.saturating_sub(max_bytes);
+    let marker = format_truncation_marker(source, removed_units_for_source(source, removed_bytes));
+    let marker_len = marker.len();
+
+    if marker_len >= max_bytes {
+        let truncated_marker = truncate_on_boundary(&marker, max_bytes);
+        return truncated_marker.to_string();
+    }
+
+    let keep_budget = max_bytes - marker_len;
+    let (left_budget, right_budget) = split_budget(keep_budget);
+    let prefix_end = pick_prefix_end(s, left_budget);
+    let mut suffix_start = pick_suffix_start(s, right_budget);
+    if suffix_start < prefix_end {
+        suffix_start = prefix_end;
+    }
+
+    let mut out = assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker);
+
+    if out.len() > max_bytes {
+        let boundary = truncate_on_boundary(&out, max_bytes);
+        out.truncate(boundary.len());
+    }
+
+    out
 }
 
 fn truncate_formatted_exec_output(
@@ -114,13 +271,17 @@ fn truncate_formatted_exec_output(
     let truncated_by_bytes = content.len() > limit_bytes;
     // this is a bit wrong. We are counting metadata lines and not just shell output lines.
     let marker = if omitted > 0 {
-        Some(format!(
-            "\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
-        ))
+        let marker_text = format_truncation_marker(
+            TruncationSource::LineOmission { total_lines },
+            u64::try_from(omitted).unwrap_or(u64::MAX),
+        );
+        Some(format!("\n{marker_text}\n\n"))
     } else if truncated_by_bytes {
-        Some(format!(
-            "\n[... output truncated to fit {limit_bytes} bytes ...]\n\n"
-        ))
+        let removed_bytes =
+            u64::try_from(content.len().saturating_sub(limit_bytes)).unwrap_or(u64::MAX);
+        let marker_text =
+            format_truncation_marker(TruncationSource::ByteLimit { limit_bytes }, removed_bytes);
+        Some(format!("\n{marker_text}\n\n"))
     } else {
         None
     };
@@ -147,192 +308,136 @@ fn truncate_formatted_exec_output(
     result
 }
 
-fn error_on_double_truncation(content: &str) {
-    if content.contains("Total output lines:") && content.contains("omitted") {
-        tracing::error!(
-            "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
-        );
-    }
+#[derive(Clone, Copy)]
+pub enum TruncationSource {
+    Policy(TruncationPolicy),
+    LineOmission { total_lines: usize },
+    ByteLimit { limit_bytes: usize },
 }
 
-/// Truncate an output string to a maximum number of “tokens”, where tokens are
-/// approximated as individual `char`s. Preserves a prefix and suffix with an
-/// elision marker describing how many tokens were omitted.
-pub(crate) fn truncate_output_to_tokens(
-    output: &str,
-    max_tokens: usize,
-) -> (String, Option<usize>) {
-    if max_tokens == 0 {
-        let total_tokens = output.chars().count();
-        let message = format!("…{total_tokens} tokens truncated…");
-        return (message, Some(total_tokens));
+fn format_truncation_marker(source: TruncationSource, removed_count: u64) -> String {
+    match source {
+        TruncationSource::Policy(TruncationPolicy::Tokens(_)) => {
+            format!("[…{removed_count} tokens truncated…]")
+        }
+        TruncationSource::Policy(TruncationPolicy::Bytes(_)) => {
+            format!("[…{removed_count} bytes truncated…]")
+        }
+        TruncationSource::LineOmission { total_lines } => {
+            format!("[... omitted {removed_count} of {total_lines} lines ...]")
+        }
+        TruncationSource::ByteLimit { limit_bytes } => {
+            format!("[... removed {removed_count} bytes to fit {limit_bytes} byte limit ...]")
+        }
     }
+}
 
-    let tokens: Vec<char> = output.chars().collect();
-    let total_tokens = tokens.len();
-    if total_tokens <= max_tokens {
-        return (output.to_string(), None);
-    }
+fn split_budget(budget: usize) -> (usize, usize) {
+    let left = budget / 2;
+    (left, budget - left)
+}
 
-    let half = max_tokens / 2;
-    if half == 0 {
-        let truncated = total_tokens.saturating_sub(max_tokens);
-        let message = format!("…{truncated} tokens truncated…");
-        return (message, Some(total_tokens));
+fn removed_units_for_source(source: TruncationSource, removed_bytes: usize) -> u64 {
+    match source {
+        TruncationSource::Policy(TruncationPolicy::Tokens(_)) => {
+            approx_tokens_from_byte_count(removed_bytes)
+        }
+        _ => u64::try_from(removed_bytes).unwrap_or(u64::MAX),
     }
+}
 
-    let truncated = total_tokens.saturating_sub(half * 2);
-    let mut truncated_output = String::new();
-    truncated_output.extend(&tokens[..half]);
-    truncated_output.push_str(&format!("…{truncated} tokens truncated…"));
-    truncated_output.extend(&tokens[total_tokens - half..]);
-    (truncated_output, Some(total_tokens))
+fn assemble_truncated_output(prefix: &str, suffix: &str, marker: &str) -> String {
+    let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + 1);
+    out.push_str(prefix);
+    out.push_str(marker);
+    out.push('\n');
+    out.push_str(suffix);
+    out
 }
 
-/// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes,
-/// preserving the beginning and the end. Returns the possibly truncated
-/// string and `Some(original_token_count)` (counted with the local tokenizer;
-/// falls back to a 4-bytes-per-token estimate if the tokenizer cannot load)
-/// if truncation occurred; otherwise returns the original string and `None`.
-pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>) {
-    if s.len() <= max_bytes {
-        return (s.to_string(), None);
-    }
+pub(crate) fn approx_token_count(text: &str) -> usize {
+    let len = text.len();
+    len.saturating_add(APPROX_BYTES_PER_TOKEN.saturating_sub(1)) / APPROX_BYTES_PER_TOKEN
+}
 
-    // Build a tokenizer for counting (default to o200k_base; fall back to cl100k_base).
-    // If both fail, fall back to a 4-bytes-per-token estimate.
-    let tok = Tokenizer::try_default().ok();
-    let token_count = |text: &str| -> u64 {
-        if let Some(ref t) = tok {
-            t.count(text) as u64
-        } else {
-            (text.len() as u64).div_ceil(4)
-        }
-    };
+fn approx_bytes_for_tokens(tokens: usize) -> usize {
+    tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)
+}
 
-    let total_tokens = token_count(s);
-    if max_bytes == 0 {
-        return (
-            format!("…{total_tokens} tokens truncated…"),
-            Some(total_tokens),
-        );
-    }
+fn approx_tokens_from_byte_count(bytes: usize) -> u64 {
+    let bytes_u64 = bytes as u64;
+    bytes_u64.saturating_add((APPROX_BYTES_PER_TOKEN as u64).saturating_sub(1))
+        / (APPROX_BYTES_PER_TOKEN as u64)
+}
 
-    fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
-        if input.len() <= max_len {
-            return input;
-        }
-        let mut end = max_len;
-        while end > 0 && !input.is_char_boundary(end) {
-            end -= 1;
-        }
-        &input[..end]
+fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
+    if input.len() <= max_len {
+        return input;
     }
-
-    fn pick_prefix_end(s: &str, left_budget: usize) -> usize {
-        if let Some(head) = s.get(..left_budget)
-            && let Some(i) = head.rfind('\n')
-        {
-            return i + 1;
-        }
-        truncate_on_boundary(s, left_budget).len()
+    let mut end = max_len;
+    while end > 0 && !input.is_char_boundary(end) {
+        end -= 1;
     }
+    &input[..end]
+}
 
-    fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
-        let start_tail = s.len().saturating_sub(right_budget);
-        if let Some(tail) = s.get(start_tail..)
-            && let Some(i) = tail.find('\n')
-        {
-            return start_tail + i + 1;
-        }
-
-        let mut idx = start_tail.min(s.len());
-        while idx < s.len() && !s.is_char_boundary(idx) {
-            idx += 1;
-        }
-        idx
-    }
-
-    // Iterate to stabilize marker length → keep budget → boundaries.
-    let mut guess_tokens: u64 = 1;
-    for _ in 0..4 {
-        let marker = format!("…{guess_tokens} tokens truncated…");
-        let marker_len = marker.len();
-        let keep_budget = max_bytes.saturating_sub(marker_len);
-        if keep_budget == 0 {
-            return (
-                format!("…{total_tokens} tokens truncated…"),
-                Some(total_tokens),
-            );
-        }
-
-        let left_budget = keep_budget / 2;
-        let right_budget = keep_budget - left_budget;
-        let prefix_end = pick_prefix_end(s, left_budget);
-        let mut suffix_start = pick_suffix_start(s, right_budget);
-        if suffix_start < prefix_end {
-            suffix_start = prefix_end;
-        }
-
-        // Tokens actually removed (middle slice) using the real tokenizer.
-        let removed_tokens = token_count(&s[prefix_end..suffix_start]);
-
-        // If the number of digits in the token count does not change the marker length,
-        // we can finalize output.
-        let final_marker = format!("…{removed_tokens} tokens truncated…");
-        if final_marker.len() == marker_len {
-            let kept_content_bytes = prefix_end + (s.len() - suffix_start);
-            let mut out = String::with_capacity(final_marker.len() + kept_content_bytes + 1);
-            out.push_str(&s[..prefix_end]);
-            out.push_str(&final_marker);
-            out.push('\n');
-            out.push_str(&s[suffix_start..]);
-            return (out, Some(total_tokens));
-        }
-
-        guess_tokens = removed_tokens;
+fn pick_prefix_end(s: &str, left_budget: usize) -> usize {
+    if let Some(head) = s.get(..left_budget)
+        && let Some(i) = head.rfind('\n')
+    {
+        return i + 1;
     }
+    truncate_on_boundary(s, left_budget).len()
+}
 
-    // Fallback build after iterations: compute with the last guess.
-    let marker = format!("…{guess_tokens} tokens truncated…");
-    let marker_len = marker.len();
-    let keep_budget = max_bytes.saturating_sub(marker_len);
-    if keep_budget == 0 {
-        return (
-            format!("…{total_tokens} tokens truncated…"),
-            Some(total_tokens),
-        );
+fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
+    let start_tail = s.len().saturating_sub(right_budget);
+    if let Some(tail) = s.get(start_tail..)
+        && let Some(i) = tail.find('\n')
+    {
+        return start_tail + i + 1;
     }
 
-    let left_budget = keep_budget / 2;
-    let right_budget = keep_budget - left_budget;
-    let prefix_end = pick_prefix_end(s, left_budget);
-    let mut suffix_start = pick_suffix_start(s, right_budget);
-    if suffix_start < prefix_end {
-        suffix_start = prefix_end;
+    let mut idx = start_tail.min(s.len());
+    while idx < s.len() && !s.is_char_boundary(idx) {
+        idx += 1;
     }
+    idx
+}
 
-    let mut out = String::with_capacity(marker_len + prefix_end + (s.len() - suffix_start) + 1);
-    out.push_str(&s[..prefix_end]);
-    out.push_str(&marker);
-    out.push('\n');
-    out.push_str(&s[suffix_start..]);
-    (out, Some(total_tokens))
+fn error_on_double_truncation(content: &str) {
+    if content.contains("Total output lines:") && content.contains("omitted") {
+        tracing::error!(
+            "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
+        );
+    }
 }
 
 #[cfg(test)]
 mod tests {
-    use super::MODEL_FORMAT_MAX_BYTES;
-    use super::MODEL_FORMAT_MAX_LINES;
-    use super::format_output_for_model_body;
-    use super::globally_truncate_function_output_items;
-    use super::truncate_middle;
-    use super::truncate_output_to_tokens;
+    use crate::config::OPENAI_DEFAULT_MODEL;
+    use crate::model_family::derive_default_model_family;
+    use crate::model_family::find_family_for_model;
+
+    use super::TruncationPolicy;
+    use super::TruncationSource;
+    use super::approx_token_count;
+    use super::truncate_function_output_items_with_policy;
+    use super::truncate_with_line_bytes_budget;
+    use super::truncate_with_token_budget;
     use codex_protocol::models::FunctionCallOutputContentItem;
-    use codex_utils_tokenizer::Tokenizer;
     use pretty_assertions::assert_eq;
     use regex_lite::Regex;
 
+    const MODEL_FORMAT_MAX_LINES: usize = 256;
+
+    fn model_format_max_bytes() -> usize {
+        find_family_for_model(OPENAI_DEFAULT_MODEL)
+            .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL))
+            .truncation_policy
+            .byte_budget()
+    }
+
     fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
         let head_lines = MODEL_FORMAT_MAX_LINES / 2;
         let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines;
@@ -342,7 +447,8 @@ mod tests {
         let escaped_line = regex_lite::escape(line);
         if omitted == 0 {
             return format!(
-                r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
+                r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$",
+                max_bytes = model_format_max_bytes(),
             );
         }
         format!(
@@ -351,88 +457,46 @@ mod tests {
     }
 
     #[test]
-    fn truncate_middle_no_newlines_fallback() {
-        let tok = Tokenizer::try_default().expect("load tokenizer");
-        let s = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ*";
-        let max_bytes = 32;
-        let (out, original) = truncate_middle(s, max_bytes);
-        assert!(out.starts_with("abc"));
-        assert!(out.contains("tokens truncated"));
-        assert!(out.ends_with("XYZ*"));
-        assert_eq!(original, Some(tok.count(s) as u64));
+    fn truncate_middle_returns_original_when_under_limit() {
+        let s = "short output";
+        let limit = 100;
+        let source = TruncationSource::Policy(TruncationPolicy::Tokens(limit));
+        let (out, original) = truncate_with_token_budget(s, limit, source);
+        assert_eq!(out, s);
+        assert_eq!(original, None);
     }
 
     #[test]
-    fn truncate_middle_prefers_newline_boundaries() {
-        let tok = Tokenizer::try_default().expect("load tokenizer");
-        let mut s = String::new();
-        for i in 1..=20 {
-            s.push_str(&format!("{i:03}\n"));
-        }
-        assert_eq!(s.len(), 80);
-
-        let max_bytes = 64;
-        let (out, tokens) = truncate_middle(&s, max_bytes);
-        assert!(out.starts_with("001\n002\n003\n004\n"));
-        assert!(out.contains("tokens truncated"));
-        assert!(out.ends_with("017\n018\n019\n020\n"));
-        assert_eq!(tokens, Some(tok.count(&s) as u64));
+    fn truncate_middle_reports_truncation_at_zero_limit() {
+        let s = "abcdef";
+        let source = TruncationSource::Policy(TruncationPolicy::Tokens(0));
+        let (out, original) = truncate_with_token_budget(s, 0, source);
+        assert_eq!(out, "[…2 tokens truncated…]");
+        assert_eq!(original, Some(approx_token_count(s) as u64));
     }
 
     #[test]
-    fn truncate_middle_handles_utf8_content() {
-        let tok = Tokenizer::try_default().expect("load tokenizer");
-        let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n";
-        let max_bytes = 32;
-        let (out, tokens) = truncate_middle(s, max_bytes);
-
+    fn truncate_middle_enforces_token_budget() {
+        let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa";
+        let max_tokens = 12;
+        let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens));
+        let (out, original) = truncate_with_token_budget(s, max_tokens, source);
         assert!(out.contains("tokens truncated"));
-        assert!(!out.contains('\u{fffd}'));
-        assert_eq!(tokens, Some(tok.count(s) as u64));
+        assert_eq!(original, Some(approx_token_count(s) as u64));
+        assert!(out.len() < s.len(), "truncated output should be shorter");
     }
 
     #[test]
-    fn truncate_middle_prefers_newline_boundaries_2() {
-        let tok = Tokenizer::try_default().expect("load tokenizer");
-        // Build a multi-line string of 20 numbered lines (each "NNN\n").
-        let mut s = String::new();
-        for i in 1..=20 {
-            s.push_str(&format!("{i:03}\n"));
-        }
-        assert_eq!(s.len(), 80);
+    fn truncate_middle_handles_utf8_content() {
+        let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
+        let max_tokens = 8;
+        let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens));
+        let (out, tokens) = truncate_with_token_budget(s, max_tokens, source);
 
-        let max_bytes = 64;
-        let (out, total) = truncate_middle(&s, max_bytes);
-        assert!(out.starts_with("001\n002\n003\n004\n"));
         assert!(out.contains("tokens truncated"));
-        assert!(out.ends_with("017\n018\n019\n020\n"));
-        assert_eq!(total, Some(tok.count(&s) as u64));
-    }
-
-    #[test]
-    fn truncate_output_to_tokens_returns_original_when_under_limit() {
-        let s = "short output";
-        let (truncated, original) = truncate_output_to_tokens(s, 100);
-        assert_eq!(truncated, s);
-        assert_eq!(original, None);
-    }
-
-    #[test]
-    fn truncate_output_to_tokens_reports_truncation_at_zero_limit() {
-        let s = "abcdef";
-        let (truncated, original) = truncate_output_to_tokens(s, 0);
-        assert!(truncated.contains("tokens truncated"));
-        assert_eq!(original, Some(s.chars().count()));
-    }
-
-    #[test]
-    fn truncate_output_to_tokens_preserves_prefix_and_suffix() {
-        let s = "abcdefghijklmnopqrstuvwxyz";
-        let max_tokens = 10;
-        let (truncated, original) = truncate_output_to_tokens(s, max_tokens);
-        assert!(truncated.starts_with("abcde"));
-        assert!(truncated.ends_with("vwxyz"));
-        assert_eq!(original, Some(s.chars().count()));
+        assert!(!out.contains('\u{fffd}'));
+        assert_eq!(tokens, Some(approx_token_count(s) as u64));
+        assert!(out.len() < s.len(), "UTF-8 content should be shortened");
     }
 
     #[test]
@@ -440,11 +504,7 @@ mod tests {
         let line = "very long execution error line that should trigger truncation\n";
         let large_error = line.repeat(2_500); // way beyond both byte and line limits
 
-        let truncated = format_output_for_model_body(
-            &large_error,
-            MODEL_FORMAT_MAX_BYTES,
-            MODEL_FORMAT_MAX_LINES,
-        );
+        let truncated = truncate_with_line_bytes_budget(&large_error, model_format_max_bytes());
 
         let total_lines = large_error.lines().count();
         let pattern = truncated_message_pattern(line, total_lines);
@@ -459,7 +519,7 @@ mod tests {
             .expect("missing body capture")
             .as_str();
         assert!(
-            body.len() <= MODEL_FORMAT_MAX_BYTES,
+            body.len() <= model_format_max_bytes(),
             "body exceeds byte limit: {} bytes",
             body.len()
         );
@@ -468,16 +528,14 @@ mod tests {
 
     #[test]
     fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
-        let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
-        let truncated = format_output_for_model_body(
-            &long_line,
-            MODEL_FORMAT_MAX_BYTES,
-            MODEL_FORMAT_MAX_LINES,
-        );
+        let max_bytes = model_format_max_bytes();
+        let long_line = "a".repeat(max_bytes + 50);
+        let truncated = truncate_with_line_bytes_budget(&long_line, max_bytes);
 
         assert_ne!(truncated, long_line);
+        let removed_bytes = long_line.len().saturating_sub(max_bytes);
         let marker_line =
-            format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
+            format!("[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]");
         assert!(
             truncated.contains(&marker_line),
             "missing byte truncation marker: {truncated}"
@@ -493,7 +551,7 @@ mod tests {
         let content = "example output\n".repeat(10);
 
         assert_eq!(
-            format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES),
+            truncate_with_line_bytes_budget(&content, model_format_max_bytes()),
             content
         );
     }
@@ -505,8 +563,7 @@ mod tests {
             .map(|idx| format!("line-{idx}\n"))
             .collect();
 
-        let truncated =
-            format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES);
+        let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes());
 
         let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
         let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
@@ -535,39 +592,33 @@ mod tests {
             .map(|idx| format!("line-{idx}-{long_line}\n"))
             .collect();
 
-        let truncated =
-            format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES);
+        let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes());
 
         assert!(
             truncated.contains("[... omitted 42 of 298 lines ...]"),
             "expected omitted marker when line count exceeds limit: {truncated}"
         );
         assert!(
-            !truncated.contains("output truncated to fit"),
+            !truncated.contains("byte limit"),
             "line omission marker should take precedence over byte marker: {truncated}"
         );
     }
 
     #[test]
     fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
-        // Arrange: several text items, none exceeding per-item limit, but total exceeds budget.
-        let budget = MODEL_FORMAT_MAX_BYTES;
-        let t1_len = (budget / 2).saturating_sub(10);
-        let t2_len = (budget / 2).saturating_sub(10);
-        let remaining_after_t1_t2 = budget.saturating_sub(t1_len + t2_len);
-        let t3_len = 50; // gets truncated to remaining_after_t1_t2
-        let t4_len = 5; // omitted
-        let t5_len = 7; // omitted
-
-        let t1 = "a".repeat(t1_len);
-        let t2 = "b".repeat(t2_len);
-        let t3 = "c".repeat(t3_len);
-        let t4 = "d".repeat(t4_len);
-        let t5 = "e".repeat(t5_len);
+        let chunk = "alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu nu xi omicron pi rho sigma tau upsilon phi chi psi omega.\n";
+        let chunk_tokens = approx_token_count(chunk);
+        assert!(chunk_tokens > 0, "chunk must consume tokens");
+        let limit = chunk_tokens * 3;
+        let t1 = chunk.to_string();
+        let t2 = chunk.to_string();
+        let t3 = chunk.repeat(10);
+        let t4 = chunk.to_string();
+        let t5 = chunk.to_string();
 
         let items = vec![
-            FunctionCallOutputContentItem::InputText { text: t1 },
-            FunctionCallOutputContentItem::InputText { text: t2 },
+            FunctionCallOutputContentItem::InputText { text: t1.clone() },
+            FunctionCallOutputContentItem::InputText { text: t2.clone() },
             FunctionCallOutputContentItem::InputImage {
                 image_url: "img:mid".to_string(),
             },
@@ -576,7 +627,8 @@ mod tests {
             FunctionCallOutputContentItem::InputText { text: t5 },
         ];
 
-        let output = globally_truncate_function_output_items(&items);
+        let output =
+            truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(limit));
 
         // Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted.
         assert_eq!(output.len(), 5);
@@ -585,13 +637,13 @@ mod tests {
             FunctionCallOutputContentItem::InputText { text } => text,
             other => panic!("unexpected first item: {other:?}"),
         };
-        assert_eq!(first_text.len(), t1_len);
+        assert_eq!(first_text, &t1);
 
         let second_text = match &output[1] {
             FunctionCallOutputContentItem::InputText { text } => text,
             other => panic!("unexpected second item: {other:?}"),
         };
-        assert_eq!(second_text.len(), t2_len);
+        assert_eq!(second_text, &t2);
 
         assert_eq!(
             output[2],
@@ -604,7 +656,10 @@ mod tests {
             FunctionCallOutputContentItem::InputText { text } => text,
             other => panic!("unexpected fourth item: {other:?}"),
         };
-        assert_eq!(fourth_text.len(), remaining_after_t1_t2);
+        assert!(
+            fourth_text.contains("tokens truncated"),
+            "expected marker in truncated snippet: {fourth_text}"
+        );
 
         let summary_text = match &output[4] {
             FunctionCallOutputContentItem::InputText { text } => text,
diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs
index f77b744497..390401d789 100644
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -45,6 +45,7 @@ pub(crate) const MIN_YIELD_TIME_MS: u64 = 250;
 pub(crate) const MAX_YIELD_TIME_MS: u64 = 30_000;
 pub(crate) const DEFAULT_MAX_OUTPUT_TOKENS: usize = 10_000;
 pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 1024 * 1024; // 1 MiB
+pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_TOKENS: usize = UNIFIED_EXEC_OUTPUT_MAX_BYTES / 4;
 
 pub(crate) struct UnifiedExecContext {
     pub session: Arc<Session>,
diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs
index bdb935f171..82d6e41370 100644
--- a/codex-rs/core/src/unified_exec/session.rs
+++ b/codex-rs/core/src/unified_exec/session.rs
@@ -14,11 +14,13 @@ use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
 use crate::exec::StreamOutput;
 use crate::exec::is_likely_sandbox_denied;
-use crate::truncate::truncate_middle;
+use crate::truncate::TruncationPolicy;
+use crate::truncate::truncate_text;
 use codex_utils_pty::ExecCommandSession;
 use codex_utils_pty::SpawnedPty;
 
 use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES;
+use super::UNIFIED_EXEC_OUTPUT_MAX_TOKENS;
 use super::UnifiedExecError;
 
 #[derive(Debug, Default)]
@@ -165,7 +167,10 @@ impl UnifiedExecSession {
         };
 
         if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) {
-            let (snippet, _) = truncate_middle(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_BYTES);
+            let snippet = truncate_text(
+                &aggregated_text,
+                TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS),
+            );
             let message = if snippet.is_empty() {
                 format!("exit code {exit_code}")
             } else {
diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs
index fee46df8b8..57c60f2b84 100644
--- a/codex-rs/core/src/unified_exec/session_manager.rs
+++ b/codex-rs/core/src/unified_exec/session_manager.rs
@@ -23,6 +23,9 @@ use crate::tools::orchestrator::ToolOrchestrator;
 use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
 use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
 use crate::tools::sandboxing::ToolCtx;
+use crate::truncate::TruncationPolicy;
+use crate::truncate::approx_token_count;
+use crate::truncate::truncate_text;
 
 use super::ExecCommandRequest;
 use super::SessionEntry;
@@ -36,7 +39,6 @@ use super::generate_chunk_id;
 use super::resolve_max_tokens;
 use super::session::OutputBuffer;
 use super::session::UnifiedExecSession;
-use crate::truncate::truncate_output_to_tokens;
 
 impl UnifiedExecSessionManager {
     pub(crate) async fn exec_command(
@@ -70,7 +72,7 @@ impl UnifiedExecSessionManager {
         let wall_time = Instant::now().saturating_duration_since(start);
 
         let text = String::from_utf8_lossy(&collected).to_string();
-        let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens);
+        let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
         let chunk_id = generate_chunk_id();
         let has_exited = session.has_exited();
         let stored_id = self
@@ -85,6 +87,8 @@ impl UnifiedExecSessionManager {
         // Only include a session_id in the response if the process is still alive.
         let session_id = if has_exited { None } else { Some(stored_id) };
 
+        let original_token_count = approx_token_count(&text);
+
         let response = UnifiedExecResponse {
             event_call_id: context.call_id.clone(),
             chunk_id,
@@ -92,7 +96,7 @@ impl UnifiedExecSessionManager {
             output,
             session_id,
             exit_code: exit_code.flatten(),
-            original_token_count,
+            original_token_count: Some(original_token_count),
             session_command: Some(request.command.clone()),
         };
 
@@ -175,7 +179,8 @@ impl UnifiedExecSessionManager {
         let wall_time = Instant::now().saturating_duration_since(start);
 
         let text = String::from_utf8_lossy(&collected).to_string();
-        let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens);
+        let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
+        let original_token_count = approx_token_count(&text);
         let chunk_id = generate_chunk_id();
 
         let status = self.refresh_session_state(session_id).await;
@@ -199,7 +204,7 @@ impl UnifiedExecSessionManager {
             output,
             session_id,
             exit_code,
-            original_token_count,
+            original_token_count: Some(original_token_count),
             session_command: Some(session_command.clone()),
         };
 
diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs
index 200d27e887..3cbbc6bd57 100644
--- a/codex-rs/core/tests/suite/truncation.rs
+++ b/codex-rs/core/tests/suite/truncation.rs
@@ -27,7 +27,6 @@ use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use escargot::CargoBuild;
-use regex_lite::Regex;
 use serde_json::Value;
 use serde_json::json;
 use std::collections::HashMap;
@@ -48,7 +47,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
     let test = builder.build(&server).await?;
 
     // Construct a very long, non-existent path to force a RespondToModel error with a large message
-    let long_path = "a".repeat(20_000);
+    let long_path = "long path text should trigger truncation".repeat(8_000);
     let call_id = "grep-huge-error";
     let args = json!({
         "pattern": "alpha",
@@ -80,12 +79,16 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
 
     tracing::debug!(output = %output, "truncated function error output");
 
-    // Expect plaintext with byte-truncation marker and no omitted-lines marker
+    // Expect plaintext with token-based truncation marker and no omitted-lines marker
     assert!(
         serde_json::from_str::<serde_json::Value>(&output).is_err(),
         "expected error output to be plain text",
     );
-    let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]\s*$"#;
+    assert!(
+        !output.contains("Total output lines:"),
+        "error output should not include line-based truncation header: {output}",
+    );
+    let truncated_pattern = r"(?s)^unable to access `.*tokens truncated.*$";
     assert_regex_match(truncated_pattern, &output);
     assert!(
         !output.contains("omitted"),
@@ -269,7 +272,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
     let tool_name = format!("mcp__{server_name}__echo");
 
     // Build a very large message to exceed 10KiB once serialized.
-    let large_msg = "long-message-with-newlines-".repeat(600);
+    let large_msg = "long-message-with-newlines-".repeat(6000);
     let args_json = serde_json::json!({ "message": large_msg });
 
     mount_sse_once(
@@ -334,22 +337,19 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
         .function_call_output_text(call_id)
         .context("function_call_output present for rmcp call")?;
 
-    // Expect plain text with byte-based truncation marker.
+    // Expect plain text with token-based truncation marker; the original JSON body
+    // is truncated in the middle of the echo string.
     assert!(
         serde_json::from_str::<Value>(&output).is_err(),
         "expected truncated MCP output to be plain text"
     );
     assert!(
-        output.starts_with("Total output lines: 1\n\n{"),
-        "expected total line header and JSON head, got: {output}"
+        !output.contains("Total output lines:"),
+        "MCP output should not include line-based truncation header: {output}"
     );
 
-    let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]")
-        .expect("compile regex");
-    assert!(
-        byte_marker.is_match(&output),
-        "expected byte truncation marker, got: {output}"
-    );
+    let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#;
+    assert_regex_match(truncated_pattern, &output);
 
     Ok(())
 }
@@ -453,3 +453,164 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> {
 
     Ok(())
 }
+
+// Token-based policy should report token counts even when truncation is byte-estimated.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn token_policy_marker_reports_tokens() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5.1-codex".to_string(); // token policy
+        config.model_family =
+            find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex");
+        config.tool_output_token_limit = Some(50); // small budget to force truncation
+    });
+    let fixture = builder.build(&server).await?;
+
+    let call_id = "shell-token-marker";
+    let args = json!({
+        "command": ["/bin/sh", "-c", "seq 1 150"],
+        "timeout_ms": 5_000,
+    });
+
+    mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let done_mock = mount_sse_once(
+        &server,
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    fixture
+        .submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess)
+        .await?;
+
+    let output = done_mock
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("shell output present")?;
+
+    assert_regex_match(r"\[\u{2026}127 tokens truncated\u{2026}]", &output);
+
+    Ok(())
+}
+
+// Byte-based policy should report bytes removed.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn byte_policy_marker_reports_bytes() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5.1".to_string(); // byte policy
+        config.model_family = find_family_for_model("gpt-5.1").expect("model family for gpt-5.1");
+        config.tool_output_token_limit = Some(50); // ~200 byte cap
+    });
+    let fixture = builder.build(&server).await?;
+
+    let call_id = "shell-byte-marker";
+    let args = json!({
+        "command": ["/bin/sh", "-c", "seq 1 150"],
+        "timeout_ms": 5_000,
+    });
+
+    mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let done_mock = mount_sse_once(
+        &server,
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    fixture
+        .submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess)
+        .await?;
+
+    let output = done_mock
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("shell output present")?;
+
+    assert_regex_match(r"\[\u{2026}505 bytes truncated\u{2026}]", &output);
+
+    Ok(())
+}
+
+// Overriding config with a large token budget should avoid truncation.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn large_budget_avoids_truncation() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5.1-codex".to_string();
+        config.model_family =
+            find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex");
+        config.tool_output_token_limit = Some(50_000); // ample budget
+    });
+    let fixture = builder.build(&server).await?;
+
+    let call_id = "shell-no-trunc";
+    let args = json!({
+        "command": ["/bin/sh", "-c", "seq 1 1000"],
+        "timeout_ms": 5_000,
+    });
+
+    mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let done_mock = mount_sse_once(
+        &server,
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    fixture
+        .submit_turn_with_policy(
+            "run big output without truncation",
+            SandboxPolicy::DangerFullAccess,
+        )
+        .await?;
+
+    let output = done_mock
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("shell output present")?;
+
+    assert!(
+        !output.contains("truncated"),
+        "output should remain untruncated with ample budget"
+    );
+
+    Ok(())
+}
diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs
index 970be5277e..23f2c62b9c 100644
--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@@ -26,9 +26,11 @@ use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use core_test_support::wait_for_event_match;
+use core_test_support::wait_for_event_with_timeout;
 use regex_lite::Regex;
 use serde_json::Value;
 use serde_json::json;
+use tokio::time::Duration;
 
 fn extract_output_text(item: &Value) -> Option<&str> {
     item.get("output").and_then(|value| match value {
@@ -814,7 +816,7 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
 
     let call_id = "uexec-metadata";
     let args = serde_json::json!({
-        "cmd": "printf 'abcdefghijklmnopqrstuvwxyz'",
+        "cmd": "printf 'token one token two token three token four token five token six token seven'",
         "yield_time_ms": 500,
         "max_output_tokens": 6,
     });
@@ -1295,7 +1297,7 @@ async fn unified_exec_streams_after_lagged_output() -> Result<()> {
 import sys
 import time
 
-chunk = b'x' * (1 << 20)
+chunk = b'long content here to trigger truncation' * (1 << 10)
 for _ in range(4):
     sys.stdout.buffer.write(chunk)
     sys.stdout.flush()
@@ -1365,8 +1367,13 @@ PY
             summary: ReasoningSummary::Auto,
         })
         .await?;
-
-    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
+    // This is a worst case scenario for the truncate logic.
+    wait_for_event_with_timeout(
+        &codex,
+        |event| matches!(event, EventMsg::TaskComplete(_)),
+        Duration::from_secs(10),
+    )
+    .await;
 
     let requests = server.received_requests().await.expect("recorded requests");
     assert!(!requests.is_empty(), "expected at least one POST request");
@@ -1523,14 +1530,15 @@ async fn unified_exec_formats_large_output_summary() -> Result<()> {
     } = builder.build(&server).await?;
 
     let script = r#"python3 - <<'PY'
-for i in range(300):
-    print(f"line-{i}")
+for i in range(10000):
+    print("token token ")
 PY
 "#;
 
     let call_id = "uexec-large-output";
     let args = serde_json::json!({
         "cmd": script,
+        "max_output_tokens": 100,
         "yield_time_ms": 500,
     });
 
@@ -1577,15 +1585,14 @@ PY
     let outputs = collect_tool_outputs(&bodies)?;
     let large_output = outputs.get(call_id).expect("missing large output summary");
 
-    assert_regex_match(
-        concat!(
-            r"(?s)",
-            r"line-0.*?",
-            r"\[\.{3} omitted \d+ of \d+ lines \.{3}\].*?",
-            r"line-299",
-        ),
-        &large_output.output,
-    );
+    let output_text = large_output.output.replace("\r\n", "\n");
+    let truncated_pattern = r#"(?s)^(token token \n){5,}.*\[\u{2026}\d+ tokens truncated\u{2026}]\n(token token \n){5,}$"#;
+    assert_regex_match(truncated_pattern, &output_text);
+
+    let original_tokens = large_output
+        .original_token_count
+        .expect("missing original_token_count for large output summary");
+    assert!(original_tokens > 0);
 
     Ok(())
 }
diff --git a/codex-rs/core/tests/suite/user_shell_cmd.rs b/codex-rs/core/tests/suite/user_shell_cmd.rs
index 0d42c45c1c..0e9585ba4b 100644
--- a/codex-rs/core/tests/suite/user_shell_cmd.rs
+++ b/codex-rs/core/tests/suite/user_shell_cmd.rs
@@ -272,7 +272,7 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
     let mut builder = test_codex().with_config(|config| {
         config.model = "gpt-5.1-codex".to_string();
         config.model_family =
-            find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
+            find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
     });
     let fixture = builder.build(&server).await?;
 
diff --git a/codex-rs/utils/tokenizer/src/lib.rs b/codex-rs/utils/tokenizer/src/lib.rs
index 23ab261fa6..1e151393a9 100644
--- a/codex-rs/utils/tokenizer/src/lib.rs
+++ b/codex-rs/utils/tokenizer/src/lib.rs
@@ -122,6 +122,11 @@ impl Tokenizer {
     }
 }
 
+impl fmt::Debug for Tokenizer {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "Tokenizer {{ inner: <CoreBPE> }}")
+    }
+}
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/docs/config.md b/docs/config.md
index 51867773a9..ddfe2ff7c4 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -925,6 +925,7 @@ Valid values:
 | `model_provider`                                 | string                                                            | Provider id from `model_providers` (default: `openai`).                                                                    |
 | `model_context_window`                           | number                                                            | Context window tokens.                                                                                                     |
 | `model_max_output_tokens`                        | number                                                            | Max output tokens.                                                                                                         |
+| `tool_output_token_limit`                        | number                                                            | Token budget for stored function/tool outputs in history (default: 2,560 tokens).                                          |
 | `approval_policy`                                | `untrusted` \| `on-failure` \| `on-request` \| `never`            | When to prompt for approval.                                                                                               |
 | `sandbox_mode`                                   | `read-only` \| `workspace-write` \| `danger-full-access`          | OS sandbox policy.                                                                                                         |
 | `sandbox_workspace_write.writable_roots`         | array<string>                                                     | Extra writable roots in workspace‑write.                                                                                   |
diff --git a/docs/example-config.md b/docs/example-config.md
index 21b360ccef..b2da427314 100644
--- a/docs/example-config.md
+++ b/docs/example-config.md
@@ -33,6 +33,7 @@ model_provider = "openai"
 # model_context_window = 128000       # tokens; default: auto for model
 # model_max_output_tokens = 8192      # tokens; default: auto for model
 # model_auto_compact_token_limit = 0  # disable/override auto; default: model family specific
+# tool_output_token_limit = 10000  # tokens stored per tool output; default: 10000 for gpt-5.1-codex
 
 ################################################################################
 # Reasoning & Verbosity (Responses API capable models)