Skip to content
Merged
19 changes: 14 additions & 5 deletions codex-rs/core/src/tools/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ pub struct ExecCommandToolOutput {
pub wall_time: Duration,
/// Raw bytes returned for this unified exec call before any truncation.
pub raw_output: Vec<u8>,
pub truncation_policy: TruncationPolicy,
pub max_output_tokens: Option<usize>,
pub process_id: Option<i32>,
pub exit_code: Option<i32>,
Expand Down Expand Up @@ -357,7 +358,9 @@ impl ToolOutput for ExecCommandToolOutput {
return None;
}

Some(JsonValue::String(self.truncated_output()))
Some(JsonValue::String(
self.truncated_output(self.model_output_max_tokens()),
))
}

fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue {
Expand All @@ -381,7 +384,10 @@ impl ToolOutput for ExecCommandToolOutput {
exit_code: self.exit_code,
session_id: self.process_id,
original_token_count: self.original_token_count,
output: self.truncated_output(),
output: match self.max_output_tokens {
Some(max_tokens) => self.truncated_output(max_tokens),
None => String::from_utf8_lossy(&self.raw_output).to_string(),
Comment on lines +387 to +389
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve truncation notice for oversized raw exec output

When a code-mode exec omits max_output_tokens, this now returns raw_output verbatim, but the bytes are still sourced from the unified-exec HeadTailBuffer, which caps retained output at 1 MiB and drops the middle for a fast/short-lived command that emits more than that before the collector drains it. In that scenario code mode receives a head+tail concatenation with no …truncated… marker, so scripts can parse or summarize corrupted data as if it were complete; the previous formatted path at least surfaced that the result was truncated.

Useful? React with 👍 / 👎.

},
};

serde_json::to_value(result).unwrap_or_else(|err| {
Expand All @@ -391,9 +397,12 @@ impl ToolOutput for ExecCommandToolOutput {
}

impl ExecCommandToolOutput {
pub(crate) fn truncated_output(&self) -> String {
fn model_output_max_tokens(&self) -> usize {
resolve_max_tokens(self.max_output_tokens).min(self.truncation_policy.token_budget())
}

pub(crate) fn truncated_output(&self, max_tokens: usize) -> String {
let text = String::from_utf8_lossy(&self.raw_output).to_string();
let max_tokens = resolve_max_tokens(self.max_output_tokens);
formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens))
}

Expand All @@ -420,7 +429,7 @@ impl ExecCommandToolOutput {
}

sections.push("Output:".to_string());
sections.push(self.truncated_output());
sections.push(self.truncated_output(self.model_output_max_tokens()));

sections.join("\n")
}
Expand Down
1 change: 1 addition & 0 deletions codex-rs/core/src/tools/context_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ fn exec_command_tool_output_formats_truncated_response() {
chunk_id: "abc123".to_string(),
wall_time: std::time::Duration::from_millis(1250),
raw_output: b"token one token two token three token four token five".to_vec(),
truncation_policy: TruncationPolicy::Tokens(10_000),
max_output_tokens: Some(4),
process_id: None,
exit_code: Some(0),
Expand Down
9 changes: 0 additions & 9 deletions codex-rs/core/src/tools/handlers/unified_exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
use crate::tools::hook_names::HookToolName;
use crate::tools::registry::PostToolUsePayload;
use crate::unified_exec::resolve_max_tokens;
use codex_protocol::models::AdditionalPermissionProfile;
use codex_tools::UnifiedExecShellMode;
use codex_utils_output_truncation::TruncationPolicy;
use serde::Deserialize;
use std::path::PathBuf;
use std::sync::Arc;
Expand Down Expand Up @@ -72,13 +70,6 @@ fn default_tty() -> bool {
false
}

fn effective_max_output_tokens(
max_output_tokens: Option<usize>,
truncation_policy: TruncationPolicy,
) -> usize {
resolve_max_tokens(max_output_tokens).min(truncation_policy.token_budget())
}

#[derive(Debug)]
pub(crate) struct ResolvedCommand {
pub(crate) command: Vec<String>,
Expand Down
11 changes: 5 additions & 6 deletions codex-rs/core/src/tools/handlers/unified_exec/exec_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ use super::super::shell_spec::CommandToolOptions;
use super::super::shell_spec::create_exec_command_tool_with_environment_id;
use super::ExecCommandArgs;
use super::ExecCommandEnvironmentArgs;
use super::effective_max_output_tokens;
use super::get_command;
use super::post_unified_exec_tool_use_payload;

Expand Down Expand Up @@ -162,8 +161,6 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
prefix_rule,
..
} = args;
let max_output_tokens =
effective_max_output_tokens(max_output_tokens, turn.truncation_policy);

let exec_permission_approvals_enabled =
session.features().enabled(Feature::ExecPermissionApprovals);
Expand Down Expand Up @@ -241,7 +238,8 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
chunk_id: String::new(),
wall_time: std::time::Duration::ZERO,
raw_output: output.into_text().into_bytes(),
max_output_tokens: Some(max_output_tokens),
truncation_policy: turn.truncation_policy,
max_output_tokens,
process_id: None,
exit_code: None,
original_token_count: None,
Expand All @@ -258,7 +256,7 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
hook_command: hook_command.clone(),
process_id,
yield_time_ms,
max_output_tokens: Some(max_output_tokens),
max_output_tokens,
cwd,
sandbox_cwd: turn_environment.cwd.clone(),
environment,
Expand All @@ -284,7 +282,8 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
chunk_id: generate_chunk_id(),
wall_time: output.duration,
raw_output: output_text.into_bytes(),
max_output_tokens: Some(max_output_tokens),
truncation_policy: turn.truncation_policy,
max_output_tokens,
// Sandbox denial is terminal, so there is no live
// process for write_stdin to resume.
process_id: None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use codex_tools::ToolSpec;
use serde::Deserialize;

use super::super::shell_spec::create_write_stdin_tool;
use super::effective_max_output_tokens;
use super::post_unified_exec_tool_use_payload;

#[derive(Debug, Deserialize)]
Expand Down Expand Up @@ -62,16 +61,15 @@ impl ToolExecutor<ToolInvocation> for WriteStdinHandler {
};

let args: WriteStdinArgs = parse_arguments(&arguments)?;
let max_output_tokens =
effective_max_output_tokens(args.max_output_tokens, turn.truncation_policy);
let response = session
.services
.unified_exec_manager
.write_stdin(WriteStdinRequest {
process_id: args.session_id,
input: &args.chars,
yield_time_ms: args.yield_time_ms,
max_output_tokens: Some(max_output_tokens),
max_output_tokens: args.max_output_tokens,
truncation_policy: turn.truncation_policy,
})
.await
.map_err(|err| {
Expand Down
9 changes: 9 additions & 0 deletions codex-rs/core/src/tools/handlers/unified_exec_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::shell::default_user_shell;
use codex_tools::UnifiedExecShellMode;
use codex_tools::ZshForkConfig;
use codex_utils_absolute_path::AbsolutePathBuf;
use codex_utils_output_truncation::TruncationPolicy;
use pretty_assertions::assert_eq;
use std::sync::Arc;

Expand All @@ -17,6 +18,8 @@ use crate::tools::registry::CoreToolRuntime;
use crate::turn_diff_tracker::TurnDiffTracker;
use tokio::sync::Mutex;

const TEST_TRUNCATION_POLICY: TruncationPolicy = TruncationPolicy::Tokens(10_000);

async fn invocation_for_payload(
tool_name: &str,
call_id: &str,
Expand Down Expand Up @@ -258,6 +261,7 @@ async fn exec_command_post_tool_use_payload_uses_output_for_noninteractive_one_s
chunk_id: "chunk-1".to_string(),
wall_time: std::time::Duration::from_millis(498),
raw_output: b"three".to_vec(),
truncation_policy: TEST_TRUNCATION_POLICY,
max_output_tokens: None,
process_id: None,
exit_code: Some(0),
Expand Down Expand Up @@ -287,6 +291,7 @@ async fn exec_command_post_tool_use_payload_uses_output_for_interactive_completi
chunk_id: "chunk-1".to_string(),
wall_time: std::time::Duration::from_millis(498),
raw_output: b"three".to_vec(),
truncation_policy: TEST_TRUNCATION_POLICY,
max_output_tokens: None,
process_id: None,
exit_code: Some(0),
Expand Down Expand Up @@ -317,6 +322,7 @@ async fn exec_command_post_tool_use_payload_skips_running_sessions() {
chunk_id: "chunk-1".to_string(),
wall_time: std::time::Duration::from_millis(498),
raw_output: b"three".to_vec(),
truncation_policy: TEST_TRUNCATION_POLICY,
max_output_tokens: None,
process_id: Some(45),
exit_code: None,
Expand All @@ -342,6 +348,7 @@ async fn write_stdin_post_tool_use_payload_uses_original_exec_call_id_and_comman
chunk_id: "chunk-2".to_string(),
wall_time: std::time::Duration::from_millis(498),
raw_output: b"finished\n".to_vec(),
truncation_policy: TEST_TRUNCATION_POLICY,
max_output_tokens: None,
process_id: None,
exit_code: Some(0),
Expand Down Expand Up @@ -372,6 +379,7 @@ async fn write_stdin_post_tool_use_payload_keeps_parallel_session_metadata_separ
chunk_id: "chunk-a".to_string(),
wall_time: std::time::Duration::from_millis(498),
raw_output: b"alpha\n".to_vec(),
truncation_policy: TEST_TRUNCATION_POLICY,
max_output_tokens: None,
process_id: None,
exit_code: Some(0),
Expand All @@ -383,6 +391,7 @@ async fn write_stdin_post_tool_use_payload_keeps_parallel_session_metadata_separ
chunk_id: "chunk-b".to_string(),
wall_time: std::time::Duration::from_millis(498),
raw_output: b"beta\n".to_vec(),
truncation_policy: TEST_TRUNCATION_POLICY,
max_output_tokens: None,
process_id: None,
exit_code: Some(0),
Expand Down
2 changes: 2 additions & 0 deletions codex-rs/core/src/unified_exec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use codex_exec_server::Environment;
use codex_network_proxy::NetworkProxy;
use codex_protocol::models::AdditionalPermissionProfile;
use codex_utils_absolute_path::AbsolutePathBuf;
use codex_utils_output_truncation::TruncationPolicy;
use rand::Rng;
use rand::rng;
use tokio::sync::Mutex;
Expand Down Expand Up @@ -111,6 +112,7 @@ pub(crate) struct WriteStdinRequest<'a> {
pub input: &'a str,
pub yield_time_ms: u64,
pub max_output_tokens: Option<usize>,
pub truncation_policy: TruncationPolicy,
}

#[derive(Default)]
Expand Down
39 changes: 31 additions & 8 deletions codex-rs/core/src/unified_exec/mod_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::tools::context::ExecCommandToolOutput;
use crate::unified_exec::WriteStdinRequest;
use crate::unified_exec::process::OutputHandles;
use codex_sandboxing::SandboxType;
use codex_utils_output_truncation::TruncationPolicy;
use codex_utils_output_truncation::approx_token_count;
use core_test_support::get_remote_test_env;
use core_test_support::skip_if_sandbox;
Expand Down Expand Up @@ -162,6 +163,7 @@ async fn exec_command_with_tty(
chunk_id: generate_chunk_id(),
wall_time,
raw_output: collected,
truncation_policy: turn.truncation_policy,
max_output_tokens: None,
process_id: response_process_id,
exit_code,
Expand Down Expand Up @@ -195,6 +197,7 @@ async fn write_stdin(
input,
yield_time_ms,
max_output_tokens: None,
truncation_policy: TruncationPolicy::Tokens(10_000),
})
.await
}
Expand Down Expand Up @@ -260,7 +263,9 @@ async fn unified_exec_persists_across_requests() -> anyhow::Result<()> {
)
.await?;
assert!(
out_2.truncated_output().contains("codex"),
out_2
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
.contains("codex"),
"expected environment variable output"
);

Expand Down Expand Up @@ -301,7 +306,9 @@ async fn multi_unified_exec_sessions() -> anyhow::Result<()> {
"short command should not report a process id if it exits quickly"
);
assert!(
!out_2.truncated_output().contains("codex"),
!out_2
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
.contains("codex"),
"short command should run in a fresh shell"
);

Expand All @@ -313,7 +320,9 @@ async fn multi_unified_exec_sessions() -> anyhow::Result<()> {
)
.await?;
assert!(
out_3.truncated_output().contains("codex"),
out_3
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
.contains("codex"),
"session should preserve state"
);

Expand Down Expand Up @@ -350,7 +359,9 @@ async fn unified_exec_timeouts() -> anyhow::Result<()> {
)
.await?;
assert!(
!out_2.truncated_output().contains(TEST_VAR_VALUE),
!out_2
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
.contains(TEST_VAR_VALUE),
"timeout too short should yield incomplete output"
);

Expand All @@ -359,7 +370,9 @@ async fn unified_exec_timeouts() -> anyhow::Result<()> {
let out_3 = write_stdin(&session, process_id, "", /*yield_time_ms*/ 100).await?;

assert!(
out_3.truncated_output().contains(TEST_VAR_VALUE),
out_3
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
.contains(TEST_VAR_VALUE),
"subsequent poll should retrieve output"
);

Expand Down Expand Up @@ -394,7 +407,9 @@ async fn unified_exec_pause_blocks_yield_timeout() -> anyhow::Result<()> {
"pause should block the unified exec yield timeout"
);
assert!(
response.truncated_output().contains("unified-exec-done"),
response
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
.contains("unified-exec-done"),
"exec_command should wait for output after the pause lifts"
);
assert!(
Expand All @@ -420,7 +435,11 @@ async fn requests_with_large_timeout_are_capped() -> anyhow::Result<()> {
.await?;

assert!(result.process_id.is_some());
assert!(result.truncated_output().contains("codex"));
assert!(
result
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
.contains("codex")
);

Ok(())
}
Expand All @@ -442,7 +461,11 @@ async fn completed_commands_do_not_persist_sessions() -> anyhow::Result<()> {
result.process_id.is_some(),
"completed command should report a process id"
);
assert!(result.truncated_output().contains("codex"));
assert!(
result
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
.contains("codex")
);

assert!(
session
Expand Down
2 changes: 2 additions & 0 deletions codex-rs/core/src/unified_exec/process_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ impl UnifiedExecProcessManager {
chunk_id,
wall_time,
raw_output: collected,
truncation_policy: context.turn.truncation_policy,
max_output_tokens: request.max_output_tokens,
process_id: response_process_id,
exit_code,
Expand Down Expand Up @@ -725,6 +726,7 @@ impl UnifiedExecProcessManager {
chunk_id,
wall_time,
raw_output: collected,
truncation_policy: request.truncation_policy,
max_output_tokens: request.max_output_tokens,
process_id,
exit_code,
Expand Down
Loading
Loading