diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index 5a120d0907..d0c922d100 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -145,6 +145,7 @@ pub(crate) async fn handle_container_exec_with_params( ) .await; + // always make sure to truncate the output if its length isn't controlled. match output_result { Ok(output) => { let ExecToolCallOutput { exit_code, .. } = &output; @@ -155,13 +156,16 @@ pub(crate) async fn handle_container_exec_with_params( Err(FunctionCallError::RespondToModel(content)) } } - Err(ExecError::Function(err)) => Err(err), + Err(ExecError::Function(err)) => Err(truncate_function_error(err)), Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err( FunctionCallError::RespondToModel(format_exec_output_apply_patch(&output)), ), - Err(ExecError::Codex(err)) => Err(FunctionCallError::RespondToModel(format!( - "execution error: {err:?}" - ))), + Err(ExecError::Codex(err)) => { + let message = format!("execution error: {err:?}"); + Err(FunctionCallError::RespondToModel( + truncate_formatted_exec_output(&message), + )) + } } } @@ -206,26 +210,28 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { aggregated_output, .. } = exec_output; - // Head+tail truncation for the model: show the beginning and end with an elision. - // Clients still receive full streams; only this formatted summary is capped. - - let mut s = &aggregated_output.text; - let prefixed_str: String; + let content = aggregated_output.text.as_str(); if exec_output.timed_out { - prefixed_str = format!( - "command timed out after {} milliseconds\n", + let prefixed = format!( + "command timed out after {} milliseconds\n{content}", exec_output.duration.as_millis() - ) + s; - s = &prefixed_str; + ); + return truncate_formatted_exec_output(&prefixed); } - let total_lines = s.lines().count(); - if s.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES { - return s.to_string(); + truncate_formatted_exec_output(content) +} + +fn truncate_formatted_exec_output(content: &str) -> String { + // Head+tail truncation for the model: show the beginning and end with an elision. + // Clients still receive full streams; only this formatted summary is capped. + let total_lines = content.lines().count(); + if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES { + return content.to_string(); } - let segments: Vec<&str> = s.split_inclusive('\n').collect(); + let segments: Vec<&str> = content.split_inclusive('\n').collect(); let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len()); let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take)); let omitted = segments.len().saturating_sub(head_take + tail_take); @@ -236,9 +242,9 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { .map(|segment| segment.len()) .sum(); let tail_slice_start: usize = if tail_take == 0 { - s.len() + content.len() } else { - s.len() + content.len() - segments .iter() .rev() @@ -260,9 +266,9 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { head_budget = MODEL_FORMAT_MAX_BYTES.saturating_sub(marker.len()); } - let head_slice = &s[..head_slice_end]; + let head_slice = &content[..head_slice_end]; let head_part = take_bytes_at_char_boundary(head_slice, head_budget); - let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len())); + let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len())); result.push_str(head_part); result.push_str(&marker); @@ -272,9 +278,71 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { return result; } - let tail_slice = &s[tail_slice_start..]; + let tail_slice = &content[tail_slice_start..]; let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining); result.push_str(tail_part); result } + +fn truncate_function_error(err: FunctionCallError) -> FunctionCallError { + match err { + FunctionCallError::RespondToModel(msg) => { + FunctionCallError::RespondToModel(truncate_formatted_exec_output(&msg)) + } + FunctionCallError::Fatal(msg) => { + FunctionCallError::Fatal(truncate_formatted_exec_output(&msg)) + } + other => other, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn truncate_formatted_exec_output_truncates_large_error() { + let line = "very long execution error line that should trigger truncation\n"; + let large_error = line.repeat(2_500); // way beyond both byte and line limits + + let truncated = truncate_formatted_exec_output(&large_error); + + assert!(truncated.len() <= MODEL_FORMAT_MAX_BYTES); + assert!(truncated.starts_with(line)); + assert!(truncated.contains("[... omitted")); + assert_ne!(truncated, large_error); + } + + #[test] + fn truncate_function_error_trims_respond_to_model() { + let line = "respond-to-model error that should be truncated\n"; + let huge = line.repeat(3_000); + + let err = truncate_function_error(FunctionCallError::RespondToModel(huge)); + match err { + FunctionCallError::RespondToModel(message) => { + assert!(message.len() <= MODEL_FORMAT_MAX_BYTES); + assert!(message.contains("[... omitted")); + assert!(message.starts_with(line)); + } + other => panic!("unexpected error variant: {other:?}"), + } + } + + #[test] + fn truncate_function_error_trims_fatal() { + let line = "fatal error output that should be truncated\n"; + let huge = line.repeat(3_000); + + let err = truncate_function_error(FunctionCallError::Fatal(huge)); + match err { + FunctionCallError::Fatal(message) => { + assert!(message.len() <= MODEL_FORMAT_MAX_BYTES); + assert!(message.contains("[... omitted")); + assert!(message.starts_with(line)); + } + other => panic!("unexpected error variant: {other:?}"), + } + } +} diff --git a/codex-rs/core/tests/suite/tools.rs b/codex-rs/core/tests/suite/tools.rs index 4d6ccdac0c..ba4a88339b 100644 --- a/codex-rs/core/tests/suite/tools.rs +++ b/codex-rs/core/tests/suite/tools.rs @@ -448,3 +448,150 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn shell_sandbox_denied_truncates_error_output() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex(); + let test = builder.build(&server).await?; + + let call_id = "shell-denied"; + let long_line = "this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz"; + let script = format!( + "for i in $(seq 1 500); do >&2 echo '{long_line}'; done; cat <<'EOF' > denied.txt\ncontent\nEOF", + ); + let args = json!({ + "command": ["/bin/sh", "-c", script], + "timeout_ms": 1_000, + }); + + let responses = vec![ + sse(vec![ + json!({"type": "response.created", "response": {"id": "resp-1"}}), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + mount_sse_sequence(&server, responses).await; + + submit_turn( + &test, + "attempt to write in read-only sandbox", + AskForApproval::Never, + SandboxPolicy::ReadOnly, + ) + .await?; + + let requests = server.received_requests().await.expect("recorded requests"); + let bodies = request_bodies(&requests)?; + let function_outputs = collect_output_items(&bodies, "function_call_output"); + let denied_item = function_outputs + .iter() + .find(|item| item.get("call_id").and_then(Value::as_str) == Some(call_id)) + .expect("denied output present"); + + let output = denied_item + .get("output") + .and_then(Value::as_str) + .expect("denied output string"); + + assert!( + output.starts_with("failed in sandbox: "), + "expected sandbox error prefix, got {output:?}" + ); + assert!( + output.contains("[... omitted"), + "expected truncated marker, got {output:?}" + ); + assert!( + output.contains(long_line), + "expected truncated stderr sample, got {output:?}" + ); + // Linux distributions may surface sandbox write failures as different errno messages + // depending on the underlying mechanism (e.g., EPERM, EACCES, or EROFS). Accept a + // small set of common variants to keep this cross-platform. + let denial_markers = [ + "Operation not permitted", // EPERM + "Permission denied", // EACCES + "Read-only file system", // EROFS + ]; + assert!( + denial_markers.iter().any(|m| output.contains(m)), + "expected sandbox denial message, got {output:?}" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn shell_spawn_failure_truncates_exec_error() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|cfg| { + cfg.sandbox_policy = SandboxPolicy::DangerFullAccess; + }); + let test = builder.build(&server).await?; + + let call_id = "shell-spawn-failure"; + let bogus_component = "missing-bin-".repeat(700); + let bogus_exe = test + .cwd + .path() + .join(bogus_component) + .to_string_lossy() + .into_owned(); + + let args = json!({ + "command": [bogus_exe], + "timeout_ms": 1_000, + }); + + let responses = vec![ + sse(vec![ + json!({"type": "response.created", "response": {"id": "resp-1"}}), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + mount_sse_sequence(&server, responses).await; + + submit_turn( + &test, + "spawn a missing binary", + AskForApproval::Never, + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = server.received_requests().await.expect("recorded requests"); + let bodies = request_bodies(&requests)?; + let function_outputs = collect_output_items(&bodies, "function_call_output"); + let failure_item = function_outputs + .iter() + .find(|item| item.get("call_id").and_then(Value::as_str) == Some(call_id)) + .expect("spawn failure output present"); + + let output = failure_item + .get("output") + .and_then(Value::as_str) + .expect("spawn failure output string"); + + assert!( + output.starts_with("execution error:"), + "expected execution error prefix, got {output:?}" + ); + assert!(output.len() <= 10 * 1024); + + Ok(()) +}