Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 90 additions & 22 deletions codex-rs/core/src/tools/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ pub(crate) async fn handle_container_exec_with_params(
)
.await;

// always make sure to truncate the output if its length isn't controlled.
match output_result {
Ok(output) => {
let ExecToolCallOutput { exit_code, .. } = &output;
Expand All @@ -155,13 +156,16 @@ pub(crate) async fn handle_container_exec_with_params(
Err(FunctionCallError::RespondToModel(content))
}
}
Err(ExecError::Function(err)) => Err(err),
Err(ExecError::Function(err)) => Err(truncate_function_error(err)),
Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err(
FunctionCallError::RespondToModel(format_exec_output_apply_patch(&output)),
),
Err(ExecError::Codex(err)) => Err(FunctionCallError::RespondToModel(format!(
"execution error: {err:?}"
))),
Err(ExecError::Codex(err)) => {
let message = format!("execution error: {err:?}");
Err(FunctionCallError::RespondToModel(
truncate_formatted_exec_output(&message),
))
}
}
}

Expand Down Expand Up @@ -206,26 +210,28 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
aggregated_output, ..
} = exec_output;

// Head+tail truncation for the model: show the beginning and end with an elision.
// Clients still receive full streams; only this formatted summary is capped.

let mut s = &aggregated_output.text;
let prefixed_str: String;
let content = aggregated_output.text.as_str();

if exec_output.timed_out {
prefixed_str = format!(
"command timed out after {} milliseconds\n",
let prefixed = format!(
"command timed out after {} milliseconds\n{content}",
exec_output.duration.as_millis()
) + s;
s = &prefixed_str;
);
return truncate_formatted_exec_output(&prefixed);
}

let total_lines = s.lines().count();
if s.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
return s.to_string();
truncate_formatted_exec_output(content)
}

fn truncate_formatted_exec_output(content: &str) -> String {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK for now but we will have to merge later all the truncation code we have everywhere

// Head+tail truncation for the model: show the beginning and end with an elision.
// Clients still receive full streams; only this formatted summary is capped.
let total_lines = content.lines().count();
if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
return content.to_string();
}

let segments: Vec<&str> = s.split_inclusive('\n').collect();
let segments: Vec<&str> = content.split_inclusive('\n').collect();
let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
let omitted = segments.len().saturating_sub(head_take + tail_take);
Expand All @@ -236,9 +242,9 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
.map(|segment| segment.len())
.sum();
let tail_slice_start: usize = if tail_take == 0 {
s.len()
content.len()
} else {
s.len()
content.len()
- segments
.iter()
.rev()
Expand All @@ -260,9 +266,9 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
head_budget = MODEL_FORMAT_MAX_BYTES.saturating_sub(marker.len());
}

let head_slice = &s[..head_slice_end];
let head_slice = &content[..head_slice_end];
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len()));
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));

result.push_str(head_part);
result.push_str(&marker);
Expand All @@ -272,9 +278,71 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
return result;
}

let tail_slice = &s[tail_slice_start..];
let tail_slice = &content[tail_slice_start..];
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
result.push_str(tail_part);

result
}

fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
match err {
FunctionCallError::RespondToModel(msg) => {
FunctionCallError::RespondToModel(truncate_formatted_exec_output(&msg))
}
FunctionCallError::Fatal(msg) => {
FunctionCallError::Fatal(truncate_formatted_exec_output(&msg))
}
other => other,
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn truncate_formatted_exec_output_truncates_large_error() {
let line = "very long execution error line that should trigger truncation\n";
let large_error = line.repeat(2_500); // way beyond both byte and line limits

let truncated = truncate_formatted_exec_output(&large_error);

assert!(truncated.len() <= MODEL_FORMAT_MAX_BYTES);
assert!(truncated.starts_with(line));
assert!(truncated.contains("[... omitted"));
assert_ne!(truncated, large_error);
}

#[test]
fn truncate_function_error_trims_respond_to_model() {
let line = "respond-to-model error that should be truncated\n";
let huge = line.repeat(3_000);

let err = truncate_function_error(FunctionCallError::RespondToModel(huge));
match err {
FunctionCallError::RespondToModel(message) => {
assert!(message.len() <= MODEL_FORMAT_MAX_BYTES);
assert!(message.contains("[... omitted"));
assert!(message.starts_with(line));
}
other => panic!("unexpected error variant: {other:?}"),
}
}

#[test]
fn truncate_function_error_trims_fatal() {
let line = "fatal error output that should be truncated\n";
let huge = line.repeat(3_000);

let err = truncate_function_error(FunctionCallError::Fatal(huge));
match err {
FunctionCallError::Fatal(message) => {
assert!(message.len() <= MODEL_FORMAT_MAX_BYTES);
assert!(message.contains("[... omitted"));
assert!(message.starts_with(line));
}
other => panic!("unexpected error variant: {other:?}"),
}
}
}
147 changes: 147 additions & 0 deletions codex-rs/core/tests/suite/tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -448,3 +448,150 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {

Ok(())
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_sandbox_denied_truncates_error_output() -> Result<()> {
skip_if_no_network!(Ok(()));

let server = start_mock_server().await;
let mut builder = test_codex();
let test = builder.build(&server).await?;

let call_id = "shell-denied";
let long_line = "this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz";
let script = format!(
"for i in $(seq 1 500); do >&2 echo '{long_line}'; done; cat <<'EOF' > denied.txt\ncontent\nEOF",
);
let args = json!({
"command": ["/bin/sh", "-c", script],
"timeout_ms": 1_000,
});

let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;

submit_turn(
&test,
"attempt to write in read-only sandbox",
AskForApproval::Never,
SandboxPolicy::ReadOnly,
)
.await?;

let requests = server.received_requests().await.expect("recorded requests");
let bodies = request_bodies(&requests)?;
let function_outputs = collect_output_items(&bodies, "function_call_output");
let denied_item = function_outputs
.iter()
.find(|item| item.get("call_id").and_then(Value::as_str) == Some(call_id))
.expect("denied output present");

let output = denied_item
.get("output")
.and_then(Value::as_str)
.expect("denied output string");

assert!(
output.starts_with("failed in sandbox: "),
"expected sandbox error prefix, got {output:?}"
);
assert!(
output.contains("[... omitted"),
"expected truncated marker, got {output:?}"
);
assert!(
output.contains(long_line),
"expected truncated stderr sample, got {output:?}"
);
// Linux distributions may surface sandbox write failures as different errno messages
// depending on the underlying mechanism (e.g., EPERM, EACCES, or EROFS). Accept a
// small set of common variants to keep this cross-platform.
let denial_markers = [
"Operation not permitted", // EPERM
"Permission denied", // EACCES
"Read-only file system", // EROFS
];
assert!(
denial_markers.iter().any(|m| output.contains(m)),
"expected sandbox denial message, got {output:?}"
);

Ok(())
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
skip_if_no_network!(Ok(()));

let server = start_mock_server().await;
let mut builder = test_codex().with_config(|cfg| {
cfg.sandbox_policy = SandboxPolicy::DangerFullAccess;
});
let test = builder.build(&server).await?;

let call_id = "shell-spawn-failure";
let bogus_component = "missing-bin-".repeat(700);
let bogus_exe = test
.cwd
.path()
.join(bogus_component)
.to_string_lossy()
.into_owned();

let args = json!({
"command": [bogus_exe],
"timeout_ms": 1_000,
});

let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;

submit_turn(
&test,
"spawn a missing binary",
AskForApproval::Never,
SandboxPolicy::DangerFullAccess,
)
.await?;

let requests = server.received_requests().await.expect("recorded requests");
let bodies = request_bodies(&requests)?;
let function_outputs = collect_output_items(&bodies, "function_call_output");
let failure_item = function_outputs
.iter()
.find(|item| item.get("call_id").and_then(Value::as_str) == Some(call_id))
.expect("spawn failure output present");

let output = failure_item
.get("output")
.and_then(Value::as_str)
.expect("spawn failure output string");

assert!(
output.starts_with("execution error:"),
"expected execution error prefix, got {output:?}"
);
assert!(output.len() <= 10 * 1024);

Ok(())
}
Loading