From ea9a916ed4f024398d872c4db9781c579738f4a3 Mon Sep 17 00:00:00 2001 From: Dylan Hurd Date: Sun, 16 Nov 2025 23:29:31 -0800 Subject: [PATCH 1/4] chore(core) Add shell_serialization coverage --- codex-rs/core/tests/common/test_codex.rs | 41 ++ .../core/tests/suite/shell_serialization.rs | 641 +++++++++++++----- 2 files changed, 530 insertions(+), 152 deletions(-) diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index e88c410ecefd..bad94883c218 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -38,6 +38,15 @@ pub enum ApplyPatchModelOutput { ShellViaHeredoc, } +/// A collection of different ways the model can output an apply_patch call +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum ShellModelOutput { + Shell, + ShellCommand, + LocalShell, + // UnifiedExec has its own set of tests +} + pub struct TestCodexBuilder { config_mutators: Vec>, } @@ -289,6 +298,15 @@ impl TestCodexHarness { .to_string() } + pub async fn local_shell_call_output(&self, call_id: &str) -> String { + let bodies = self.request_bodies().await; + local_shell_call_output(&bodies, call_id) + .get("output") + .and_then(Value::as_str) + .expect("output string") + .to_string() + } + pub async fn apply_patch_output( &self, call_id: &str, @@ -301,6 +319,14 @@ impl TestCodexHarness { | ApplyPatchModelOutput::ShellViaHeredoc => self.function_call_stdout(call_id).await, } } + + pub async fn shell_output(&self, call_id: &str, output_type: ShellModelOutput) -> String { + match output_type { + ShellModelOutput::LocalShell => self.local_shell_call_output(call_id).await, + ShellModelOutput::Shell + | ShellModelOutput::ShellCommand => self.function_call_stdout(call_id).await, + } + } } fn custom_tool_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value { @@ -333,6 +359,21 @@ fn function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value { panic!("function_call_output {call_id} not found"); } +fn local_shell_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value { + for body in bodies { + if let Some(items) = body.get("input").and_then(Value::as_array) { + for item in items { + if item.get("type").and_then(Value::as_str) == Some("local_shell_call_output") + && item.get("call_id").and_then(Value::as_str) == Some(call_id) + { + return item; + } + } + } + } + panic!("local_shell_call_output {call_id} not found"); +} + pub fn test_codex() -> TestCodexBuilder { TestCodexBuilder { config_mutators: vec![], diff --git a/codex-rs/core/tests/suite/shell_serialization.rs b/codex-rs/core/tests/suite/shell_serialization.rs index 58d32c71d689..2f1abcd8ad76 100644 --- a/codex-rs/core/tests/suite/shell_serialization.rs +++ b/codex-rs/core/tests/suite/shell_serialization.rs @@ -5,10 +5,9 @@ use codex_core::features::Feature; use codex_core::model_family::find_family_for_model; use codex_core::protocol::SandboxPolicy; use core_test_support::assert_regex_match; -use core_test_support::responses::ev_apply_patch_function_call; +use core_test_support::responses::ev_apply_patch_call; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; -use core_test_support::responses::ev_custom_tool_call; use core_test_support::responses::ev_function_call; use core_test_support::responses::ev_local_shell_call; use core_test_support::responses::ev_response_created; @@ -16,12 +15,15 @@ use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; +use core_test_support::test_codex::ApplyPatchModelOutput; +use core_test_support::test_codex::ShellModelOutput; use core_test_support::test_codex::test_codex; use pretty_assertions::assert_eq; use regex_lite::Regex; use serde_json::Value; use serde_json::json; use std::fs; +use test_case::test_case; const FIXTURE_JSON: &str = r#"{ "description": "This is an example JSON file.", @@ -36,33 +38,74 @@ const FIXTURE_JSON: &str = r#"{ "#; #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> { +#[test_case(ShellModelOutput::Shell)] +#[test_case(ShellModelOutput::ShellCommand)] +#[test_case(ShellModelOutput::LocalShell)] +async fn shell_output_stays_json_without_freeform_apply_patch( + output_type: ShellModelOutput, +) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.features.disable(Feature::ApplyPatchFreeform); config.model = "gpt-5".to_string(); config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family"); + if matches!(output_type, ShellModelOutput::ShellCommand) { + config.features.enable(Feature::ShellCommandTool); + } + let _ = output_type; }); let test = builder.build(&server).await?; let call_id = "shell-json"; - let args = json!({ - "command": ["/bin/echo", "shell json"], - "timeout_ms": 1_000, - }); - let responses = vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ]; + let responses = match output_type { + ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { + let command = if matches!(output_type, ShellModelOutput::ShellCommand) { + json!({ + "command": "echo shell json", + "timeout_ms": 1_000, + }) + } else { + json!({ + "command": ["/bin/echo", "shell json"], + "timeout_ms": 1_000, + }) + }; + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call( + call_id, + if matches!(output_type, ShellModelOutput::ShellCommand) { + "shell_command" + } else { + "shell" + }, + &serde_json::to_string(&command)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + ShellModelOutput::LocalShell => { + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_local_shell_call(call_id, "completed", vec!["/bin/echo", "shell json"]), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + }; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -80,7 +123,6 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> { let mut parsed: Value = serde_json::from_str(output)?; if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) { - // duration_seconds is non-deterministic; remove it for deep equality let _ = metadata.remove("duration_seconds"); } @@ -102,31 +144,72 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> { +#[test_case(ShellModelOutput::Shell)] +#[test_case(ShellModelOutput::ShellCommand)] +#[test_case(ShellModelOutput::LocalShell)] +async fn shell_output_is_structured_with_freeform_apply_patch( + output_type: ShellModelOutput, +) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.features.enable(Feature::ApplyPatchFreeform); + if matches!(output_type, ShellModelOutput::ShellCommand) { + config.features.enable(Feature::ShellCommandTool); + } + let _ = output_type; }); let test = builder.build(&server).await?; let call_id = "shell-structured"; - let args = json!({ - "command": ["/bin/echo", "freeform shell"], - "timeout_ms": 1_000, - }); - let responses = vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ]; + let responses = match output_type { + ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { + let command = if matches!(output_type, ShellModelOutput::ShellCommand) { + json!({ + "command": "echo freeform shell", + "timeout_ms": 1_000, + }) + } else { + json!({ + "command": ["/bin/echo", "freeform shell"], + "timeout_ms": 1_000, + }) + }; + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call( + call_id, + if matches!(output_type, ShellModelOutput::ShellCommand) { + "shell_command" + } else { + "shell" + }, + &serde_json::to_string(&command)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + ShellModelOutput::LocalShell => { + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_local_shell_call(call_id, "completed", vec!["/bin/echo", "freeform shell"]), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + }; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -159,14 +242,23 @@ freeform shell } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn shell_output_preserves_fixture_json_without_serialization() -> Result<()> { +#[test_case(ShellModelOutput::Shell)] +#[test_case(ShellModelOutput::ShellCommand)] +#[test_case(ShellModelOutput::LocalShell)] +async fn shell_output_preserves_fixture_json_without_serialization( + output_type: ShellModelOutput, +) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.features.disable(Feature::ApplyPatchFreeform); config.model = "gpt-5".to_string(); config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family"); + if matches!(output_type, ShellModelOutput::ShellCommand) { + config.features.enable(Feature::ShellCommandTool); + } + let _ = output_type; }); let test = builder.build(&server).await?; @@ -175,21 +267,57 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<( let fixture_path_str = fixture_path.to_string_lossy().to_string(); let call_id = "shell-json-fixture"; - let args = json!({ - "command": ["/usr/bin/sed", "-n", "p", fixture_path_str], - "timeout_ms": 1_000, - }); - let responses = vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ]; + let responses = match output_type { + ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { + let command = if matches!(output_type, ShellModelOutput::ShellCommand) { + json!({ + "command": format!("/usr/bin/sed -n p {fixture_path_str}"), + "timeout_ms": 1_000, + }) + } else { + json!({ + "command": ["/usr/bin/sed", "-n", "p", fixture_path_str], + "timeout_ms": 1_000, + }) + }; + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call( + call_id, + if matches!(output_type, ShellModelOutput::ShellCommand) { + "shell_command" + } else { + "shell" + }, + &serde_json::to_string(&command)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + ShellModelOutput::LocalShell => { + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_local_shell_call( + call_id, + "completed", + vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()], + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + }; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -232,12 +360,21 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<( } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn shell_output_structures_fixture_with_serialization() -> Result<()> { +#[test_case(ShellModelOutput::Shell)] +#[test_case(ShellModelOutput::ShellCommand)] +#[test_case(ShellModelOutput::LocalShell)] +async fn shell_output_structures_fixture_with_serialization( + output_type: ShellModelOutput, +) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.features.enable(Feature::ApplyPatchFreeform); + if matches!(output_type, ShellModelOutput::ShellCommand) { + config.features.enable(Feature::ShellCommandTool); + } + let _ = output_type; }); let test = builder.build(&server).await?; @@ -246,21 +383,57 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> { let fixture_path_str = fixture_path.to_string_lossy().to_string(); let call_id = "shell-structured-fixture"; - let args = json!({ - "command": ["/usr/bin/sed", "-n", "p", fixture_path_str], - "timeout_ms": 1_000, - }); - let responses = vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ]; + let responses = match output_type { + ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { + let command = if matches!(output_type, ShellModelOutput::ShellCommand) { + json!({ + "command": format!("/usr/bin/sed -n p {fixture_path_str}"), + "timeout_ms": 1_000, + }) + } else { + json!({ + "command": ["/usr/bin/sed", "-n", "p", fixture_path_str], + "timeout_ms": 1_000, + }) + }; + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call( + call_id, + if matches!(output_type, ShellModelOutput::ShellCommand) { + "shell_command" + } else { + "shell" + }, + &serde_json::to_string(&command)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + ShellModelOutput::LocalShell => { + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_local_shell_call( + call_id, + "completed", + vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()], + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + }; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -298,40 +471,72 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn shell_output_for_freeform_tool_records_duration() -> Result<()> { +#[test_case(ShellModelOutput::Shell)] +#[test_case(ShellModelOutput::ShellCommand)] +#[test_case(ShellModelOutput::LocalShell)] +async fn shell_output_for_freeform_tool_records_duration( + output_type: ShellModelOutput, +) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.include_apply_patch_tool = true; + if matches!(output_type, ShellModelOutput::ShellCommand) { + config.features.enable(Feature::ShellCommandTool); + } + let _ = output_type; }); let test = builder.build(&server).await?; - #[cfg(target_os = "linux")] - let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"]; - - #[cfg(target_os = "macos")] - let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"]; - - #[cfg(windows)] - let sleep_cmd = "timeout 1"; - let call_id = "shell-structured"; - let args = json!({ - "command": sleep_cmd, - "timeout_ms": 2_000, - }); - let responses = vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ]; + let responses = match output_type { + ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { + let command = if matches!(output_type, ShellModelOutput::ShellCommand) { + json!({ + "command": "sleep 1", + "timeout_ms": 2_000, + }) + } else { + json!({ + "command": ["/bin/bash", "-c", "sleep 1"], + "timeout_ms": 2_000, + }) + }; + vec![ + sse(vec![ + json!({"type": "response.created", "response": {"id": "resp-1"}}), + ev_function_call( + call_id, + if matches!(output_type, ShellModelOutput::ShellCommand) { + "shell_command" + } else { + "shell" + }, + &serde_json::to_string(&command)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + ShellModelOutput::LocalShell => { + vec![ + sse(vec![ + json!({"type": "response.created", "response": {"id": "resp-1"}}), + ev_local_shell_call(call_id, "completed", vec!["/bin/bash", "-c", "sleep 1"]), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + }; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -371,33 +576,72 @@ $"#; } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn shell_output_reserializes_truncated_content() -> Result<()> { +#[test_case(ShellModelOutput::Shell)] +#[test_case(ShellModelOutput::ShellCommand)] +#[test_case(ShellModelOutput::LocalShell)] +async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutput) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.model = "gpt-5.1-codex".to_string(); config.model_family = - find_family_for_model("gpt-5.1-codex").expect("gpt-5.1 is a model family"); + find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family"); + if matches!(output_type, ShellModelOutput::ShellCommand) { + config.features.enable(Feature::ShellCommandTool); + } + let _ = output_type; }); let test = builder.build(&server).await?; let call_id = "shell-truncated"; - let args = json!({ - "command": ["/bin/sh", "-c", "seq 1 400"], - "timeout_ms": 5_000, - }); - let responses = vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ]; + let responses = match output_type { + ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { + let command = if matches!(output_type, ShellModelOutput::ShellCommand) { + json!({ + "command": "seq 1 400", + "timeout_ms": 5_000, + }) + } else { + json!({ + "command": ["/bin/sh", "-c", "seq 1 400"], + "timeout_ms": 5_000, + }) + }; + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call( + call_id, + if matches!(output_type, ShellModelOutput::ShellCommand) { + "shell_command" + } else { + "shell" + }, + &serde_json::to_string(&command)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + ShellModelOutput::LocalShell => { + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_local_shell_call(call_id, "completed", vec!["/bin/sh", "-c", "seq 1 400"]), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ] + } + }; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -445,11 +689,17 @@ $"#; } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn apply_patch_custom_tool_output_is_structured() -> Result<()> { +#[test_case(ApplyPatchModelOutput::Freeform)] +#[test_case(ApplyPatchModelOutput::Function)] +#[test_case(ApplyPatchModelOutput::Shell)] +#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)] +async fn apply_patch_custom_tool_output_is_structured( + output_type: ApplyPatchModelOutput, +) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.include_apply_patch_tool = true; }); let test = builder.build(&server).await?; @@ -466,7 +716,7 @@ async fn apply_patch_custom_tool_output_is_structured() -> Result<()> { let responses = vec![ sse(vec![ json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_custom_tool_call(call_id, "apply_patch", &patch), + ev_apply_patch_call(call_id, &patch, output_type), ev_completed("resp-1"), ]), sse(vec![ @@ -485,7 +735,12 @@ async fn apply_patch_custom_tool_output_is_structured() -> Result<()> { let req = mock .last_request() .expect("apply_patch output request recorded"); - let output_item = req.custom_tool_call_output(call_id); + let output_item = match output_type { + ApplyPatchModelOutput::Freeform => req.custom_tool_call_output(call_id), + ApplyPatchModelOutput::Function + | ApplyPatchModelOutput::Shell + | ApplyPatchModelOutput::ShellViaHeredoc => req.function_call_output(call_id), + }; let output = output_item .get("output") .and_then(Value::as_str) @@ -505,11 +760,17 @@ A {file_name} } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn apply_patch_custom_tool_call_creates_file() -> Result<()> { +#[test_case(ApplyPatchModelOutput::Freeform)] +#[test_case(ApplyPatchModelOutput::Function)] +#[test_case(ApplyPatchModelOutput::Shell)] +#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)] +async fn apply_patch_custom_tool_call_creates_file( + output_type: ApplyPatchModelOutput, +) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.include_apply_patch_tool = true; }); let test = builder.build(&server).await?; @@ -522,7 +783,7 @@ async fn apply_patch_custom_tool_call_creates_file() -> Result<()> { let responses = vec![ sse(vec![ json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_custom_tool_call(call_id, "apply_patch", &patch), + ev_apply_patch_call(call_id, &patch, output_type), ev_completed("resp-1"), ]), sse(vec![ @@ -541,7 +802,12 @@ async fn apply_patch_custom_tool_call_creates_file() -> Result<()> { let req = mock .last_request() .expect("apply_patch output request recorded"); - let output_item = req.custom_tool_call_output(call_id); + let output_item = match output_type { + ApplyPatchModelOutput::Freeform => req.custom_tool_call_output(call_id), + ApplyPatchModelOutput::Function + | ApplyPatchModelOutput::Shell + | ApplyPatchModelOutput::ShellViaHeredoc => req.function_call_output(call_id), + }; let output = output_item .get("output") .and_then(Value::as_str) @@ -568,11 +834,17 @@ A {file_name} } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> { +#[test_case(ApplyPatchModelOutput::Freeform)] +#[test_case(ApplyPatchModelOutput::Function)] +#[test_case(ApplyPatchModelOutput::Shell)] +#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)] +async fn apply_patch_custom_tool_call_updates_existing_file( + output_type: ApplyPatchModelOutput, +) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.include_apply_patch_tool = true; }); let test = builder.build(&server).await?; @@ -587,7 +859,7 @@ async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> { let responses = vec![ sse(vec![ json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_custom_tool_call(call_id, "apply_patch", &patch), + ev_apply_patch_call(call_id, &patch, output_type), ev_completed("resp-1"), ]), sse(vec![ @@ -606,7 +878,12 @@ async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> { let req = mock .last_request() .expect("apply_patch output request recorded"); - let output_item = req.custom_tool_call_output(call_id); + let output_item = match output_type { + ApplyPatchModelOutput::Freeform => req.custom_tool_call_output(call_id), + ApplyPatchModelOutput::Function + | ApplyPatchModelOutput::Shell + | ApplyPatchModelOutput::ShellViaHeredoc => req.function_call_output(call_id), + }; let output = output_item .get("output") .and_then(Value::as_str) @@ -629,11 +906,17 @@ M {file_name} } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> { +#[test_case(ApplyPatchModelOutput::Freeform)] +#[test_case(ApplyPatchModelOutput::Function)] +#[test_case(ApplyPatchModelOutput::Shell)] +#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)] +async fn apply_patch_custom_tool_call_reports_failure_output( + output_type: ApplyPatchModelOutput, +) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.include_apply_patch_tool = true; }); let test = builder.build(&server).await?; @@ -646,7 +929,7 @@ async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> { let responses = vec![ sse(vec![ json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_custom_tool_call(call_id, "apply_patch", &patch), + ev_apply_patch_call(call_id, &patch, output_type), ev_completed("resp-1"), ]), sse(vec![ @@ -665,7 +948,12 @@ async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> { let req = mock .last_request() .expect("apply_patch output request recorded"); - let output_item = req.custom_tool_call_output(call_id); + let output_item = match output_type { + ApplyPatchModelOutput::Freeform => req.custom_tool_call_output(call_id), + ApplyPatchModelOutput::Function + | ApplyPatchModelOutput::Shell + | ApplyPatchModelOutput::ShellViaHeredoc => req.function_call_output(call_id), + }; let output = output_item .get("output") .and_then(Value::as_str) @@ -681,11 +969,17 @@ async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn apply_patch_function_call_output_is_structured() -> Result<()> { +#[test_case(ApplyPatchModelOutput::Freeform)] +#[test_case(ApplyPatchModelOutput::Function)] +#[test_case(ApplyPatchModelOutput::Shell)] +#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)] +async fn apply_patch_function_call_output_is_structured( + output_type: ApplyPatchModelOutput, +) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.include_apply_patch_tool = true; }); let test = builder.build(&server).await?; @@ -697,7 +991,7 @@ async fn apply_patch_function_call_output_is_structured() -> Result<()> { let responses = vec![ sse(vec![ json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_apply_patch_function_call(call_id, &patch), + ev_apply_patch_call(call_id, &patch, output_type), ev_completed("resp-1"), ]), sse(vec![ @@ -716,7 +1010,12 @@ async fn apply_patch_function_call_output_is_structured() -> Result<()> { let req = mock .last_request() .expect("apply_patch function output request recorded"); - let output_item = req.function_call_output(call_id); + let output_item = match output_type { + ApplyPatchModelOutput::Freeform => req.custom_tool_call_output(call_id), + ApplyPatchModelOutput::Function + | ApplyPatchModelOutput::Shell + | ApplyPatchModelOutput::ShellViaHeredoc => req.function_call_output(call_id), + }; let output = output_item .get("output") .and_then(Value::as_str) @@ -736,7 +1035,10 @@ A {file_name} } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> { +#[test_case(ShellModelOutput::Shell)] +#[test_case(ShellModelOutput::ShellCommand)] +#[test_case(ShellModelOutput::LocalShell)] +async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutput) -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -745,25 +1047,60 @@ async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> { config.model_family = find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family"); config.include_apply_patch_tool = true; + if matches!(output_type, ShellModelOutput::ShellCommand) { + config.features.enable(Feature::ShellCommandTool); + } }); let test = builder.build(&server).await?; let call_id = "shell-nonzero-exit"; - let args = json!({ - "command": ["/bin/sh", "-c", "exit 42"], - "timeout_ms": 1_000, - }); - let responses = vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "shell failure handled"), - ev_completed("resp-2"), - ]), - ]; + let responses = match output_type { + ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { + let command = if matches!(output_type, ShellModelOutput::ShellCommand) { + json!({ + "command": "exit 42", + "timeout_ms": 1_000, + }) + } else { + json!({ + "command": ["/bin/sh", "-c", "exit 42"], + "timeout_ms": 1_000, + }) + }; + vec![ + sse(vec![ + json!({"type": "response.created", "response": {"id": "resp-1"}}), + ev_function_call( + call_id, + if matches!(output_type, ShellModelOutput::ShellCommand) { + "shell_command" + } else { + "shell" + }, + &serde_json::to_string(&command)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "shell failure handled"), + ev_completed("resp-2"), + ]), + ] + } + ShellModelOutput::LocalShell => { + vec![ + sse(vec![ + json!({"type": "response.created", "response": {"id": "resp-1"}}), + ev_local_shell_call(call_id, "completed", vec!["/bin/sh", "-c", "exit 42"]), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "shell failure handled"), + ev_completed("resp-2"), + ]), + ] + } + }; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -793,7 +1130,7 @@ async fn shell_command_output_is_structured() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.features.enable(Feature::ShellCommandTool); }); let test = builder.build(&server).await?; From e889d6fb358ae78688934e82b453efd9958e37b6 Mon Sep 17 00:00:00 2001 From: Dylan Hurd Date: Mon, 17 Nov 2025 18:02:03 -0800 Subject: [PATCH 2/4] use responses helper --- .../core/tests/suite/shell_serialization.rs | 414 ++++-------------- 1 file changed, 76 insertions(+), 338 deletions(-) diff --git a/codex-rs/core/tests/suite/shell_serialization.rs b/codex-rs/core/tests/suite/shell_serialization.rs index 2f1abcd8ad76..a2cdfb8d014d 100644 --- a/codex-rs/core/tests/suite/shell_serialization.rs +++ b/codex-rs/core/tests/suite/shell_serialization.rs @@ -1,4 +1,5 @@ #![cfg(not(target_os = "windows"))] +#![allow(clippy::expect_used)] use anyhow::Result; use codex_core::features::Feature; @@ -37,52 +38,25 @@ const FIXTURE_JSON: &str = r#"{ } "#; -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -#[test_case(ShellModelOutput::Shell)] -#[test_case(ShellModelOutput::ShellCommand)] -#[test_case(ShellModelOutput::LocalShell)] -async fn shell_output_stays_json_without_freeform_apply_patch( +fn shell_responses( + call_id: &str, + command: Vec<&str>, output_type: ShellModelOutput, -) -> Result<()> { - skip_if_no_network!(Ok(())); - - let server = start_mock_server().await; - let mut builder = test_codex().with_config(move |config| { - config.features.disable(Feature::ApplyPatchFreeform); - config.model = "gpt-5".to_string(); - config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family"); - if matches!(output_type, ShellModelOutput::ShellCommand) { - config.features.enable(Feature::ShellCommandTool); - } - let _ = output_type; - }); - let test = builder.build(&server).await?; - - let call_id = "shell-json"; - let responses = match output_type { - ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { - let command = if matches!(output_type, ShellModelOutput::ShellCommand) { - json!({ - "command": "echo shell json", - "timeout_ms": 1_000, - }) - } else { - json!({ - "command": ["/bin/echo", "shell json"], - "timeout_ms": 1_000, - }) - }; - vec![ +) -> Result> { + match output_type { + ShellModelOutput::ShellCommand => { + let command = shlex::try_join(command)?; + let parameters = json!({ + "command": command, + "timeout_ms": 2_000, + }); + Ok(vec![ sse(vec![ ev_response_created("resp-1"), ev_function_call( call_id, - if matches!(output_type, ShellModelOutput::ShellCommand) { - "shell_command" - } else { - "shell" - }, - &serde_json::to_string(&command)?, + "shell_command", + &serde_json::to_string(¶meters)?, ), ev_completed("resp-1"), ]), @@ -90,22 +64,62 @@ async fn shell_output_stays_json_without_freeform_apply_patch( ev_assistant_message("msg-1", "done"), ev_completed("resp-2"), ]), - ] + ]) } - ShellModelOutput::LocalShell => { - vec![ + ShellModelOutput::Shell => { + let parameters = json!({ + "command": command, + "timeout_ms": 2_000, + }); + Ok(vec![ sse(vec![ ev_response_created("resp-1"), - ev_local_shell_call(call_id, "completed", vec!["/bin/echo", "shell json"]), + ev_function_call(call_id, "shell", &serde_json::to_string(¶meters)?), ev_completed("resp-1"), ]), sse(vec![ ev_assistant_message("msg-1", "done"), ev_completed("resp-2"), ]), - ] + ]) } - }; + ShellModelOutput::LocalShell => Ok(vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_local_shell_call(call_id, "completed", command), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]), + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[test_case(ShellModelOutput::Shell)] +#[test_case(ShellModelOutput::ShellCommand)] +#[test_case(ShellModelOutput::LocalShell)] +async fn shell_output_stays_json_without_freeform_apply_patch( + output_type: ShellModelOutput, +) -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(move |config| { + config.features.disable(Feature::ApplyPatchFreeform); + config.model = "gpt-5".to_string(); + config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family"); + if matches!(output_type, ShellModelOutput::ShellCommand) { + config.features.enable(Feature::ShellCommandTool); + } + let _ = output_type; + }); + let test = builder.build(&server).await?; + + let call_id = "shell-json"; + let responses = shell_responses(call_id, vec!["/bin/echo", "shell json"], output_type)?; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -163,53 +177,7 @@ async fn shell_output_is_structured_with_freeform_apply_patch( let test = builder.build(&server).await?; let call_id = "shell-structured"; - let responses = match output_type { - ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { - let command = if matches!(output_type, ShellModelOutput::ShellCommand) { - json!({ - "command": "echo freeform shell", - "timeout_ms": 1_000, - }) - } else { - json!({ - "command": ["/bin/echo", "freeform shell"], - "timeout_ms": 1_000, - }) - }; - vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_function_call( - call_id, - if matches!(output_type, ShellModelOutput::ShellCommand) { - "shell_command" - } else { - "shell" - }, - &serde_json::to_string(&command)?, - ), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ] - } - ShellModelOutput::LocalShell => { - vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_local_shell_call(call_id, "completed", vec!["/bin/echo", "freeform shell"]), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ] - } - }; + let responses = shell_responses(call_id, vec!["/bin/echo", "freeform shell"], output_type)?; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -267,57 +235,11 @@ async fn shell_output_preserves_fixture_json_without_serialization( let fixture_path_str = fixture_path.to_string_lossy().to_string(); let call_id = "shell-json-fixture"; - let responses = match output_type { - ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { - let command = if matches!(output_type, ShellModelOutput::ShellCommand) { - json!({ - "command": format!("/usr/bin/sed -n p {fixture_path_str}"), - "timeout_ms": 1_000, - }) - } else { - json!({ - "command": ["/usr/bin/sed", "-n", "p", fixture_path_str], - "timeout_ms": 1_000, - }) - }; - vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_function_call( - call_id, - if matches!(output_type, ShellModelOutput::ShellCommand) { - "shell_command" - } else { - "shell" - }, - &serde_json::to_string(&command)?, - ), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ] - } - ShellModelOutput::LocalShell => { - vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_local_shell_call( - call_id, - "completed", - vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()], - ), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ] - } - }; + let responses = shell_responses( + call_id, + vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()], + output_type, + )?; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -383,57 +305,11 @@ async fn shell_output_structures_fixture_with_serialization( let fixture_path_str = fixture_path.to_string_lossy().to_string(); let call_id = "shell-structured-fixture"; - let responses = match output_type { - ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { - let command = if matches!(output_type, ShellModelOutput::ShellCommand) { - json!({ - "command": format!("/usr/bin/sed -n p {fixture_path_str}"), - "timeout_ms": 1_000, - }) - } else { - json!({ - "command": ["/usr/bin/sed", "-n", "p", fixture_path_str], - "timeout_ms": 1_000, - }) - }; - vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_function_call( - call_id, - if matches!(output_type, ShellModelOutput::ShellCommand) { - "shell_command" - } else { - "shell" - }, - &serde_json::to_string(&command)?, - ), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ] - } - ShellModelOutput::LocalShell => { - vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_local_shell_call( - call_id, - "completed", - vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()], - ), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ] - } - }; + let responses = shell_responses( + call_id, + vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()], + output_type, + )?; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -490,53 +366,7 @@ async fn shell_output_for_freeform_tool_records_duration( let test = builder.build(&server).await?; let call_id = "shell-structured"; - let responses = match output_type { - ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { - let command = if matches!(output_type, ShellModelOutput::ShellCommand) { - json!({ - "command": "sleep 1", - "timeout_ms": 2_000, - }) - } else { - json!({ - "command": ["/bin/bash", "-c", "sleep 1"], - "timeout_ms": 2_000, - }) - }; - vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_function_call( - call_id, - if matches!(output_type, ShellModelOutput::ShellCommand) { - "shell_command" - } else { - "shell" - }, - &serde_json::to_string(&command)?, - ), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ] - } - ShellModelOutput::LocalShell => { - vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_local_shell_call(call_id, "completed", vec!["/bin/bash", "-c", "sleep 1"]), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ] - } - }; + let responses = shell_responses(call_id, vec!["/bin/bash", "-c", "sleep 1"], output_type)?; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -595,53 +425,7 @@ async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutp let test = builder.build(&server).await?; let call_id = "shell-truncated"; - let responses = match output_type { - ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { - let command = if matches!(output_type, ShellModelOutput::ShellCommand) { - json!({ - "command": "seq 1 400", - "timeout_ms": 5_000, - }) - } else { - json!({ - "command": ["/bin/sh", "-c", "seq 1 400"], - "timeout_ms": 5_000, - }) - }; - vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_function_call( - call_id, - if matches!(output_type, ShellModelOutput::ShellCommand) { - "shell_command" - } else { - "shell" - }, - &serde_json::to_string(&command)?, - ), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ] - } - ShellModelOutput::LocalShell => { - vec![ - sse(vec![ - ev_response_created("resp-1"), - ev_local_shell_call(call_id, "completed", vec!["/bin/sh", "-c", "seq 1 400"]), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ] - } - }; + let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "seq 1 400"], output_type)?; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( @@ -1042,7 +826,7 @@ async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutp skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex().with_config(|config| { + let mut builder = test_codex().with_config(move |config| { config.model = "gpt-5.1-codex".to_string(); config.model_family = find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family"); @@ -1054,53 +838,7 @@ async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutp let test = builder.build(&server).await?; let call_id = "shell-nonzero-exit"; - let responses = match output_type { - ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { - let command = if matches!(output_type, ShellModelOutput::ShellCommand) { - json!({ - "command": "exit 42", - "timeout_ms": 1_000, - }) - } else { - json!({ - "command": ["/bin/sh", "-c", "exit 42"], - "timeout_ms": 1_000, - }) - }; - vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_function_call( - call_id, - if matches!(output_type, ShellModelOutput::ShellCommand) { - "shell_command" - } else { - "shell" - }, - &serde_json::to_string(&command)?, - ), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "shell failure handled"), - ev_completed("resp-2"), - ]), - ] - } - ShellModelOutput::LocalShell => { - vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_local_shell_call(call_id, "completed", vec!["/bin/sh", "-c", "exit 42"]), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "shell failure handled"), - ev_completed("resp-2"), - ]), - ] - } - }; + let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "exit 42"], output_type)?; let mock = mount_sse_sequence(&server, responses).await; test.submit_turn_with_policy( From 2709f3c97d4cb31271abb05acef54d6e169d370e Mon Sep 17 00:00:00 2001 From: Dylan Hurd Date: Mon, 17 Nov 2025 18:24:03 -0800 Subject: [PATCH 3/4] fmt --- codex-rs/core/tests/common/test_codex.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index bad94883c218..f88a736eeddf 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -323,8 +323,9 @@ impl TestCodexHarness { pub async fn shell_output(&self, call_id: &str, output_type: ShellModelOutput) -> String { match output_type { ShellModelOutput::LocalShell => self.local_shell_call_output(call_id).await, - ShellModelOutput::Shell - | ShellModelOutput::ShellCommand => self.function_call_stdout(call_id).await, + ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { + self.function_call_stdout(call_id).await + } } } } From 947e85d9d140692bef5edf8bb6aa92fa898ec472 Mon Sep 17 00:00:00 2001 From: Dylan Hurd Date: Mon, 17 Nov 2025 18:53:21 -0800 Subject: [PATCH 4/4] simplify --- codex-rs/core/tests/common/test_codex.rs | 33 --- codex-rs/core/tests/suite/apply_patch_cli.rs | 4 +- .../core/tests/suite/shell_serialization.rs | 268 ++++++------------ 3 files changed, 84 insertions(+), 221 deletions(-) diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index f88a736eeddf..c213cab5d647 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -298,15 +298,6 @@ impl TestCodexHarness { .to_string() } - pub async fn local_shell_call_output(&self, call_id: &str) -> String { - let bodies = self.request_bodies().await; - local_shell_call_output(&bodies, call_id) - .get("output") - .and_then(Value::as_str) - .expect("output string") - .to_string() - } - pub async fn apply_patch_output( &self, call_id: &str, @@ -319,15 +310,6 @@ impl TestCodexHarness { | ApplyPatchModelOutput::ShellViaHeredoc => self.function_call_stdout(call_id).await, } } - - pub async fn shell_output(&self, call_id: &str, output_type: ShellModelOutput) -> String { - match output_type { - ShellModelOutput::LocalShell => self.local_shell_call_output(call_id).await, - ShellModelOutput::Shell | ShellModelOutput::ShellCommand => { - self.function_call_stdout(call_id).await - } - } - } } fn custom_tool_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value { @@ -360,21 +342,6 @@ fn function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value { panic!("function_call_output {call_id} not found"); } -fn local_shell_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value { - for body in bodies { - if let Some(items) = body.get("input").and_then(Value::as_array) { - for item in items { - if item.get("type").and_then(Value::as_str) == Some("local_shell_call_output") - && item.get("call_id").and_then(Value::as_str) == Some(call_id) - { - return item; - } - } - } - } - panic!("local_shell_call_output {call_id} not found"); -} - pub fn test_codex() -> TestCodexBuilder { TestCodexBuilder { config_mutators: vec![], diff --git a/codex-rs/core/tests/suite/apply_patch_cli.rs b/codex-rs/core/tests/suite/apply_patch_cli.rs index 41b074cc3e28..59ecf421e60e 100644 --- a/codex-rs/core/tests/suite/apply_patch_cli.rs +++ b/codex-rs/core/tests/suite/apply_patch_cli.rs @@ -29,7 +29,7 @@ use core_test_support::wait_for_event; use serde_json::json; use test_case::test_case; -async fn apply_patch_harness() -> Result { +pub async fn apply_patch_harness() -> Result { apply_patch_harness_with(|_| {}).await } @@ -43,7 +43,7 @@ async fn apply_patch_harness_with( .await } -async fn mount_apply_patch( +pub async fn mount_apply_patch( harness: &TestCodexHarness, call_id: &str, patch: &str, diff --git a/codex-rs/core/tests/suite/shell_serialization.rs b/codex-rs/core/tests/suite/shell_serialization.rs index a2cdfb8d014d..3b468ce2b799 100644 --- a/codex-rs/core/tests/suite/shell_serialization.rs +++ b/codex-rs/core/tests/suite/shell_serialization.rs @@ -6,7 +6,6 @@ use codex_core::features::Feature; use codex_core::model_family::find_family_for_model; use codex_core::protocol::SandboxPolicy; use core_test_support::assert_regex_match; -use core_test_support::responses::ev_apply_patch_call; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_function_call; @@ -26,6 +25,9 @@ use serde_json::json; use std::fs; use test_case::test_case; +use crate::suite::apply_patch_cli::apply_patch_harness; +use crate::suite::apply_patch_cli::mount_apply_patch; + const FIXTURE_JSON: &str = r#"{ "description": "This is an example JSON file.", "foo": "bar", @@ -482,11 +484,7 @@ async fn apply_patch_custom_tool_output_is_structured( ) -> Result<()> { skip_if_no_network!(Ok(())); - let server = start_mock_server().await; - let mut builder = test_codex().with_config(move |config| { - config.include_apply_patch_tool = true; - }); - let test = builder.build(&server).await?; + let harness = apply_patch_harness().await?; let call_id = "apply-patch-structured"; let file_name = "structured.txt"; @@ -497,38 +495,17 @@ async fn apply_patch_custom_tool_output_is_structured( *** End Patch "# ); - let responses = vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_apply_patch_call(call_id, &patch, output_type), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]), - ]; - let mock = mount_sse_sequence(&server, responses).await; + mount_apply_patch(&harness, call_id, &patch, "done", output_type).await; - test.submit_turn_with_policy( - "apply the patch via custom tool", - SandboxPolicy::DangerFullAccess, - ) - .await?; + harness + .test() + .submit_turn_with_policy( + "apply the patch via custom tool", + SandboxPolicy::DangerFullAccess, + ) + .await?; - let req = mock - .last_request() - .expect("apply_patch output request recorded"); - let output_item = match output_type { - ApplyPatchModelOutput::Freeform => req.custom_tool_call_output(call_id), - ApplyPatchModelOutput::Function - | ApplyPatchModelOutput::Shell - | ApplyPatchModelOutput::ShellViaHeredoc => req.function_call_output(call_id), - }; - let output = output_item - .get("output") - .and_then(Value::as_str) - .expect("apply_patch output string"); + let output = harness.apply_patch_output(call_id, output_type).await; let expected_pattern = format!( r"(?s)^Exit code: 0 @@ -538,7 +515,7 @@ Success. Updated the following files: A {file_name} ?$" ); - assert_regex_match(&expected_pattern, output); + assert_regex_match(&expected_pattern, output.as_str()); Ok(()) } @@ -553,49 +530,24 @@ async fn apply_patch_custom_tool_call_creates_file( ) -> Result<()> { skip_if_no_network!(Ok(())); - let server = start_mock_server().await; - let mut builder = test_codex().with_config(move |config| { - config.include_apply_patch_tool = true; - }); - let test = builder.build(&server).await?; + let harness = apply_patch_harness().await?; let call_id = "apply-patch-add-file"; let file_name = "custom_tool_apply_patch.txt"; let patch = format!( "*** Begin Patch\n*** Add File: {file_name}\n+custom tool content\n*** End Patch\n" ); - let responses = vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_apply_patch_call(call_id, &patch, output_type), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "apply_patch done"), - ev_completed("resp-2"), - ]), - ]; - let mock = mount_sse_sequence(&server, responses).await; + mount_apply_patch(&harness, call_id, &patch, "apply_patch done", output_type).await; - test.submit_turn_with_policy( - "apply the patch via custom tool to create a file", - SandboxPolicy::DangerFullAccess, - ) - .await?; + harness + .test() + .submit_turn_with_policy( + "apply the patch via custom tool to create a file", + SandboxPolicy::DangerFullAccess, + ) + .await?; - let req = mock - .last_request() - .expect("apply_patch output request recorded"); - let output_item = match output_type { - ApplyPatchModelOutput::Freeform => req.custom_tool_call_output(call_id), - ApplyPatchModelOutput::Function - | ApplyPatchModelOutput::Shell - | ApplyPatchModelOutput::ShellViaHeredoc => req.function_call_output(call_id), - }; - let output = output_item - .get("output") - .and_then(Value::as_str) - .expect("apply_patch output string"); + let output = harness.apply_patch_output(call_id, output_type).await; let expected_pattern = format!( r"(?s)^Exit code: 0 @@ -605,9 +557,9 @@ Success. Updated the following files: A {file_name} ?$" ); - assert_regex_match(&expected_pattern, output); + assert_regex_match(&expected_pattern, output.as_str()); - let new_file_path = test.cwd.path().join(file_name); + let new_file_path = harness.path(file_name); let created_contents = fs::read_to_string(&new_file_path)?; assert_eq!( created_contents, "custom tool content\n", @@ -627,51 +579,33 @@ async fn apply_patch_custom_tool_call_updates_existing_file( ) -> Result<()> { skip_if_no_network!(Ok(())); - let server = start_mock_server().await; - let mut builder = test_codex().with_config(move |config| { - config.include_apply_patch_tool = true; - }); - let test = builder.build(&server).await?; + let harness = apply_patch_harness().await?; let call_id = "apply-patch-update-file"; let file_name = "custom_tool_apply_patch_existing.txt"; - let file_path = test.cwd.path().join(file_name); + let file_path = harness.path(file_name); fs::write(&file_path, "before\n")?; let patch = format!( "*** Begin Patch\n*** Update File: {file_name}\n@@\n-before\n+after\n*** End Patch\n" ); - let responses = vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_apply_patch_call(call_id, &patch, output_type), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "apply_patch update done"), - ev_completed("resp-2"), - ]), - ]; - let mock = mount_sse_sequence(&server, responses).await; - - test.submit_turn_with_policy( - "apply the patch via custom tool to update a file", - SandboxPolicy::DangerFullAccess, + mount_apply_patch( + &harness, + call_id, + &patch, + "apply_patch update done", + output_type, ) - .await?; + .await; - let req = mock - .last_request() - .expect("apply_patch output request recorded"); - let output_item = match output_type { - ApplyPatchModelOutput::Freeform => req.custom_tool_call_output(call_id), - ApplyPatchModelOutput::Function - | ApplyPatchModelOutput::Shell - | ApplyPatchModelOutput::ShellViaHeredoc => req.function_call_output(call_id), - }; - let output = output_item - .get("output") - .and_then(Value::as_str) - .expect("apply_patch output string"); + harness + .test() + .submit_turn_with_policy( + "apply the patch via custom tool to update a file", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let output = harness.apply_patch_output(call_id, output_type).await; let expected_pattern = format!( r"(?s)^Exit code: 0 @@ -681,7 +615,7 @@ Success. Updated the following files: M {file_name} ?$" ); - assert_regex_match(&expected_pattern, output); + assert_regex_match(&expected_pattern, output.as_str()); let updated_contents = fs::read_to_string(file_path)?; assert_eq!(updated_contents, "after\n", "expected updated file content"); @@ -699,55 +633,37 @@ async fn apply_patch_custom_tool_call_reports_failure_output( ) -> Result<()> { skip_if_no_network!(Ok(())); - let server = start_mock_server().await; - let mut builder = test_codex().with_config(move |config| { - config.include_apply_patch_tool = true; - }); - let test = builder.build(&server).await?; + let harness = apply_patch_harness().await?; let call_id = "apply-patch-failure"; let missing_file = "missing_custom_tool_apply_patch.txt"; let patch = format!( "*** Begin Patch\n*** Update File: {missing_file}\n@@\n-before\n+after\n*** End Patch\n" ); - let responses = vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_apply_patch_call(call_id, &patch, output_type), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "apply_patch failure done"), - ev_completed("resp-2"), - ]), - ]; - let mock = mount_sse_sequence(&server, responses).await; - - test.submit_turn_with_policy( - "attempt a failing apply_patch via custom tool", - SandboxPolicy::DangerFullAccess, + mount_apply_patch( + &harness, + call_id, + &patch, + "apply_patch failure done", + output_type, ) - .await?; + .await; - let req = mock - .last_request() - .expect("apply_patch output request recorded"); - let output_item = match output_type { - ApplyPatchModelOutput::Freeform => req.custom_tool_call_output(call_id), - ApplyPatchModelOutput::Function - | ApplyPatchModelOutput::Shell - | ApplyPatchModelOutput::ShellViaHeredoc => req.function_call_output(call_id), - }; - let output = output_item - .get("output") - .and_then(Value::as_str) - .expect("apply_patch output string"); + harness + .test() + .submit_turn_with_policy( + "attempt a failing apply_patch via custom tool", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let output = harness.apply_patch_output(call_id, output_type).await; let expected_output = format!( "apply_patch verification failed: Failed to read file to update {}/{missing_file}: No such file or directory (os error 2)", - test.cwd.path().to_string_lossy() + harness.cwd().to_string_lossy() ); - assert_eq!(output, expected_output); + assert_eq!(output, expected_output.as_str()); Ok(()) } @@ -762,49 +678,29 @@ async fn apply_patch_function_call_output_is_structured( ) -> Result<()> { skip_if_no_network!(Ok(())); - let server = start_mock_server().await; - let mut builder = test_codex().with_config(move |config| { - config.include_apply_patch_tool = true; - }); - let test = builder.build(&server).await?; + let harness = apply_patch_harness().await?; let call_id = "apply-patch-function"; let file_name = "function_apply_patch.txt"; let patch = format!("*** Begin Patch\n*** Add File: {file_name}\n+via function call\n*** End Patch\n"); - let responses = vec![ - sse(vec![ - json!({"type": "response.created", "response": {"id": "resp-1"}}), - ev_apply_patch_call(call_id, &patch, output_type), - ev_completed("resp-1"), - ]), - sse(vec![ - ev_assistant_message("msg-1", "apply_patch function done"), - ev_completed("resp-2"), - ]), - ]; - let mock = mount_sse_sequence(&server, responses).await; - - test.submit_turn_with_policy( - "apply the patch via function-call apply_patch", - SandboxPolicy::DangerFullAccess, + mount_apply_patch( + &harness, + call_id, + &patch, + "apply_patch function done", + output_type, ) - .await?; - - let req = mock - .last_request() - .expect("apply_patch function output request recorded"); - let output_item = match output_type { - ApplyPatchModelOutput::Freeform => req.custom_tool_call_output(call_id), - ApplyPatchModelOutput::Function - | ApplyPatchModelOutput::Shell - | ApplyPatchModelOutput::ShellViaHeredoc => req.function_call_output(call_id), - }; - let output = output_item - .get("output") - .and_then(Value::as_str) - .expect("apply_patch output string"); - + .await; + harness + .test() + .submit_turn_with_policy( + "apply the patch via function-call apply_patch", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let output = harness.apply_patch_output(call_id, output_type).await; let expected_pattern = format!( r"(?s)^Exit code: 0 Wall time: [0-9]+(?:\.[0-9]+)? seconds @@ -813,7 +709,7 @@ Success. Updated the following files: A {file_name} ?$" ); - assert_regex_match(&expected_pattern, output); + assert_regex_match(&expected_pattern, output.as_str()); Ok(()) }