From 805de1938170103c8fcc2d165a4c4c7fdc9d9f4a Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Fri, 26 Sep 2025 13:42:58 +0200 Subject: [PATCH 01/17] V1 --- codex-rs/Cargo.lock | 1 + codex-rs/core/Cargo.toml | 1 + codex-rs/core/src/codex.rs | 241 +++++++++--------- codex-rs/core/src/lib.rs | 1 + .../core/src/sandbox/apply_patch_adapter.rs | 30 +++ codex-rs/core/src/sandbox/backend.rs | 87 +++++++ codex-rs/core/src/sandbox/mod.rs | 50 ++++ codex-rs/core/src/sandbox/planner.rs | 112 ++++++++ 8 files changed, 404 insertions(+), 119 deletions(-) create mode 100644 codex-rs/core/src/sandbox/apply_patch_adapter.rs create mode 100644 codex-rs/core/src/sandbox/backend.rs create mode 100644 codex-rs/core/src/sandbox/mod.rs create mode 100644 codex-rs/core/src/sandbox/planner.rs diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 8b71f139b6..88f93b9776 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -673,6 +673,7 @@ dependencies = [ "askama", "assert_cmd", "async-channel", + "async-trait", "base64", "bytes", "chrono", diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index d9ded08283..b56de6b4e4 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -15,6 +15,7 @@ workspace = true anyhow = { workspace = true } askama = { workspace = true } async-channel = { workspace = true } +async-trait = { workspace = true } base64 = { workspace = true } bytes = { workspace = true } chrono = { workspace = true, features = ["serde"] } diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 48aa3dbd7c..8bdb497906 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -44,7 +44,6 @@ use tracing::warn; use crate::ModelProviderInfo; use crate::apply_patch; use crate::apply_patch::ApplyPatchExec; -use crate::apply_patch::CODEX_APPLY_PATCH_ARG1; use crate::apply_patch::InternalApplyPatchInvocation; use crate::apply_patch::convert_apply_patch_to_protocol; use crate::client::ModelClient; @@ -63,7 +62,6 @@ use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::StdoutStream; use crate::exec::StreamOutput; -use crate::exec::process_exec_tool_call; use crate::exec_command::EXEC_COMMAND_TOOL_NAME; use crate::exec_command::ExecCommandParams; use crate::exec_command::ExecSessionManager; @@ -114,9 +112,15 @@ use crate::protocol::TurnDiffEvent; use crate::protocol::WebSearchBeginEvent; use crate::rollout::RolloutRecorder; use crate::rollout::RolloutRecorderParams; -use crate::safety::SafetyCheck; -use crate::safety::assess_command_safety; -use crate::safety::assess_safety_for_untrusted_command; +use crate::sandbox::BackendRegistry; +use crate::sandbox::ExecPlan; +use crate::sandbox::ExecRequest; +use crate::sandbox::ExecRuntimeContext; +use crate::sandbox::PatchExecRequest; +use crate::sandbox::build_exec_params_for_apply_patch; +use crate::sandbox::plan_apply_patch; +use crate::sandbox::plan_exec; +use crate::sandbox::run_with_plan; use crate::shell; use crate::state::ActiveTurn; use crate::state::SessionServices; @@ -942,15 +946,24 @@ impl Session { self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone()) .await; - let result = process_exec_tool_call( - exec_args.params, - exec_args.sandbox_type, - exec_args.sandbox_policy, - exec_args.sandbox_cwd, - exec_args.codex_linux_sandbox_exe, - exec_args.stdout_stream, - ) - .await; + let ExecInvokeArgs { + params, + plan, + sandbox_policy, + sandbox_cwd, + codex_linux_sandbox_exe, + stdout_stream, + } = exec_args; + + let registry = BackendRegistry::new(); + let runtime_ctx = ExecRuntimeContext { + sandbox_policy, + sandbox_cwd, + codex_linux_sandbox_exe, + stdout_stream, + }; + + let result = run_with_plan(params, &plan, ®istry, &runtime_ctx).await; let output_stderr; let borrowed: &ExecToolCallOutput = match &result { @@ -2673,7 +2686,7 @@ fn parse_container_exec_arguments( pub struct ExecInvokeArgs<'a> { pub params: ExecParams, - pub sandbox_type: SandboxType, + pub plan: ExecPlan, pub sandbox_policy: &'a SandboxPolicy, pub sandbox_cwd: &'a Path, pub codex_linux_sandbox_exe: &'a Option, @@ -2740,98 +2753,80 @@ async fn handle_container_exec_with_params( MaybeApplyPatchVerified::NotApplyPatch => None, }; - let (params, safety, command_for_display) = match &apply_patch_exec { - Some(ApplyPatchExec { - action: ApplyPatchAction { patch, cwd, .. }, - user_explicitly_approved_this_action, - }) => { - let path_to_codex = std::env::current_exe() - .ok() - .map(|p| p.to_string_lossy().to_string()); - let Some(path_to_codex) = path_to_codex else { + let mut params = params; + let command_for_display; + + let plan = if let Some(apply_patch_exec) = apply_patch_exec.as_ref() { + params = build_exec_params_for_apply_patch(apply_patch_exec, ¶ms)?; + command_for_display = vec![ + "apply_patch".to_string(), + apply_patch_exec.action.patch.clone(), + ]; + + let plan_req = PatchExecRequest { + action: &apply_patch_exec.action, + approval: turn_context.approval_policy, + policy: &turn_context.sandbox_policy, + cwd: &turn_context.cwd, + user_explicitly_approved: apply_patch_exec.user_explicitly_approved_this_action, + }; + + match plan_apply_patch(&plan_req) { + plan @ ExecPlan::Approved { .. } => plan, + ExecPlan::AskUser { .. } => { return Err(FunctionCallError::RespondToModel( - "failed to determine path to codex executable".to_string(), + "patch requires approval but none was recorded".to_string(), )); - }; - - let params = ExecParams { - command: vec![ - path_to_codex, - CODEX_APPLY_PATCH_ARG1.to_string(), - patch.clone(), - ], - cwd: cwd.clone(), - timeout_ms: params.timeout_ms, - env: HashMap::new(), - with_escalated_permissions: params.with_escalated_permissions, - justification: params.justification.clone(), - }; - let safety = if *user_explicitly_approved_this_action { - SafetyCheck::AutoApprove { - sandbox_type: SandboxType::None, - } - } else { - assess_safety_for_untrusted_command( - turn_context.approval_policy, - &turn_context.sandbox_policy, - params.with_escalated_permissions.unwrap_or(false), - ) - }; - ( - params, - safety, - vec!["apply_patch".to_string(), patch.clone()], - ) - } - None => { - let safety = { - let state = sess.state.lock().await; - assess_command_safety( - ¶ms.command, - turn_context.approval_policy, - &turn_context.sandbox_policy, - state.approved_commands_ref(), - params.with_escalated_permissions.unwrap_or(false), - ) - }; - let command_for_display = params.command.clone(); - (params, safety, command_for_display) + } + ExecPlan::Reject { reason } => { + return Err(FunctionCallError::RespondToModel(format!( + "patch rejected: {reason}" + ))); + } } - }; + } else { + command_for_display = params.command.clone(); + + let initial_plan = { + let state = sess.state.lock().await; + plan_exec(&ExecRequest { + params: ¶ms, + approval: turn_context.approval_policy, + policy: &turn_context.sandbox_policy, + approved_session_commands: state.approved_commands_ref(), + }) + }; - let sandbox_type = match safety { - SafetyCheck::AutoApprove { sandbox_type } => sandbox_type, - SafetyCheck::AskUser => { - let decision = sess - .request_command_approval( - sub_id.clone(), - call_id.clone(), - params.command.clone(), - params.cwd.clone(), - params.justification.clone(), - ) - .await; - match decision { - ReviewDecision::Approved => (), - ReviewDecision::ApprovedForSession => { - sess.add_approved_command(params.command.clone()).await; - } - ReviewDecision::Denied | ReviewDecision::Abort => { - return Err(FunctionCallError::RespondToModel( - "exec command rejected by user".to_string(), - )); + match initial_plan { + plan @ ExecPlan::Approved { .. } => plan, + ExecPlan::AskUser { reason } => { + let decision = sess + .request_command_approval( + sub_id.clone(), + call_id.clone(), + params.command.clone(), + params.cwd.clone(), + reason, + ) + .await; + match decision { + ReviewDecision::Approved => ExecPlan::approved(SandboxType::None, false, true), + ReviewDecision::ApprovedForSession => { + sess.add_approved_command(params.command.clone()).await; + ExecPlan::approved(SandboxType::None, false, true) + } + ReviewDecision::Denied | ReviewDecision::Abort => { + return Err(FunctionCallError::RespondToModel( + "exec command rejected by user".to_string(), + )); + } } } - // No sandboxing is applied because the user has given - // explicit approval. Often, we end up in this case because - // the command cannot be run in a sandbox, such as - // installing a new dependency that requires network access. - SandboxType::None - } - SafetyCheck::Reject { reason } => { - return Err(FunctionCallError::RespondToModel(format!( - "exec command rejected: {reason:?}" - ))); + ExecPlan::Reject { reason } => { + return Err(FunctionCallError::RespondToModel(format!( + "exec command rejected: {reason:?}" + ))); + } } }; @@ -2840,25 +2835,26 @@ async fn handle_container_exec_with_params( call_id: call_id.clone(), command_for_display: command_for_display.clone(), cwd: params.cwd.clone(), - apply_patch: apply_patch_exec.map( + apply_patch: apply_patch_exec.as_ref().map( |ApplyPatchExec { action, user_explicitly_approved_this_action, }| ApplyPatchCommandContext { - user_explicitly_approved_this_action, - changes: convert_apply_patch_to_protocol(&action), + user_explicitly_approved_this_action: *user_explicitly_approved_this_action, + changes: convert_apply_patch_to_protocol(action), }, ), }; let params = maybe_translate_shell_command(params, sess, turn_context); + let plan_for_invocation = plan.clone(); let output_result = sess .run_exec_with_events( turn_diff_tracker, exec_command_context.clone(), ExecInvokeArgs { params: params.clone(), - sandbox_type, + plan: plan_for_invocation, sandbox_policy: &turn_context.sandbox_policy, sandbox_cwd: &turn_context.cwd, codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe, @@ -2891,7 +2887,7 @@ async fn handle_container_exec_with_params( params, exec_command_context, error, - sandbox_type, + &plan, sess, turn_context, ) @@ -2908,7 +2904,7 @@ async fn handle_sandbox_error( params: ExecParams, exec_command_context: ExecCommandContext, error: SandboxErr, - sandbox_type: SandboxType, + plan: &ExecPlan, sess: &Session, turn_context: &TurnContext, ) -> Result { @@ -2921,15 +2917,21 @@ async fn handle_sandbox_error( return Err(FunctionCallError::RespondToModel(content)); } - // Early out if either the user never wants to be asked for approval, or - // we're letting the model manage escalation requests. Otherwise, continue - match turn_context.approval_policy { - AskForApproval::Never | AskForApproval::OnRequest => { - return Err(FunctionCallError::RespondToModel(format!( - "failed in sandbox {sandbox_type:?} with execution error: {error:?}" - ))); - } - AskForApproval::UnlessTrusted | AskForApproval::OnFailure => (), + let ExecPlan::Approved { + sandbox: sandbox_type, + on_failure_escalate, + .. + } = plan + else { + return Err(FunctionCallError::RespondToModel( + "execution failed without an approved plan".to_string(), + )); + }; + + if !on_failure_escalate { + return Err(FunctionCallError::RespondToModel(format!( + "failed in sandbox {sandbox_type:?} with execution error: {error:?}" + ))); } // Note that when `error` is `SandboxErr::Denied`, it could be a false @@ -2944,11 +2946,12 @@ async fn handle_sandbox_error( sess.notify_background_event(&sub_id, format!("Execution failed: {error}")) .await; + let command_for_retry = params.command.clone(); let decision = sess .request_command_approval( sub_id.clone(), call_id.clone(), - params.command.clone(), + command_for_retry.clone(), cwd.clone(), Some("command failed; retry without sandbox?".to_string()), ) @@ -2960,7 +2963,7 @@ async fn handle_sandbox_error( // remainder of the session so future // executions skip the sandbox directly. // TODO(ragona): Isn't this a bug? It always saves the command in an | fork? - sess.add_approved_command(params.command.clone()).await; + sess.add_approved_command(command_for_retry.clone()).await; // Inform UI we are retrying without sandbox. sess.notify_background_event(&sub_id, "retrying command without sandbox") .await; @@ -2973,7 +2976,7 @@ async fn handle_sandbox_error( exec_command_context.clone(), ExecInvokeArgs { params, - sandbox_type: SandboxType::None, + plan: ExecPlan::approved(SandboxType::None, false, true), sandbox_policy: &turn_context.sandbox_policy, sandbox_cwd: &turn_context.cwd, codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe, diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 36287c1af1..8598fa1e71 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -61,6 +61,7 @@ pub mod plan_tool; pub mod project_doc; mod rollout; pub(crate) mod safety; +pub mod sandbox; pub mod seatbelt; pub mod shell; pub mod spawn; diff --git a/codex-rs/core/src/sandbox/apply_patch_adapter.rs b/codex-rs/core/src/sandbox/apply_patch_adapter.rs new file mode 100644 index 0000000000..60a123684b --- /dev/null +++ b/codex-rs/core/src/sandbox/apply_patch_adapter.rs @@ -0,0 +1,30 @@ +use std::env; + +use crate::apply_patch::ApplyPatchExec; +use crate::apply_patch::CODEX_APPLY_PATCH_ARG1; +use crate::exec::ExecParams; +use crate::function_tool::FunctionCallError; + +pub(crate) fn build_exec_params_for_apply_patch( + exec: &ApplyPatchExec, + original: &ExecParams, +) -> Result { + let path_to_codex = env::current_exe() + .ok() + .map(|p| p.to_string_lossy().to_string()) + .ok_or_else(|| { + FunctionCallError::RespondToModel( + "failed to determine path to codex executable".to_string(), + ) + })?; + + let patch = exec.action.patch.clone(); + Ok(ExecParams { + command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch], + cwd: exec.action.cwd.clone(), + timeout_ms: original.timeout_ms, + env: original.env.clone(), + with_escalated_permissions: original.with_escalated_permissions, + justification: original.justification.clone(), + }) +} diff --git a/codex-rs/core/src/sandbox/backend.rs b/codex-rs/core/src/sandbox/backend.rs new file mode 100644 index 0000000000..02e5a87e4f --- /dev/null +++ b/codex-rs/core/src/sandbox/backend.rs @@ -0,0 +1,87 @@ +use std::path::Path; +use std::path::PathBuf; + +use async_trait::async_trait; + +use crate::error::Result; +use crate::exec::ExecParams; +use crate::exec::ExecToolCallOutput; +use crate::exec::SandboxType; +use crate::exec::StdoutStream; +use crate::exec::process_exec_tool_call; +use crate::protocol::SandboxPolicy; + +#[async_trait] +pub trait SpawnBackend: Send + Sync { + fn sandbox_type(&self) -> SandboxType; + + async fn spawn( + &self, + params: ExecParams, + sandbox_policy: &SandboxPolicy, + sandbox_cwd: &Path, + codex_linux_sandbox_exe: &Option, + stdout_stream: Option, + ) -> Result { + process_exec_tool_call( + params, + self.sandbox_type(), + sandbox_policy, + sandbox_cwd, + codex_linux_sandbox_exe, + stdout_stream, + ) + .await + } +} + +#[derive(Clone, Copy, Debug, Default)] +pub struct DirectBackend; + +#[async_trait] +impl SpawnBackend for DirectBackend { + fn sandbox_type(&self) -> SandboxType { + SandboxType::None + } +} + +#[derive(Clone, Copy, Debug, Default)] +pub struct SeatbeltBackend; + +#[async_trait] +impl SpawnBackend for SeatbeltBackend { + fn sandbox_type(&self) -> SandboxType { + SandboxType::MacosSeatbelt + } +} + +#[derive(Clone, Copy, Debug, Default)] +pub struct LinuxBackend; + +#[async_trait] +impl SpawnBackend for LinuxBackend { + fn sandbox_type(&self) -> SandboxType { + SandboxType::LinuxSeccomp + } +} + +#[derive(Default)] +pub struct BackendRegistry { + direct: DirectBackend, + seatbelt: SeatbeltBackend, + linux: LinuxBackend, +} + +impl BackendRegistry { + pub fn new() -> Self { + Self::default() + } + + pub fn for_type(&self, sandbox: SandboxType) -> &dyn SpawnBackend { + match sandbox { + SandboxType::None => &self.direct, + SandboxType::MacosSeatbelt => &self.seatbelt, + SandboxType::LinuxSeccomp => &self.linux, + } + } +} diff --git a/codex-rs/core/src/sandbox/mod.rs b/codex-rs/core/src/sandbox/mod.rs new file mode 100644 index 0000000000..abd735992d --- /dev/null +++ b/codex-rs/core/src/sandbox/mod.rs @@ -0,0 +1,50 @@ +mod apply_patch_adapter; +mod backend; +mod planner; + +pub(crate) use apply_patch_adapter::build_exec_params_for_apply_patch; +pub use backend::BackendRegistry; +pub use backend::DirectBackend; +pub use backend::LinuxBackend; +pub use backend::SeatbeltBackend; +pub use backend::SpawnBackend; +pub use planner::ExecPlan; +pub use planner::ExecRequest; +pub use planner::PatchExecRequest; +pub use planner::plan_apply_patch; +pub use planner::plan_exec; + +use crate::error::Result; +use crate::exec::ExecParams; +use crate::exec::ExecToolCallOutput; +use crate::exec::StdoutStream; +use crate::protocol::SandboxPolicy; + +pub struct ExecRuntimeContext<'a> { + pub sandbox_policy: &'a SandboxPolicy, + pub sandbox_cwd: &'a std::path::Path, + pub codex_linux_sandbox_exe: &'a Option, + pub stdout_stream: Option, +} + +pub async fn run_with_plan( + params: ExecParams, + plan: &ExecPlan, + registry: &BackendRegistry, + runtime_ctx: &ExecRuntimeContext<'_>, +) -> Result { + let ExecPlan::Approved { sandbox, .. } = plan else { + unreachable!("run_with_plan called without approved plan"); + }; + + registry + .for_type(*sandbox) + .spawn( + params, + runtime_ctx.sandbox_policy, + runtime_ctx.sandbox_cwd, + runtime_ctx.codex_linux_sandbox_exe, + runtime_ctx.stdout_stream.clone(), + ) + .await +} diff --git a/codex-rs/core/src/sandbox/planner.rs b/codex-rs/core/src/sandbox/planner.rs new file mode 100644 index 0000000000..fcb1412ab9 --- /dev/null +++ b/codex-rs/core/src/sandbox/planner.rs @@ -0,0 +1,112 @@ +use std::collections::HashSet; +use std::path::Path; + +use codex_apply_patch::ApplyPatchAction; + +use crate::exec::ExecParams; +use crate::exec::SandboxType; +use crate::protocol::AskForApproval; +use crate::protocol::SandboxPolicy; +use crate::safety::SafetyCheck; +use crate::safety::assess_command_safety; +use crate::safety::assess_patch_safety; + +#[derive(Clone, Debug)] +pub struct ExecRequest<'a> { + pub params: &'a ExecParams, + pub approval: AskForApproval, + pub policy: &'a SandboxPolicy, + pub approved_session_commands: &'a HashSet>, +} + +#[derive(Clone, Debug)] +pub enum ExecPlan { + Reject { + reason: String, + }, + AskUser { + reason: Option, + }, + Approved { + sandbox: SandboxType, + on_failure_escalate: bool, + approved_by_user: bool, + }, +} + +impl ExecPlan { + pub fn approved( + sandbox: SandboxType, + on_failure_escalate: bool, + approved_by_user: bool, + ) -> Self { + ExecPlan::Approved { + sandbox, + on_failure_escalate, + approved_by_user, + } + } +} + +pub fn plan_exec(req: &ExecRequest<'_>) -> ExecPlan { + let params = req.params; + let with_escalated_permissions = params.with_escalated_permissions.unwrap_or(false); + let safety = assess_command_safety( + ¶ms.command, + req.approval, + req.policy, + req.approved_session_commands, + with_escalated_permissions, + ); + + match safety { + SafetyCheck::AutoApprove { sandbox_type } => ExecPlan::Approved { + sandbox: sandbox_type, + on_failure_escalate: should_escalate_on_failure(req.approval, sandbox_type), + approved_by_user: false, + }, + SafetyCheck::AskUser => ExecPlan::AskUser { + reason: params.justification.clone(), + }, + SafetyCheck::Reject { reason } => ExecPlan::Reject { reason }, + } +} + +#[derive(Clone, Debug)] +pub struct PatchExecRequest<'a> { + pub action: &'a ApplyPatchAction, + pub approval: AskForApproval, + pub policy: &'a SandboxPolicy, + pub cwd: &'a Path, + pub user_explicitly_approved: bool, +} + +pub fn plan_apply_patch(req: &PatchExecRequest<'_>) -> ExecPlan { + if req.user_explicitly_approved { + ExecPlan::Approved { + sandbox: SandboxType::None, + on_failure_escalate: false, + approved_by_user: true, + } + } else { + match assess_patch_safety(req.action, req.approval, req.policy, req.cwd) { + SafetyCheck::AutoApprove { sandbox_type } => ExecPlan::Approved { + sandbox: sandbox_type, + on_failure_escalate: should_escalate_on_failure(req.approval, sandbox_type), + approved_by_user: false, + }, + SafetyCheck::AskUser => ExecPlan::AskUser { reason: None }, + SafetyCheck::Reject { reason } => ExecPlan::Reject { reason }, + } + } +} + +fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool { + matches!( + (approval, sandbox), + ( + AskForApproval::UnlessTrusted | AskForApproval::OnFailure, + SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp + ) + ) +} From a29380cdffc1e74738d1a7fa6912f19766c50d5d Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Fri, 26 Sep 2025 14:02:38 +0200 Subject: [PATCH 02/17] Isolate apply patch adapter --- codex-rs/core/src/sandbox/apply_patch_adapter.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/codex-rs/core/src/sandbox/apply_patch_adapter.rs b/codex-rs/core/src/sandbox/apply_patch_adapter.rs index 60a123684b..7e55f43279 100644 --- a/codex-rs/core/src/sandbox/apply_patch_adapter.rs +++ b/codex-rs/core/src/sandbox/apply_patch_adapter.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::env; use crate::apply_patch::ApplyPatchExec; @@ -23,7 +24,9 @@ pub(crate) fn build_exec_params_for_apply_patch( command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch], cwd: exec.action.cwd.clone(), timeout_ms: original.timeout_ms, - env: original.env.clone(), + // Run apply_patch with a minimal environment for determinism and to + // avoid leaking host environment variables into the patch process. + env: HashMap::new(), with_escalated_permissions: original.with_escalated_permissions, justification: original.justification.clone(), }) From caab5a19ee9d13399c8fb7c40b213ec8872f5b0d Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Fri, 26 Sep 2025 14:46:07 +0200 Subject: [PATCH 03/17] Move some stuff around --- codex-rs/core/src/apply_patch.rs | 1 + codex-rs/core/src/codex.rs | 103 ++++++-------------------- codex-rs/core/src/sandbox/mod.rs | 3 +- codex-rs/core/src/sandbox/planner.rs | 105 +++++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 81 deletions(-) diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs index 1ebbe5d738..8f372fb2d3 100644 --- a/codex-rs/core/src/apply_patch.rs +++ b/codex-rs/core/src/apply_patch.rs @@ -27,6 +27,7 @@ pub(crate) enum InternalApplyPatchInvocation { DelegateToExec(ApplyPatchExec), } +#[derive(Debug)] pub(crate) struct ApplyPatchExec { pub(crate) action: ApplyPatchAction, pub(crate) user_explicitly_approved_this_action: bool, diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 8bdb497906..20127318b2 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -114,12 +114,9 @@ use crate::rollout::RolloutRecorder; use crate::rollout::RolloutRecorderParams; use crate::sandbox::BackendRegistry; use crate::sandbox::ExecPlan; -use crate::sandbox::ExecRequest; use crate::sandbox::ExecRuntimeContext; -use crate::sandbox::PatchExecRequest; -use crate::sandbox::build_exec_params_for_apply_patch; -use crate::sandbox::plan_apply_patch; -use crate::sandbox::plan_exec; +use crate::sandbox::PreparedExec; +use crate::sandbox::prepare_exec_invocation; use crate::sandbox::run_with_plan; use crate::shell; use crate::state::ActiveTurn; @@ -2753,83 +2750,29 @@ async fn handle_container_exec_with_params( MaybeApplyPatchVerified::NotApplyPatch => None, }; - let mut params = params; - let command_for_display; - - let plan = if let Some(apply_patch_exec) = apply_patch_exec.as_ref() { - params = build_exec_params_for_apply_patch(apply_patch_exec, ¶ms)?; - command_for_display = vec![ - "apply_patch".to_string(), - apply_patch_exec.action.patch.clone(), - ]; - - let plan_req = PatchExecRequest { - action: &apply_patch_exec.action, - approval: turn_context.approval_policy, - policy: &turn_context.sandbox_policy, - cwd: &turn_context.cwd, - user_explicitly_approved: apply_patch_exec.user_explicitly_approved_this_action, - }; - - match plan_apply_patch(&plan_req) { - plan @ ExecPlan::Approved { .. } => plan, - ExecPlan::AskUser { .. } => { - return Err(FunctionCallError::RespondToModel( - "patch requires approval but none was recorded".to_string(), - )); - } - ExecPlan::Reject { reason } => { - return Err(FunctionCallError::RespondToModel(format!( - "patch rejected: {reason}" - ))); - } - } - } else { - command_for_display = params.command.clone(); - - let initial_plan = { - let state = sess.state.lock().await; - plan_exec(&ExecRequest { - params: ¶ms, - approval: turn_context.approval_policy, - policy: &turn_context.sandbox_policy, - approved_session_commands: state.approved_commands_ref(), - }) - }; - - match initial_plan { - plan @ ExecPlan::Approved { .. } => plan, - ExecPlan::AskUser { reason } => { - let decision = sess - .request_command_approval( - sub_id.clone(), - call_id.clone(), - params.command.clone(), - params.cwd.clone(), - reason, - ) - .await; - match decision { - ReviewDecision::Approved => ExecPlan::approved(SandboxType::None, false, true), - ReviewDecision::ApprovedForSession => { - sess.add_approved_command(params.command.clone()).await; - ExecPlan::approved(SandboxType::None, false, true) - } - ReviewDecision::Denied | ReviewDecision::Abort => { - return Err(FunctionCallError::RespondToModel( - "exec command rejected by user".to_string(), - )); - } - } - } - ExecPlan::Reject { reason } => { - return Err(FunctionCallError::RespondToModel(format!( - "exec command rejected: {reason:?}" - ))); - } - } + let approved_session_commands = { + let state = sess.state.lock().await; + state.approved_commands_ref().clone() }; + let prepared = prepare_exec_invocation( + sess, + turn_context, + &sub_id, + &call_id, + params, + apply_patch_exec, + approved_session_commands, + ) + .await?; + + let PreparedExec { + params, + plan, + command_for_display, + apply_patch_exec, + } = prepared; + let exec_command_context = ExecCommandContext { sub_id: sub_id.clone(), call_id: call_id.clone(), diff --git a/codex-rs/core/src/sandbox/mod.rs b/codex-rs/core/src/sandbox/mod.rs index abd735992d..d1553f0d17 100644 --- a/codex-rs/core/src/sandbox/mod.rs +++ b/codex-rs/core/src/sandbox/mod.rs @@ -2,7 +2,6 @@ mod apply_patch_adapter; mod backend; mod planner; -pub(crate) use apply_patch_adapter::build_exec_params_for_apply_patch; pub use backend::BackendRegistry; pub use backend::DirectBackend; pub use backend::LinuxBackend; @@ -11,8 +10,10 @@ pub use backend::SpawnBackend; pub use planner::ExecPlan; pub use planner::ExecRequest; pub use planner::PatchExecRequest; +pub(crate) use planner::PreparedExec; pub use planner::plan_apply_patch; pub use planner::plan_exec; +pub(crate) use planner::prepare_exec_invocation; use crate::error::Result; use crate::exec::ExecParams; diff --git a/codex-rs/core/src/sandbox/planner.rs b/codex-rs/core/src/sandbox/planner.rs index fcb1412ab9..2799087871 100644 --- a/codex-rs/core/src/sandbox/planner.rs +++ b/codex-rs/core/src/sandbox/planner.rs @@ -3,9 +3,15 @@ use std::path::Path; use codex_apply_patch::ApplyPatchAction; +use super::apply_patch_adapter::build_exec_params_for_apply_patch; +use crate::apply_patch::ApplyPatchExec; +use crate::codex::Session; +use crate::codex::TurnContext; use crate::exec::ExecParams; use crate::exec::SandboxType; +use crate::function_tool::FunctionCallError; use crate::protocol::AskForApproval; +use crate::protocol::ReviewDecision; use crate::protocol::SandboxPolicy; use crate::safety::SafetyCheck; use crate::safety::assess_command_safety; @@ -101,6 +107,105 @@ pub fn plan_apply_patch(req: &PatchExecRequest<'_>) -> ExecPlan { } } +#[derive(Debug)] +pub(crate) struct PreparedExec { + pub(crate) params: ExecParams, + pub(crate) plan: ExecPlan, + pub(crate) command_for_display: Vec, + pub(crate) apply_patch_exec: Option, +} + +pub(crate) async fn prepare_exec_invocation( + sess: &Session, + turn_context: &TurnContext, + sub_id: &str, + call_id: &str, + params: ExecParams, + apply_patch_exec: Option, + approved_session_commands: HashSet>, +) -> Result { + let mut params = params; + + let (plan, command_for_display) = if let Some(exec) = apply_patch_exec.as_ref() { + params = build_exec_params_for_apply_patch(exec, ¶ms)?; + let command_for_display = vec!["apply_patch".to_string(), exec.action.patch.clone()]; + + let plan_req = PatchExecRequest { + action: &exec.action, + approval: turn_context.approval_policy, + policy: &turn_context.sandbox_policy, + cwd: &turn_context.cwd, + user_explicitly_approved: exec.user_explicitly_approved_this_action, + }; + + let plan = match plan_apply_patch(&plan_req) { + plan @ ExecPlan::Approved { .. } => plan, + ExecPlan::AskUser { .. } => { + return Err(FunctionCallError::RespondToModel( + "patch requires approval but none was recorded".to_string(), + )); + } + ExecPlan::Reject { reason } => { + return Err(FunctionCallError::RespondToModel(format!( + "patch rejected: {reason}" + ))); + } + }; + + (plan, command_for_display) + } else { + let command_for_display = params.command.clone(); + + let initial_plan = plan_exec(&ExecRequest { + params: ¶ms, + approval: turn_context.approval_policy, + policy: &turn_context.sandbox_policy, + approved_session_commands: &approved_session_commands, + }); + + let plan = match initial_plan { + plan @ ExecPlan::Approved { .. } => plan, + ExecPlan::AskUser { reason } => { + let decision = sess + .request_command_approval( + sub_id.to_string(), + call_id.to_string(), + params.command.clone(), + params.cwd.clone(), + reason, + ) + .await; + match decision { + ReviewDecision::Approved => ExecPlan::approved(SandboxType::None, false, true), + ReviewDecision::ApprovedForSession => { + sess.add_approved_command(params.command.clone()).await; + ExecPlan::approved(SandboxType::None, false, true) + } + ReviewDecision::Denied | ReviewDecision::Abort => { + return Err(FunctionCallError::RespondToModel( + "exec command rejected by user".to_string(), + )); + } + } + } + ExecPlan::Reject { reason } => { + return Err(FunctionCallError::RespondToModel(format!( + "exec command rejected: {reason:?}" + ))); + } + }; + + (plan, command_for_display) + }; + + Ok(PreparedExec { + params, + plan, + command_for_display, + apply_patch_exec, + }) +} + fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool { matches!( (approval, sandbox), From 5b74f10a7bdf5b8e659421d8f60a5c10f6bfa474 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Mon, 29 Sep 2025 19:34:12 +0100 Subject: [PATCH 04/17] Sandboxing iteration 2 --- codex-rs/core/src/codex.rs | 320 +++++------------- codex-rs/core/src/executor/backends.rs | 95 ++++++ codex-rs/core/src/executor/cache.rs | 25 ++ codex-rs/core/src/executor/mod.rs | 11 + codex-rs/core/src/executor/runner.rs | 306 +++++++++++++++++ codex-rs/core/src/executor/sandbox.rs | 193 +++++++++++ codex-rs/core/src/lib.rs | 2 +- .../core/src/sandbox/apply_patch_adapter.rs | 33 -- codex-rs/core/src/sandbox/backend.rs | 87 ----- codex-rs/core/src/sandbox/mod.rs | 51 --- codex-rs/core/src/sandbox/planner.rs | 217 ------------ codex-rs/core/src/state/session.rs | 12 - 12 files changed, 718 insertions(+), 634 deletions(-) create mode 100644 codex-rs/core/src/executor/backends.rs create mode 100644 codex-rs/core/src/executor/cache.rs create mode 100644 codex-rs/core/src/executor/mod.rs create mode 100644 codex-rs/core/src/executor/runner.rs create mode 100644 codex-rs/core/src/executor/sandbox.rs delete mode 100644 codex-rs/core/src/sandbox/apply_patch_adapter.rs delete mode 100644 codex-rs/core/src/sandbox/backend.rs delete mode 100644 codex-rs/core/src/sandbox/mod.rs delete mode 100644 codex-rs/core/src/sandbox/planner.rs diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 70cc6900b3..84b1c53b9c 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1,10 +1,8 @@ use std::borrow::Cow; use std::collections::HashMap; -use std::path::Path; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicU64; -use std::time::Duration; use crate::AuthManager; use crate::client_common::REVIEW_PROMPT; @@ -54,11 +52,10 @@ use crate::environment_context::EnvironmentContext; use crate::error::CodexErr; use crate::error::Result as CodexResult; use crate::error::SandboxErr; -use crate::error::get_error_message_ui; use crate::exec::ExecParams; use crate::exec::ExecToolCallOutput; -use crate::exec::SandboxType; use crate::exec::StdoutStream; +#[cfg(test)] use crate::exec::StreamOutput; use crate::exec_command::EXEC_COMMAND_TOOL_NAME; use crate::exec_command::ExecCommandParams; @@ -66,6 +63,12 @@ use crate::exec_command::ExecSessionManager; use crate::exec_command::WRITE_STDIN_TOOL_NAME; use crate::exec_command::WriteStdinParams; use crate::exec_env::create_env; +use crate::executor::ExecError; +use crate::executor::ExecutionMode; +use crate::executor::ExecutionRequest; +use crate::executor::Executor; +use crate::executor::ExecutorConfig; +use crate::executor::normalize_exec_result; use crate::mcp_connection_manager::McpConnectionManager; use crate::mcp_tool_call::handle_mcp_tool_call; use crate::model_family::find_family_for_model; @@ -109,12 +112,6 @@ use crate::protocol::TurnDiffEvent; use crate::protocol::WebSearchBeginEvent; use crate::rollout::RolloutRecorder; use crate::rollout::RolloutRecorderParams; -use crate::sandbox::BackendRegistry; -use crate::sandbox::ExecPlan; -use crate::sandbox::ExecRuntimeContext; -use crate::sandbox::PreparedExec; -use crate::sandbox::prepare_exec_invocation; -use crate::sandbox::run_with_plan; use crate::shell; use crate::state::ActiveTurn; use crate::state::SessionServices; @@ -266,6 +263,7 @@ pub(crate) struct Session { pub(crate) active_turn: Mutex>, services: SessionServices, next_internal_sub_id: AtomicU64, + executor: Executor, } /// The context needed for a single turn of the conversation. @@ -464,6 +462,12 @@ impl Session { show_raw_agent_reasoning: config.show_raw_agent_reasoning, }; + let executor = Executor::new(ExecutorConfig::new( + turn_context.sandbox_policy.clone(), + turn_context.cwd.clone(), + config.codex_linux_sandbox_exe.clone(), + )); + let sess = Arc::new(Session { conversation_id, tx_event: tx_event.clone(), @@ -471,6 +475,7 @@ impl Session { active_turn: Mutex::new(None), services, next_internal_sub_id: AtomicU64::new(0), + executor, }); // Dispatch the SessionConfiguredEvent first and then report any errors. @@ -546,6 +551,11 @@ impl Session { } } + /// Emit an exec approval request event and await the user's decision. + /// + /// The request is keyed by `sub_id`/`call_id` so matching responses are delivered + /// to the correct in-flight turn. If the task is aborted, this returns the + /// default `ReviewDecision` (`Denied`). pub async fn request_command_approval( &self, sub_id: String, @@ -643,11 +653,6 @@ impl Session { } } - pub async fn add_approved_command(&self, cmd: Vec) { - let mut state = self.state.lock().await; - state.add_approved_command(cmd); - } - /// Records input items: always append to conversation history and /// persist these response items to rollout. async fn record_conversation_items(&self, items: &[ResponseItem]) { @@ -901,12 +906,13 @@ impl Session { /// command even on error. /// /// Returns the output of the exec tool call. - async fn run_exec_with_events<'a>( + async fn run_exec_with_events( &self, turn_diff_tracker: &mut TurnDiffTracker, begin_ctx: ExecCommandContext, - exec_args: ExecInvokeArgs<'a>, - ) -> crate::error::Result { + request: ExecutionRequest, + approval_policy: AskForApproval, + ) -> Result { let is_apply_patch = begin_ctx.apply_patch.is_some(); let sub_id = begin_ctx.sub_id.clone(); let call_id = begin_ctx.call_id.clone(); @@ -914,41 +920,14 @@ impl Session { self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone()) .await; - let ExecInvokeArgs { - params, - plan, - sandbox_policy, - sandbox_cwd, - codex_linux_sandbox_exe, - stdout_stream, - } = exec_args; - - let registry = BackendRegistry::new(); - let runtime_ctx = ExecRuntimeContext { - sandbox_policy, - sandbox_cwd, - codex_linux_sandbox_exe, - stdout_stream, - }; + let result = self + .executor + .run(request, self, approval_policy, &sub_id, &call_id) + .await; - let result = run_with_plan(params, &plan, ®istry, &runtime_ctx).await; + let normalized = normalize_exec_result(&result); + let borrowed = normalized.event_output(); - let output_stderr; - let borrowed: &ExecToolCallOutput = match &result { - Ok(output) => output, - Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output, - Err(e) => { - output_stderr = ExecToolCallOutput { - exit_code: -1, - stdout: StreamOutput::new(String::new()), - stderr: StreamOutput::new(get_error_message_ui(e)), - aggregated_output: StreamOutput::new(get_error_message_ui(e)), - duration: Duration::default(), - timed_out: false, - }; - &output_stderr - } - }; self.on_exec_command_end( turn_diff_tracker, &sub_id, @@ -958,13 +937,15 @@ impl Session { ) .await; + drop(normalized); + result } /// Helper that emits a BackgroundEvent with the given message. This keeps /// the call‑sites terse so adding more diagnostics does not clutter the /// core agent logic. - async fn notify_background_event(&self, sub_id: &str, message: impl Into) { + pub(crate) async fn notify_background_event(&self, sub_id: &str, message: impl Into) { let event = Event { id: sub_id.to_string(), msg: EventMsg::BackgroundEvent(BackgroundEventEvent { @@ -2530,15 +2511,6 @@ fn parse_container_exec_arguments( }) } -pub struct ExecInvokeArgs<'a> { - pub params: ExecParams, - pub plan: ExecPlan, - pub sandbox_policy: &'a SandboxPolicy, - pub sandbox_cwd: &'a Path, - pub codex_linux_sandbox_exe: &'a Option, - pub stdout_stream: Option, -} - fn maybe_translate_shell_command( params: ExecParams, sess: &Session, @@ -2599,29 +2571,12 @@ async fn handle_container_exec_with_params( MaybeApplyPatchVerified::NotApplyPatch => None, }; - let approved_session_commands = { - let state = sess.state.lock().await; - state.approved_commands_ref().clone() + let command_for_display = if let Some(exec) = apply_patch_exec.as_ref() { + vec!["apply_patch".to_string(), exec.action.patch.clone()] + } else { + params.command.clone() }; - let prepared = prepare_exec_invocation( - sess, - turn_context, - &sub_id, - &call_id, - params, - apply_patch_exec, - approved_session_commands, - ) - .await?; - - let PreparedExec { - params, - plan, - command_for_display, - apply_patch_exec, - } = prepared; - let exec_command_context = ExecCommandContext { sub_id: sub_id.clone(), call_id: call_id.clone(), @@ -2638,28 +2593,41 @@ async fn handle_container_exec_with_params( ), }; - let params = maybe_translate_shell_command(params, sess, turn_context); - let plan_for_invocation = plan.clone(); + let translated_params = maybe_translate_shell_command(params, sess, turn_context); + let stdout_stream = if exec_command_context.apply_patch.is_some() { + None + } else { + Some(StdoutStream { + sub_id: sub_id.clone(), + call_id: call_id.clone(), + tx_event: sess.tx_event.clone(), + }) + }; + + let mode = match apply_patch_exec { + Some(exec) => ExecutionMode::ApplyPatch(exec), + None => ExecutionMode::Shell, + }; + + let request = ExecutionRequest { + params: translated_params, + approval_command: command_for_display, + mode, + stdout_stream, + }; + + sess.executor.update_environment( + turn_context.sandbox_policy.clone(), + turn_context.cwd.clone(), + sess.services.codex_linux_sandbox_exe.clone(), + ); + let output_result = sess .run_exec_with_events( turn_diff_tracker, - exec_command_context.clone(), - ExecInvokeArgs { - params: params.clone(), - plan: plan_for_invocation, - sandbox_policy: &turn_context.sandbox_policy, - sandbox_cwd: &turn_context.cwd, - codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe, - stdout_stream: if exec_command_context.apply_patch.is_some() { - None - } else { - Some(StdoutStream { - sub_id: sub_id.clone(), - call_id: call_id.clone(), - tx_event: sess.tx_event.clone(), - }) - }, - }, + exec_command_context, + request, + turn_context.approval_policy, ) .await; @@ -2673,142 +2641,16 @@ async fn handle_container_exec_with_params( Err(FunctionCallError::RespondToModel(content)) } } - Err(CodexErr::Sandbox(error)) => { - handle_sandbox_error( - turn_diff_tracker, - params, - exec_command_context, - error, - &plan, - sess, - turn_context, - ) - .await - } - Err(e) => Err(FunctionCallError::RespondToModel(format!( - "execution error: {e:?}" + Err(ExecError::Function(err)) => Err(err), + Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err( + FunctionCallError::RespondToModel(format_exec_output(&output)), + ), + Err(ExecError::Codex(err)) => Err(FunctionCallError::RespondToModel(format!( + "execution error: {err:?}" ))), } } -async fn handle_sandbox_error( - turn_diff_tracker: &mut TurnDiffTracker, - params: ExecParams, - exec_command_context: ExecCommandContext, - error: SandboxErr, - plan: &ExecPlan, - sess: &Session, - turn_context: &TurnContext, -) -> Result { - let call_id = exec_command_context.call_id.clone(); - let sub_id = exec_command_context.sub_id.clone(); - let cwd = exec_command_context.cwd.clone(); - - if let SandboxErr::Timeout { output } = &error { - let content = format_exec_output(output); - return Err(FunctionCallError::RespondToModel(content)); - } - - let ExecPlan::Approved { - sandbox: sandbox_type, - on_failure_escalate, - .. - } = plan - else { - return Err(FunctionCallError::RespondToModel( - "execution failed without an approved plan".to_string(), - )); - }; - - if !on_failure_escalate { - return Err(FunctionCallError::RespondToModel(format!( - "failed in sandbox {sandbox_type:?} with execution error: {error:?}" - ))); - } - - // Note that when `error` is `SandboxErr::Denied`, it could be a false - // positive. That is, it may have exited with a non-zero exit code, not - // because the sandbox denied it, but because that is its expected behavior, - // i.e., a grep command that did not match anything. Ideally we would - // include additional metadata on the command to indicate whether non-zero - // exit codes merit a retry. - - // For now, we categorically ask the user to retry without sandbox and - // emit the raw error as a background event. - sess.notify_background_event(&sub_id, format!("Execution failed: {error}")) - .await; - - let command_for_retry = params.command.clone(); - let decision = sess - .request_command_approval( - sub_id.clone(), - call_id.clone(), - command_for_retry.clone(), - cwd.clone(), - Some("command failed; retry without sandbox?".to_string()), - ) - .await; - - match decision { - ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { - // Persist this command as pre‑approved for the - // remainder of the session so future - // executions skip the sandbox directly. - // TODO(ragona): Isn't this a bug? It always saves the command in an | fork? - sess.add_approved_command(command_for_retry.clone()).await; - // Inform UI we are retrying without sandbox. - sess.notify_background_event(&sub_id, "retrying command without sandbox") - .await; - - // This is an escalated retry; the policy will not be - // examined and the sandbox has been set to `None`. - let retry_output_result = sess - .run_exec_with_events( - turn_diff_tracker, - exec_command_context.clone(), - ExecInvokeArgs { - params, - plan: ExecPlan::approved(SandboxType::None, false, true), - sandbox_policy: &turn_context.sandbox_policy, - sandbox_cwd: &turn_context.cwd, - codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe, - stdout_stream: if exec_command_context.apply_patch.is_some() { - None - } else { - Some(StdoutStream { - sub_id: sub_id.clone(), - call_id: call_id.clone(), - tx_event: sess.tx_event.clone(), - }) - }, - }, - ) - .await; - - match retry_output_result { - Ok(retry_output) => { - let ExecToolCallOutput { exit_code, .. } = &retry_output; - let content = format_exec_output(&retry_output); - if *exit_code == 0 { - Ok(content) - } else { - Err(FunctionCallError::RespondToModel(content)) - } - } - Err(e) => Err(FunctionCallError::RespondToModel(format!( - "retry failed: {e}" - ))), - } - } - ReviewDecision::Denied | ReviewDecision::Abort => { - // Fall through to original failure handling. - Err(FunctionCallError::RespondToModel( - "exec command rejected by user".to_string(), - )) - } - } -} - fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { let ExecToolCallOutput { aggregated_output, .. @@ -3366,6 +3208,11 @@ mod tests { user_shell: shell::Shell::Unknown, show_raw_agent_reasoning: config.show_raw_agent_reasoning, }; + let executor = Executor::new(ExecutorConfig::new( + turn_context.sandbox_policy.clone(), + turn_context.cwd.clone(), + None, + )); let session = Session { conversation_id, tx_event, @@ -3373,6 +3220,7 @@ mod tests { active_turn: Mutex::new(None), services, next_internal_sub_id: AtomicU64::new(0), + executor, }; (session, turn_context) } @@ -3433,6 +3281,11 @@ mod tests { user_shell: shell::Shell::Unknown, show_raw_agent_reasoning: config.show_raw_agent_reasoning, }; + let executor = Executor::new(ExecutorConfig::new( + config.sandbox_policy.clone(), + config.cwd.clone(), + None, + )); let session = Arc::new(Session { conversation_id, tx_event, @@ -3440,6 +3293,7 @@ mod tests { active_turn: Mutex::new(None), services, next_internal_sub_id: AtomicU64::new(0), + executor, }); (session, turn_context, rx_event) } diff --git a/codex-rs/core/src/executor/backends.rs b/codex-rs/core/src/executor/backends.rs new file mode 100644 index 0000000000..aaa7f08030 --- /dev/null +++ b/codex-rs/core/src/executor/backends.rs @@ -0,0 +1,95 @@ +use std::sync::Arc; + +use async_trait::async_trait; + +use crate::apply_patch::ApplyPatchExec; +use crate::exec::ExecParams; +use crate::exec::ExecToolCallOutput; +use crate::executor::sandbox::build_exec_params_for_apply_patch; +use crate::function_tool::FunctionCallError; + +pub(crate) enum ExecutionMode { + Shell, + ApplyPatch(ApplyPatchExec), +} + +#[async_trait] +/// Backend-specific hooks that prepare and post-process execution requests for a +/// given [`ExecutionMode`]. +pub(crate) trait ExecutionBackend: Send + Sync { + fn prepare( + &self, + params: ExecParams, + // Required for downcasting the apply_patch. + mode: &ExecutionMode, + ) -> Result; + + async fn finalize( + &self, + output: ExecToolCallOutput, + _mode: &ExecutionMode, + ) -> Result { + Ok(output) + } +} + +pub(crate) struct BackendStore { + shell: Arc, + apply_patch: Arc, +} + +impl BackendStore { + pub(crate) fn new() -> Self { + Self { + shell: Arc::new(ShellBackend), + apply_patch: Arc::new(ApplyPatchBackend), + } + } + + pub(crate) fn for_mode(&self, mode: &ExecutionMode) -> Arc { + match mode { + ExecutionMode::Shell => self.shell.clone(), + ExecutionMode::ApplyPatch(_) => self.apply_patch.clone(), + } + } +} + +pub(crate) fn default_backends() -> BackendStore { + BackendStore::new() +} + +struct ShellBackend; + +#[async_trait] +impl ExecutionBackend for ShellBackend { + fn prepare( + &self, + params: ExecParams, + mode: &ExecutionMode, + ) -> Result { + match mode { + ExecutionMode::Shell => Ok(params), + _ => Err(FunctionCallError::RespondToModel( + "shell backend invoked with non-shell mode".to_string(), + )), + } + } +} + +struct ApplyPatchBackend; + +#[async_trait] +impl ExecutionBackend for ApplyPatchBackend { + fn prepare( + &self, + params: ExecParams, + mode: &ExecutionMode, + ) -> Result { + match mode { + ExecutionMode::ApplyPatch(exec) => build_exec_params_for_apply_patch(exec, ¶ms), + ExecutionMode::Shell => Err(FunctionCallError::RespondToModel( + "apply_patch backend invoked without patch context".to_string(), + )), + } + } +} diff --git a/codex-rs/core/src/executor/cache.rs b/codex-rs/core/src/executor/cache.rs new file mode 100644 index 0000000000..5629c8893a --- /dev/null +++ b/codex-rs/core/src/executor/cache.rs @@ -0,0 +1,25 @@ +use std::collections::HashSet; +use std::sync::Arc; +use std::sync::Mutex; + +#[derive(Clone, Debug, Default)] +/// Thread-safe store of user approvals so repeated commands can reuse +/// previously granted trust. +pub(crate) struct ApprovalCache { + inner: Arc>>>, +} + +impl ApprovalCache { + pub(crate) fn insert(&self, command: Vec) { + if command.is_empty() { + return; + } + if let Ok(mut guard) = self.inner.lock() { + guard.insert(command); + } + } + + pub(crate) fn snapshot(&self) -> HashSet> { + self.inner.lock().map(|g| g.clone()).unwrap_or_default() + } +} diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs new file mode 100644 index 0000000000..c3dce856e5 --- /dev/null +++ b/codex-rs/core/src/executor/mod.rs @@ -0,0 +1,11 @@ +mod backends; +mod cache; +mod runner; +mod sandbox; + +pub(crate) use backends::ExecutionMode; +pub(crate) use runner::ExecError; +pub(crate) use runner::ExecutionRequest; +pub(crate) use runner::Executor; +pub(crate) use runner::ExecutorConfig; +pub(crate) use runner::normalize_exec_result; diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs new file mode 100644 index 0000000000..708568a73b --- /dev/null +++ b/codex-rs/core/src/executor/runner.rs @@ -0,0 +1,306 @@ +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::RwLock; +use std::time::Duration; + +use thiserror::Error; + +use super::backends::BackendStore; +use super::backends::ExecutionBackend; +use super::backends::ExecutionMode; +use super::backends::default_backends; +use super::cache::ApprovalCache; +use crate::codex::Session; +use crate::error::CodexErr; +use crate::error::SandboxErr; +use crate::error::get_error_message_ui; +use crate::exec::ExecParams; +use crate::exec::ExecToolCallOutput; +use crate::exec::SandboxType; +use crate::exec::StdoutStream; +use crate::exec::StreamOutput; +use crate::exec::process_exec_tool_call; +use crate::executor::sandbox::select_sandbox; +use crate::function_tool::FunctionCallError; +use crate::protocol::AskForApproval; +use crate::protocol::ReviewDecision; +use crate::protocol::SandboxPolicy; + +#[derive(Clone, Debug)] +pub(crate) struct ExecutorConfig { + pub(crate) sandbox_policy: SandboxPolicy, + pub(crate) sandbox_cwd: PathBuf, + codex_linux_sandbox_exe: Option, +} + +impl ExecutorConfig { + pub(crate) fn new( + sandbox_policy: SandboxPolicy, + sandbox_cwd: PathBuf, + codex_linux_sandbox_exe: Option, + ) -> Self { + Self { + sandbox_policy, + sandbox_cwd, + codex_linux_sandbox_exe, + } + } +} + +#[derive(Debug, Error)] +pub enum ExecError { + #[error(transparent)] + Function(#[from] FunctionCallError), + #[error(transparent)] + Codex(#[from] CodexErr), +} + +impl ExecError { + pub(crate) fn rejection(msg: impl Into) -> Self { + FunctionCallError::RespondToModel(msg.into()).into() + } +} + +/// Coordinates sandbox selection, backend-specific preparation, and command +/// execution for tool calls requested by the model. +pub(crate) struct Executor { + backends: BackendStore, + approval_cache: ApprovalCache, + config: Arc>, +} + +impl Executor { + pub(crate) fn new(config: ExecutorConfig) -> Self { + Self { + backends: default_backends(), + approval_cache: ApprovalCache::default(), + config: Arc::new(RwLock::new(config)), + } + } + + /// Updates the sandbox policy and working directory used for future + /// executions without recreating the executor. + pub(crate) fn update_environment( + &self, + sandbox_policy: SandboxPolicy, + sandbox_cwd: PathBuf, + codex_linux_sandbox_exe: Option, + ) { + if let Ok(mut cfg) = self.config.write() { + cfg.sandbox_policy = sandbox_policy; + cfg.sandbox_cwd = sandbox_cwd; + cfg.codex_linux_sandbox_exe = codex_linux_sandbox_exe; + } + } + + /// Runs a prepared execution request end-to-end: prepares parameters, decides on + /// sandbox placement (prompting the user when necessary), launches the command, + /// and lets the backend post-process the final output. + pub(crate) async fn run( + &self, + mut request: ExecutionRequest, + session: &Session, + approval_policy: AskForApproval, + sub_id: &str, + call_id: &str, + ) -> Result { + // Step 1: Normalise parameters via the selected backend. + let backend = self.backends.for_mode(&request.mode); + request.params = backend + .prepare(request.params, &request.mode) + .map_err(ExecError::from)?; + + // Step 2: Snapshot sandbox configuration so it stays stable for this run. + let config = self + .config + .read() + .map_err(|_| ExecError::rejection("executor config poisoned"))? + .clone(); + + // Step 3: Decide sandbox placement, prompting for approval when needed. + let sandbox_decision = select_sandbox( + &request, + approval_policy, + self.approval_cache.snapshot(), + &config, + session, + sub_id, + call_id, + ) + .await?; + if sandbox_decision.record_session_approval { + self.approval_cache.insert(request.approval_command.clone()); + } + + // Step 4: Launch the command within the chosen sandbox. + let first_attempt = self + .spawn( + request.params.clone(), + sandbox_decision.initial_sandbox, + &config, + request.stdout_stream.clone(), + ) + .await; + + // Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry. + let raw_output = match first_attempt { + Ok(output) => output, + Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => { + return Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into()); + } + Err(CodexErr::Sandbox(error @ SandboxErr::Denied { .. })) => { + return if sandbox_decision.escalate_on_failure { + self.retry_without_sandbox( + &*backend, &request, &config, session, sub_id, call_id, error, + ) + .await + } else { + Err(ExecError::rejection(format!( + "failed in sandbox {:?} with execution error: {error:?}", + sandbox_decision.initial_sandbox + ))) + }; + } + Err(err) => return Err(err.into()), + }; + + // Step 6: Allow the backend to post-process the raw output. + backend + .finalize(raw_output, &request.mode) + .await + .map_err(ExecError::from) + } + + /// Fallback path invoked when a sandboxed run is denied so the user can + /// approve rerunning without isolation. + #[allow(clippy::too_many_arguments)] + async fn retry_without_sandbox( + &self, + backend: &dyn ExecutionBackend, + request: &ExecutionRequest, + config: &ExecutorConfig, + session: &Session, + sub_id: &str, + call_id: &str, + sandbox_error: SandboxErr, + ) -> Result { + session + .notify_background_event(sub_id, format!("Execution failed: {sandbox_error}")) + .await; + let decision = session + .request_command_approval( + sub_id.to_string(), + call_id.to_string(), + request.approval_command.clone(), + request.params.cwd.clone(), + Some("command failed; retry without sandbox?".to_string()), + ) + .await; + + match decision { + ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { + if matches!(decision, ReviewDecision::ApprovedForSession) { + self.approval_cache.insert(request.approval_command.clone()); + } + session + .notify_background_event(sub_id, "retrying command without sandbox") + .await; + + let retry_output = self + .spawn( + request.params.clone(), + SandboxType::None, + config, + request.stdout_stream.clone(), + ) + .await?; + + backend + .finalize(retry_output, &request.mode) + .await + .map_err(ExecError::from) + } + ReviewDecision::Denied | ReviewDecision::Abort => { + Err(ExecError::rejection("exec command rejected by user")) + } + } + } + + async fn spawn( + &self, + params: ExecParams, + sandbox: SandboxType, + config: &ExecutorConfig, + stdout_stream: Option, + ) -> Result { + process_exec_tool_call( + params, + sandbox, + &config.sandbox_policy, + &config.sandbox_cwd, + &config.codex_linux_sandbox_exe, + stdout_stream, + ) + .await + } +} + +pub(crate) struct ExecutionRequest { + pub params: ExecParams, + pub approval_command: Vec, + pub mode: ExecutionMode, + pub stdout_stream: Option, +} + +pub(crate) struct NormalizedExecOutput<'a> { + borrowed: Option<&'a ExecToolCallOutput>, + synthetic: Option, +} + +impl<'a> NormalizedExecOutput<'a> { + pub(crate) fn event_output(&'a self) -> &'a ExecToolCallOutput { + match (self.borrowed, self.synthetic.as_ref()) { + (Some(output), _) => output, + (None, Some(output)) => output, + (None, None) => unreachable!("normalized exec output missing data"), + } + } +} + +/// Converts a raw execution result into a uniform view that always exposes an +/// [`ExecToolCallOutput`], synthesizing error output when the command fails +/// before producing a response. +pub(crate) fn normalize_exec_result( + result: &Result, +) -> NormalizedExecOutput<'_> { + match result { + Ok(output) => NormalizedExecOutput { + borrowed: Some(output), + synthetic: None, + }, + Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => { + NormalizedExecOutput { + borrowed: Some(output.as_ref()), + synthetic: None, + } + } + Err(err) => { + let message = match err { + ExecError::Function(FunctionCallError::RespondToModel(msg)) => msg.clone(), + ExecError::Codex(e) => get_error_message_ui(e), + }; + let synthetic = ExecToolCallOutput { + exit_code: -1, + stdout: StreamOutput::new(String::new()), + stderr: StreamOutput::new(message.clone()), + aggregated_output: StreamOutput::new(message), + duration: Duration::default(), + timed_out: false, + }; + NormalizedExecOutput { + borrowed: None, + synthetic: Some(synthetic), + } + } + } +} diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs new file mode 100644 index 0000000000..1f87d961e6 --- /dev/null +++ b/codex-rs/core/src/executor/sandbox.rs @@ -0,0 +1,193 @@ +use crate::CODEX_APPLY_PATCH_ARG1; +use crate::apply_patch::ApplyPatchExec; +use crate::codex::Session; +use crate::exec::ExecParams; +use crate::exec::SandboxType; +use crate::executor::ExecError; +use crate::executor::ExecutionMode; +use crate::executor::ExecutionRequest; +use crate::executor::ExecutorConfig; +use crate::function_tool::FunctionCallError; +use crate::safety::SafetyCheck; +use crate::safety::assess_command_safety; +use crate::safety::assess_patch_safety; +use codex_protocol::protocol::AskForApproval; +use codex_protocol::protocol::ReviewDecision; +use std::collections::HashMap; +use std::collections::HashSet; +use std::env; + +/// Sandbox placement options selected for an execution run, including whether +/// to escalate after failures and whether approvals should persist. +pub(crate) struct SandboxDecision { + pub(crate) initial_sandbox: SandboxType, + pub(crate) escalate_on_failure: bool, + pub(crate) record_session_approval: bool, +} + +impl SandboxDecision { + fn auto(sandbox: SandboxType, escalate_on_failure: bool) -> Self { + Self { + initial_sandbox: sandbox, + escalate_on_failure, + record_session_approval: false, + } + } + + fn user_override(record_session_approval: bool) -> Self { + Self { + initial_sandbox: SandboxType::None, + escalate_on_failure: false, + record_session_approval, + } + } +} + +fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool { + matches!( + (approval, sandbox), + ( + AskForApproval::UnlessTrusted | AskForApproval::OnFailure, + SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp + ) + ) +} + +/// Builds the command-line invocation that shells out to `codex apply_patch` +/// using the provided apply-patch request details. +pub(crate) fn build_exec_params_for_apply_patch( + exec: &ApplyPatchExec, + original: &ExecParams, +) -> Result { + let path_to_codex = env::current_exe() + .ok() + .map(|p| p.to_string_lossy().to_string()) + .ok_or_else(|| { + FunctionCallError::RespondToModel( + "failed to determine path to codex executable".to_string(), + ) + })?; + + let patch = exec.action.patch.clone(); + Ok(ExecParams { + command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch], + cwd: exec.action.cwd.clone(), + timeout_ms: original.timeout_ms, + // Run apply_patch with a minimal environment for determinism and to + // avoid leaking host environment variables into the patch process. + env: HashMap::new(), + with_escalated_permissions: original.with_escalated_permissions, + justification: original.justification.clone(), + }) +} + +/// Determines how a command should be sandboxed, prompting the user when +/// policy requires explicit approval. +pub async fn select_sandbox( + request: &ExecutionRequest, + approval_policy: AskForApproval, + approval_cache: HashSet>, + config: &ExecutorConfig, + session: &Session, + sub_id: &str, + call_id: &str, +) -> Result { + match &request.mode { + ExecutionMode::Shell => { + select_shell_sandbox( + request, + approval_policy, + approval_cache, + config, + session, + sub_id, + call_id, + ) + .await + } + ExecutionMode::ApplyPatch(exec) => { + select_apply_patch_sandbox(exec, approval_policy, config) + } + } +} + +async fn select_shell_sandbox( + request: &ExecutionRequest, + approval_policy: AskForApproval, + approved_snapshot: HashSet>, + config: &ExecutorConfig, + session: &Session, + sub_id: &str, + call_id: &str, +) -> Result { + let command_for_safety = if request.approval_command.is_empty() { + request.params.command.clone() + } else { + request.approval_command.clone() + }; + + let safety = assess_command_safety( + &command_for_safety, + approval_policy, + &config.sandbox_policy, + &approved_snapshot, + request.params.with_escalated_permissions.unwrap_or(false), + ); + + match safety { + SafetyCheck::AutoApprove { sandbox_type } => Ok(SandboxDecision::auto( + sandbox_type, + should_escalate_on_failure(approval_policy, sandbox_type), + )), + SafetyCheck::AskUser => { + let decision = session + .request_command_approval( + sub_id.to_string(), + call_id.to_string(), + request.approval_command.clone(), + request.params.cwd.clone(), + request.params.justification.clone(), + ) + .await; + + match decision { + ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)), + ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)), + ReviewDecision::Denied | ReviewDecision::Abort => { + Err(ExecError::rejection("exec command rejected by user")) + } + } + } + SafetyCheck::Reject { reason } => Err(ExecError::rejection(format!( + "exec command rejected: {reason}" + ))), + } +} + +fn select_apply_patch_sandbox( + exec: &ApplyPatchExec, + approval_policy: AskForApproval, + config: &ExecutorConfig, +) -> Result { + if exec.user_explicitly_approved_this_action { + return Ok(SandboxDecision::user_override(false)); + } + + match assess_patch_safety( + &exec.action, + approval_policy, + &config.sandbox_policy, + &config.sandbox_cwd, + ) { + SafetyCheck::AutoApprove { sandbox_type } => Ok(SandboxDecision::auto( + sandbox_type, + should_escalate_on_failure(approval_policy, sandbox_type), + )), + SafetyCheck::AskUser => Err(ExecError::rejection( + "patch requires approval but none was recorded", + )), + SafetyCheck::Reject { reason } => { + Err(ExecError::rejection(format!("patch rejected: {reason}"))) + } + } +} diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 9613e40b12..edc04ab942 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -27,6 +27,7 @@ pub mod error; pub mod exec; mod exec_command; pub mod exec_env; +pub mod executor; mod flags; pub mod git_info; pub mod landlock; @@ -60,7 +61,6 @@ pub mod plan_tool; pub mod project_doc; mod rollout; pub(crate) mod safety; -pub mod sandbox; pub mod seatbelt; pub mod shell; pub mod spawn; diff --git a/codex-rs/core/src/sandbox/apply_patch_adapter.rs b/codex-rs/core/src/sandbox/apply_patch_adapter.rs deleted file mode 100644 index 7e55f43279..0000000000 --- a/codex-rs/core/src/sandbox/apply_patch_adapter.rs +++ /dev/null @@ -1,33 +0,0 @@ -use std::collections::HashMap; -use std::env; - -use crate::apply_patch::ApplyPatchExec; -use crate::apply_patch::CODEX_APPLY_PATCH_ARG1; -use crate::exec::ExecParams; -use crate::function_tool::FunctionCallError; - -pub(crate) fn build_exec_params_for_apply_patch( - exec: &ApplyPatchExec, - original: &ExecParams, -) -> Result { - let path_to_codex = env::current_exe() - .ok() - .map(|p| p.to_string_lossy().to_string()) - .ok_or_else(|| { - FunctionCallError::RespondToModel( - "failed to determine path to codex executable".to_string(), - ) - })?; - - let patch = exec.action.patch.clone(); - Ok(ExecParams { - command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch], - cwd: exec.action.cwd.clone(), - timeout_ms: original.timeout_ms, - // Run apply_patch with a minimal environment for determinism and to - // avoid leaking host environment variables into the patch process. - env: HashMap::new(), - with_escalated_permissions: original.with_escalated_permissions, - justification: original.justification.clone(), - }) -} diff --git a/codex-rs/core/src/sandbox/backend.rs b/codex-rs/core/src/sandbox/backend.rs deleted file mode 100644 index 02e5a87e4f..0000000000 --- a/codex-rs/core/src/sandbox/backend.rs +++ /dev/null @@ -1,87 +0,0 @@ -use std::path::Path; -use std::path::PathBuf; - -use async_trait::async_trait; - -use crate::error::Result; -use crate::exec::ExecParams; -use crate::exec::ExecToolCallOutput; -use crate::exec::SandboxType; -use crate::exec::StdoutStream; -use crate::exec::process_exec_tool_call; -use crate::protocol::SandboxPolicy; - -#[async_trait] -pub trait SpawnBackend: Send + Sync { - fn sandbox_type(&self) -> SandboxType; - - async fn spawn( - &self, - params: ExecParams, - sandbox_policy: &SandboxPolicy, - sandbox_cwd: &Path, - codex_linux_sandbox_exe: &Option, - stdout_stream: Option, - ) -> Result { - process_exec_tool_call( - params, - self.sandbox_type(), - sandbox_policy, - sandbox_cwd, - codex_linux_sandbox_exe, - stdout_stream, - ) - .await - } -} - -#[derive(Clone, Copy, Debug, Default)] -pub struct DirectBackend; - -#[async_trait] -impl SpawnBackend for DirectBackend { - fn sandbox_type(&self) -> SandboxType { - SandboxType::None - } -} - -#[derive(Clone, Copy, Debug, Default)] -pub struct SeatbeltBackend; - -#[async_trait] -impl SpawnBackend for SeatbeltBackend { - fn sandbox_type(&self) -> SandboxType { - SandboxType::MacosSeatbelt - } -} - -#[derive(Clone, Copy, Debug, Default)] -pub struct LinuxBackend; - -#[async_trait] -impl SpawnBackend for LinuxBackend { - fn sandbox_type(&self) -> SandboxType { - SandboxType::LinuxSeccomp - } -} - -#[derive(Default)] -pub struct BackendRegistry { - direct: DirectBackend, - seatbelt: SeatbeltBackend, - linux: LinuxBackend, -} - -impl BackendRegistry { - pub fn new() -> Self { - Self::default() - } - - pub fn for_type(&self, sandbox: SandboxType) -> &dyn SpawnBackend { - match sandbox { - SandboxType::None => &self.direct, - SandboxType::MacosSeatbelt => &self.seatbelt, - SandboxType::LinuxSeccomp => &self.linux, - } - } -} diff --git a/codex-rs/core/src/sandbox/mod.rs b/codex-rs/core/src/sandbox/mod.rs deleted file mode 100644 index d1553f0d17..0000000000 --- a/codex-rs/core/src/sandbox/mod.rs +++ /dev/null @@ -1,51 +0,0 @@ -mod apply_patch_adapter; -mod backend; -mod planner; - -pub use backend::BackendRegistry; -pub use backend::DirectBackend; -pub use backend::LinuxBackend; -pub use backend::SeatbeltBackend; -pub use backend::SpawnBackend; -pub use planner::ExecPlan; -pub use planner::ExecRequest; -pub use planner::PatchExecRequest; -pub(crate) use planner::PreparedExec; -pub use planner::plan_apply_patch; -pub use planner::plan_exec; -pub(crate) use planner::prepare_exec_invocation; - -use crate::error::Result; -use crate::exec::ExecParams; -use crate::exec::ExecToolCallOutput; -use crate::exec::StdoutStream; -use crate::protocol::SandboxPolicy; - -pub struct ExecRuntimeContext<'a> { - pub sandbox_policy: &'a SandboxPolicy, - pub sandbox_cwd: &'a std::path::Path, - pub codex_linux_sandbox_exe: &'a Option, - pub stdout_stream: Option, -} - -pub async fn run_with_plan( - params: ExecParams, - plan: &ExecPlan, - registry: &BackendRegistry, - runtime_ctx: &ExecRuntimeContext<'_>, -) -> Result { - let ExecPlan::Approved { sandbox, .. } = plan else { - unreachable!("run_with_plan called without approved plan"); - }; - - registry - .for_type(*sandbox) - .spawn( - params, - runtime_ctx.sandbox_policy, - runtime_ctx.sandbox_cwd, - runtime_ctx.codex_linux_sandbox_exe, - runtime_ctx.stdout_stream.clone(), - ) - .await -} diff --git a/codex-rs/core/src/sandbox/planner.rs b/codex-rs/core/src/sandbox/planner.rs deleted file mode 100644 index 2799087871..0000000000 --- a/codex-rs/core/src/sandbox/planner.rs +++ /dev/null @@ -1,217 +0,0 @@ -use std::collections::HashSet; -use std::path::Path; - -use codex_apply_patch::ApplyPatchAction; - -use super::apply_patch_adapter::build_exec_params_for_apply_patch; -use crate::apply_patch::ApplyPatchExec; -use crate::codex::Session; -use crate::codex::TurnContext; -use crate::exec::ExecParams; -use crate::exec::SandboxType; -use crate::function_tool::FunctionCallError; -use crate::protocol::AskForApproval; -use crate::protocol::ReviewDecision; -use crate::protocol::SandboxPolicy; -use crate::safety::SafetyCheck; -use crate::safety::assess_command_safety; -use crate::safety::assess_patch_safety; - -#[derive(Clone, Debug)] -pub struct ExecRequest<'a> { - pub params: &'a ExecParams, - pub approval: AskForApproval, - pub policy: &'a SandboxPolicy, - pub approved_session_commands: &'a HashSet>, -} - -#[derive(Clone, Debug)] -pub enum ExecPlan { - Reject { - reason: String, - }, - AskUser { - reason: Option, - }, - Approved { - sandbox: SandboxType, - on_failure_escalate: bool, - approved_by_user: bool, - }, -} - -impl ExecPlan { - pub fn approved( - sandbox: SandboxType, - on_failure_escalate: bool, - approved_by_user: bool, - ) -> Self { - ExecPlan::Approved { - sandbox, - on_failure_escalate, - approved_by_user, - } - } -} - -pub fn plan_exec(req: &ExecRequest<'_>) -> ExecPlan { - let params = req.params; - let with_escalated_permissions = params.with_escalated_permissions.unwrap_or(false); - let safety = assess_command_safety( - ¶ms.command, - req.approval, - req.policy, - req.approved_session_commands, - with_escalated_permissions, - ); - - match safety { - SafetyCheck::AutoApprove { sandbox_type } => ExecPlan::Approved { - sandbox: sandbox_type, - on_failure_escalate: should_escalate_on_failure(req.approval, sandbox_type), - approved_by_user: false, - }, - SafetyCheck::AskUser => ExecPlan::AskUser { - reason: params.justification.clone(), - }, - SafetyCheck::Reject { reason } => ExecPlan::Reject { reason }, - } -} - -#[derive(Clone, Debug)] -pub struct PatchExecRequest<'a> { - pub action: &'a ApplyPatchAction, - pub approval: AskForApproval, - pub policy: &'a SandboxPolicy, - pub cwd: &'a Path, - pub user_explicitly_approved: bool, -} - -pub fn plan_apply_patch(req: &PatchExecRequest<'_>) -> ExecPlan { - if req.user_explicitly_approved { - ExecPlan::Approved { - sandbox: SandboxType::None, - on_failure_escalate: false, - approved_by_user: true, - } - } else { - match assess_patch_safety(req.action, req.approval, req.policy, req.cwd) { - SafetyCheck::AutoApprove { sandbox_type } => ExecPlan::Approved { - sandbox: sandbox_type, - on_failure_escalate: should_escalate_on_failure(req.approval, sandbox_type), - approved_by_user: false, - }, - SafetyCheck::AskUser => ExecPlan::AskUser { reason: None }, - SafetyCheck::Reject { reason } => ExecPlan::Reject { reason }, - } - } -} - -#[derive(Debug)] -pub(crate) struct PreparedExec { - pub(crate) params: ExecParams, - pub(crate) plan: ExecPlan, - pub(crate) command_for_display: Vec, - pub(crate) apply_patch_exec: Option, -} - -pub(crate) async fn prepare_exec_invocation( - sess: &Session, - turn_context: &TurnContext, - sub_id: &str, - call_id: &str, - params: ExecParams, - apply_patch_exec: Option, - approved_session_commands: HashSet>, -) -> Result { - let mut params = params; - - let (plan, command_for_display) = if let Some(exec) = apply_patch_exec.as_ref() { - params = build_exec_params_for_apply_patch(exec, ¶ms)?; - let command_for_display = vec!["apply_patch".to_string(), exec.action.patch.clone()]; - - let plan_req = PatchExecRequest { - action: &exec.action, - approval: turn_context.approval_policy, - policy: &turn_context.sandbox_policy, - cwd: &turn_context.cwd, - user_explicitly_approved: exec.user_explicitly_approved_this_action, - }; - - let plan = match plan_apply_patch(&plan_req) { - plan @ ExecPlan::Approved { .. } => plan, - ExecPlan::AskUser { .. } => { - return Err(FunctionCallError::RespondToModel( - "patch requires approval but none was recorded".to_string(), - )); - } - ExecPlan::Reject { reason } => { - return Err(FunctionCallError::RespondToModel(format!( - "patch rejected: {reason}" - ))); - } - }; - - (plan, command_for_display) - } else { - let command_for_display = params.command.clone(); - - let initial_plan = plan_exec(&ExecRequest { - params: ¶ms, - approval: turn_context.approval_policy, - policy: &turn_context.sandbox_policy, - approved_session_commands: &approved_session_commands, - }); - - let plan = match initial_plan { - plan @ ExecPlan::Approved { .. } => plan, - ExecPlan::AskUser { reason } => { - let decision = sess - .request_command_approval( - sub_id.to_string(), - call_id.to_string(), - params.command.clone(), - params.cwd.clone(), - reason, - ) - .await; - match decision { - ReviewDecision::Approved => ExecPlan::approved(SandboxType::None, false, true), - ReviewDecision::ApprovedForSession => { - sess.add_approved_command(params.command.clone()).await; - ExecPlan::approved(SandboxType::None, false, true) - } - ReviewDecision::Denied | ReviewDecision::Abort => { - return Err(FunctionCallError::RespondToModel( - "exec command rejected by user".to_string(), - )); - } - } - } - ExecPlan::Reject { reason } => { - return Err(FunctionCallError::RespondToModel(format!( - "exec command rejected: {reason:?}" - ))); - } - }; - - (plan, command_for_display) - }; - - Ok(PreparedExec { - params, - plan, - command_for_display, - apply_patch_exec, - }) -} - -fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool { - matches!( - (approval, sandbox), - ( - AskForApproval::UnlessTrusted | AskForApproval::OnFailure, - SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp - ) - ) -} diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index ee0c5fc976..f170a10c18 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -1,7 +1,5 @@ //! Session-wide mutable state. -use std::collections::HashSet; - use codex_protocol::models::ResponseItem; use crate::conversation_history::ConversationHistory; @@ -12,7 +10,6 @@ use crate::protocol::TokenUsageInfo; /// Persistent, session-scoped state previously stored directly on `Session`. #[derive(Default)] pub(crate) struct SessionState { - pub(crate) approved_commands: HashSet>, pub(crate) history: ConversationHistory, pub(crate) token_info: Option, pub(crate) latest_rate_limits: Option, @@ -44,15 +41,6 @@ impl SessionState { self.history.replace(items); } - // Approved command helpers - pub(crate) fn add_approved_command(&mut self, cmd: Vec) { - self.approved_commands.insert(cmd); - } - - pub(crate) fn approved_commands_ref(&self) -> &HashSet> { - &self.approved_commands - } - // Token/rate limit helpers pub(crate) fn update_token_info_from_usage( &mut self, From ed45f852093493f0137386bee5f701cb28262a3a Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 11:27:23 +0100 Subject: [PATCH 05/17] RV 1 --- codex-rs/core/src/apply_patch.rs | 25 +++ codex-rs/core/src/codex.rs | 3 +- codex-rs/core/src/executor/backends.rs | 35 ++-- codex-rs/core/src/executor/cache.rs | 26 +++ codex-rs/core/src/executor/runner.rs | 83 +++++++-- codex-rs/core/src/executor/sandbox.rs | 229 ++++++++++++++++++++++--- 6 files changed, 350 insertions(+), 51 deletions(-) diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs index 8f372fb2d3..7efae19f15 100644 --- a/codex-rs/core/src/apply_patch.rs +++ b/codex-rs/core/src/apply_patch.rs @@ -109,3 +109,28 @@ pub(crate) fn convert_apply_patch_to_protocol( } result } + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + use tempfile::tempdir; + + #[test] + fn convert_apply_patch_maps_add_variant() { + let tmp = tempdir().expect("tmp"); + let p = tmp.path().join("a.txt"); + // Create an action with a single Add change + let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string()); + + let got = convert_apply_patch_to_protocol(&action); + + assert_eq!( + got.get(&p), + Some(&FileChange::Add { + content: "hello".to_string() + }) + ); + } +} diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 84b1c53b9c..cde64105f4 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -2616,10 +2616,9 @@ async fn handle_container_exec_with_params( stdout_stream, }; - sess.executor.update_environment( + sess.executor.update_environment( // todo this should not be needed ? Not sure what it means turn_context.sandbox_policy.clone(), turn_context.cwd.clone(), - sess.services.codex_linux_sandbox_exe.clone(), ); let output_result = sess diff --git a/codex-rs/core/src/executor/backends.rs b/codex-rs/core/src/executor/backends.rs index aaa7f08030..09c786a9d4 100644 --- a/codex-rs/core/src/executor/backends.rs +++ b/codex-rs/core/src/executor/backends.rs @@ -1,11 +1,13 @@ +use std::collections::HashMap; +use std::env; use std::sync::Arc; use async_trait::async_trait; use crate::apply_patch::ApplyPatchExec; +use crate::CODEX_APPLY_PATCH_ARG1; use crate::exec::ExecParams; use crate::exec::ExecToolCallOutput; -use crate::executor::sandbox::build_exec_params_for_apply_patch; use crate::function_tool::FunctionCallError; pub(crate) enum ExecutionMode { @@ -23,14 +25,6 @@ pub(crate) trait ExecutionBackend: Send + Sync { // Required for downcasting the apply_patch. mode: &ExecutionMode, ) -> Result; - - async fn finalize( - &self, - output: ExecToolCallOutput, - _mode: &ExecutionMode, - ) -> Result { - Ok(output) - } } pub(crate) struct BackendStore { @@ -86,7 +80,28 @@ impl ExecutionBackend for ApplyPatchBackend { mode: &ExecutionMode, ) -> Result { match mode { - ExecutionMode::ApplyPatch(exec) => build_exec_params_for_apply_patch(exec, ¶ms), + ExecutionMode::ApplyPatch(exec) => { + let path_to_codex = env::current_exe() + .ok() + .map(|p| p.to_string_lossy().to_string()) + .ok_or_else(|| { + FunctionCallError::RespondToModel( + "failed to determine path to codex executable".to_string(), + ) + })?; + + let patch = exec.action.patch.clone(); + Ok(ExecParams { + command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch], + cwd: exec.action.cwd.clone(), + timeout_ms: params.timeout_ms, + // Run apply_patch with a minimal environment for determinism and to + // avoid leaking host environment variables into the patch process. + env: HashMap::new(), + with_escalated_permissions: params.with_escalated_permissions, + justification: params.justification, + }) + }, ExecutionMode::Shell => Err(FunctionCallError::RespondToModel( "apply_patch backend invoked without patch context".to_string(), )), diff --git a/codex-rs/core/src/executor/cache.rs b/codex-rs/core/src/executor/cache.rs index 5629c8893a..737ecb927c 100644 --- a/codex-rs/core/src/executor/cache.rs +++ b/codex-rs/core/src/executor/cache.rs @@ -23,3 +23,29 @@ impl ApprovalCache { self.inner.lock().map(|g| g.clone()).unwrap_or_default() } } + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn insert_ignores_empty_and_dedupes() { + let cache = ApprovalCache::default(); + + // Empty should be ignored + cache.insert(vec![]); + assert!(cache.snapshot().is_empty()); + + // Insert a command and verify snapshot contains it + let cmd = vec!["foo".to_string(), "bar".to_string()]; + cache.insert(cmd.clone()); + let snap1 = cache.snapshot(); + assert!(snap1.contains(&cmd)); + + // Reinserting should not create duplicates + cache.insert(cmd); + let snap2 = cache.snapshot(); + assert_eq!(snap1, snap2); + } +} diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index 708568a73b..a65da13802 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -84,12 +84,10 @@ impl Executor { &self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf, - codex_linux_sandbox_exe: Option, ) { if let Ok(mut cfg) = self.config.write() { cfg.sandbox_policy = sandbox_policy; cfg.sandbox_cwd = sandbox_cwd; - cfg.codex_linux_sandbox_exe = codex_linux_sandbox_exe; } } @@ -164,11 +162,7 @@ impl Executor { Err(err) => return Err(err.into()), }; - // Step 6: Allow the backend to post-process the raw output. - backend - .finalize(raw_output, &request.mode) - .await - .map_err(ExecError::from) + Ok(raw_output) } /// Fallback path invoked when a sandboxed run is denied so the user can @@ -215,10 +209,7 @@ impl Executor { ) .await?; - backend - .finalize(retry_output, &request.mode) - .await - .map_err(ExecError::from) + Ok(retry_output) } ReviewDecision::Denied | ReviewDecision::Abort => { Err(ExecError::rejection("exec command rejected by user")) @@ -304,3 +295,73 @@ pub(crate) fn normalize_exec_result( } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::error::CodexErr; + use crate::error::EnvVarError; + use crate::error::SandboxErr; + use crate::exec::StreamOutput; + use pretty_assertions::assert_eq; + + fn make_output(text: &str) -> ExecToolCallOutput { + ExecToolCallOutput { + exit_code: 1, + stdout: StreamOutput::new(String::new()), + stderr: StreamOutput::new(String::new()), + aggregated_output: StreamOutput::new(text.to_string()), + duration: Duration::from_millis(123), + timed_out: false, + } + } + + #[test] + fn normalize_success_borrows() { + let out = make_output("ok"); + let result: Result = Ok(out); + let normalized = normalize_exec_result(&result); + assert_eq!(normalized.event_output().aggregated_output.text, "ok"); + } + + #[test] + fn normalize_timeout_borrows_embedded_output() { + let out = make_output("timed out payload"); + let err = CodexErr::Sandbox(SandboxErr::Timeout { + output: Box::new(out), + }); + let result: Result = Err(ExecError::Codex(err)); + let normalized = normalize_exec_result(&result); + assert_eq!( + normalized.event_output().aggregated_output.text, + "timed out payload" + ); + } + + #[test] + fn normalize_function_error_synthesizes_payload() { + let err = FunctionCallError::RespondToModel("boom".to_string()); + let result: Result = Err(ExecError::Function(err)); + let normalized = normalize_exec_result(&result); + assert_eq!(normalized.event_output().aggregated_output.text, "boom"); + } + + #[test] + fn normalize_codex_error_synthesizes_user_message() { + // Use a simple EnvVar error which formats to a clear message + let e = CodexErr::EnvVar(EnvVarError { + var: "FOO".to_string(), + instructions: Some("set it".to_string()), + }); + let result: Result = Err(ExecError::Codex(e)); + let normalized = normalize_exec_result(&result); + assert!( + normalized + .event_output() + .aggregated_output + .text + .contains("Missing environment variable: `FOO`"), + "expected synthesized user-friendly message" + ); + } +} diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index 1f87d961e6..87fdc0f00a 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -53,34 +53,6 @@ fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> ) } -/// Builds the command-line invocation that shells out to `codex apply_patch` -/// using the provided apply-patch request details. -pub(crate) fn build_exec_params_for_apply_patch( - exec: &ApplyPatchExec, - original: &ExecParams, -) -> Result { - let path_to_codex = env::current_exe() - .ok() - .map(|p| p.to_string_lossy().to_string()) - .ok_or_else(|| { - FunctionCallError::RespondToModel( - "failed to determine path to codex executable".to_string(), - ) - })?; - - let patch = exec.action.patch.clone(); - Ok(ExecParams { - command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch], - cwd: exec.action.cwd.clone(), - timeout_ms: original.timeout_ms, - // Run apply_patch with a minimal environment for determinism and to - // avoid leaking host environment variables into the patch process. - env: HashMap::new(), - with_escalated_permissions: original.with_escalated_permissions, - justification: original.justification.clone(), - }) -} - /// Determines how a command should be sandboxed, prompting the user when /// policy requires explicit approval. pub async fn select_sandbox( @@ -191,3 +163,204 @@ fn select_apply_patch_sandbox( } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::codex::make_session_and_context; + use crate::exec::ExecParams; + use crate::protocol::SandboxPolicy; + use codex_apply_patch::ApplyPatchAction; + use pretty_assertions::assert_eq; + + #[tokio::test] + async fn select_apply_patch_user_override_when_explicit() { + let (session, _ctx) = make_session_and_context(); + let tmp = tempfile::tempdir().expect("tmp"); + let p = tmp.path().join("a.txt"); + let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string()); + let exec = ApplyPatchExec { + action, + user_explicitly_approved_this_action: true, + }; + let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None); + let request = ExecutionRequest { + params: ExecParams { + command: vec!["apply_patch".into()], + cwd: std::env::temp_dir(), + timeout_ms: None, + env: std::collections::HashMap::new(), + with_escalated_permissions: None, + justification: None, + }, + approval_command: vec!["apply_patch".into()], + mode: ExecutionMode::ApplyPatch(exec), + stdout_stream: None, + }; + let decision = select_sandbox( + &request, + AskForApproval::OnRequest, + Default::default(), + &cfg, + &session, + "sub", + "call", + ) + .await + .expect("ok"); + // Explicit user override runs without sandbox + assert_eq!(decision.initial_sandbox, SandboxType::None); + assert_eq!(decision.escalate_on_failure, false); + } + + #[tokio::test] + async fn select_apply_patch_autoapprove_in_danger() { + let (session, _ctx) = make_session_and_context(); + let tmp = tempfile::tempdir().expect("tmp"); + let p = tmp.path().join("a.txt"); + let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string()); + let exec = ApplyPatchExec { + action, + user_explicitly_approved_this_action: false, + }; + let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None); + let request = ExecutionRequest { + params: ExecParams { + command: vec!["apply_patch".into()], + cwd: std::env::temp_dir(), + timeout_ms: None, + env: std::collections::HashMap::new(), + with_escalated_permissions: None, + justification: None, + }, + approval_command: vec!["apply_patch".into()], + mode: ExecutionMode::ApplyPatch(exec), + stdout_stream: None, + }; + let decision = select_sandbox( + &request, + AskForApproval::OnRequest, + Default::default(), + &cfg, + &session, + "sub", + "call", + ) + .await + .expect("ok"); + // On platforms with a sandbox, DangerFullAccess still prefers it + let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None); + assert_eq!(decision.initial_sandbox, expected); + assert_eq!(decision.escalate_on_failure, false); + } + + #[tokio::test] + async fn select_apply_patch_requires_approval_on_unless_trusted() { + let (session, _ctx) = make_session_and_context(); + let tempdir = tempfile::tempdir().expect("tmpdir"); + let p = tempdir.path().join("a.txt"); + let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string()); + let exec = ApplyPatchExec { + action, + user_explicitly_approved_this_action: false, + }; + let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None); + let request = ExecutionRequest { + params: ExecParams { + command: vec!["apply_patch".into()], + cwd: std::env::temp_dir(), + timeout_ms: None, + env: std::collections::HashMap::new(), + with_escalated_permissions: None, + justification: None, + }, + approval_command: vec!["apply_patch".into()], + mode: ExecutionMode::ApplyPatch(exec), + stdout_stream: None, + }; + let result = select_sandbox( + &request, + AskForApproval::UnlessTrusted, + Default::default(), + &cfg, + &session, + "sub", + "call", + ) + .await; + match result { + Ok(_) => panic!("expected error"), + Err(ExecError::Function(FunctionCallError::RespondToModel(msg))) => { + assert!(msg.contains("requires approval")) + } + Err(other) => panic!("unexpected error: {other:?}"), + } + } + + #[tokio::test] + async fn select_shell_autoapprove_in_danger_mode() { + let (session, _ctx) = make_session_and_context(); + let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None); + let request = ExecutionRequest { + params: ExecParams { + command: vec!["some-unknown".into()], + cwd: std::env::temp_dir(), + timeout_ms: None, + env: std::collections::HashMap::new(), + with_escalated_permissions: None, + justification: None, + }, + approval_command: vec!["some-unknown".into()], + mode: ExecutionMode::Shell, + stdout_stream: None, + }; + let decision = select_sandbox( + &request, + AskForApproval::OnRequest, + Default::default(), + &cfg, + &session, + "sub", + "call", + ) + .await + .expect("ok"); + assert_eq!(decision.initial_sandbox, SandboxType::None); + assert_eq!(decision.escalate_on_failure, false); + } + + #[cfg(any(target_os = "macos", target_os = "linux"))] + #[tokio::test] + async fn select_shell_escalates_on_failure_with_platform_sandbox() { + let (session, _ctx) = make_session_and_context(); + let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None); + let request = ExecutionRequest { + params: ExecParams { + // Unknown command => untrusted but not flagged dangerous + command: vec!["some-unknown".into()], + cwd: std::env::temp_dir(), + timeout_ms: None, + env: std::collections::HashMap::new(), + with_escalated_permissions: None, + justification: None, + }, + approval_command: vec!["some-unknown".into()], + mode: ExecutionMode::Shell, + stdout_stream: None, + }; + let decision = select_sandbox( + &request, + AskForApproval::OnFailure, + Default::default(), + &cfg, + &session, + "sub", + "call", + ) + .await + .expect("ok"); + // On macOS/Linux we should have a platform sandbox and escalate on failure + assert_ne!(decision.initial_sandbox, SandboxType::None); + assert_eq!(decision.escalate_on_failure, true); + } +} From 2dd226891a80bb8d821ee19ece65d365b71f5330 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 11:36:51 +0100 Subject: [PATCH 06/17] RV 2 --- codex-rs/core/src/apply_patch.rs | 2 +- codex-rs/core/src/codex.rs | 2 +- codex-rs/core/src/executor/backends.rs | 33 +++++++------------------- codex-rs/core/src/executor/runner.rs | 21 ++++------------ 4 files changed, 15 insertions(+), 43 deletions(-) diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs index 7efae19f15..f1653bf9e9 100644 --- a/codex-rs/core/src/apply_patch.rs +++ b/codex-rs/core/src/apply_patch.rs @@ -114,7 +114,7 @@ pub(crate) fn convert_apply_patch_to_protocol( mod tests { use super::*; use pretty_assertions::assert_eq; - + use tempfile::tempdir; #[test] diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index cde64105f4..2877098444 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -2616,7 +2616,7 @@ async fn handle_container_exec_with_params( stdout_stream, }; - sess.executor.update_environment( // todo this should not be needed ? Not sure what it means + sess.executor.update_environment( turn_context.sandbox_policy.clone(), turn_context.cwd.clone(), ); diff --git a/codex-rs/core/src/executor/backends.rs b/codex-rs/core/src/executor/backends.rs index 09c786a9d4..aa744b8174 100644 --- a/codex-rs/core/src/executor/backends.rs +++ b/codex-rs/core/src/executor/backends.rs @@ -1,13 +1,11 @@ use std::collections::HashMap; use std::env; -use std::sync::Arc; use async_trait::async_trait; -use crate::apply_patch::ApplyPatchExec; use crate::CODEX_APPLY_PATCH_ARG1; +use crate::apply_patch::ApplyPatchExec; use crate::exec::ExecParams; -use crate::exec::ExecToolCallOutput; use crate::function_tool::FunctionCallError; pub(crate) enum ExecutionMode { @@ -27,31 +25,16 @@ pub(crate) trait ExecutionBackend: Send + Sync { ) -> Result; } -pub(crate) struct BackendStore { - shell: Arc, - apply_patch: Arc, -} - -impl BackendStore { - pub(crate) fn new() -> Self { - Self { - shell: Arc::new(ShellBackend), - apply_patch: Arc::new(ApplyPatchBackend), - } - } +static SHELL_BACKEND: ShellBackend = ShellBackend; +static APPLY_PATCH_BACKEND: ApplyPatchBackend = ApplyPatchBackend; - pub(crate) fn for_mode(&self, mode: &ExecutionMode) -> Arc { - match mode { - ExecutionMode::Shell => self.shell.clone(), - ExecutionMode::ApplyPatch(_) => self.apply_patch.clone(), - } +pub(crate) fn backend_for_mode(mode: &ExecutionMode) -> &'static dyn ExecutionBackend { + match mode { + ExecutionMode::Shell => &SHELL_BACKEND, + ExecutionMode::ApplyPatch(_) => &APPLY_PATCH_BACKEND, } } -pub(crate) fn default_backends() -> BackendStore { - BackendStore::new() -} - struct ShellBackend; #[async_trait] @@ -101,7 +84,7 @@ impl ExecutionBackend for ApplyPatchBackend { with_escalated_permissions: params.with_escalated_permissions, justification: params.justification, }) - }, + } ExecutionMode::Shell => Err(FunctionCallError::RespondToModel( "apply_patch backend invoked without patch context".to_string(), )), diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index a65da13802..9161e78659 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -5,10 +5,8 @@ use std::time::Duration; use thiserror::Error; -use super::backends::BackendStore; -use super::backends::ExecutionBackend; use super::backends::ExecutionMode; -use super::backends::default_backends; +use super::backends::backend_for_mode; use super::cache::ApprovalCache; use crate::codex::Session; use crate::error::CodexErr; @@ -64,7 +62,6 @@ impl ExecError { /// Coordinates sandbox selection, backend-specific preparation, and command /// execution for tool calls requested by the model. pub(crate) struct Executor { - backends: BackendStore, approval_cache: ApprovalCache, config: Arc>, } @@ -72,7 +69,6 @@ pub(crate) struct Executor { impl Executor { pub(crate) fn new(config: ExecutorConfig) -> Self { Self { - backends: default_backends(), approval_cache: ApprovalCache::default(), config: Arc::new(RwLock::new(config)), } @@ -80,11 +76,7 @@ impl Executor { /// Updates the sandbox policy and working directory used for future /// executions without recreating the executor. - pub(crate) fn update_environment( - &self, - sandbox_policy: SandboxPolicy, - sandbox_cwd: PathBuf, - ) { + pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) { if let Ok(mut cfg) = self.config.write() { cfg.sandbox_policy = sandbox_policy; cfg.sandbox_cwd = sandbox_cwd; @@ -103,7 +95,7 @@ impl Executor { call_id: &str, ) -> Result { // Step 1: Normalise parameters via the selected backend. - let backend = self.backends.for_mode(&request.mode); + let backend = backend_for_mode(&request.mode); request.params = backend .prepare(request.params, &request.mode) .map_err(ExecError::from)?; @@ -148,10 +140,8 @@ impl Executor { } Err(CodexErr::Sandbox(error @ SandboxErr::Denied { .. })) => { return if sandbox_decision.escalate_on_failure { - self.retry_without_sandbox( - &*backend, &request, &config, session, sub_id, call_id, error, - ) - .await + self.retry_without_sandbox(&request, &config, session, sub_id, call_id, error) + .await } else { Err(ExecError::rejection(format!( "failed in sandbox {:?} with execution error: {error:?}", @@ -170,7 +160,6 @@ impl Executor { #[allow(clippy::too_many_arguments)] async fn retry_without_sandbox( &self, - backend: &dyn ExecutionBackend, request: &ExecutionRequest, config: &ExecutorConfig, session: &Session, From 4656160e31bdab4a773aac2e17a77fac92207156 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 11:40:44 +0100 Subject: [PATCH 07/17] RV 3 --- codex-rs/core/src/codex.rs | 41 +++++++++++++----------------- codex-rs/core/src/state/service.rs | 4 +-- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 2877098444..40551bfe50 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -263,7 +263,6 @@ pub(crate) struct Session { pub(crate) active_turn: Mutex>, services: SessionServices, next_internal_sub_id: AtomicU64, - executor: Executor, } /// The context needed for a single turn of the conversation. @@ -457,17 +456,15 @@ impl Session { unified_exec_manager: UnifiedExecSessionManager::default(), notifier: notify, rollout: Mutex::new(Some(rollout_recorder)), - codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(), user_shell: default_shell, show_raw_agent_reasoning: config.show_raw_agent_reasoning, + executor: Executor::new(ExecutorConfig::new( + turn_context.sandbox_policy.clone(), + turn_context.cwd.clone(), + config.codex_linux_sandbox_exe.clone(), + )), }; - let executor = Executor::new(ExecutorConfig::new( - turn_context.sandbox_policy.clone(), - turn_context.cwd.clone(), - config.codex_linux_sandbox_exe.clone(), - )); - let sess = Arc::new(Session { conversation_id, tx_event: tx_event.clone(), @@ -475,7 +472,6 @@ impl Session { active_turn: Mutex::new(None), services, next_internal_sub_id: AtomicU64::new(0), - executor, }); // Dispatch the SessionConfiguredEvent first and then report any errors. @@ -921,6 +917,7 @@ impl Session { .await; let result = self + .services .executor .run(request, self, approval_policy, &sub_id, &call_id) .await; @@ -2616,7 +2613,7 @@ async fn handle_container_exec_with_params( stdout_stream, }; - sess.executor.update_environment( + sess.services.executor.update_environment( turn_context.sandbox_policy.clone(), turn_context.cwd.clone(), ); @@ -3203,15 +3200,14 @@ mod tests { unified_exec_manager: UnifiedExecSessionManager::default(), notifier: UserNotifier::default(), rollout: Mutex::new(None), - codex_linux_sandbox_exe: None, user_shell: shell::Shell::Unknown, show_raw_agent_reasoning: config.show_raw_agent_reasoning, + executor: Executor::new(ExecutorConfig::new( + turn_context.sandbox_policy.clone(), + turn_context.cwd.clone(), + None, + )), }; - let executor = Executor::new(ExecutorConfig::new( - turn_context.sandbox_policy.clone(), - turn_context.cwd.clone(), - None, - )); let session = Session { conversation_id, tx_event, @@ -3219,7 +3215,6 @@ mod tests { active_turn: Mutex::new(None), services, next_internal_sub_id: AtomicU64::new(0), - executor, }; (session, turn_context) } @@ -3276,15 +3271,14 @@ mod tests { unified_exec_manager: UnifiedExecSessionManager::default(), notifier: UserNotifier::default(), rollout: Mutex::new(None), - codex_linux_sandbox_exe: None, user_shell: shell::Shell::Unknown, show_raw_agent_reasoning: config.show_raw_agent_reasoning, + executor: Executor::new(ExecutorConfig::new( + config.sandbox_policy.clone(), + config.cwd.clone(), + None, + )), }; - let executor = Executor::new(ExecutorConfig::new( - config.sandbox_policy.clone(), - config.cwd.clone(), - None, - )); let session = Arc::new(Session { conversation_id, tx_event, @@ -3292,7 +3286,6 @@ mod tests { active_turn: Mutex::new(None), services, next_internal_sub_id: AtomicU64::new(0), - executor, }); (session, turn_context, rx_event) } diff --git a/codex-rs/core/src/state/service.rs b/codex-rs/core/src/state/service.rs index a67b9dda93..994352eddf 100644 --- a/codex-rs/core/src/state/service.rs +++ b/codex-rs/core/src/state/service.rs @@ -1,9 +1,9 @@ use crate::RolloutRecorder; use crate::exec_command::ExecSessionManager; +use crate::executor::Executor; use crate::mcp_connection_manager::McpConnectionManager; use crate::unified_exec::UnifiedExecSessionManager; use crate::user_notification::UserNotifier; -use std::path::PathBuf; use tokio::sync::Mutex; pub(crate) struct SessionServices { @@ -12,7 +12,7 @@ pub(crate) struct SessionServices { pub(crate) unified_exec_manager: UnifiedExecSessionManager, pub(crate) notifier: UserNotifier, pub(crate) rollout: Mutex>, - pub(crate) codex_linux_sandbox_exe: Option, pub(crate) user_shell: crate::shell::Shell, pub(crate) show_raw_agent_reasoning: bool, + pub(crate) executor: Executor, } From 1d87628d419c9a2fae08b227f612ad5d2b58b9d4 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 11:55:43 +0100 Subject: [PATCH 08/17] RV 4 --- codex-rs/core/src/codex.rs | 62 ++++++++++---------------- codex-rs/core/src/executor/backends.rs | 8 ++++ codex-rs/core/src/executor/runner.rs | 48 ++++++++++++++++++-- codex-rs/core/src/executor/sandbox.rs | 5 +++ 4 files changed, 80 insertions(+), 43 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 40551bfe50..d5d3e22830 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -906,7 +906,11 @@ impl Session { &self, turn_diff_tracker: &mut TurnDiffTracker, begin_ctx: ExecCommandContext, - request: ExecutionRequest, + params: ExecParams, + approval_command: Vec, + mode: ExecutionMode, + stdout_stream: Option, + use_shell_profile: bool, approval_policy: AskForApproval, ) -> Result { let is_apply_patch = begin_ctx.apply_patch.is_some(); @@ -916,6 +920,14 @@ impl Session { self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone()) .await; + let request = ExecutionRequest { + params, + approval_command, + mode, + stdout_stream, + use_shell_profile, + }; + let result = self .services .executor @@ -1030,7 +1042,7 @@ impl Session { &self.services.notifier } - fn user_shell(&self) -> &shell::Shell { + pub(crate) fn user_shell(&self) -> &shell::Shell { &self.services.user_shell } @@ -2508,24 +2520,6 @@ fn parse_container_exec_arguments( }) } -fn maybe_translate_shell_command( - params: ExecParams, - sess: &Session, - turn_context: &TurnContext, -) -> ExecParams { - let should_translate = matches!(sess.user_shell(), crate::shell::Shell::PowerShell(_)) - || turn_context.shell_environment_policy.use_profile; - - if should_translate - && let Some(command) = sess - .user_shell() - .format_default_shell_invocation(params.command.clone()) - { - return ExecParams { command, ..params }; - } - params -} - async fn handle_container_exec_with_params( params: ExecParams, sess: &Session, @@ -2590,29 +2584,11 @@ async fn handle_container_exec_with_params( ), }; - let translated_params = maybe_translate_shell_command(params, sess, turn_context); - let stdout_stream = if exec_command_context.apply_patch.is_some() { - None - } else { - Some(StdoutStream { - sub_id: sub_id.clone(), - call_id: call_id.clone(), - tx_event: sess.tx_event.clone(), - }) - }; - let mode = match apply_patch_exec { Some(exec) => ExecutionMode::ApplyPatch(exec), None => ExecutionMode::Shell, }; - let request = ExecutionRequest { - params: translated_params, - approval_command: command_for_display, - mode, - stdout_stream, - }; - sess.services.executor.update_environment( turn_context.sandbox_policy.clone(), turn_context.cwd.clone(), @@ -2622,7 +2598,15 @@ async fn handle_container_exec_with_params( .run_exec_with_events( turn_diff_tracker, exec_command_context, - request, + params, + command_for_display, + mode, + Some(StdoutStream { + sub_id: sub_id.clone(), // todo we should not nead the ids + call_id: call_id.clone(), + tx_event: sess.tx_event.clone(), + }), + turn_context.shell_environment_policy.use_profile, turn_context.approval_policy, ) .await; diff --git a/codex-rs/core/src/executor/backends.rs b/codex-rs/core/src/executor/backends.rs index aa744b8174..95cdb3cacb 100644 --- a/codex-rs/core/src/executor/backends.rs +++ b/codex-rs/core/src/executor/backends.rs @@ -23,6 +23,10 @@ pub(crate) trait ExecutionBackend: Send + Sync { // Required for downcasting the apply_patch. mode: &ExecutionMode, ) -> Result; + + fn stream_stdout(&self, _mode: &ExecutionMode) -> bool { + true + } } static SHELL_BACKEND: ShellBackend = ShellBackend; @@ -90,4 +94,8 @@ impl ExecutionBackend for ApplyPatchBackend { )), } } + + fn stream_stdout(&self, _mode: &ExecutionMode) -> bool { + false + } } diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index 9161e78659..086bdcf2f4 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -23,6 +23,7 @@ use crate::function_tool::FunctionCallError; use crate::protocol::AskForApproval; use crate::protocol::ReviewDecision; use crate::protocol::SandboxPolicy; +use crate::shell; #[derive(Clone, Debug)] pub(crate) struct ExecutorConfig { @@ -94,8 +95,18 @@ impl Executor { sub_id: &str, call_id: &str, ) -> Result { + if matches!(request.mode, ExecutionMode::Shell) { + request.params = + maybe_translate_shell_command(request.params, session, request.use_shell_profile); + } + // Step 1: Normalise parameters via the selected backend. let backend = backend_for_mode(&request.mode); + let stdout_stream = if backend.stream_stdout(&request.mode) { + request.stdout_stream.clone() + } else { + None + }; request.params = backend .prepare(request.params, &request.mode) .map_err(ExecError::from)?; @@ -128,7 +139,7 @@ impl Executor { request.params.clone(), sandbox_decision.initial_sandbox, &config, - request.stdout_stream.clone(), + stdout_stream.clone(), ) .await; @@ -140,8 +151,16 @@ impl Executor { } Err(CodexErr::Sandbox(error @ SandboxErr::Denied { .. })) => { return if sandbox_decision.escalate_on_failure { - self.retry_without_sandbox(&request, &config, session, sub_id, call_id, error) - .await + self.retry_without_sandbox( + &request, + &config, + session, + sub_id, + call_id, + stdout_stream.clone(), + error, + ) + .await } else { Err(ExecError::rejection(format!( "failed in sandbox {:?} with execution error: {error:?}", @@ -165,6 +184,7 @@ impl Executor { session: &Session, sub_id: &str, call_id: &str, + stdout_stream: Option, sandbox_error: SandboxErr, ) -> Result { session @@ -194,7 +214,7 @@ impl Executor { request.params.clone(), SandboxType::None, config, - request.stdout_stream.clone(), + stdout_stream, ) .await?; @@ -225,11 +245,31 @@ impl Executor { } } +fn maybe_translate_shell_command( + params: ExecParams, + session: &Session, + use_shell_profile: bool, +) -> ExecParams { + let should_translate = + matches!(session.user_shell(), shell::Shell::PowerShell(_)) || use_shell_profile; + + if should_translate + && let Some(command) = session + .user_shell() + .format_default_shell_invocation(params.command.clone()) + { + return ExecParams { command, ..params }; + } + + params +} + pub(crate) struct ExecutionRequest { pub params: ExecParams, pub approval_command: Vec, pub mode: ExecutionMode, pub stdout_stream: Option, + pub use_shell_profile: bool, } pub(crate) struct NormalizedExecOutput<'a> { diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index 87fdc0f00a..99507fc446 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -196,6 +196,7 @@ mod tests { approval_command: vec!["apply_patch".into()], mode: ExecutionMode::ApplyPatch(exec), stdout_stream: None, + use_shell_profile: false, }; let decision = select_sandbox( &request, @@ -236,6 +237,7 @@ mod tests { approval_command: vec!["apply_patch".into()], mode: ExecutionMode::ApplyPatch(exec), stdout_stream: None, + use_shell_profile: false, }; let decision = select_sandbox( &request, @@ -277,6 +279,7 @@ mod tests { approval_command: vec!["apply_patch".into()], mode: ExecutionMode::ApplyPatch(exec), stdout_stream: None, + use_shell_profile: false, }; let result = select_sandbox( &request, @@ -313,6 +316,7 @@ mod tests { approval_command: vec!["some-unknown".into()], mode: ExecutionMode::Shell, stdout_stream: None, + use_shell_profile: false, }; let decision = select_sandbox( &request, @@ -347,6 +351,7 @@ mod tests { approval_command: vec!["some-unknown".into()], mode: ExecutionMode::Shell, stdout_stream: None, + use_shell_profile: false, }; let decision = select_sandbox( &request, From 8c09db17c3d9aa9f79725cf000ee809028591246 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 12:04:44 +0100 Subject: [PATCH 09/17] RV 5 --- codex-rs/core/src/codex.rs | 51 +++++++++++-------------- codex-rs/core/src/executor/mod.rs | 55 ++++++++++++++++++++++++++- codex-rs/core/src/executor/runner.rs | 17 +-------- codex-rs/core/src/executor/sandbox.rs | 2 +- 4 files changed, 78 insertions(+), 47 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index d5d3e22830..8e4e4475b2 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -63,7 +63,6 @@ use crate::exec_command::ExecSessionManager; use crate::exec_command::WRITE_STDIN_TOOL_NAME; use crate::exec_command::WriteStdinParams; use crate::exec_env::create_env; -use crate::executor::ExecError; use crate::executor::ExecutionMode; use crate::executor::ExecutionRequest; use crate::executor::Executor; @@ -905,29 +904,17 @@ impl Session { async fn run_exec_with_events( &self, turn_diff_tracker: &mut TurnDiffTracker, - begin_ctx: ExecCommandContext, - params: ExecParams, - approval_command: Vec, - mode: ExecutionMode, - stdout_stream: Option, - use_shell_profile: bool, + prepared: PreparedExec, approval_policy: AskForApproval, ) -> Result { - let is_apply_patch = begin_ctx.apply_patch.is_some(); - let sub_id = begin_ctx.sub_id.clone(); - let call_id = begin_ctx.call_id.clone(); + let PreparedExec { context, request } = prepared; + let is_apply_patch = context.apply_patch.is_some(); + let sub_id = context.sub_id.clone(); + let call_id = context.call_id.clone(); - self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone()) + self.on_exec_command_begin(turn_diff_tracker, context.clone()) .await; - let request = ExecutionRequest { - params, - approval_command, - mode, - stdout_stream, - use_shell_profile, - }; - let result = self .services .executor @@ -2594,19 +2581,23 @@ async fn handle_container_exec_with_params( turn_context.cwd.clone(), ); + let prepared_exec = PreparedExec::new( + exec_command_context, + params, + command_for_display, + mode, + Some(StdoutStream { + sub_id: sub_id.clone(), + call_id: call_id.clone(), + tx_event: sess.tx_event.clone(), + }), + turn_context.shell_environment_policy.use_profile, + ); + let output_result = sess .run_exec_with_events( turn_diff_tracker, - exec_command_context, - params, - command_for_display, - mode, - Some(StdoutStream { - sub_id: sub_id.clone(), // todo we should not nead the ids - call_id: call_id.clone(), - tx_event: sess.tx_event.clone(), - }), - turn_context.shell_environment_policy.use_profile, + prepared_exec, turn_context.approval_policy, ) .await; @@ -2889,6 +2880,8 @@ pub(crate) async fn exit_review_mode( .await; } +use crate::executor::errors::ExecError; +use crate::executor::linkers::PreparedExec; #[cfg(test)] pub(crate) use tests::make_session_and_context; diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs index c3dce856e5..3150f0176c 100644 --- a/codex-rs/core/src/executor/mod.rs +++ b/codex-rs/core/src/executor/mod.rs @@ -4,8 +4,61 @@ mod runner; mod sandbox; pub(crate) use backends::ExecutionMode; -pub(crate) use runner::ExecError; pub(crate) use runner::ExecutionRequest; pub(crate) use runner::Executor; pub(crate) use runner::ExecutorConfig; pub(crate) use runner::normalize_exec_result; + +pub mod linkers { + use crate::codex::ExecCommandContext; + use crate::exec::ExecParams; + use crate::exec::StdoutStream; + use crate::executor::backends::ExecutionMode; + use crate::executor::runner::ExecutionRequest; + + pub struct PreparedExec { + pub context: ExecCommandContext, + pub request: ExecutionRequest, + } + + impl PreparedExec { + pub fn new( + context: ExecCommandContext, + params: ExecParams, + approval_command: Vec, + mode: ExecutionMode, + stdout_stream: Option, + use_shell_profile: bool, + ) -> Self { + let request = ExecutionRequest { + params, + approval_command, + mode, + stdout_stream, + use_shell_profile, + }; + + Self { context, request } + } + } +} + +pub mod errors { + use crate::error::CodexErr; + use crate::function_tool::FunctionCallError; + use thiserror::Error; + + #[derive(Debug, Error)] + pub enum ExecError { + #[error(transparent)] + Function(#[from] FunctionCallError), + #[error(transparent)] + Codex(#[from] CodexErr), + } + + impl ExecError { + pub(crate) fn rejection(msg: impl Into) -> Self { + FunctionCallError::RespondToModel(msg.into()).into() + } + } +} diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index 086bdcf2f4..86caf313a8 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -3,8 +3,6 @@ use std::sync::Arc; use std::sync::RwLock; use std::time::Duration; -use thiserror::Error; - use super::backends::ExecutionMode; use super::backends::backend_for_mode; use super::cache::ApprovalCache; @@ -18,6 +16,7 @@ use crate::exec::SandboxType; use crate::exec::StdoutStream; use crate::exec::StreamOutput; use crate::exec::process_exec_tool_call; +use crate::executor::errors::ExecError; use crate::executor::sandbox::select_sandbox; use crate::function_tool::FunctionCallError; use crate::protocol::AskForApproval; @@ -46,20 +45,6 @@ impl ExecutorConfig { } } -#[derive(Debug, Error)] -pub enum ExecError { - #[error(transparent)] - Function(#[from] FunctionCallError), - #[error(transparent)] - Codex(#[from] CodexErr), -} - -impl ExecError { - pub(crate) fn rejection(msg: impl Into) -> Self { - FunctionCallError::RespondToModel(msg.into()).into() - } -} - /// Coordinates sandbox selection, backend-specific preparation, and command /// execution for tool calls requested by the model. pub(crate) struct Executor { diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index 99507fc446..d420dfed2f 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -3,7 +3,7 @@ use crate::apply_patch::ApplyPatchExec; use crate::codex::Session; use crate::exec::ExecParams; use crate::exec::SandboxType; -use crate::executor::ExecError; +use crate::executor::errors::ExecError; use crate::executor::ExecutionMode; use crate::executor::ExecutionRequest; use crate::executor::ExecutorConfig; From 43c0abb31e29b97bb21083dabddff43fb25caf23 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 12:42:36 +0100 Subject: [PATCH 10/17] RV 6 --- codex-rs/core/src/codex.rs | 1 - codex-rs/core/src/executor/mod.rs | 6 +++--- codex-rs/core/src/executor/sandbox.rs | 8 ++------ codex-rs/core/tests/suite/seatbelt.rs | 6 ++++++ 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 8e4e4475b2..20b3457562 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -64,7 +64,6 @@ use crate::exec_command::WRITE_STDIN_TOOL_NAME; use crate::exec_command::WriteStdinParams; use crate::exec_env::create_env; use crate::executor::ExecutionMode; -use crate::executor::ExecutionRequest; use crate::executor::Executor; use crate::executor::ExecutorConfig; use crate::executor::normalize_exec_result; diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs index 3150f0176c..a5a305c604 100644 --- a/codex-rs/core/src/executor/mod.rs +++ b/codex-rs/core/src/executor/mod.rs @@ -9,7 +9,7 @@ pub(crate) use runner::Executor; pub(crate) use runner::ExecutorConfig; pub(crate) use runner::normalize_exec_result; -pub mod linkers { +pub(crate) mod linkers { use crate::codex::ExecCommandContext; use crate::exec::ExecParams; use crate::exec::StdoutStream; @@ -17,8 +17,8 @@ pub mod linkers { use crate::executor::runner::ExecutionRequest; pub struct PreparedExec { - pub context: ExecCommandContext, - pub request: ExecutionRequest, + pub(crate) context: ExecCommandContext, + pub(crate) request: ExecutionRequest, } impl PreparedExec { diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index d420dfed2f..a43760a9ff 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -1,21 +1,16 @@ -use crate::CODEX_APPLY_PATCH_ARG1; use crate::apply_patch::ApplyPatchExec; use crate::codex::Session; -use crate::exec::ExecParams; use crate::exec::SandboxType; -use crate::executor::errors::ExecError; use crate::executor::ExecutionMode; use crate::executor::ExecutionRequest; use crate::executor::ExecutorConfig; -use crate::function_tool::FunctionCallError; +use crate::executor::errors::ExecError; use crate::safety::SafetyCheck; use crate::safety::assess_command_safety; use crate::safety::assess_patch_safety; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::ReviewDecision; -use std::collections::HashMap; use std::collections::HashSet; -use std::env; /// Sandbox placement options selected for an execution run, including whether /// to escalate after failures and whether approvals should persist. @@ -169,6 +164,7 @@ mod tests { use super::*; use crate::codex::make_session_and_context; use crate::exec::ExecParams; + use crate::function_tool::FunctionCallError; use crate::protocol::SandboxPolicy; use codex_apply_patch::ApplyPatchAction; use pretty_assertions::assert_eq; diff --git a/codex-rs/core/tests/suite/seatbelt.rs b/codex-rs/core/tests/suite/seatbelt.rs index 78f599d42e..a879d3e952 100644 --- a/codex-rs/core/tests/suite/seatbelt.rs +++ b/codex-rs/core/tests/suite/seatbelt.rs @@ -169,6 +169,12 @@ async fn python_getpwuid_works_under_seatbelt() { return; } + // For local dev. + if which::which("python3").is_err() { + eprintln!("python3 not found in PATH, skipping test."); + return; + } + // ReadOnly is sufficient here since we are only exercising user lookup. let policy = SandboxPolicy::ReadOnly; let command_cwd = std::env::current_dir().expect("getcwd"); From 9c194dc0f90b5229497da6bc8caa4a065e7b4168 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 12:56:30 +0100 Subject: [PATCH 11/17] Fix merge --- codex-rs/core/src/executor/sandbox.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index a43760a9ff..3d3eaceaee 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -102,10 +102,19 @@ async fn select_shell_sandbox( ); match safety { - SafetyCheck::AutoApprove { sandbox_type } => Ok(SandboxDecision::auto( + SafetyCheck::AutoApprove { sandbox_type, - should_escalate_on_failure(approval_policy, sandbox_type), - )), + user_explicitly_approved, + } => { + let mut decision = SandboxDecision::auto( + sandbox_type, + should_escalate_on_failure(approval_policy, sandbox_type), + ); + if user_explicitly_approved { + decision.record_session_approval = true; + } + Ok(decision) + } SafetyCheck::AskUser => { let decision = session .request_command_approval( @@ -146,7 +155,7 @@ fn select_apply_patch_sandbox( &config.sandbox_policy, &config.sandbox_cwd, ) { - SafetyCheck::AutoApprove { sandbox_type } => Ok(SandboxDecision::auto( + SafetyCheck::AutoApprove { sandbox_type, .. } => Ok(SandboxDecision::auto( sandbox_type, should_escalate_on_failure(approval_policy, sandbox_type), )), From 5c00e1596a3adf0fd0683a2962475813cd38159f Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 13:28:25 +0100 Subject: [PATCH 12/17] Restore otel --- codex-rs/core/src/codex.rs | 9 ++++--- codex-rs/core/src/executor/runner.rs | 34 ++++++++++++++--------- codex-rs/core/src/executor/sandbox.rs | 39 +++++++++++++++++++++++---- codex-rs/core/src/safety.rs | 5 ++-- 4 files changed, 64 insertions(+), 23 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 2fb6bbfc17..3fc0da7829 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1,7 +1,6 @@ use std::borrow::Cow; use std::collections::HashMap; use std::fmt::Debug; -use std::path::Path; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicU64; @@ -125,7 +124,6 @@ use crate::user_instructions::UserInstructions; use crate::user_notification::UserNotification; use crate::util::backoff; use codex_otel::otel_event_manager::OtelEventManager; -use codex_otel::otel_event_manager::ToolDecisionSource; use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig; use codex_protocol::custom_prompts::CustomPrompt; @@ -833,6 +831,7 @@ impl Session { command_for_display, cwd, apply_patch, + .. } = exec_command_context; let msg = match apply_patch { Some(ApplyPatchCommandContext { @@ -946,7 +945,7 @@ impl Session { let result = self .services .executor - .run(request, self, approval_policy, &sub_id, &call_id) + .run(request, self, approval_policy, &context) .await; let normalized = normalize_exec_result(&result); @@ -1079,6 +1078,8 @@ pub(crate) struct ExecCommandContext { pub(crate) command_for_display: Vec, pub(crate) cwd: PathBuf, pub(crate) apply_patch: Option, + pub(crate) tool_name: String, + pub(crate) otel_event_manager: OtelEventManager, } #[derive(Clone, Debug)] @@ -2662,6 +2663,8 @@ async fn handle_container_exec_with_params( changes: convert_apply_patch_to_protocol(action), }, ), + tool_name: tool_name.to_string(), + otel_event_manager, }; let mode = match apply_patch_exec { diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index 86caf313a8..ede9ed49e7 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -6,6 +6,7 @@ use std::time::Duration; use super::backends::ExecutionMode; use super::backends::backend_for_mode; use super::cache::ApprovalCache; +use crate::codex::ExecCommandContext; use crate::codex::Session; use crate::error::CodexErr; use crate::error::SandboxErr; @@ -23,6 +24,7 @@ use crate::protocol::AskForApproval; use crate::protocol::ReviewDecision; use crate::protocol::SandboxPolicy; use crate::shell; +use codex_otel::otel_event_manager::ToolDecisionSource; #[derive(Clone, Debug)] pub(crate) struct ExecutorConfig { @@ -77,8 +79,7 @@ impl Executor { mut request: ExecutionRequest, session: &Session, approval_policy: AskForApproval, - sub_id: &str, - call_id: &str, + context: &ExecCommandContext, ) -> Result { if matches!(request.mode, ExecutionMode::Shell) { request.params = @@ -110,8 +111,9 @@ impl Executor { self.approval_cache.snapshot(), &config, session, - sub_id, - call_id, + &context.sub_id, + &context.call_id, + &context.otel_event_manager, ) .await?; if sandbox_decision.record_session_approval { @@ -140,8 +142,7 @@ impl Executor { &request, &config, session, - sub_id, - call_id, + context, stdout_stream.clone(), error, ) @@ -161,37 +162,44 @@ impl Executor { /// Fallback path invoked when a sandboxed run is denied so the user can /// approve rerunning without isolation. - #[allow(clippy::too_many_arguments)] async fn retry_without_sandbox( &self, request: &ExecutionRequest, config: &ExecutorConfig, session: &Session, - sub_id: &str, - call_id: &str, + context: &ExecCommandContext, stdout_stream: Option, sandbox_error: SandboxErr, ) -> Result { session - .notify_background_event(sub_id, format!("Execution failed: {sandbox_error}")) + .notify_background_event( + &context.sub_id, + format!("Execution failed: {sandbox_error}"), + ) .await; let decision = session .request_command_approval( - sub_id.to_string(), - call_id.to_string(), + context.sub_id.to_string(), + context.call_id.to_string(), request.approval_command.clone(), request.params.cwd.clone(), Some("command failed; retry without sandbox?".to_string()), ) .await; + context.otel_event_manager.tool_decision( + &context.tool_name, + &context.call_id, + decision, + ToolDecisionSource::User, + ); match decision { ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { if matches!(decision, ReviewDecision::ApprovedForSession) { self.approval_cache.insert(request.approval_command.clone()); } session - .notify_background_event(sub_id, "retrying command without sandbox") + .notify_background_event(&context.sub_id, "retrying command without sandbox") .await; let retry_output = self diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index 3d3eaceaee..4c619c3e6e 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -8,6 +8,8 @@ use crate::executor::errors::ExecError; use crate::safety::SafetyCheck; use crate::safety::assess_command_safety; use crate::safety::assess_patch_safety; +use codex_otel::otel_event_manager::OtelEventManager; +use codex_otel::otel_event_manager::ToolDecisionSource; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::ReviewDecision; use std::collections::HashSet; @@ -50,6 +52,7 @@ fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> /// Determines how a command should be sandboxed, prompting the user when /// policy requires explicit approval. +#[warn(clippy::too_many_arguments)] pub async fn select_sandbox( request: &ExecutionRequest, approval_policy: AskForApproval, @@ -58,6 +61,7 @@ pub async fn select_sandbox( session: &Session, sub_id: &str, call_id: &str, + otel_event_manager: &OtelEventManager, ) -> Result { match &request.mode { ExecutionMode::Shell => { @@ -69,6 +73,7 @@ pub async fn select_sandbox( session, sub_id, call_id, + otel_event_manager, ) .await } @@ -78,6 +83,7 @@ pub async fn select_sandbox( } } +#[warn(clippy::too_many_arguments)] async fn select_shell_sandbox( request: &ExecutionRequest, approval_policy: AskForApproval, @@ -86,6 +92,7 @@ async fn select_shell_sandbox( session: &Session, sub_id: &str, call_id: &str, + otel_event_manager: &OtelEventManager, ) -> Result { let command_for_safety = if request.approval_command.is_empty() { request.params.command.clone() @@ -113,6 +120,12 @@ async fn select_shell_sandbox( if user_explicitly_approved { decision.record_session_approval = true; } + let (decision_for_event, source) = if user_explicitly_approved { + (ReviewDecision::ApprovedForSession, ToolDecisionSource::User) + } else { + (ReviewDecision::Approved, ToolDecisionSource::Config) + }; + otel_event_manager.tool_decision("local_shell", call_id, decision_for_event, source); Ok(decision) } SafetyCheck::AskUser => { @@ -126,6 +139,12 @@ async fn select_shell_sandbox( ) .await; + otel_event_manager.tool_decision( + "local_shell", + call_id, + decision, + ToolDecisionSource::User, + ); match decision { ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)), ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)), @@ -180,7 +199,7 @@ mod tests { #[tokio::test] async fn select_apply_patch_user_override_when_explicit() { - let (session, _ctx) = make_session_and_context(); + let (session, ctx) = make_session_and_context(); let tmp = tempfile::tempdir().expect("tmp"); let p = tmp.path().join("a.txt"); let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string()); @@ -203,6 +222,7 @@ mod tests { stdout_stream: None, use_shell_profile: false, }; + let otel_event_manager = ctx.client.get_otel_event_manager(); let decision = select_sandbox( &request, AskForApproval::OnRequest, @@ -211,6 +231,7 @@ mod tests { &session, "sub", "call", + &otel_event_manager, ) .await .expect("ok"); @@ -221,7 +242,7 @@ mod tests { #[tokio::test] async fn select_apply_patch_autoapprove_in_danger() { - let (session, _ctx) = make_session_and_context(); + let (session, ctx) = make_session_and_context(); let tmp = tempfile::tempdir().expect("tmp"); let p = tmp.path().join("a.txt"); let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string()); @@ -244,6 +265,7 @@ mod tests { stdout_stream: None, use_shell_profile: false, }; + let otel_event_manager = ctx.client.get_otel_event_manager(); let decision = select_sandbox( &request, AskForApproval::OnRequest, @@ -252,6 +274,7 @@ mod tests { &session, "sub", "call", + &otel_event_manager, ) .await .expect("ok"); @@ -263,7 +286,7 @@ mod tests { #[tokio::test] async fn select_apply_patch_requires_approval_on_unless_trusted() { - let (session, _ctx) = make_session_and_context(); + let (session, ctx) = make_session_and_context(); let tempdir = tempfile::tempdir().expect("tmpdir"); let p = tempdir.path().join("a.txt"); let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string()); @@ -286,6 +309,7 @@ mod tests { stdout_stream: None, use_shell_profile: false, }; + let otel_event_manager = ctx.client.get_otel_event_manager(); let result = select_sandbox( &request, AskForApproval::UnlessTrusted, @@ -294,6 +318,7 @@ mod tests { &session, "sub", "call", + &otel_event_manager, ) .await; match result { @@ -307,7 +332,7 @@ mod tests { #[tokio::test] async fn select_shell_autoapprove_in_danger_mode() { - let (session, _ctx) = make_session_and_context(); + let (session, ctx) = make_session_and_context(); let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None); let request = ExecutionRequest { params: ExecParams { @@ -323,6 +348,7 @@ mod tests { stdout_stream: None, use_shell_profile: false, }; + let otel_event_manager = ctx.client.get_otel_event_manager(); let decision = select_sandbox( &request, AskForApproval::OnRequest, @@ -331,6 +357,7 @@ mod tests { &session, "sub", "call", + &otel_event_manager, ) .await .expect("ok"); @@ -341,7 +368,7 @@ mod tests { #[cfg(any(target_os = "macos", target_os = "linux"))] #[tokio::test] async fn select_shell_escalates_on_failure_with_platform_sandbox() { - let (session, _ctx) = make_session_and_context(); + let (session, ctx) = make_session_and_context(); let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None); let request = ExecutionRequest { params: ExecParams { @@ -358,6 +385,7 @@ mod tests { stdout_stream: None, use_shell_profile: false, }; + let otel_event_manager = ctx.client.get_otel_event_manager(); let decision = select_sandbox( &request, AskForApproval::OnFailure, @@ -366,6 +394,7 @@ mod tests { &session, "sub", "call", + &otel_event_manager, ) .await .expect("ok"); diff --git a/codex-rs/core/src/safety.rs b/codex-rs/core/src/safety.rs index b976ae4a4c..0ed0f929ff 100644 --- a/codex-rs/core/src/safety.rs +++ b/codex-rs/core/src/safety.rs @@ -125,9 +125,10 @@ pub fn assess_command_safety( // the session _because_ they know it needs to run outside a sandbox. if is_known_safe_command(command) || approved.contains(command) { + let user_explicitly_approved = approved.contains(command); return SafetyCheck::AutoApprove { sandbox_type: SandboxType::None, - user_explicitly_approved: false, + user_explicitly_approved, }; } @@ -380,7 +381,7 @@ mod tests { safety_check, SafetyCheck::AutoApprove { sandbox_type: SandboxType::None, - user_explicitly_approved: false, + user_explicitly_approved: true, } ); } From 3def127178b7392b08c86b0cf739316fc8c6cfc3 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 13:31:48 +0100 Subject: [PATCH 13/17] Clippy --- codex-rs/core/src/executor/sandbox.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index 4c619c3e6e..5c01ff69b4 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -52,7 +52,7 @@ fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> /// Determines how a command should be sandboxed, prompting the user when /// policy requires explicit approval. -#[warn(clippy::too_many_arguments)] +#[allow(clippy::too_many_arguments)] pub async fn select_sandbox( request: &ExecutionRequest, approval_policy: AskForApproval, @@ -83,7 +83,7 @@ pub async fn select_sandbox( } } -#[warn(clippy::too_many_arguments)] +#[allow(clippy::too_many_arguments)] async fn select_shell_sandbox( request: &ExecutionRequest, approval_policy: AskForApproval, From 60e7333575dd298863ffa0fa71da0d52744f96ab Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 17:50:30 +0100 Subject: [PATCH 14/17] Add integration tests --- .../suite/codex_message_processor_flow.rs | 232 ++++++++++++++++++ 1 file changed, 232 insertions(+) diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs index 7527411223..cc00298f28 100644 --- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs +++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs @@ -10,6 +10,7 @@ use codex_core::protocol::SandboxPolicy; use codex_core::protocol_config_types::ReasoningEffort; use codex_core::protocol_config_types::ReasoningSummary; use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use codex_protocol::config_types::SandboxMode; use codex_protocol::mcp_protocol::AddConversationListenerParams; use codex_protocol::mcp_protocol::AddConversationSubscriptionResponse; use codex_protocol::mcp_protocol::EXEC_COMMAND_APPROVAL_METHOD; @@ -21,6 +22,9 @@ use codex_protocol::mcp_protocol::SendUserMessageParams; use codex_protocol::mcp_protocol::SendUserMessageResponse; use codex_protocol::mcp_protocol::SendUserTurnParams; use codex_protocol::mcp_protocol::SendUserTurnResponse; +use codex_protocol::protocol::Event; +use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::InputMessageKind; use mcp_types::JSONRPCNotification; use mcp_types::JSONRPCResponse; use mcp_types::RequestId; @@ -349,6 +353,234 @@ async fn test_send_user_turn_changes_approval_policy_behavior() { } // Helper: minimal config.toml pointing at mock provider. + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() { + if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." + ); + return; + } + + let tmp = TempDir::new().expect("tmp dir"); + let codex_home = tmp.path().join("codex_home"); + std::fs::create_dir(&codex_home).expect("create codex home dir"); + let workspace_root = tmp.path().join("workspace"); + std::fs::create_dir(&workspace_root).expect("create workspace root"); + let first_cwd = workspace_root.join("turn1"); + let second_cwd = workspace_root.join("turn2"); + std::fs::create_dir(&first_cwd).expect("create first cwd"); + std::fs::create_dir(&second_cwd).expect("create second cwd"); + + let responses = vec![ + create_shell_sse_response( + vec![ + "bash".to_string(), + "-lc".to_string(), + "echo first turn".to_string(), + ], + None, + Some(5000), + "call-first", + ) + .expect("create first shell response"), + create_final_assistant_message_sse_response("done first") + .expect("create first final assistant message"), + create_shell_sse_response( + vec![ + "bash".to_string(), + "-lc".to_string(), + "echo second turn".to_string(), + ], + None, + Some(5000), + "call-second", + ) + .expect("create second shell response"), + create_final_assistant_message_sse_response("done second") + .expect("create second final assistant message"), + ]; + let server = create_mock_chat_completions_server(responses).await; + create_config_toml(&codex_home, &server.uri()).expect("write config"); + + let mut mcp = McpProcess::new(&codex_home) + .await + .expect("spawn mcp process"); + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()) + .await + .expect("init timeout") + .expect("init failed"); + + let new_conv_id = mcp + .send_new_conversation_request(NewConversationParams { + cwd: Some(first_cwd.to_string_lossy().into_owned()), + approval_policy: Some(AskForApproval::Never), + sandbox: Some(SandboxMode::WorkspaceWrite), + ..Default::default() + }) + .await + .expect("send newConversation"); + let new_conv_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)), + ) + .await + .expect("newConversation timeout") + .expect("newConversation resp"); + let NewConversationResponse { + conversation_id, + model, + .. + } = to_response::(new_conv_resp) + .expect("deserialize newConversation response"); + + let add_listener_id = mcp + .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id }) + .await + .expect("send addConversationListener"); + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)), + ) + .await + .expect("addConversationListener timeout") + .expect("addConversationListener resp"); + + let first_turn_id = mcp + .send_send_user_turn_request(SendUserTurnParams { + conversation_id, + items: vec![codex_protocol::mcp_protocol::InputItem::Text { + text: "first turn".to_string(), + }], + cwd: first_cwd.clone(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::WorkspaceWrite { + writable_roots: vec![first_cwd.clone()], + network_access: false, + exclude_tmpdir_env_var: false, + exclude_slash_tmp: false, + }, + model: model.clone(), + effort: Some(ReasoningEffort::Medium), + summary: ReasoningSummary::Auto, + }) + .await + .expect("send first sendUserTurn"); + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(first_turn_id)), + ) + .await + .expect("sendUserTurn 1 timeout") + .expect("sendUserTurn 1 resp"); + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("codex/event/task_complete"), + ) + .await + .expect("task_complete 1 timeout") + .expect("task_complete 1 notification"); + + let second_turn_id = mcp + .send_send_user_turn_request(SendUserTurnParams { + conversation_id, + items: vec![codex_protocol::mcp_protocol::InputItem::Text { + text: "second turn".to_string(), + }], + cwd: second_cwd.clone(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::DangerFullAccess, + model: model.clone(), + effort: Some(ReasoningEffort::Medium), + summary: ReasoningSummary::Auto, + }) + .await + .expect("send second sendUserTurn"); + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(second_turn_id)), + ) + .await + .expect("sendUserTurn 2 timeout") + .expect("sendUserTurn 2 resp"); + + let mut env_message: Option = None; + let second_cwd_str = second_cwd.to_string_lossy().into_owned(); + for _ in 0..10 { + let notification = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("codex/event/user_message"), + ) + .await + .expect("user_message timeout") + .expect("user_message notification"); + let params = notification + .params + .clone() + .expect("user_message should include params"); + let event: Event = serde_json::from_value(params).expect("deserialize user_message event"); + if let EventMsg::UserMessage(user) = event.msg + && matches!(user.kind, Some(InputMessageKind::EnvironmentContext)) + && user.message.contains(&second_cwd_str) + { + env_message = Some(user.message); + break; + } + } + let env_message = env_message.expect("expected environment context update"); + assert!( + env_message.contains("danger-full-access"), + "env context should reflect new sandbox mode: {env_message}" + ); + assert!( + env_message.contains("enabled"), + "env context should enable network access for danger-full-access policy: {env_message}" + ); + assert!( + env_message.contains(&second_cwd_str), + "env context should include updated cwd: {env_message}" + ); + + let exec_begin_notification = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("codex/event/exec_command_begin"), + ) + .await + .expect("exec_command_begin timeout") + .expect("exec_command_begin notification"); + let params = exec_begin_notification + .params + .clone() + .expect("exec_command_begin params"); + let event: Event = serde_json::from_value(params).expect("deserialize exec begin event"); + let exec_begin = match event.msg { + EventMsg::ExecCommandBegin(exec_begin) => exec_begin, + other => panic!("expected ExecCommandBegin event, got {other:?}"), + }; + assert_eq!( + exec_begin.cwd, second_cwd, + "exec turn should run from updated cwd" + ); + assert_eq!( + exec_begin.command, + vec![ + "bash".to_string(), + "-lc".to_string(), + "echo second turn".to_string() + ], + "exec turn should run expected command" + ); + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("codex/event/task_complete"), + ) + .await + .expect("task_complete 2 timeout") + .expect("task_complete 2 notification"); +} + fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> { let config_toml = codex_home.join("config.toml"); std::fs::write( From c5cf9535e3cbc5fe805047985fbdb4f1d85d9d2d Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Tue, 30 Sep 2025 18:35:00 +0100 Subject: [PATCH 15/17] Fix test --- .../app-server/tests/common/mcp_process.rs | 40 ++++++++++++++++--- codex-rs/core/src/codex.rs | 13 +++++- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/codex-rs/app-server/tests/common/mcp_process.rs b/codex-rs/app-server/tests/common/mcp_process.rs index cbf54cecf0..3d76c7a115 100644 --- a/codex-rs/app-server/tests/common/mcp_process.rs +++ b/codex-rs/app-server/tests/common/mcp_process.rs @@ -1,3 +1,4 @@ +use std::collections::VecDeque; use std::path::Path; use std::process::Stdio; use std::sync::atomic::AtomicI64; @@ -46,6 +47,7 @@ pub struct McpProcess { process: Child, stdin: ChildStdin, stdout: BufReader, + pending_user_messages: VecDeque, } impl McpProcess { @@ -116,6 +118,7 @@ impl McpProcess { process, stdin, stdout, + pending_user_messages: VecDeque::new(), }) } @@ -380,8 +383,9 @@ impl McpProcess { let message = self.read_jsonrpc_message().await?; match message { - JSONRPCMessage::Notification(_) => { - eprintln!("notification: {message:?}"); + JSONRPCMessage::Notification(notification) => { + eprintln!("notification: {notification:?}"); + self.enqueue_user_message(notification); } JSONRPCMessage::Request(jsonrpc_request) => { return Ok(jsonrpc_request); @@ -405,8 +409,9 @@ impl McpProcess { loop { let message = self.read_jsonrpc_message().await?; match message { - JSONRPCMessage::Notification(_) => { - eprintln!("notification: {message:?}"); + JSONRPCMessage::Notification(notification) => { + eprintln!("notification: {notification:?}"); + self.enqueue_user_message(notification); } JSONRPCMessage::Request(_) => { anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}"); @@ -430,8 +435,9 @@ impl McpProcess { loop { let message = self.read_jsonrpc_message().await?; match message { - JSONRPCMessage::Notification(_) => { - eprintln!("notification: {message:?}"); + JSONRPCMessage::Notification(notification) => { + eprintln!("notification: {notification:?}"); + self.enqueue_user_message(notification); } JSONRPCMessage::Request(_) => { anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}"); @@ -454,6 +460,10 @@ impl McpProcess { ) -> anyhow::Result { eprintln!("in read_stream_until_notification_message({method})"); + if let Some(notification) = self.take_pending_notification_by_method(method) { + return Ok(notification); + } + loop { let message = self.read_jsonrpc_message().await?; match message { @@ -461,6 +471,7 @@ impl McpProcess { if notification.method == method { return Ok(notification); } + self.enqueue_user_message(notification); } JSONRPCMessage::Request(_) => { anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}"); @@ -474,4 +485,21 @@ impl McpProcess { } } } + + fn take_pending_notification_by_method(&mut self, method: &str) -> Option { + if let Some(pos) = self + .pending_user_messages + .iter() + .position(|notification| notification.method == method) + { + return self.pending_user_messages.remove(pos); + } + None + } + + fn enqueue_user_message(&mut self, notification: JSONRPCNotification) { + if notification.method == "codex/event/user_message" { + self.pending_user_messages.push_back(notification); + } + } } diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 3fc0da7829..3d7a0be639 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1286,8 +1286,19 @@ async fn submission_loop( let previous_env_context = EnvironmentContext::from(turn_context.as_ref()); let new_env_context = EnvironmentContext::from(&fresh_turn_context); if !new_env_context.equals_except_shell(&previous_env_context) { - sess.record_conversation_items(&[ResponseItem::from(new_env_context)]) + let env_response_item = ResponseItem::from(new_env_context); + sess.record_conversation_items(std::slice::from_ref(&env_response_item)) .await; + for msg in map_response_item_to_event_messages( + &env_response_item, + sess.show_raw_agent_reasoning(), + ) { + let event = Event { + id: sub.id.clone(), + msg, + }; + sess.send_event(event).await; + } } // Install the new persistent context for subsequent tasks/turns. From c1fa66618612ec796e65be3f4d4c24df74f4a840 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Wed, 1 Oct 2025 10:13:30 +0100 Subject: [PATCH 16/17] Fix merge --- .../suite/codex_message_processor_flow.rs | 20 +++---------------- codex-rs/core/src/executor/runner.rs | 20 +++++++++---------- 2 files changed, 12 insertions(+), 28 deletions(-) diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs index 9f36521c73..f611e39856 100644 --- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs +++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs @@ -5,7 +5,7 @@ use app_test_support::create_final_assistant_message_sse_response; use app_test_support::create_mock_chat_completions_server; use app_test_support::create_shell_sse_response; use app_test_support::to_response; -use codex_app_server_protocol::AddConversationListenerParams; +use codex_app_server_protocol::{AddConversationListenerParams, InputItem}; use codex_app_server_protocol::AddConversationSubscriptionResponse; use codex_app_server_protocol::ExecCommandApprovalParams; use codex_app_server_protocol::JSONRPCNotification; @@ -26,23 +26,9 @@ use codex_core::protocol_config_types::ReasoningEffort; use codex_core::protocol_config_types::ReasoningSummary; use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use codex_protocol::config_types::SandboxMode; -use codex_protocol::mcp_protocol::AddConversationListenerParams; -use codex_protocol::mcp_protocol::AddConversationSubscriptionResponse; -use codex_protocol::mcp_protocol::EXEC_COMMAND_APPROVAL_METHOD; -use codex_protocol::mcp_protocol::NewConversationParams; -use codex_protocol::mcp_protocol::NewConversationResponse; -use codex_protocol::mcp_protocol::RemoveConversationListenerParams; -use codex_protocol::mcp_protocol::RemoveConversationSubscriptionResponse; -use codex_protocol::mcp_protocol::SendUserMessageParams; -use codex_protocol::mcp_protocol::SendUserMessageResponse; -use codex_protocol::mcp_protocol::SendUserTurnParams; -use codex_protocol::mcp_protocol::SendUserTurnResponse; use codex_protocol::protocol::Event; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::InputMessageKind; -use mcp_types::JSONRPCNotification; -use mcp_types::JSONRPCResponse; -use mcp_types::RequestId; use pretty_assertions::assert_eq; use std::env; use tempfile::TempDir; @@ -482,7 +468,7 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() { let first_turn_id = mcp .send_send_user_turn_request(SendUserTurnParams { conversation_id, - items: vec![codex_protocol::mcp_protocol::InputItem::Text { + items: vec![InputItem::Text { text: "first turn".to_string(), }], cwd: first_cwd.clone(), @@ -517,7 +503,7 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() { let second_turn_id = mcp .send_send_user_turn_request(SendUserTurnParams { conversation_id, - items: vec![codex_protocol::mcp_protocol::InputItem::Text { + items: vec![InputItem::Text { text: "second turn".to_string(), }], cwd: second_cwd.clone(), diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index ede9ed49e7..befa83360d 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -131,19 +131,19 @@ impl Executor { .await; // Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry. - let raw_output = match first_attempt { - Ok(output) => output, + match first_attempt { + Ok(output) => Ok(output), Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => { - return Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into()); + Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into()) } - Err(CodexErr::Sandbox(error @ SandboxErr::Denied { .. })) => { - return if sandbox_decision.escalate_on_failure { + Err(CodexErr::Sandbox(error)) => { + if sandbox_decision.escalate_on_failure { self.retry_without_sandbox( &request, &config, session, context, - stdout_stream.clone(), + stdout_stream, error, ) .await @@ -152,12 +152,10 @@ impl Executor { "failed in sandbox {:?} with execution error: {error:?}", sandbox_decision.initial_sandbox ))) - }; + } } - Err(err) => return Err(err.into()), - }; - - Ok(raw_output) + Err(err) => Err(err.into()), + } } /// Fallback path invoked when a sandboxed run is denied so the user can From 16abbe34f82c9f3783eabbfeb4bd05b31ab2052c Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Wed, 1 Oct 2025 10:16:58 +0100 Subject: [PATCH 17/17] FMT --- .../app-server/tests/suite/codex_message_processor_flow.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs index f611e39856..4dff2a1575 100644 --- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs +++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs @@ -5,9 +5,10 @@ use app_test_support::create_final_assistant_message_sse_response; use app_test_support::create_mock_chat_completions_server; use app_test_support::create_shell_sse_response; use app_test_support::to_response; -use codex_app_server_protocol::{AddConversationListenerParams, InputItem}; +use codex_app_server_protocol::AddConversationListenerParams; use codex_app_server_protocol::AddConversationSubscriptionResponse; use codex_app_server_protocol::ExecCommandApprovalParams; +use codex_app_server_protocol::InputItem; use codex_app_server_protocol::JSONRPCNotification; use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::NewConversationParams;