From 805de1938170103c8fcc2d165a4c4c7fdc9d9f4a Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Fri, 26 Sep 2025 13:42:58 +0200
Subject: [PATCH 01/17] V1

---
 codex-rs/Cargo.lock                           |   1 +
 codex-rs/core/Cargo.toml                      |   1 +
 codex-rs/core/src/codex.rs                    | 241 +++++++++---------
 codex-rs/core/src/lib.rs                      |   1 +
 .../core/src/sandbox/apply_patch_adapter.rs   |  30 +++
 codex-rs/core/src/sandbox/backend.rs          |  87 +++++++
 codex-rs/core/src/sandbox/mod.rs              |  50 ++++
 codex-rs/core/src/sandbox/planner.rs          | 112 ++++++++
 8 files changed, 404 insertions(+), 119 deletions(-)
 create mode 100644 codex-rs/core/src/sandbox/apply_patch_adapter.rs
 create mode 100644 codex-rs/core/src/sandbox/backend.rs
 create mode 100644 codex-rs/core/src/sandbox/mod.rs
 create mode 100644 codex-rs/core/src/sandbox/planner.rs

diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index 8b71f139b6..88f93b9776 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -673,6 +673,7 @@ dependencies = [
  "askama",
  "assert_cmd",
  "async-channel",
+ "async-trait",
  "base64",
  "bytes",
  "chrono",
diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml
index d9ded08283..b56de6b4e4 100644
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -15,6 +15,7 @@ workspace = true
 anyhow = { workspace = true }
 askama = { workspace = true }
 async-channel = { workspace = true }
+async-trait = { workspace = true }
 base64 = { workspace = true }
 bytes = { workspace = true }
 chrono = { workspace = true, features = ["serde"] }
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 48aa3dbd7c..8bdb497906 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -44,7 +44,6 @@ use tracing::warn;
 use crate::ModelProviderInfo;
 use crate::apply_patch;
 use crate::apply_patch::ApplyPatchExec;
-use crate::apply_patch::CODEX_APPLY_PATCH_ARG1;
 use crate::apply_patch::InternalApplyPatchInvocation;
 use crate::apply_patch::convert_apply_patch_to_protocol;
 use crate::client::ModelClient;
@@ -63,7 +62,6 @@ use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
 use crate::exec::StdoutStream;
 use crate::exec::StreamOutput;
-use crate::exec::process_exec_tool_call;
 use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
 use crate::exec_command::ExecCommandParams;
 use crate::exec_command::ExecSessionManager;
@@ -114,9 +112,15 @@ use crate::protocol::TurnDiffEvent;
 use crate::protocol::WebSearchBeginEvent;
 use crate::rollout::RolloutRecorder;
 use crate::rollout::RolloutRecorderParams;
-use crate::safety::SafetyCheck;
-use crate::safety::assess_command_safety;
-use crate::safety::assess_safety_for_untrusted_command;
+use crate::sandbox::BackendRegistry;
+use crate::sandbox::ExecPlan;
+use crate::sandbox::ExecRequest;
+use crate::sandbox::ExecRuntimeContext;
+use crate::sandbox::PatchExecRequest;
+use crate::sandbox::build_exec_params_for_apply_patch;
+use crate::sandbox::plan_apply_patch;
+use crate::sandbox::plan_exec;
+use crate::sandbox::run_with_plan;
 use crate::shell;
 use crate::state::ActiveTurn;
 use crate::state::SessionServices;
@@ -942,15 +946,24 @@ impl Session {
         self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone())
             .await;
 
-        let result = process_exec_tool_call(
-            exec_args.params,
-            exec_args.sandbox_type,
-            exec_args.sandbox_policy,
-            exec_args.sandbox_cwd,
-            exec_args.codex_linux_sandbox_exe,
-            exec_args.stdout_stream,
-        )
-        .await;
+        let ExecInvokeArgs {
+            params,
+            plan,
+            sandbox_policy,
+            sandbox_cwd,
+            codex_linux_sandbox_exe,
+            stdout_stream,
+        } = exec_args;
+
+        let registry = BackendRegistry::new();
+        let runtime_ctx = ExecRuntimeContext {
+            sandbox_policy,
+            sandbox_cwd,
+            codex_linux_sandbox_exe,
+            stdout_stream,
+        };
+
+        let result = run_with_plan(params, &plan, &registry, &runtime_ctx).await;
 
         let output_stderr;
         let borrowed: &ExecToolCallOutput = match &result {
@@ -2673,7 +2686,7 @@ fn parse_container_exec_arguments(
 
 pub struct ExecInvokeArgs<'a> {
     pub params: ExecParams,
-    pub sandbox_type: SandboxType,
+    pub plan: ExecPlan,
     pub sandbox_policy: &'a SandboxPolicy,
     pub sandbox_cwd: &'a Path,
     pub codex_linux_sandbox_exe: &'a Option<PathBuf>,
@@ -2740,98 +2753,80 @@ async fn handle_container_exec_with_params(
         MaybeApplyPatchVerified::NotApplyPatch => None,
     };
 
-    let (params, safety, command_for_display) = match &apply_patch_exec {
-        Some(ApplyPatchExec {
-            action: ApplyPatchAction { patch, cwd, .. },
-            user_explicitly_approved_this_action,
-        }) => {
-            let path_to_codex = std::env::current_exe()
-                .ok()
-                .map(|p| p.to_string_lossy().to_string());
-            let Some(path_to_codex) = path_to_codex else {
+    let mut params = params;
+    let command_for_display;
+
+    let plan = if let Some(apply_patch_exec) = apply_patch_exec.as_ref() {
+        params = build_exec_params_for_apply_patch(apply_patch_exec, &params)?;
+        command_for_display = vec![
+            "apply_patch".to_string(),
+            apply_patch_exec.action.patch.clone(),
+        ];
+
+        let plan_req = PatchExecRequest {
+            action: &apply_patch_exec.action,
+            approval: turn_context.approval_policy,
+            policy: &turn_context.sandbox_policy,
+            cwd: &turn_context.cwd,
+            user_explicitly_approved: apply_patch_exec.user_explicitly_approved_this_action,
+        };
+
+        match plan_apply_patch(&plan_req) {
+            plan @ ExecPlan::Approved { .. } => plan,
+            ExecPlan::AskUser { .. } => {
                 return Err(FunctionCallError::RespondToModel(
-                    "failed to determine path to codex executable".to_string(),
+                    "patch requires approval but none was recorded".to_string(),
                 ));
-            };
-
-            let params = ExecParams {
-                command: vec![
-                    path_to_codex,
-                    CODEX_APPLY_PATCH_ARG1.to_string(),
-                    patch.clone(),
-                ],
-                cwd: cwd.clone(),
-                timeout_ms: params.timeout_ms,
-                env: HashMap::new(),
-                with_escalated_permissions: params.with_escalated_permissions,
-                justification: params.justification.clone(),
-            };
-            let safety = if *user_explicitly_approved_this_action {
-                SafetyCheck::AutoApprove {
-                    sandbox_type: SandboxType::None,
-                }
-            } else {
-                assess_safety_for_untrusted_command(
-                    turn_context.approval_policy,
-                    &turn_context.sandbox_policy,
-                    params.with_escalated_permissions.unwrap_or(false),
-                )
-            };
-            (
-                params,
-                safety,
-                vec!["apply_patch".to_string(), patch.clone()],
-            )
-        }
-        None => {
-            let safety = {
-                let state = sess.state.lock().await;
-                assess_command_safety(
-                    &params.command,
-                    turn_context.approval_policy,
-                    &turn_context.sandbox_policy,
-                    state.approved_commands_ref(),
-                    params.with_escalated_permissions.unwrap_or(false),
-                )
-            };
-            let command_for_display = params.command.clone();
-            (params, safety, command_for_display)
+            }
+            ExecPlan::Reject { reason } => {
+                return Err(FunctionCallError::RespondToModel(format!(
+                    "patch rejected: {reason}"
+                )));
+            }
         }
-    };
+    } else {
+        command_for_display = params.command.clone();
+
+        let initial_plan = {
+            let state = sess.state.lock().await;
+            plan_exec(&ExecRequest {
+                params: &params,
+                approval: turn_context.approval_policy,
+                policy: &turn_context.sandbox_policy,
+                approved_session_commands: state.approved_commands_ref(),
+            })
+        };
 
-    let sandbox_type = match safety {
-        SafetyCheck::AutoApprove { sandbox_type } => sandbox_type,
-        SafetyCheck::AskUser => {
-            let decision = sess
-                .request_command_approval(
-                    sub_id.clone(),
-                    call_id.clone(),
-                    params.command.clone(),
-                    params.cwd.clone(),
-                    params.justification.clone(),
-                )
-                .await;
-            match decision {
-                ReviewDecision::Approved => (),
-                ReviewDecision::ApprovedForSession => {
-                    sess.add_approved_command(params.command.clone()).await;
-                }
-                ReviewDecision::Denied | ReviewDecision::Abort => {
-                    return Err(FunctionCallError::RespondToModel(
-                        "exec command rejected by user".to_string(),
-                    ));
+        match initial_plan {
+            plan @ ExecPlan::Approved { .. } => plan,
+            ExecPlan::AskUser { reason } => {
+                let decision = sess
+                    .request_command_approval(
+                        sub_id.clone(),
+                        call_id.clone(),
+                        params.command.clone(),
+                        params.cwd.clone(),
+                        reason,
+                    )
+                    .await;
+                match decision {
+                    ReviewDecision::Approved => ExecPlan::approved(SandboxType::None, false, true),
+                    ReviewDecision::ApprovedForSession => {
+                        sess.add_approved_command(params.command.clone()).await;
+                        ExecPlan::approved(SandboxType::None, false, true)
+                    }
+                    ReviewDecision::Denied | ReviewDecision::Abort => {
+                        return Err(FunctionCallError::RespondToModel(
+                            "exec command rejected by user".to_string(),
+                        ));
+                    }
                 }
             }
-            // No sandboxing is applied because the user has given
-            // explicit approval. Often, we end up in this case because
-            // the command cannot be run in a sandbox, such as
-            // installing a new dependency that requires network access.
-            SandboxType::None
-        }
-        SafetyCheck::Reject { reason } => {
-            return Err(FunctionCallError::RespondToModel(format!(
-                "exec command rejected: {reason:?}"
-            )));
+            ExecPlan::Reject { reason } => {
+                return Err(FunctionCallError::RespondToModel(format!(
+                    "exec command rejected: {reason:?}"
+                )));
+            }
         }
     };
 
@@ -2840,25 +2835,26 @@ async fn handle_container_exec_with_params(
         call_id: call_id.clone(),
         command_for_display: command_for_display.clone(),
         cwd: params.cwd.clone(),
-        apply_patch: apply_patch_exec.map(
+        apply_patch: apply_patch_exec.as_ref().map(
             |ApplyPatchExec {
                  action,
                  user_explicitly_approved_this_action,
              }| ApplyPatchCommandContext {
-                user_explicitly_approved_this_action,
-                changes: convert_apply_patch_to_protocol(&action),
+                user_explicitly_approved_this_action: *user_explicitly_approved_this_action,
+                changes: convert_apply_patch_to_protocol(action),
             },
         ),
     };
 
     let params = maybe_translate_shell_command(params, sess, turn_context);
+    let plan_for_invocation = plan.clone();
     let output_result = sess
         .run_exec_with_events(
             turn_diff_tracker,
             exec_command_context.clone(),
             ExecInvokeArgs {
                 params: params.clone(),
-                sandbox_type,
+                plan: plan_for_invocation,
                 sandbox_policy: &turn_context.sandbox_policy,
                 sandbox_cwd: &turn_context.cwd,
                 codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
@@ -2891,7 +2887,7 @@ async fn handle_container_exec_with_params(
                 params,
                 exec_command_context,
                 error,
-                sandbox_type,
+                &plan,
                 sess,
                 turn_context,
             )
@@ -2908,7 +2904,7 @@ async fn handle_sandbox_error(
     params: ExecParams,
     exec_command_context: ExecCommandContext,
     error: SandboxErr,
-    sandbox_type: SandboxType,
+    plan: &ExecPlan,
     sess: &Session,
     turn_context: &TurnContext,
 ) -> Result<String, FunctionCallError> {
@@ -2921,15 +2917,21 @@ async fn handle_sandbox_error(
         return Err(FunctionCallError::RespondToModel(content));
     }
 
-    // Early out if either the user never wants to be asked for approval, or
-    // we're letting the model manage escalation requests. Otherwise, continue
-    match turn_context.approval_policy {
-        AskForApproval::Never | AskForApproval::OnRequest => {
-            return Err(FunctionCallError::RespondToModel(format!(
-                "failed in sandbox {sandbox_type:?} with execution error: {error:?}"
-            )));
-        }
-        AskForApproval::UnlessTrusted | AskForApproval::OnFailure => (),
+    let ExecPlan::Approved {
+        sandbox: sandbox_type,
+        on_failure_escalate,
+        ..
+    } = plan
+    else {
+        return Err(FunctionCallError::RespondToModel(
+            "execution failed without an approved plan".to_string(),
+        ));
+    };
+
+    if !on_failure_escalate {
+        return Err(FunctionCallError::RespondToModel(format!(
+            "failed in sandbox {sandbox_type:?} with execution error: {error:?}"
+        )));
     }
 
     // Note that when `error` is `SandboxErr::Denied`, it could be a false
@@ -2944,11 +2946,12 @@ async fn handle_sandbox_error(
     sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
         .await;
 
+    let command_for_retry = params.command.clone();
     let decision = sess
         .request_command_approval(
             sub_id.clone(),
             call_id.clone(),
-            params.command.clone(),
+            command_for_retry.clone(),
             cwd.clone(),
             Some("command failed; retry without sandbox?".to_string()),
         )
@@ -2960,7 +2963,7 @@ async fn handle_sandbox_error(
             // remainder of the session so future
             // executions skip the sandbox directly.
             // TODO(ragona): Isn't this a bug? It always saves the command in an | fork?
-            sess.add_approved_command(params.command.clone()).await;
+            sess.add_approved_command(command_for_retry.clone()).await;
             // Inform UI we are retrying without sandbox.
             sess.notify_background_event(&sub_id, "retrying command without sandbox")
                 .await;
@@ -2973,7 +2976,7 @@ async fn handle_sandbox_error(
                     exec_command_context.clone(),
                     ExecInvokeArgs {
                         params,
-                        sandbox_type: SandboxType::None,
+                        plan: ExecPlan::approved(SandboxType::None, false, true),
                         sandbox_policy: &turn_context.sandbox_policy,
                         sandbox_cwd: &turn_context.cwd,
                         codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index 36287c1af1..8598fa1e71 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -61,6 +61,7 @@ pub mod plan_tool;
 pub mod project_doc;
 mod rollout;
 pub(crate) mod safety;
+pub mod sandbox;
 pub mod seatbelt;
 pub mod shell;
 pub mod spawn;
diff --git a/codex-rs/core/src/sandbox/apply_patch_adapter.rs b/codex-rs/core/src/sandbox/apply_patch_adapter.rs
new file mode 100644
index 0000000000..60a123684b
--- /dev/null
+++ b/codex-rs/core/src/sandbox/apply_patch_adapter.rs
@@ -0,0 +1,30 @@
+use std::env;
+
+use crate::apply_patch::ApplyPatchExec;
+use crate::apply_patch::CODEX_APPLY_PATCH_ARG1;
+use crate::exec::ExecParams;
+use crate::function_tool::FunctionCallError;
+
+pub(crate) fn build_exec_params_for_apply_patch(
+    exec: &ApplyPatchExec,
+    original: &ExecParams,
+) -> Result<ExecParams, FunctionCallError> {
+    let path_to_codex = env::current_exe()
+        .ok()
+        .map(|p| p.to_string_lossy().to_string())
+        .ok_or_else(|| {
+            FunctionCallError::RespondToModel(
+                "failed to determine path to codex executable".to_string(),
+            )
+        })?;
+
+    let patch = exec.action.patch.clone();
+    Ok(ExecParams {
+        command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
+        cwd: exec.action.cwd.clone(),
+        timeout_ms: original.timeout_ms,
+        env: original.env.clone(),
+        with_escalated_permissions: original.with_escalated_permissions,
+        justification: original.justification.clone(),
+    })
+}
diff --git a/codex-rs/core/src/sandbox/backend.rs b/codex-rs/core/src/sandbox/backend.rs
new file mode 100644
index 0000000000..02e5a87e4f
--- /dev/null
+++ b/codex-rs/core/src/sandbox/backend.rs
@@ -0,0 +1,87 @@
+use std::path::Path;
+use std::path::PathBuf;
+
+use async_trait::async_trait;
+
+use crate::error::Result;
+use crate::exec::ExecParams;
+use crate::exec::ExecToolCallOutput;
+use crate::exec::SandboxType;
+use crate::exec::StdoutStream;
+use crate::exec::process_exec_tool_call;
+use crate::protocol::SandboxPolicy;
+
+#[async_trait]
+pub trait SpawnBackend: Send + Sync {
+    fn sandbox_type(&self) -> SandboxType;
+
+    async fn spawn(
+        &self,
+        params: ExecParams,
+        sandbox_policy: &SandboxPolicy,
+        sandbox_cwd: &Path,
+        codex_linux_sandbox_exe: &Option<PathBuf>,
+        stdout_stream: Option<StdoutStream>,
+    ) -> Result<ExecToolCallOutput> {
+        process_exec_tool_call(
+            params,
+            self.sandbox_type(),
+            sandbox_policy,
+            sandbox_cwd,
+            codex_linux_sandbox_exe,
+            stdout_stream,
+        )
+        .await
+    }
+}
+
+#[derive(Clone, Copy, Debug, Default)]
+pub struct DirectBackend;
+
+#[async_trait]
+impl SpawnBackend for DirectBackend {
+    fn sandbox_type(&self) -> SandboxType {
+        SandboxType::None
+    }
+}
+
+#[derive(Clone, Copy, Debug, Default)]
+pub struct SeatbeltBackend;
+
+#[async_trait]
+impl SpawnBackend for SeatbeltBackend {
+    fn sandbox_type(&self) -> SandboxType {
+        SandboxType::MacosSeatbelt
+    }
+}
+
+#[derive(Clone, Copy, Debug, Default)]
+pub struct LinuxBackend;
+
+#[async_trait]
+impl SpawnBackend for LinuxBackend {
+    fn sandbox_type(&self) -> SandboxType {
+        SandboxType::LinuxSeccomp
+    }
+}
+
+#[derive(Default)]
+pub struct BackendRegistry {
+    direct: DirectBackend,
+    seatbelt: SeatbeltBackend,
+    linux: LinuxBackend,
+}
+
+impl BackendRegistry {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn for_type(&self, sandbox: SandboxType) -> &dyn SpawnBackend {
+        match sandbox {
+            SandboxType::None => &self.direct,
+            SandboxType::MacosSeatbelt => &self.seatbelt,
+            SandboxType::LinuxSeccomp => &self.linux,
+        }
+    }
+}
diff --git a/codex-rs/core/src/sandbox/mod.rs b/codex-rs/core/src/sandbox/mod.rs
new file mode 100644
index 0000000000..abd735992d
--- /dev/null
+++ b/codex-rs/core/src/sandbox/mod.rs
@@ -0,0 +1,50 @@
+mod apply_patch_adapter;
+mod backend;
+mod planner;
+
+pub(crate) use apply_patch_adapter::build_exec_params_for_apply_patch;
+pub use backend::BackendRegistry;
+pub use backend::DirectBackend;
+pub use backend::LinuxBackend;
+pub use backend::SeatbeltBackend;
+pub use backend::SpawnBackend;
+pub use planner::ExecPlan;
+pub use planner::ExecRequest;
+pub use planner::PatchExecRequest;
+pub use planner::plan_apply_patch;
+pub use planner::plan_exec;
+
+use crate::error::Result;
+use crate::exec::ExecParams;
+use crate::exec::ExecToolCallOutput;
+use crate::exec::StdoutStream;
+use crate::protocol::SandboxPolicy;
+
+pub struct ExecRuntimeContext<'a> {
+    pub sandbox_policy: &'a SandboxPolicy,
+    pub sandbox_cwd: &'a std::path::Path,
+    pub codex_linux_sandbox_exe: &'a Option<std::path::PathBuf>,
+    pub stdout_stream: Option<StdoutStream>,
+}
+
+pub async fn run_with_plan(
+    params: ExecParams,
+    plan: &ExecPlan,
+    registry: &BackendRegistry,
+    runtime_ctx: &ExecRuntimeContext<'_>,
+) -> Result<ExecToolCallOutput> {
+    let ExecPlan::Approved { sandbox, .. } = plan else {
+        unreachable!("run_with_plan called without approved plan");
+    };
+
+    registry
+        .for_type(*sandbox)
+        .spawn(
+            params,
+            runtime_ctx.sandbox_policy,
+            runtime_ctx.sandbox_cwd,
+            runtime_ctx.codex_linux_sandbox_exe,
+            runtime_ctx.stdout_stream.clone(),
+        )
+        .await
+}
diff --git a/codex-rs/core/src/sandbox/planner.rs b/codex-rs/core/src/sandbox/planner.rs
new file mode 100644
index 0000000000..fcb1412ab9
--- /dev/null
+++ b/codex-rs/core/src/sandbox/planner.rs
@@ -0,0 +1,112 @@
+use std::collections::HashSet;
+use std::path::Path;
+
+use codex_apply_patch::ApplyPatchAction;
+
+use crate::exec::ExecParams;
+use crate::exec::SandboxType;
+use crate::protocol::AskForApproval;
+use crate::protocol::SandboxPolicy;
+use crate::safety::SafetyCheck;
+use crate::safety::assess_command_safety;
+use crate::safety::assess_patch_safety;
+
+#[derive(Clone, Debug)]
+pub struct ExecRequest<'a> {
+    pub params: &'a ExecParams,
+    pub approval: AskForApproval,
+    pub policy: &'a SandboxPolicy,
+    pub approved_session_commands: &'a HashSet<Vec<String>>,
+}
+
+#[derive(Clone, Debug)]
+pub enum ExecPlan {
+    Reject {
+        reason: String,
+    },
+    AskUser {
+        reason: Option<String>,
+    },
+    Approved {
+        sandbox: SandboxType,
+        on_failure_escalate: bool,
+        approved_by_user: bool,
+    },
+}
+
+impl ExecPlan {
+    pub fn approved(
+        sandbox: SandboxType,
+        on_failure_escalate: bool,
+        approved_by_user: bool,
+    ) -> Self {
+        ExecPlan::Approved {
+            sandbox,
+            on_failure_escalate,
+            approved_by_user,
+        }
+    }
+}
+
+pub fn plan_exec(req: &ExecRequest<'_>) -> ExecPlan {
+    let params = req.params;
+    let with_escalated_permissions = params.with_escalated_permissions.unwrap_or(false);
+    let safety = assess_command_safety(
+        &params.command,
+        req.approval,
+        req.policy,
+        req.approved_session_commands,
+        with_escalated_permissions,
+    );
+
+    match safety {
+        SafetyCheck::AutoApprove { sandbox_type } => ExecPlan::Approved {
+            sandbox: sandbox_type,
+            on_failure_escalate: should_escalate_on_failure(req.approval, sandbox_type),
+            approved_by_user: false,
+        },
+        SafetyCheck::AskUser => ExecPlan::AskUser {
+            reason: params.justification.clone(),
+        },
+        SafetyCheck::Reject { reason } => ExecPlan::Reject { reason },
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct PatchExecRequest<'a> {
+    pub action: &'a ApplyPatchAction,
+    pub approval: AskForApproval,
+    pub policy: &'a SandboxPolicy,
+    pub cwd: &'a Path,
+    pub user_explicitly_approved: bool,
+}
+
+pub fn plan_apply_patch(req: &PatchExecRequest<'_>) -> ExecPlan {
+    if req.user_explicitly_approved {
+        ExecPlan::Approved {
+            sandbox: SandboxType::None,
+            on_failure_escalate: false,
+            approved_by_user: true,
+        }
+    } else {
+        match assess_patch_safety(req.action, req.approval, req.policy, req.cwd) {
+            SafetyCheck::AutoApprove { sandbox_type } => ExecPlan::Approved {
+                sandbox: sandbox_type,
+                on_failure_escalate: should_escalate_on_failure(req.approval, sandbox_type),
+                approved_by_user: false,
+            },
+            SafetyCheck::AskUser => ExecPlan::AskUser { reason: None },
+            SafetyCheck::Reject { reason } => ExecPlan::Reject { reason },
+        }
+    }
+}
+
+fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
+    matches!(
+        (approval, sandbox),
+        (
+            AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
+            SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
+        )
+    )
+}

From a29380cdffc1e74738d1a7fa6912f19766c50d5d Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Fri, 26 Sep 2025 14:02:38 +0200
Subject: [PATCH 02/17] Isolate apply patch adapter

---
 codex-rs/core/src/sandbox/apply_patch_adapter.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/codex-rs/core/src/sandbox/apply_patch_adapter.rs b/codex-rs/core/src/sandbox/apply_patch_adapter.rs
index 60a123684b..7e55f43279 100644
--- a/codex-rs/core/src/sandbox/apply_patch_adapter.rs
+++ b/codex-rs/core/src/sandbox/apply_patch_adapter.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
 use std::env;
 
 use crate::apply_patch::ApplyPatchExec;
@@ -23,7 +24,9 @@ pub(crate) fn build_exec_params_for_apply_patch(
         command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
         cwd: exec.action.cwd.clone(),
         timeout_ms: original.timeout_ms,
-        env: original.env.clone(),
+        // Run apply_patch with a minimal environment for determinism and to
+        // avoid leaking host environment variables into the patch process.
+        env: HashMap::new(),
         with_escalated_permissions: original.with_escalated_permissions,
         justification: original.justification.clone(),
     })

From caab5a19ee9d13399c8fb7c40b213ec8872f5b0d Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Fri, 26 Sep 2025 14:46:07 +0200
Subject: [PATCH 03/17] Move some stuff around

---
 codex-rs/core/src/apply_patch.rs     |   1 +
 codex-rs/core/src/codex.rs           | 103 ++++++--------------------
 codex-rs/core/src/sandbox/mod.rs     |   3 +-
 codex-rs/core/src/sandbox/planner.rs | 105 +++++++++++++++++++++++++++
 4 files changed, 131 insertions(+), 81 deletions(-)

diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs
index 1ebbe5d738..8f372fb2d3 100644
--- a/codex-rs/core/src/apply_patch.rs
+++ b/codex-rs/core/src/apply_patch.rs
@@ -27,6 +27,7 @@ pub(crate) enum InternalApplyPatchInvocation {
     DelegateToExec(ApplyPatchExec),
 }
 
+#[derive(Debug)]
 pub(crate) struct ApplyPatchExec {
     pub(crate) action: ApplyPatchAction,
     pub(crate) user_explicitly_approved_this_action: bool,
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 8bdb497906..20127318b2 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -114,12 +114,9 @@ use crate::rollout::RolloutRecorder;
 use crate::rollout::RolloutRecorderParams;
 use crate::sandbox::BackendRegistry;
 use crate::sandbox::ExecPlan;
-use crate::sandbox::ExecRequest;
 use crate::sandbox::ExecRuntimeContext;
-use crate::sandbox::PatchExecRequest;
-use crate::sandbox::build_exec_params_for_apply_patch;
-use crate::sandbox::plan_apply_patch;
-use crate::sandbox::plan_exec;
+use crate::sandbox::PreparedExec;
+use crate::sandbox::prepare_exec_invocation;
 use crate::sandbox::run_with_plan;
 use crate::shell;
 use crate::state::ActiveTurn;
@@ -2753,83 +2750,29 @@ async fn handle_container_exec_with_params(
         MaybeApplyPatchVerified::NotApplyPatch => None,
     };
 
-    let mut params = params;
-    let command_for_display;
-
-    let plan = if let Some(apply_patch_exec) = apply_patch_exec.as_ref() {
-        params = build_exec_params_for_apply_patch(apply_patch_exec, &params)?;
-        command_for_display = vec![
-            "apply_patch".to_string(),
-            apply_patch_exec.action.patch.clone(),
-        ];
-
-        let plan_req = PatchExecRequest {
-            action: &apply_patch_exec.action,
-            approval: turn_context.approval_policy,
-            policy: &turn_context.sandbox_policy,
-            cwd: &turn_context.cwd,
-            user_explicitly_approved: apply_patch_exec.user_explicitly_approved_this_action,
-        };
-
-        match plan_apply_patch(&plan_req) {
-            plan @ ExecPlan::Approved { .. } => plan,
-            ExecPlan::AskUser { .. } => {
-                return Err(FunctionCallError::RespondToModel(
-                    "patch requires approval but none was recorded".to_string(),
-                ));
-            }
-            ExecPlan::Reject { reason } => {
-                return Err(FunctionCallError::RespondToModel(format!(
-                    "patch rejected: {reason}"
-                )));
-            }
-        }
-    } else {
-        command_for_display = params.command.clone();
-
-        let initial_plan = {
-            let state = sess.state.lock().await;
-            plan_exec(&ExecRequest {
-                params: &params,
-                approval: turn_context.approval_policy,
-                policy: &turn_context.sandbox_policy,
-                approved_session_commands: state.approved_commands_ref(),
-            })
-        };
-
-        match initial_plan {
-            plan @ ExecPlan::Approved { .. } => plan,
-            ExecPlan::AskUser { reason } => {
-                let decision = sess
-                    .request_command_approval(
-                        sub_id.clone(),
-                        call_id.clone(),
-                        params.command.clone(),
-                        params.cwd.clone(),
-                        reason,
-                    )
-                    .await;
-                match decision {
-                    ReviewDecision::Approved => ExecPlan::approved(SandboxType::None, false, true),
-                    ReviewDecision::ApprovedForSession => {
-                        sess.add_approved_command(params.command.clone()).await;
-                        ExecPlan::approved(SandboxType::None, false, true)
-                    }
-                    ReviewDecision::Denied | ReviewDecision::Abort => {
-                        return Err(FunctionCallError::RespondToModel(
-                            "exec command rejected by user".to_string(),
-                        ));
-                    }
-                }
-            }
-            ExecPlan::Reject { reason } => {
-                return Err(FunctionCallError::RespondToModel(format!(
-                    "exec command rejected: {reason:?}"
-                )));
-            }
-        }
+    let approved_session_commands = {
+        let state = sess.state.lock().await;
+        state.approved_commands_ref().clone()
     };
 
+    let prepared = prepare_exec_invocation(
+        sess,
+        turn_context,
+        &sub_id,
+        &call_id,
+        params,
+        apply_patch_exec,
+        approved_session_commands,
+    )
+    .await?;
+
+    let PreparedExec {
+        params,
+        plan,
+        command_for_display,
+        apply_patch_exec,
+    } = prepared;
+
     let exec_command_context = ExecCommandContext {
         sub_id: sub_id.clone(),
         call_id: call_id.clone(),
diff --git a/codex-rs/core/src/sandbox/mod.rs b/codex-rs/core/src/sandbox/mod.rs
index abd735992d..d1553f0d17 100644
--- a/codex-rs/core/src/sandbox/mod.rs
+++ b/codex-rs/core/src/sandbox/mod.rs
@@ -2,7 +2,6 @@ mod apply_patch_adapter;
 mod backend;
 mod planner;
 
-pub(crate) use apply_patch_adapter::build_exec_params_for_apply_patch;
 pub use backend::BackendRegistry;
 pub use backend::DirectBackend;
 pub use backend::LinuxBackend;
@@ -11,8 +10,10 @@ pub use backend::SpawnBackend;
 pub use planner::ExecPlan;
 pub use planner::ExecRequest;
 pub use planner::PatchExecRequest;
+pub(crate) use planner::PreparedExec;
 pub use planner::plan_apply_patch;
 pub use planner::plan_exec;
+pub(crate) use planner::prepare_exec_invocation;
 
 use crate::error::Result;
 use crate::exec::ExecParams;
diff --git a/codex-rs/core/src/sandbox/planner.rs b/codex-rs/core/src/sandbox/planner.rs
index fcb1412ab9..2799087871 100644
--- a/codex-rs/core/src/sandbox/planner.rs
+++ b/codex-rs/core/src/sandbox/planner.rs
@@ -3,9 +3,15 @@ use std::path::Path;
 
 use codex_apply_patch::ApplyPatchAction;
 
+use super::apply_patch_adapter::build_exec_params_for_apply_patch;
+use crate::apply_patch::ApplyPatchExec;
+use crate::codex::Session;
+use crate::codex::TurnContext;
 use crate::exec::ExecParams;
 use crate::exec::SandboxType;
+use crate::function_tool::FunctionCallError;
 use crate::protocol::AskForApproval;
+use crate::protocol::ReviewDecision;
 use crate::protocol::SandboxPolicy;
 use crate::safety::SafetyCheck;
 use crate::safety::assess_command_safety;
@@ -101,6 +107,105 @@ pub fn plan_apply_patch(req: &PatchExecRequest<'_>) -> ExecPlan {
     }
 }
 
+#[derive(Debug)]
+pub(crate) struct PreparedExec {
+    pub(crate) params: ExecParams,
+    pub(crate) plan: ExecPlan,
+    pub(crate) command_for_display: Vec<String>,
+    pub(crate) apply_patch_exec: Option<ApplyPatchExec>,
+}
+
+pub(crate) async fn prepare_exec_invocation(
+    sess: &Session,
+    turn_context: &TurnContext,
+    sub_id: &str,
+    call_id: &str,
+    params: ExecParams,
+    apply_patch_exec: Option<ApplyPatchExec>,
+    approved_session_commands: HashSet<Vec<String>>,
+) -> Result<PreparedExec, FunctionCallError> {
+    let mut params = params;
+
+    let (plan, command_for_display) = if let Some(exec) = apply_patch_exec.as_ref() {
+        params = build_exec_params_for_apply_patch(exec, &params)?;
+        let command_for_display = vec!["apply_patch".to_string(), exec.action.patch.clone()];
+
+        let plan_req = PatchExecRequest {
+            action: &exec.action,
+            approval: turn_context.approval_policy,
+            policy: &turn_context.sandbox_policy,
+            cwd: &turn_context.cwd,
+            user_explicitly_approved: exec.user_explicitly_approved_this_action,
+        };
+
+        let plan = match plan_apply_patch(&plan_req) {
+            plan @ ExecPlan::Approved { .. } => plan,
+            ExecPlan::AskUser { .. } => {
+                return Err(FunctionCallError::RespondToModel(
+                    "patch requires approval but none was recorded".to_string(),
+                ));
+            }
+            ExecPlan::Reject { reason } => {
+                return Err(FunctionCallError::RespondToModel(format!(
+                    "patch rejected: {reason}"
+                )));
+            }
+        };
+
+        (plan, command_for_display)
+    } else {
+        let command_for_display = params.command.clone();
+
+        let initial_plan = plan_exec(&ExecRequest {
+            params: &params,
+            approval: turn_context.approval_policy,
+            policy: &turn_context.sandbox_policy,
+            approved_session_commands: &approved_session_commands,
+        });
+
+        let plan = match initial_plan {
+            plan @ ExecPlan::Approved { .. } => plan,
+            ExecPlan::AskUser { reason } => {
+                let decision = sess
+                    .request_command_approval(
+                        sub_id.to_string(),
+                        call_id.to_string(),
+                        params.command.clone(),
+                        params.cwd.clone(),
+                        reason,
+                    )
+                    .await;
+                match decision {
+                    ReviewDecision::Approved => ExecPlan::approved(SandboxType::None, false, true),
+                    ReviewDecision::ApprovedForSession => {
+                        sess.add_approved_command(params.command.clone()).await;
+                        ExecPlan::approved(SandboxType::None, false, true)
+                    }
+                    ReviewDecision::Denied | ReviewDecision::Abort => {
+                        return Err(FunctionCallError::RespondToModel(
+                            "exec command rejected by user".to_string(),
+                        ));
+                    }
+                }
+            }
+            ExecPlan::Reject { reason } => {
+                return Err(FunctionCallError::RespondToModel(format!(
+                    "exec command rejected: {reason:?}"
+                )));
+            }
+        };
+
+        (plan, command_for_display)
+    };
+
+    Ok(PreparedExec {
+        params,
+        plan,
+        command_for_display,
+        apply_patch_exec,
+    })
+}
+
 fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
     matches!(
         (approval, sandbox),

From 5b74f10a7bdf5b8e659421d8f60a5c10f6bfa474 Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Mon, 29 Sep 2025 19:34:12 +0100
Subject: [PATCH 04/17] Sandboxing iteration 2

---
 codex-rs/core/src/codex.rs                    | 320 +++++-------------
 codex-rs/core/src/executor/backends.rs        |  95 ++++++
 codex-rs/core/src/executor/cache.rs           |  25 ++
 codex-rs/core/src/executor/mod.rs             |  11 +
 codex-rs/core/src/executor/runner.rs          | 306 +++++++++++++++++
 codex-rs/core/src/executor/sandbox.rs         | 193 +++++++++++
 codex-rs/core/src/lib.rs                      |   2 +-
 .../core/src/sandbox/apply_patch_adapter.rs   |  33 --
 codex-rs/core/src/sandbox/backend.rs          |  87 -----
 codex-rs/core/src/sandbox/mod.rs              |  51 ---
 codex-rs/core/src/sandbox/planner.rs          | 217 ------------
 codex-rs/core/src/state/session.rs            |  12 -
 12 files changed, 718 insertions(+), 634 deletions(-)
 create mode 100644 codex-rs/core/src/executor/backends.rs
 create mode 100644 codex-rs/core/src/executor/cache.rs
 create mode 100644 codex-rs/core/src/executor/mod.rs
 create mode 100644 codex-rs/core/src/executor/runner.rs
 create mode 100644 codex-rs/core/src/executor/sandbox.rs
 delete mode 100644 codex-rs/core/src/sandbox/apply_patch_adapter.rs
 delete mode 100644 codex-rs/core/src/sandbox/backend.rs
 delete mode 100644 codex-rs/core/src/sandbox/mod.rs
 delete mode 100644 codex-rs/core/src/sandbox/planner.rs

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 70cc6900b3..84b1c53b9c 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1,10 +1,8 @@
 use std::borrow::Cow;
 use std::collections::HashMap;
-use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::atomic::AtomicU64;
-use std::time::Duration;
 
 use crate::AuthManager;
 use crate::client_common::REVIEW_PROMPT;
@@ -54,11 +52,10 @@ use crate::environment_context::EnvironmentContext;
 use crate::error::CodexErr;
 use crate::error::Result as CodexResult;
 use crate::error::SandboxErr;
-use crate::error::get_error_message_ui;
 use crate::exec::ExecParams;
 use crate::exec::ExecToolCallOutput;
-use crate::exec::SandboxType;
 use crate::exec::StdoutStream;
+#[cfg(test)]
 use crate::exec::StreamOutput;
 use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
 use crate::exec_command::ExecCommandParams;
@@ -66,6 +63,12 @@ use crate::exec_command::ExecSessionManager;
 use crate::exec_command::WRITE_STDIN_TOOL_NAME;
 use crate::exec_command::WriteStdinParams;
 use crate::exec_env::create_env;
+use crate::executor::ExecError;
+use crate::executor::ExecutionMode;
+use crate::executor::ExecutionRequest;
+use crate::executor::Executor;
+use crate::executor::ExecutorConfig;
+use crate::executor::normalize_exec_result;
 use crate::mcp_connection_manager::McpConnectionManager;
 use crate::mcp_tool_call::handle_mcp_tool_call;
 use crate::model_family::find_family_for_model;
@@ -109,12 +112,6 @@ use crate::protocol::TurnDiffEvent;
 use crate::protocol::WebSearchBeginEvent;
 use crate::rollout::RolloutRecorder;
 use crate::rollout::RolloutRecorderParams;
-use crate::sandbox::BackendRegistry;
-use crate::sandbox::ExecPlan;
-use crate::sandbox::ExecRuntimeContext;
-use crate::sandbox::PreparedExec;
-use crate::sandbox::prepare_exec_invocation;
-use crate::sandbox::run_with_plan;
 use crate::shell;
 use crate::state::ActiveTurn;
 use crate::state::SessionServices;
@@ -266,6 +263,7 @@ pub(crate) struct Session {
     pub(crate) active_turn: Mutex<Option<ActiveTurn>>,
     services: SessionServices,
     next_internal_sub_id: AtomicU64,
+    executor: Executor,
 }
 
 /// The context needed for a single turn of the conversation.
@@ -464,6 +462,12 @@ impl Session {
             show_raw_agent_reasoning: config.show_raw_agent_reasoning,
         };
 
+        let executor = Executor::new(ExecutorConfig::new(
+            turn_context.sandbox_policy.clone(),
+            turn_context.cwd.clone(),
+            config.codex_linux_sandbox_exe.clone(),
+        ));
+
         let sess = Arc::new(Session {
             conversation_id,
             tx_event: tx_event.clone(),
@@ -471,6 +475,7 @@ impl Session {
             active_turn: Mutex::new(None),
             services,
             next_internal_sub_id: AtomicU64::new(0),
+            executor,
         });
 
         // Dispatch the SessionConfiguredEvent first and then report any errors.
@@ -546,6 +551,11 @@ impl Session {
         }
     }
 
+    /// Emit an exec approval request event and await the user's decision.
+    ///
+    /// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
+    /// to the correct in-flight turn. If the task is aborted, this returns the
+    /// default `ReviewDecision` (`Denied`).
     pub async fn request_command_approval(
         &self,
         sub_id: String,
@@ -643,11 +653,6 @@ impl Session {
         }
     }
 
-    pub async fn add_approved_command(&self, cmd: Vec<String>) {
-        let mut state = self.state.lock().await;
-        state.add_approved_command(cmd);
-    }
-
     /// Records input items: always append to conversation history and
     /// persist these response items to rollout.
     async fn record_conversation_items(&self, items: &[ResponseItem]) {
@@ -901,12 +906,13 @@ impl Session {
     /// command even on error.
     ///
     /// Returns the output of the exec tool call.
-    async fn run_exec_with_events<'a>(
+    async fn run_exec_with_events(
         &self,
         turn_diff_tracker: &mut TurnDiffTracker,
         begin_ctx: ExecCommandContext,
-        exec_args: ExecInvokeArgs<'a>,
-    ) -> crate::error::Result<ExecToolCallOutput> {
+        request: ExecutionRequest,
+        approval_policy: AskForApproval,
+    ) -> Result<ExecToolCallOutput, ExecError> {
         let is_apply_patch = begin_ctx.apply_patch.is_some();
         let sub_id = begin_ctx.sub_id.clone();
         let call_id = begin_ctx.call_id.clone();
@@ -914,41 +920,14 @@ impl Session {
         self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone())
             .await;
 
-        let ExecInvokeArgs {
-            params,
-            plan,
-            sandbox_policy,
-            sandbox_cwd,
-            codex_linux_sandbox_exe,
-            stdout_stream,
-        } = exec_args;
-
-        let registry = BackendRegistry::new();
-        let runtime_ctx = ExecRuntimeContext {
-            sandbox_policy,
-            sandbox_cwd,
-            codex_linux_sandbox_exe,
-            stdout_stream,
-        };
+        let result = self
+            .executor
+            .run(request, self, approval_policy, &sub_id, &call_id)
+            .await;
 
-        let result = run_with_plan(params, &plan, &registry, &runtime_ctx).await;
+        let normalized = normalize_exec_result(&result);
+        let borrowed = normalized.event_output();
 
-        let output_stderr;
-        let borrowed: &ExecToolCallOutput = match &result {
-            Ok(output) => output,
-            Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output,
-            Err(e) => {
-                output_stderr = ExecToolCallOutput {
-                    exit_code: -1,
-                    stdout: StreamOutput::new(String::new()),
-                    stderr: StreamOutput::new(get_error_message_ui(e)),
-                    aggregated_output: StreamOutput::new(get_error_message_ui(e)),
-                    duration: Duration::default(),
-                    timed_out: false,
-                };
-                &output_stderr
-            }
-        };
         self.on_exec_command_end(
             turn_diff_tracker,
             &sub_id,
@@ -958,13 +937,15 @@ impl Session {
         )
         .await;
 
+        drop(normalized);
+
         result
     }
 
     /// Helper that emits a BackgroundEvent with the given message. This keeps
     /// the call‑sites terse so adding more diagnostics does not clutter the
     /// core agent logic.
-    async fn notify_background_event(&self, sub_id: &str, message: impl Into<String>) {
+    pub(crate) async fn notify_background_event(&self, sub_id: &str, message: impl Into<String>) {
         let event = Event {
             id: sub_id.to_string(),
             msg: EventMsg::BackgroundEvent(BackgroundEventEvent {
@@ -2530,15 +2511,6 @@ fn parse_container_exec_arguments(
         })
 }
 
-pub struct ExecInvokeArgs<'a> {
-    pub params: ExecParams,
-    pub plan: ExecPlan,
-    pub sandbox_policy: &'a SandboxPolicy,
-    pub sandbox_cwd: &'a Path,
-    pub codex_linux_sandbox_exe: &'a Option<PathBuf>,
-    pub stdout_stream: Option<StdoutStream>,
-}
-
 fn maybe_translate_shell_command(
     params: ExecParams,
     sess: &Session,
@@ -2599,29 +2571,12 @@ async fn handle_container_exec_with_params(
         MaybeApplyPatchVerified::NotApplyPatch => None,
     };
 
-    let approved_session_commands = {
-        let state = sess.state.lock().await;
-        state.approved_commands_ref().clone()
+    let command_for_display = if let Some(exec) = apply_patch_exec.as_ref() {
+        vec!["apply_patch".to_string(), exec.action.patch.clone()]
+    } else {
+        params.command.clone()
     };
 
-    let prepared = prepare_exec_invocation(
-        sess,
-        turn_context,
-        &sub_id,
-        &call_id,
-        params,
-        apply_patch_exec,
-        approved_session_commands,
-    )
-    .await?;
-
-    let PreparedExec {
-        params,
-        plan,
-        command_for_display,
-        apply_patch_exec,
-    } = prepared;
-
     let exec_command_context = ExecCommandContext {
         sub_id: sub_id.clone(),
         call_id: call_id.clone(),
@@ -2638,28 +2593,41 @@ async fn handle_container_exec_with_params(
         ),
     };
 
-    let params = maybe_translate_shell_command(params, sess, turn_context);
-    let plan_for_invocation = plan.clone();
+    let translated_params = maybe_translate_shell_command(params, sess, turn_context);
+    let stdout_stream = if exec_command_context.apply_patch.is_some() {
+        None
+    } else {
+        Some(StdoutStream {
+            sub_id: sub_id.clone(),
+            call_id: call_id.clone(),
+            tx_event: sess.tx_event.clone(),
+        })
+    };
+
+    let mode = match apply_patch_exec {
+        Some(exec) => ExecutionMode::ApplyPatch(exec),
+        None => ExecutionMode::Shell,
+    };
+
+    let request = ExecutionRequest {
+        params: translated_params,
+        approval_command: command_for_display,
+        mode,
+        stdout_stream,
+    };
+
+    sess.executor.update_environment(
+        turn_context.sandbox_policy.clone(),
+        turn_context.cwd.clone(),
+        sess.services.codex_linux_sandbox_exe.clone(),
+    );
+
     let output_result = sess
         .run_exec_with_events(
             turn_diff_tracker,
-            exec_command_context.clone(),
-            ExecInvokeArgs {
-                params: params.clone(),
-                plan: plan_for_invocation,
-                sandbox_policy: &turn_context.sandbox_policy,
-                sandbox_cwd: &turn_context.cwd,
-                codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
-                stdout_stream: if exec_command_context.apply_patch.is_some() {
-                    None
-                } else {
-                    Some(StdoutStream {
-                        sub_id: sub_id.clone(),
-                        call_id: call_id.clone(),
-                        tx_event: sess.tx_event.clone(),
-                    })
-                },
-            },
+            exec_command_context,
+            request,
+            turn_context.approval_policy,
         )
         .await;
 
@@ -2673,142 +2641,16 @@ async fn handle_container_exec_with_params(
                 Err(FunctionCallError::RespondToModel(content))
             }
         }
-        Err(CodexErr::Sandbox(error)) => {
-            handle_sandbox_error(
-                turn_diff_tracker,
-                params,
-                exec_command_context,
-                error,
-                &plan,
-                sess,
-                turn_context,
-            )
-            .await
-        }
-        Err(e) => Err(FunctionCallError::RespondToModel(format!(
-            "execution error: {e:?}"
+        Err(ExecError::Function(err)) => Err(err),
+        Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err(
+            FunctionCallError::RespondToModel(format_exec_output(&output)),
+        ),
+        Err(ExecError::Codex(err)) => Err(FunctionCallError::RespondToModel(format!(
+            "execution error: {err:?}"
         ))),
     }
 }
 
-async fn handle_sandbox_error(
-    turn_diff_tracker: &mut TurnDiffTracker,
-    params: ExecParams,
-    exec_command_context: ExecCommandContext,
-    error: SandboxErr,
-    plan: &ExecPlan,
-    sess: &Session,
-    turn_context: &TurnContext,
-) -> Result<String, FunctionCallError> {
-    let call_id = exec_command_context.call_id.clone();
-    let sub_id = exec_command_context.sub_id.clone();
-    let cwd = exec_command_context.cwd.clone();
-
-    if let SandboxErr::Timeout { output } = &error {
-        let content = format_exec_output(output);
-        return Err(FunctionCallError::RespondToModel(content));
-    }
-
-    let ExecPlan::Approved {
-        sandbox: sandbox_type,
-        on_failure_escalate,
-        ..
-    } = plan
-    else {
-        return Err(FunctionCallError::RespondToModel(
-            "execution failed without an approved plan".to_string(),
-        ));
-    };
-
-    if !on_failure_escalate {
-        return Err(FunctionCallError::RespondToModel(format!(
-            "failed in sandbox {sandbox_type:?} with execution error: {error:?}"
-        )));
-    }
-
-    // Note that when `error` is `SandboxErr::Denied`, it could be a false
-    // positive. That is, it may have exited with a non-zero exit code, not
-    // because the sandbox denied it, but because that is its expected behavior,
-    // i.e., a grep command that did not match anything. Ideally we would
-    // include additional metadata on the command to indicate whether non-zero
-    // exit codes merit a retry.
-
-    // For now, we categorically ask the user to retry without sandbox and
-    // emit the raw error as a background event.
-    sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
-        .await;
-
-    let command_for_retry = params.command.clone();
-    let decision = sess
-        .request_command_approval(
-            sub_id.clone(),
-            call_id.clone(),
-            command_for_retry.clone(),
-            cwd.clone(),
-            Some("command failed; retry without sandbox?".to_string()),
-        )
-        .await;
-
-    match decision {
-        ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
-            // Persist this command as pre‑approved for the
-            // remainder of the session so future
-            // executions skip the sandbox directly.
-            // TODO(ragona): Isn't this a bug? It always saves the command in an | fork?
-            sess.add_approved_command(command_for_retry.clone()).await;
-            // Inform UI we are retrying without sandbox.
-            sess.notify_background_event(&sub_id, "retrying command without sandbox")
-                .await;
-
-            // This is an escalated retry; the policy will not be
-            // examined and the sandbox has been set to `None`.
-            let retry_output_result = sess
-                .run_exec_with_events(
-                    turn_diff_tracker,
-                    exec_command_context.clone(),
-                    ExecInvokeArgs {
-                        params,
-                        plan: ExecPlan::approved(SandboxType::None, false, true),
-                        sandbox_policy: &turn_context.sandbox_policy,
-                        sandbox_cwd: &turn_context.cwd,
-                        codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
-                        stdout_stream: if exec_command_context.apply_patch.is_some() {
-                            None
-                        } else {
-                            Some(StdoutStream {
-                                sub_id: sub_id.clone(),
-                                call_id: call_id.clone(),
-                                tx_event: sess.tx_event.clone(),
-                            })
-                        },
-                    },
-                )
-                .await;
-
-            match retry_output_result {
-                Ok(retry_output) => {
-                    let ExecToolCallOutput { exit_code, .. } = &retry_output;
-                    let content = format_exec_output(&retry_output);
-                    if *exit_code == 0 {
-                        Ok(content)
-                    } else {
-                        Err(FunctionCallError::RespondToModel(content))
-                    }
-                }
-                Err(e) => Err(FunctionCallError::RespondToModel(format!(
-                    "retry failed: {e}"
-                ))),
-            }
-        }
-        ReviewDecision::Denied | ReviewDecision::Abort => {
-            // Fall through to original failure handling.
-            Err(FunctionCallError::RespondToModel(
-                "exec command rejected by user".to_string(),
-            ))
-        }
-    }
-}
-
 fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
     let ExecToolCallOutput {
         aggregated_output, ..
@@ -3366,6 +3208,11 @@ mod tests {
             user_shell: shell::Shell::Unknown,
             show_raw_agent_reasoning: config.show_raw_agent_reasoning,
         };
+        let executor = Executor::new(ExecutorConfig::new(
+            turn_context.sandbox_policy.clone(),
+            turn_context.cwd.clone(),
+            None,
+        ));
         let session = Session {
             conversation_id,
             tx_event,
@@ -3373,6 +3220,7 @@ mod tests {
             active_turn: Mutex::new(None),
             services,
             next_internal_sub_id: AtomicU64::new(0),
+            executor,
         };
         (session, turn_context)
     }
@@ -3433,6 +3281,11 @@ mod tests {
             user_shell: shell::Shell::Unknown,
             show_raw_agent_reasoning: config.show_raw_agent_reasoning,
         };
+        let executor = Executor::new(ExecutorConfig::new(
+            config.sandbox_policy.clone(),
+            config.cwd.clone(),
+            None,
+        ));
         let session = Arc::new(Session {
             conversation_id,
             tx_event,
@@ -3440,6 +3293,7 @@ mod tests {
             active_turn: Mutex::new(None),
             services,
             next_internal_sub_id: AtomicU64::new(0),
+            executor,
         });
         (session, turn_context, rx_event)
     }
diff --git a/codex-rs/core/src/executor/backends.rs b/codex-rs/core/src/executor/backends.rs
new file mode 100644
index 0000000000..aaa7f08030
--- /dev/null
+++ b/codex-rs/core/src/executor/backends.rs
@@ -0,0 +1,95 @@
+use std::sync::Arc;
+
+use async_trait::async_trait;
+
+use crate::apply_patch::ApplyPatchExec;
+use crate::exec::ExecParams;
+use crate::exec::ExecToolCallOutput;
+use crate::executor::sandbox::build_exec_params_for_apply_patch;
+use crate::function_tool::FunctionCallError;
+
+pub(crate) enum ExecutionMode {
+    Shell,
+    ApplyPatch(ApplyPatchExec),
+}
+
+#[async_trait]
+/// Backend-specific hooks that prepare and post-process execution requests for a
+/// given [`ExecutionMode`].
+pub(crate) trait ExecutionBackend: Send + Sync {
+    fn prepare(
+        &self,
+        params: ExecParams,
+        // Required for downcasting the apply_patch.
+        mode: &ExecutionMode,
+    ) -> Result<ExecParams, FunctionCallError>;
+
+    async fn finalize(
+        &self,
+        output: ExecToolCallOutput,
+        _mode: &ExecutionMode,
+    ) -> Result<ExecToolCallOutput, FunctionCallError> {
+        Ok(output)
+    }
+}
+
+pub(crate) struct BackendStore {
+    shell: Arc<dyn ExecutionBackend>,
+    apply_patch: Arc<dyn ExecutionBackend>,
+}
+
+impl BackendStore {
+    pub(crate) fn new() -> Self {
+        Self {
+            shell: Arc::new(ShellBackend),
+            apply_patch: Arc::new(ApplyPatchBackend),
+        }
+    }
+
+    pub(crate) fn for_mode(&self, mode: &ExecutionMode) -> Arc<dyn ExecutionBackend> {
+        match mode {
+            ExecutionMode::Shell => self.shell.clone(),
+            ExecutionMode::ApplyPatch(_) => self.apply_patch.clone(),
+        }
+    }
+}
+
+pub(crate) fn default_backends() -> BackendStore {
+    BackendStore::new()
+}
+
+struct ShellBackend;
+
+#[async_trait]
+impl ExecutionBackend for ShellBackend {
+    fn prepare(
+        &self,
+        params: ExecParams,
+        mode: &ExecutionMode,
+    ) -> Result<ExecParams, FunctionCallError> {
+        match mode {
+            ExecutionMode::Shell => Ok(params),
+            _ => Err(FunctionCallError::RespondToModel(
+                "shell backend invoked with non-shell mode".to_string(),
+            )),
+        }
+    }
+}
+
+struct ApplyPatchBackend;
+
+#[async_trait]
+impl ExecutionBackend for ApplyPatchBackend {
+    fn prepare(
+        &self,
+        params: ExecParams,
+        mode: &ExecutionMode,
+    ) -> Result<ExecParams, FunctionCallError> {
+        match mode {
+            ExecutionMode::ApplyPatch(exec) => build_exec_params_for_apply_patch(exec, &params),
+            ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
+                "apply_patch backend invoked without patch context".to_string(),
+            )),
+        }
+    }
+}
diff --git a/codex-rs/core/src/executor/cache.rs b/codex-rs/core/src/executor/cache.rs
new file mode 100644
index 0000000000..5629c8893a
--- /dev/null
+++ b/codex-rs/core/src/executor/cache.rs
@@ -0,0 +1,25 @@
+use std::collections::HashSet;
+use std::sync::Arc;
+use std::sync::Mutex;
+
+#[derive(Clone, Debug, Default)]
+/// Thread-safe store of user approvals so repeated commands can reuse
+/// previously granted trust.
+pub(crate) struct ApprovalCache {
+    inner: Arc<Mutex<HashSet<Vec<String>>>>,
+}
+
+impl ApprovalCache {
+    pub(crate) fn insert(&self, command: Vec<String>) {
+        if command.is_empty() {
+            return;
+        }
+        if let Ok(mut guard) = self.inner.lock() {
+            guard.insert(command);
+        }
+    }
+
+    pub(crate) fn snapshot(&self) -> HashSet<Vec<String>> {
+        self.inner.lock().map(|g| g.clone()).unwrap_or_default()
+    }
+}
diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs
new file mode 100644
index 0000000000..c3dce856e5
--- /dev/null
+++ b/codex-rs/core/src/executor/mod.rs
@@ -0,0 +1,11 @@
+mod backends;
+mod cache;
+mod runner;
+mod sandbox;
+
+pub(crate) use backends::ExecutionMode;
+pub(crate) use runner::ExecError;
+pub(crate) use runner::ExecutionRequest;
+pub(crate) use runner::Executor;
+pub(crate) use runner::ExecutorConfig;
+pub(crate) use runner::normalize_exec_result;
diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs
new file mode 100644
index 0000000000..708568a73b
--- /dev/null
+++ b/codex-rs/core/src/executor/runner.rs
@@ -0,0 +1,306 @@
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::sync::RwLock;
+use std::time::Duration;
+
+use thiserror::Error;
+
+use super::backends::BackendStore;
+use super::backends::ExecutionBackend;
+use super::backends::ExecutionMode;
+use super::backends::default_backends;
+use super::cache::ApprovalCache;
+use crate::codex::Session;
+use crate::error::CodexErr;
+use crate::error::SandboxErr;
+use crate::error::get_error_message_ui;
+use crate::exec::ExecParams;
+use crate::exec::ExecToolCallOutput;
+use crate::exec::SandboxType;
+use crate::exec::StdoutStream;
+use crate::exec::StreamOutput;
+use crate::exec::process_exec_tool_call;
+use crate::executor::sandbox::select_sandbox;
+use crate::function_tool::FunctionCallError;
+use crate::protocol::AskForApproval;
+use crate::protocol::ReviewDecision;
+use crate::protocol::SandboxPolicy;
+
+#[derive(Clone, Debug)]
+pub(crate) struct ExecutorConfig {
+    pub(crate) sandbox_policy: SandboxPolicy,
+    pub(crate) sandbox_cwd: PathBuf,
+    codex_linux_sandbox_exe: Option<PathBuf>,
+}
+
+impl ExecutorConfig {
+    pub(crate) fn new(
+        sandbox_policy: SandboxPolicy,
+        sandbox_cwd: PathBuf,
+        codex_linux_sandbox_exe: Option<PathBuf>,
+    ) -> Self {
+        Self {
+            sandbox_policy,
+            sandbox_cwd,
+            codex_linux_sandbox_exe,
+        }
+    }
+}
+
+#[derive(Debug, Error)]
+pub enum ExecError {
+    #[error(transparent)]
+    Function(#[from] FunctionCallError),
+    #[error(transparent)]
+    Codex(#[from] CodexErr),
+}
+
+impl ExecError {
+    pub(crate) fn rejection(msg: impl Into<String>) -> Self {
+        FunctionCallError::RespondToModel(msg.into()).into()
+    }
+}
+
+/// Coordinates sandbox selection, backend-specific preparation, and command
+/// execution for tool calls requested by the model.
+pub(crate) struct Executor {
+    backends: BackendStore,
+    approval_cache: ApprovalCache,
+    config: Arc<RwLock<ExecutorConfig>>,
+}
+
+impl Executor {
+    pub(crate) fn new(config: ExecutorConfig) -> Self {
+        Self {
+            backends: default_backends(),
+            approval_cache: ApprovalCache::default(),
+            config: Arc::new(RwLock::new(config)),
+        }
+    }
+
+    /// Updates the sandbox policy and working directory used for future
+    /// executions without recreating the executor.
+    pub(crate) fn update_environment(
+        &self,
+        sandbox_policy: SandboxPolicy,
+        sandbox_cwd: PathBuf,
+        codex_linux_sandbox_exe: Option<PathBuf>,
+    ) {
+        if let Ok(mut cfg) = self.config.write() {
+            cfg.sandbox_policy = sandbox_policy;
+            cfg.sandbox_cwd = sandbox_cwd;
+            cfg.codex_linux_sandbox_exe = codex_linux_sandbox_exe;
+        }
+    }
+
+    /// Runs a prepared execution request end-to-end: prepares parameters, decides on
+    /// sandbox placement (prompting the user when necessary), launches the command,
+    /// and lets the backend post-process the final output.
+    pub(crate) async fn run(
+        &self,
+        mut request: ExecutionRequest,
+        session: &Session,
+        approval_policy: AskForApproval,
+        sub_id: &str,
+        call_id: &str,
+    ) -> Result<ExecToolCallOutput, ExecError> {
+        // Step 1: Normalise parameters via the selected backend.
+        let backend = self.backends.for_mode(&request.mode);
+        request.params = backend
+            .prepare(request.params, &request.mode)
+            .map_err(ExecError::from)?;
+
+        // Step 2: Snapshot sandbox configuration so it stays stable for this run.
+        let config = self
+            .config
+            .read()
+            .map_err(|_| ExecError::rejection("executor config poisoned"))?
+            .clone();
+
+        // Step 3: Decide sandbox placement, prompting for approval when needed.
+        let sandbox_decision = select_sandbox(
+            &request,
+            approval_policy,
+            self.approval_cache.snapshot(),
+            &config,
+            session,
+            sub_id,
+            call_id,
+        )
+        .await?;
+        if sandbox_decision.record_session_approval {
+            self.approval_cache.insert(request.approval_command.clone());
+        }
+
+        // Step 4: Launch the command within the chosen sandbox.
+        let first_attempt = self
+            .spawn(
+                request.params.clone(),
+                sandbox_decision.initial_sandbox,
+                &config,
+                request.stdout_stream.clone(),
+            )
+            .await;
+
+        // Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry.
+        let raw_output = match first_attempt {
+            Ok(output) => output,
+            Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => {
+                return Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into());
+            }
+            Err(CodexErr::Sandbox(error @ SandboxErr::Denied { .. })) => {
+                return if sandbox_decision.escalate_on_failure {
+                    self.retry_without_sandbox(
+                        &*backend, &request, &config, session, sub_id, call_id, error,
+                    )
+                    .await
+                } else {
+                    Err(ExecError::rejection(format!(
+                        "failed in sandbox {:?} with execution error: {error:?}",
+                        sandbox_decision.initial_sandbox
+                    )))
+                };
+            }
+            Err(err) => return Err(err.into()),
+        };
+
+        // Step 6: Allow the backend to post-process the raw output.
+        backend
+            .finalize(raw_output, &request.mode)
+            .await
+            .map_err(ExecError::from)
+    }
+
+    /// Fallback path invoked when a sandboxed run is denied so the user can
+    /// approve rerunning without isolation.
+    #[allow(clippy::too_many_arguments)]
+    async fn retry_without_sandbox(
+        &self,
+        backend: &dyn ExecutionBackend,
+        request: &ExecutionRequest,
+        config: &ExecutorConfig,
+        session: &Session,
+        sub_id: &str,
+        call_id: &str,
+        sandbox_error: SandboxErr,
+    ) -> Result<ExecToolCallOutput, ExecError> {
+        session
+            .notify_background_event(sub_id, format!("Execution failed: {sandbox_error}"))
+            .await;
+        let decision = session
+            .request_command_approval(
+                sub_id.to_string(),
+                call_id.to_string(),
+                request.approval_command.clone(),
+                request.params.cwd.clone(),
+                Some("command failed; retry without sandbox?".to_string()),
+            )
+            .await;
+
+        match decision {
+            ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
+                if matches!(decision, ReviewDecision::ApprovedForSession) {
+                    self.approval_cache.insert(request.approval_command.clone());
+                }
+                session
+                    .notify_background_event(sub_id, "retrying command without sandbox")
+                    .await;
+
+                let retry_output = self
+                    .spawn(
+                        request.params.clone(),
+                        SandboxType::None,
+                        config,
+                        request.stdout_stream.clone(),
+                    )
+                    .await?;
+
+                backend
+                    .finalize(retry_output, &request.mode)
+                    .await
+                    .map_err(ExecError::from)
+            }
+            ReviewDecision::Denied | ReviewDecision::Abort => {
+                Err(ExecError::rejection("exec command rejected by user"))
+            }
+        }
+    }
+
+    async fn spawn(
+        &self,
+        params: ExecParams,
+        sandbox: SandboxType,
+        config: &ExecutorConfig,
+        stdout_stream: Option<StdoutStream>,
+    ) -> Result<ExecToolCallOutput, CodexErr> {
+        process_exec_tool_call(
+            params,
+            sandbox,
+            &config.sandbox_policy,
+            &config.sandbox_cwd,
+            &config.codex_linux_sandbox_exe,
+            stdout_stream,
+        )
+        .await
+    }
+}
+
+pub(crate) struct ExecutionRequest {
+    pub params: ExecParams,
+    pub approval_command: Vec<String>,
+    pub mode: ExecutionMode,
+    pub stdout_stream: Option<StdoutStream>,
+}
+
+pub(crate) struct NormalizedExecOutput<'a> {
+    borrowed: Option<&'a ExecToolCallOutput>,
+    synthetic: Option<ExecToolCallOutput>,
+}
+
+impl<'a> NormalizedExecOutput<'a> {
+    pub(crate) fn event_output(&'a self) -> &'a ExecToolCallOutput {
+        match (self.borrowed, self.synthetic.as_ref()) {
+            (Some(output), _) => output,
+            (None, Some(output)) => output,
+            (None, None) => unreachable!("normalized exec output missing data"),
+        }
+    }
+}
+
+/// Converts a raw execution result into a uniform view that always exposes an
+/// [`ExecToolCallOutput`], synthesizing error output when the command fails
+/// before producing a response.
+pub(crate) fn normalize_exec_result(
+    result: &Result<ExecToolCallOutput, ExecError>,
+) -> NormalizedExecOutput<'_> {
+    match result {
+        Ok(output) => NormalizedExecOutput {
+            borrowed: Some(output),
+            synthetic: None,
+        },
+        Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => {
+            NormalizedExecOutput {
+                borrowed: Some(output.as_ref()),
+                synthetic: None,
+            }
+        }
+        Err(err) => {
+            let message = match err {
+                ExecError::Function(FunctionCallError::RespondToModel(msg)) => msg.clone(),
+                ExecError::Codex(e) => get_error_message_ui(e),
+            };
+            let synthetic = ExecToolCallOutput {
+                exit_code: -1,
+                stdout: StreamOutput::new(String::new()),
+                stderr: StreamOutput::new(message.clone()),
+                aggregated_output: StreamOutput::new(message),
+                duration: Duration::default(),
+                timed_out: false,
+            };
+            NormalizedExecOutput {
+                borrowed: None,
+                synthetic: Some(synthetic),
+            }
+        }
+    }
+}
diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs
new file mode 100644
index 0000000000..1f87d961e6
--- /dev/null
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -0,0 +1,193 @@
+use crate::CODEX_APPLY_PATCH_ARG1;
+use crate::apply_patch::ApplyPatchExec;
+use crate::codex::Session;
+use crate::exec::ExecParams;
+use crate::exec::SandboxType;
+use crate::executor::ExecError;
+use crate::executor::ExecutionMode;
+use crate::executor::ExecutionRequest;
+use crate::executor::ExecutorConfig;
+use crate::function_tool::FunctionCallError;
+use crate::safety::SafetyCheck;
+use crate::safety::assess_command_safety;
+use crate::safety::assess_patch_safety;
+use codex_protocol::protocol::AskForApproval;
+use codex_protocol::protocol::ReviewDecision;
+use std::collections::HashMap;
+use std::collections::HashSet;
+use std::env;
+
+/// Sandbox placement options selected for an execution run, including whether
+/// to escalate after failures and whether approvals should persist.
+pub(crate) struct SandboxDecision {
+    pub(crate) initial_sandbox: SandboxType,
+    pub(crate) escalate_on_failure: bool,
+    pub(crate) record_session_approval: bool,
+}
+
+impl SandboxDecision {
+    fn auto(sandbox: SandboxType, escalate_on_failure: bool) -> Self {
+        Self {
+            initial_sandbox: sandbox,
+            escalate_on_failure,
+            record_session_approval: false,
+        }
+    }
+
+    fn user_override(record_session_approval: bool) -> Self {
+        Self {
+            initial_sandbox: SandboxType::None,
+            escalate_on_failure: false,
+            record_session_approval,
+        }
+    }
+}
+
+fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
+    matches!(
+        (approval, sandbox),
+        (
+            AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
+            SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
+        )
+    )
+}
+
+/// Builds the command-line invocation that shells out to `codex apply_patch`
+/// using the provided apply-patch request details.
+pub(crate) fn build_exec_params_for_apply_patch(
+    exec: &ApplyPatchExec,
+    original: &ExecParams,
+) -> Result<ExecParams, FunctionCallError> {
+    let path_to_codex = env::current_exe()
+        .ok()
+        .map(|p| p.to_string_lossy().to_string())
+        .ok_or_else(|| {
+            FunctionCallError::RespondToModel(
+                "failed to determine path to codex executable".to_string(),
+            )
+        })?;
+
+    let patch = exec.action.patch.clone();
+    Ok(ExecParams {
+        command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
+        cwd: exec.action.cwd.clone(),
+        timeout_ms: original.timeout_ms,
+        // Run apply_patch with a minimal environment for determinism and to
+        // avoid leaking host environment variables into the patch process.
+        env: HashMap::new(),
+        with_escalated_permissions: original.with_escalated_permissions,
+        justification: original.justification.clone(),
+    })
+}
+
+/// Determines how a command should be sandboxed, prompting the user when
+/// policy requires explicit approval.
+pub async fn select_sandbox(
+    request: &ExecutionRequest,
+    approval_policy: AskForApproval,
+    approval_cache: HashSet<Vec<String>>,
+    config: &ExecutorConfig,
+    session: &Session,
+    sub_id: &str,
+    call_id: &str,
+) -> Result<SandboxDecision, ExecError> {
+    match &request.mode {
+        ExecutionMode::Shell => {
+            select_shell_sandbox(
+                request,
+                approval_policy,
+                approval_cache,
+                config,
+                session,
+                sub_id,
+                call_id,
+            )
+            .await
+        }
+        ExecutionMode::ApplyPatch(exec) => {
+            select_apply_patch_sandbox(exec, approval_policy, config)
+        }
+    }
+}
+
+async fn select_shell_sandbox(
+    request: &ExecutionRequest,
+    approval_policy: AskForApproval,
+    approved_snapshot: HashSet<Vec<String>>,
+    config: &ExecutorConfig,
+    session: &Session,
+    sub_id: &str,
+    call_id: &str,
+) -> Result<SandboxDecision, ExecError> {
+    let command_for_safety = if request.approval_command.is_empty() {
+        request.params.command.clone()
+    } else {
+        request.approval_command.clone()
+    };
+
+    let safety = assess_command_safety(
+        &command_for_safety,
+        approval_policy,
+        &config.sandbox_policy,
+        &approved_snapshot,
+        request.params.with_escalated_permissions.unwrap_or(false),
+    );
+
+    match safety {
+        SafetyCheck::AutoApprove { sandbox_type } => Ok(SandboxDecision::auto(
+            sandbox_type,
+            should_escalate_on_failure(approval_policy, sandbox_type),
+        )),
+        SafetyCheck::AskUser => {
+            let decision = session
+                .request_command_approval(
+                    sub_id.to_string(),
+                    call_id.to_string(),
+                    request.approval_command.clone(),
+                    request.params.cwd.clone(),
+                    request.params.justification.clone(),
+                )
+                .await;
+
+            match decision {
+                ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)),
+                ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)),
+                ReviewDecision::Denied | ReviewDecision::Abort => {
+                    Err(ExecError::rejection("exec command rejected by user"))
+                }
+            }
+        }
+        SafetyCheck::Reject { reason } => Err(ExecError::rejection(format!(
+            "exec command rejected: {reason}"
+        ))),
+    }
+}
+
+fn select_apply_patch_sandbox(
+    exec: &ApplyPatchExec,
+    approval_policy: AskForApproval,
+    config: &ExecutorConfig,
+) -> Result<SandboxDecision, ExecError> {
+    if exec.user_explicitly_approved_this_action {
+        return Ok(SandboxDecision::user_override(false));
+    }
+
+    match assess_patch_safety(
+        &exec.action,
+        approval_policy,
+        &config.sandbox_policy,
+        &config.sandbox_cwd,
+    ) {
+        SafetyCheck::AutoApprove { sandbox_type } => Ok(SandboxDecision::auto(
+            sandbox_type,
+            should_escalate_on_failure(approval_policy, sandbox_type),
+        )),
+        SafetyCheck::AskUser => Err(ExecError::rejection(
+            "patch requires approval but none was recorded",
+        )),
+        SafetyCheck::Reject { reason } => {
+            Err(ExecError::rejection(format!("patch rejected: {reason}")))
+        }
+    }
+}
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index 9613e40b12..edc04ab942 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -27,6 +27,7 @@ pub mod error;
 pub mod exec;
 mod exec_command;
 pub mod exec_env;
+pub mod executor;
 mod flags;
 pub mod git_info;
 pub mod landlock;
@@ -60,7 +61,6 @@ pub mod plan_tool;
 pub mod project_doc;
 mod rollout;
 pub(crate) mod safety;
-pub mod sandbox;
 pub mod seatbelt;
 pub mod shell;
 pub mod spawn;
diff --git a/codex-rs/core/src/sandbox/apply_patch_adapter.rs b/codex-rs/core/src/sandbox/apply_patch_adapter.rs
deleted file mode 100644
index 7e55f43279..0000000000
--- a/codex-rs/core/src/sandbox/apply_patch_adapter.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-use std::collections::HashMap;
-use std::env;
-
-use crate::apply_patch::ApplyPatchExec;
-use crate::apply_patch::CODEX_APPLY_PATCH_ARG1;
-use crate::exec::ExecParams;
-use crate::function_tool::FunctionCallError;
-
-pub(crate) fn build_exec_params_for_apply_patch(
-    exec: &ApplyPatchExec,
-    original: &ExecParams,
-) -> Result<ExecParams, FunctionCallError> {
-    let path_to_codex = env::current_exe()
-        .ok()
-        .map(|p| p.to_string_lossy().to_string())
-        .ok_or_else(|| {
-            FunctionCallError::RespondToModel(
-                "failed to determine path to codex executable".to_string(),
-            )
-        })?;
-
-    let patch = exec.action.patch.clone();
-    Ok(ExecParams {
-        command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
-        cwd: exec.action.cwd.clone(),
-        timeout_ms: original.timeout_ms,
-        // Run apply_patch with a minimal environment for determinism and to
-        // avoid leaking host environment variables into the patch process.
-        env: HashMap::new(),
-        with_escalated_permissions: original.with_escalated_permissions,
-        justification: original.justification.clone(),
-    })
-}
diff --git a/codex-rs/core/src/sandbox/backend.rs b/codex-rs/core/src/sandbox/backend.rs
deleted file mode 100644
index 02e5a87e4f..0000000000
--- a/codex-rs/core/src/sandbox/backend.rs
+++ /dev/null
@@ -1,87 +0,0 @@
-use std::path::Path;
-use std::path::PathBuf;
-
-use async_trait::async_trait;
-
-use crate::error::Result;
-use crate::exec::ExecParams;
-use crate::exec::ExecToolCallOutput;
-use crate::exec::SandboxType;
-use crate::exec::StdoutStream;
-use crate::exec::process_exec_tool_call;
-use crate::protocol::SandboxPolicy;
-
-#[async_trait]
-pub trait SpawnBackend: Send + Sync {
-    fn sandbox_type(&self) -> SandboxType;
-
-    async fn spawn(
-        &self,
-        params: ExecParams,
-        sandbox_policy: &SandboxPolicy,
-        sandbox_cwd: &Path,
-        codex_linux_sandbox_exe: &Option<PathBuf>,
-        stdout_stream: Option<StdoutStream>,
-    ) -> Result<ExecToolCallOutput> {
-        process_exec_tool_call(
-            params,
-            self.sandbox_type(),
-            sandbox_policy,
-            sandbox_cwd,
-            codex_linux_sandbox_exe,
-            stdout_stream,
-        )
-        .await
-    }
-}
-
-#[derive(Clone, Copy, Debug, Default)]
-pub struct DirectBackend;
-
-#[async_trait]
-impl SpawnBackend for DirectBackend {
-    fn sandbox_type(&self) -> SandboxType {
-        SandboxType::None
-    }
-}
-
-#[derive(Clone, Copy, Debug, Default)]
-pub struct SeatbeltBackend;
-
-#[async_trait]
-impl SpawnBackend for SeatbeltBackend {
-    fn sandbox_type(&self) -> SandboxType {
-        SandboxType::MacosSeatbelt
-    }
-}
-
-#[derive(Clone, Copy, Debug, Default)]
-pub struct LinuxBackend;
-
-#[async_trait]
-impl SpawnBackend for LinuxBackend {
-    fn sandbox_type(&self) -> SandboxType {
-        SandboxType::LinuxSeccomp
-    }
-}
-
-#[derive(Default)]
-pub struct BackendRegistry {
-    direct: DirectBackend,
-    seatbelt: SeatbeltBackend,
-    linux: LinuxBackend,
-}
-
-impl BackendRegistry {
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    pub fn for_type(&self, sandbox: SandboxType) -> &dyn SpawnBackend {
-        match sandbox {
-            SandboxType::None => &self.direct,
-            SandboxType::MacosSeatbelt => &self.seatbelt,
-            SandboxType::LinuxSeccomp => &self.linux,
-        }
-    }
-}
diff --git a/codex-rs/core/src/sandbox/mod.rs b/codex-rs/core/src/sandbox/mod.rs
deleted file mode 100644
index d1553f0d17..0000000000
--- a/codex-rs/core/src/sandbox/mod.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-mod apply_patch_adapter;
-mod backend;
-mod planner;
-
-pub use backend::BackendRegistry;
-pub use backend::DirectBackend;
-pub use backend::LinuxBackend;
-pub use backend::SeatbeltBackend;
-pub use backend::SpawnBackend;
-pub use planner::ExecPlan;
-pub use planner::ExecRequest;
-pub use planner::PatchExecRequest;
-pub(crate) use planner::PreparedExec;
-pub use planner::plan_apply_patch;
-pub use planner::plan_exec;
-pub(crate) use planner::prepare_exec_invocation;
-
-use crate::error::Result;
-use crate::exec::ExecParams;
-use crate::exec::ExecToolCallOutput;
-use crate::exec::StdoutStream;
-use crate::protocol::SandboxPolicy;
-
-pub struct ExecRuntimeContext<'a> {
-    pub sandbox_policy: &'a SandboxPolicy,
-    pub sandbox_cwd: &'a std::path::Path,
-    pub codex_linux_sandbox_exe: &'a Option<std::path::PathBuf>,
-    pub stdout_stream: Option<StdoutStream>,
-}
-
-pub async fn run_with_plan(
-    params: ExecParams,
-    plan: &ExecPlan,
-    registry: &BackendRegistry,
-    runtime_ctx: &ExecRuntimeContext<'_>,
-) -> Result<ExecToolCallOutput> {
-    let ExecPlan::Approved { sandbox, .. } = plan else {
-        unreachable!("run_with_plan called without approved plan");
-    };
-
-    registry
-        .for_type(*sandbox)
-        .spawn(
-            params,
-            runtime_ctx.sandbox_policy,
-            runtime_ctx.sandbox_cwd,
-            runtime_ctx.codex_linux_sandbox_exe,
-            runtime_ctx.stdout_stream.clone(),
-        )
-        .await
-}
diff --git a/codex-rs/core/src/sandbox/planner.rs b/codex-rs/core/src/sandbox/planner.rs
deleted file mode 100644
index 2799087871..0000000000
--- a/codex-rs/core/src/sandbox/planner.rs
+++ /dev/null
@@ -1,217 +0,0 @@
-use std::collections::HashSet;
-use std::path::Path;
-
-use codex_apply_patch::ApplyPatchAction;
-
-use super::apply_patch_adapter::build_exec_params_for_apply_patch;
-use crate::apply_patch::ApplyPatchExec;
-use crate::codex::Session;
-use crate::codex::TurnContext;
-use crate::exec::ExecParams;
-use crate::exec::SandboxType;
-use crate::function_tool::FunctionCallError;
-use crate::protocol::AskForApproval;
-use crate::protocol::ReviewDecision;
-use crate::protocol::SandboxPolicy;
-use crate::safety::SafetyCheck;
-use crate::safety::assess_command_safety;
-use crate::safety::assess_patch_safety;
-
-#[derive(Clone, Debug)]
-pub struct ExecRequest<'a> {
-    pub params: &'a ExecParams,
-    pub approval: AskForApproval,
-    pub policy: &'a SandboxPolicy,
-    pub approved_session_commands: &'a HashSet<Vec<String>>,
-}
-
-#[derive(Clone, Debug)]
-pub enum ExecPlan {
-    Reject {
-        reason: String,
-    },
-    AskUser {
-        reason: Option<String>,
-    },
-    Approved {
-        sandbox: SandboxType,
-        on_failure_escalate: bool,
-        approved_by_user: bool,
-    },
-}
-
-impl ExecPlan {
-    pub fn approved(
-        sandbox: SandboxType,
-        on_failure_escalate: bool,
-        approved_by_user: bool,
-    ) -> Self {
-        ExecPlan::Approved {
-            sandbox,
-            on_failure_escalate,
-            approved_by_user,
-        }
-    }
-}
-
-pub fn plan_exec(req: &ExecRequest<'_>) -> ExecPlan {
-    let params = req.params;
-    let with_escalated_permissions = params.with_escalated_permissions.unwrap_or(false);
-    let safety = assess_command_safety(
-        &params.command,
-        req.approval,
-        req.policy,
-        req.approved_session_commands,
-        with_escalated_permissions,
-    );
-
-    match safety {
-        SafetyCheck::AutoApprove { sandbox_type } => ExecPlan::Approved {
-            sandbox: sandbox_type,
-            on_failure_escalate: should_escalate_on_failure(req.approval, sandbox_type),
-            approved_by_user: false,
-        },
-        SafetyCheck::AskUser => ExecPlan::AskUser {
-            reason: params.justification.clone(),
-        },
-        SafetyCheck::Reject { reason } => ExecPlan::Reject { reason },
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct PatchExecRequest<'a> {
-    pub action: &'a ApplyPatchAction,
-    pub approval: AskForApproval,
-    pub policy: &'a SandboxPolicy,
-    pub cwd: &'a Path,
-    pub user_explicitly_approved: bool,
-}
-
-pub fn plan_apply_patch(req: &PatchExecRequest<'_>) -> ExecPlan {
-    if req.user_explicitly_approved {
-        ExecPlan::Approved {
-            sandbox: SandboxType::None,
-            on_failure_escalate: false,
-            approved_by_user: true,
-        }
-    } else {
-        match assess_patch_safety(req.action, req.approval, req.policy, req.cwd) {
-            SafetyCheck::AutoApprove { sandbox_type } => ExecPlan::Approved {
-                sandbox: sandbox_type,
-                on_failure_escalate: should_escalate_on_failure(req.approval, sandbox_type),
-                approved_by_user: false,
-            },
-            SafetyCheck::AskUser => ExecPlan::AskUser { reason: None },
-            SafetyCheck::Reject { reason } => ExecPlan::Reject { reason },
-        }
-    }
-}
-
-#[derive(Debug)]
-pub(crate) struct PreparedExec {
-    pub(crate) params: ExecParams,
-    pub(crate) plan: ExecPlan,
-    pub(crate) command_for_display: Vec<String>,
-    pub(crate) apply_patch_exec: Option<ApplyPatchExec>,
-}
-
-pub(crate) async fn prepare_exec_invocation(
-    sess: &Session,
-    turn_context: &TurnContext,
-    sub_id: &str,
-    call_id: &str,
-    params: ExecParams,
-    apply_patch_exec: Option<ApplyPatchExec>,
-    approved_session_commands: HashSet<Vec<String>>,
-) -> Result<PreparedExec, FunctionCallError> {
-    let mut params = params;
-
-    let (plan, command_for_display) = if let Some(exec) = apply_patch_exec.as_ref() {
-        params = build_exec_params_for_apply_patch(exec, &params)?;
-        let command_for_display = vec!["apply_patch".to_string(), exec.action.patch.clone()];
-
-        let plan_req = PatchExecRequest {
-            action: &exec.action,
-            approval: turn_context.approval_policy,
-            policy: &turn_context.sandbox_policy,
-            cwd: &turn_context.cwd,
-            user_explicitly_approved: exec.user_explicitly_approved_this_action,
-        };
-
-        let plan = match plan_apply_patch(&plan_req) {
-            plan @ ExecPlan::Approved { .. } => plan,
-            ExecPlan::AskUser { .. } => {
-                return Err(FunctionCallError::RespondToModel(
-                    "patch requires approval but none was recorded".to_string(),
-                ));
-            }
-            ExecPlan::Reject { reason } => {
-                return Err(FunctionCallError::RespondToModel(format!(
-                    "patch rejected: {reason}"
-                )));
-            }
-        };
-
-        (plan, command_for_display)
-    } else {
-        let command_for_display = params.command.clone();
-
-        let initial_plan = plan_exec(&ExecRequest {
-            params: &params,
-            approval: turn_context.approval_policy,
-            policy: &turn_context.sandbox_policy,
-            approved_session_commands: &approved_session_commands,
-        });
-
-        let plan = match initial_plan {
-            plan @ ExecPlan::Approved { .. } => plan,
-            ExecPlan::AskUser { reason } => {
-                let decision = sess
-                    .request_command_approval(
-                        sub_id.to_string(),
-                        call_id.to_string(),
-                        params.command.clone(),
-                        params.cwd.clone(),
-                        reason,
-                    )
-                    .await;
-                match decision {
-                    ReviewDecision::Approved => ExecPlan::approved(SandboxType::None, false, true),
-                    ReviewDecision::ApprovedForSession => {
-                        sess.add_approved_command(params.command.clone()).await;
-                        ExecPlan::approved(SandboxType::None, false, true)
-                    }
-                    ReviewDecision::Denied | ReviewDecision::Abort => {
-                        return Err(FunctionCallError::RespondToModel(
-                            "exec command rejected by user".to_string(),
-                        ));
-                    }
-                }
-            }
-            ExecPlan::Reject { reason } => {
-                return Err(FunctionCallError::RespondToModel(format!(
-                    "exec command rejected: {reason:?}"
-                )));
-            }
-        };
-
-        (plan, command_for_display)
-    };
-
-    Ok(PreparedExec {
-        params,
-        plan,
-        command_for_display,
-        apply_patch_exec,
-    })
-}
-
-fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
-    matches!(
-        (approval, sandbox),
-        (
-            AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
-            SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
-        )
-    )
-}
diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs
index ee0c5fc976..f170a10c18 100644
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -1,7 +1,5 @@
 //! Session-wide mutable state.
 
-use std::collections::HashSet;
-
 use codex_protocol::models::ResponseItem;
 
 use crate::conversation_history::ConversationHistory;
@@ -12,7 +10,6 @@ use crate::protocol::TokenUsageInfo;
 /// Persistent, session-scoped state previously stored directly on `Session`.
 #[derive(Default)]
 pub(crate) struct SessionState {
-    pub(crate) approved_commands: HashSet<Vec<String>>,
     pub(crate) history: ConversationHistory,
     pub(crate) token_info: Option<TokenUsageInfo>,
     pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
@@ -44,15 +41,6 @@ impl SessionState {
         self.history.replace(items);
     }
 
-    // Approved command helpers
-    pub(crate) fn add_approved_command(&mut self, cmd: Vec<String>) {
-        self.approved_commands.insert(cmd);
-    }
-
-    pub(crate) fn approved_commands_ref(&self) -> &HashSet<Vec<String>> {
-        &self.approved_commands
-    }
-
     // Token/rate limit helpers
     pub(crate) fn update_token_info_from_usage(
         &mut self,

From ed45f852093493f0137386bee5f701cb28262a3a Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 11:27:23 +0100
Subject: [PATCH 05/17] RV 1

---
 codex-rs/core/src/apply_patch.rs       |  25 +++
 codex-rs/core/src/codex.rs             |   3 +-
 codex-rs/core/src/executor/backends.rs |  35 ++--
 codex-rs/core/src/executor/cache.rs    |  26 +++
 codex-rs/core/src/executor/runner.rs   |  83 +++++++--
 codex-rs/core/src/executor/sandbox.rs  | 229 ++++++++++++++++++++++---
 6 files changed, 350 insertions(+), 51 deletions(-)

diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs
index 8f372fb2d3..7efae19f15 100644
--- a/codex-rs/core/src/apply_patch.rs
+++ b/codex-rs/core/src/apply_patch.rs
@@ -109,3 +109,28 @@ pub(crate) fn convert_apply_patch_to_protocol(
     }
     result
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+    
+    use tempfile::tempdir;
+
+    #[test]
+    fn convert_apply_patch_maps_add_variant() {
+        let tmp = tempdir().expect("tmp");
+        let p = tmp.path().join("a.txt");
+        // Create an action with a single Add change
+        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
+
+        let got = convert_apply_patch_to_protocol(&action);
+
+        assert_eq!(
+            got.get(&p),
+            Some(&FileChange::Add {
+                content: "hello".to_string()
+            })
+        );
+    }
+}
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 84b1c53b9c..cde64105f4 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -2616,10 +2616,9 @@ async fn handle_container_exec_with_params(
         stdout_stream,
     };
 
-    sess.executor.update_environment(
+    sess.executor.update_environment( // todo this should not be needed ? Not sure what it means
         turn_context.sandbox_policy.clone(),
         turn_context.cwd.clone(),
-        sess.services.codex_linux_sandbox_exe.clone(),
     );
 
     let output_result = sess
diff --git a/codex-rs/core/src/executor/backends.rs b/codex-rs/core/src/executor/backends.rs
index aaa7f08030..09c786a9d4 100644
--- a/codex-rs/core/src/executor/backends.rs
+++ b/codex-rs/core/src/executor/backends.rs
@@ -1,11 +1,13 @@
+use std::collections::HashMap;
+use std::env;
 use std::sync::Arc;
 
 use async_trait::async_trait;
 
 use crate::apply_patch::ApplyPatchExec;
+use crate::CODEX_APPLY_PATCH_ARG1;
 use crate::exec::ExecParams;
 use crate::exec::ExecToolCallOutput;
-use crate::executor::sandbox::build_exec_params_for_apply_patch;
 use crate::function_tool::FunctionCallError;
 
 pub(crate) enum ExecutionMode {
@@ -23,14 +25,6 @@ pub(crate) trait ExecutionBackend: Send + Sync {
         // Required for downcasting the apply_patch.
         mode: &ExecutionMode,
     ) -> Result<ExecParams, FunctionCallError>;
-
-    async fn finalize(
-        &self,
-        output: ExecToolCallOutput,
-        _mode: &ExecutionMode,
-    ) -> Result<ExecToolCallOutput, FunctionCallError> {
-        Ok(output)
-    }
 }
 
 pub(crate) struct BackendStore {
@@ -86,7 +80,28 @@ impl ExecutionBackend for ApplyPatchBackend {
         mode: &ExecutionMode,
     ) -> Result<ExecParams, FunctionCallError> {
         match mode {
-            ExecutionMode::ApplyPatch(exec) => build_exec_params_for_apply_patch(exec, &params),
+            ExecutionMode::ApplyPatch(exec) => {
+                let path_to_codex = env::current_exe()
+                    .ok()
+                    .map(|p| p.to_string_lossy().to_string())
+                    .ok_or_else(|| {
+                        FunctionCallError::RespondToModel(
+                            "failed to determine path to codex executable".to_string(),
+                        )
+                    })?;
+
+                let patch = exec.action.patch.clone();
+                Ok(ExecParams {
+                    command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
+                    cwd: exec.action.cwd.clone(),
+                    timeout_ms: params.timeout_ms,
+                    // Run apply_patch with a minimal environment for determinism and to
+                    // avoid leaking host environment variables into the patch process.
+                    env: HashMap::new(),
+                    with_escalated_permissions: params.with_escalated_permissions,
+                    justification: params.justification,
+                })
+            },
             ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
                 "apply_patch backend invoked without patch context".to_string(),
             )),
diff --git a/codex-rs/core/src/executor/cache.rs b/codex-rs/core/src/executor/cache.rs
index 5629c8893a..737ecb927c 100644
--- a/codex-rs/core/src/executor/cache.rs
+++ b/codex-rs/core/src/executor/cache.rs
@@ -23,3 +23,29 @@ impl ApprovalCache {
         self.inner.lock().map(|g| g.clone()).unwrap_or_default()
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn insert_ignores_empty_and_dedupes() {
+        let cache = ApprovalCache::default();
+
+        // Empty should be ignored
+        cache.insert(vec![]);
+        assert!(cache.snapshot().is_empty());
+
+        // Insert a command and verify snapshot contains it
+        let cmd = vec!["foo".to_string(), "bar".to_string()];
+        cache.insert(cmd.clone());
+        let snap1 = cache.snapshot();
+        assert!(snap1.contains(&cmd));
+
+        // Reinserting should not create duplicates
+        cache.insert(cmd);
+        let snap2 = cache.snapshot();
+        assert_eq!(snap1, snap2);
+    }
+}
diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs
index 708568a73b..a65da13802 100644
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -84,12 +84,10 @@ impl Executor {
         &self,
         sandbox_policy: SandboxPolicy,
         sandbox_cwd: PathBuf,
-        codex_linux_sandbox_exe: Option<PathBuf>,
     ) {
         if let Ok(mut cfg) = self.config.write() {
             cfg.sandbox_policy = sandbox_policy;
             cfg.sandbox_cwd = sandbox_cwd;
-            cfg.codex_linux_sandbox_exe = codex_linux_sandbox_exe;
         }
     }
 
@@ -164,11 +162,7 @@ impl Executor {
             Err(err) => return Err(err.into()),
         };
 
-        // Step 6: Allow the backend to post-process the raw output.
-        backend
-            .finalize(raw_output, &request.mode)
-            .await
-            .map_err(ExecError::from)
+        Ok(raw_output)
     }
 
     /// Fallback path invoked when a sandboxed run is denied so the user can
@@ -215,10 +209,7 @@ impl Executor {
                     )
                     .await?;
 
-                backend
-                    .finalize(retry_output, &request.mode)
-                    .await
-                    .map_err(ExecError::from)
+                Ok(retry_output)
             }
             ReviewDecision::Denied | ReviewDecision::Abort => {
                 Err(ExecError::rejection("exec command rejected by user"))
@@ -304,3 +295,73 @@ pub(crate) fn normalize_exec_result(
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::error::CodexErr;
+    use crate::error::EnvVarError;
+    use crate::error::SandboxErr;
+    use crate::exec::StreamOutput;
+    use pretty_assertions::assert_eq;
+
+    fn make_output(text: &str) -> ExecToolCallOutput {
+        ExecToolCallOutput {
+            exit_code: 1,
+            stdout: StreamOutput::new(String::new()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(text.to_string()),
+            duration: Duration::from_millis(123),
+            timed_out: false,
+        }
+    }
+
+    #[test]
+    fn normalize_success_borrows() {
+        let out = make_output("ok");
+        let result: Result<ExecToolCallOutput, ExecError> = Ok(out);
+        let normalized = normalize_exec_result(&result);
+        assert_eq!(normalized.event_output().aggregated_output.text, "ok");
+    }
+
+    #[test]
+    fn normalize_timeout_borrows_embedded_output() {
+        let out = make_output("timed out payload");
+        let err = CodexErr::Sandbox(SandboxErr::Timeout {
+            output: Box::new(out),
+        });
+        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(err));
+        let normalized = normalize_exec_result(&result);
+        assert_eq!(
+            normalized.event_output().aggregated_output.text,
+            "timed out payload"
+        );
+    }
+
+    #[test]
+    fn normalize_function_error_synthesizes_payload() {
+        let err = FunctionCallError::RespondToModel("boom".to_string());
+        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Function(err));
+        let normalized = normalize_exec_result(&result);
+        assert_eq!(normalized.event_output().aggregated_output.text, "boom");
+    }
+
+    #[test]
+    fn normalize_codex_error_synthesizes_user_message() {
+        // Use a simple EnvVar error which formats to a clear message
+        let e = CodexErr::EnvVar(EnvVarError {
+            var: "FOO".to_string(),
+            instructions: Some("set it".to_string()),
+        });
+        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(e));
+        let normalized = normalize_exec_result(&result);
+        assert!(
+            normalized
+                .event_output()
+                .aggregated_output
+                .text
+                .contains("Missing environment variable: `FOO`"),
+            "expected synthesized user-friendly message"
+        );
+    }
+}
diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs
index 1f87d961e6..87fdc0f00a 100644
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -53,34 +53,6 @@ fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) ->
     )
 }
 
-/// Builds the command-line invocation that shells out to `codex apply_patch`
-/// using the provided apply-patch request details.
-pub(crate) fn build_exec_params_for_apply_patch(
-    exec: &ApplyPatchExec,
-    original: &ExecParams,
-) -> Result<ExecParams, FunctionCallError> {
-    let path_to_codex = env::current_exe()
-        .ok()
-        .map(|p| p.to_string_lossy().to_string())
-        .ok_or_else(|| {
-            FunctionCallError::RespondToModel(
-                "failed to determine path to codex executable".to_string(),
-            )
-        })?;
-
-    let patch = exec.action.patch.clone();
-    Ok(ExecParams {
-        command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
-        cwd: exec.action.cwd.clone(),
-        timeout_ms: original.timeout_ms,
-        // Run apply_patch with a minimal environment for determinism and to
-        // avoid leaking host environment variables into the patch process.
-        env: HashMap::new(),
-        with_escalated_permissions: original.with_escalated_permissions,
-        justification: original.justification.clone(),
-    })
-}
-
 /// Determines how a command should be sandboxed, prompting the user when
 /// policy requires explicit approval.
 pub async fn select_sandbox(
@@ -191,3 +163,204 @@ fn select_apply_patch_sandbox(
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::codex::make_session_and_context;
+    use crate::exec::ExecParams;
+    use crate::protocol::SandboxPolicy;
+    use codex_apply_patch::ApplyPatchAction;
+    use pretty_assertions::assert_eq;
+
+    #[tokio::test]
+    async fn select_apply_patch_user_override_when_explicit() {
+        let (session, _ctx) = make_session_and_context();
+        let tmp = tempfile::tempdir().expect("tmp");
+        let p = tmp.path().join("a.txt");
+        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
+        let exec = ApplyPatchExec {
+            action,
+            user_explicitly_approved_this_action: true,
+        };
+        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
+        let request = ExecutionRequest {
+            params: ExecParams {
+                command: vec!["apply_patch".into()],
+                cwd: std::env::temp_dir(),
+                timeout_ms: None,
+                env: std::collections::HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            },
+            approval_command: vec!["apply_patch".into()],
+            mode: ExecutionMode::ApplyPatch(exec),
+            stdout_stream: None,
+        };
+        let decision = select_sandbox(
+            &request,
+            AskForApproval::OnRequest,
+            Default::default(),
+            &cfg,
+            &session,
+            "sub",
+            "call",
+        )
+        .await
+        .expect("ok");
+        // Explicit user override runs without sandbox
+        assert_eq!(decision.initial_sandbox, SandboxType::None);
+        assert_eq!(decision.escalate_on_failure, false);
+    }
+
+    #[tokio::test]
+    async fn select_apply_patch_autoapprove_in_danger() {
+        let (session, _ctx) = make_session_and_context();
+        let tmp = tempfile::tempdir().expect("tmp");
+        let p = tmp.path().join("a.txt");
+        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
+        let exec = ApplyPatchExec {
+            action,
+            user_explicitly_approved_this_action: false,
+        };
+        let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
+        let request = ExecutionRequest {
+            params: ExecParams {
+                command: vec!["apply_patch".into()],
+                cwd: std::env::temp_dir(),
+                timeout_ms: None,
+                env: std::collections::HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            },
+            approval_command: vec!["apply_patch".into()],
+            mode: ExecutionMode::ApplyPatch(exec),
+            stdout_stream: None,
+        };
+        let decision = select_sandbox(
+            &request,
+            AskForApproval::OnRequest,
+            Default::default(),
+            &cfg,
+            &session,
+            "sub",
+            "call",
+        )
+        .await
+        .expect("ok");
+        // On platforms with a sandbox, DangerFullAccess still prefers it
+        let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None);
+        assert_eq!(decision.initial_sandbox, expected);
+        assert_eq!(decision.escalate_on_failure, false);
+    }
+
+    #[tokio::test]
+    async fn select_apply_patch_requires_approval_on_unless_trusted() {
+        let (session, _ctx) = make_session_and_context();
+        let tempdir = tempfile::tempdir().expect("tmpdir");
+        let p = tempdir.path().join("a.txt");
+        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
+        let exec = ApplyPatchExec {
+            action,
+            user_explicitly_approved_this_action: false,
+        };
+        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
+        let request = ExecutionRequest {
+            params: ExecParams {
+                command: vec!["apply_patch".into()],
+                cwd: std::env::temp_dir(),
+                timeout_ms: None,
+                env: std::collections::HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            },
+            approval_command: vec!["apply_patch".into()],
+            mode: ExecutionMode::ApplyPatch(exec),
+            stdout_stream: None,
+        };
+        let result = select_sandbox(
+            &request,
+            AskForApproval::UnlessTrusted,
+            Default::default(),
+            &cfg,
+            &session,
+            "sub",
+            "call",
+        )
+        .await;
+        match result {
+            Ok(_) => panic!("expected error"),
+            Err(ExecError::Function(FunctionCallError::RespondToModel(msg))) => {
+                assert!(msg.contains("requires approval"))
+            }
+            Err(other) => panic!("unexpected error: {other:?}"),
+        }
+    }
+
+    #[tokio::test]
+    async fn select_shell_autoapprove_in_danger_mode() {
+        let (session, _ctx) = make_session_and_context();
+        let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
+        let request = ExecutionRequest {
+            params: ExecParams {
+                command: vec!["some-unknown".into()],
+                cwd: std::env::temp_dir(),
+                timeout_ms: None,
+                env: std::collections::HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            },
+            approval_command: vec!["some-unknown".into()],
+            mode: ExecutionMode::Shell,
+            stdout_stream: None,
+        };
+        let decision = select_sandbox(
+            &request,
+            AskForApproval::OnRequest,
+            Default::default(),
+            &cfg,
+            &session,
+            "sub",
+            "call",
+        )
+        .await
+        .expect("ok");
+        assert_eq!(decision.initial_sandbox, SandboxType::None);
+        assert_eq!(decision.escalate_on_failure, false);
+    }
+
+    #[cfg(any(target_os = "macos", target_os = "linux"))]
+    #[tokio::test]
+    async fn select_shell_escalates_on_failure_with_platform_sandbox() {
+        let (session, _ctx) = make_session_and_context();
+        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
+        let request = ExecutionRequest {
+            params: ExecParams {
+                // Unknown command => untrusted but not flagged dangerous
+                command: vec!["some-unknown".into()],
+                cwd: std::env::temp_dir(),
+                timeout_ms: None,
+                env: std::collections::HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            },
+            approval_command: vec!["some-unknown".into()],
+            mode: ExecutionMode::Shell,
+            stdout_stream: None,
+        };
+        let decision = select_sandbox(
+            &request,
+            AskForApproval::OnFailure,
+            Default::default(),
+            &cfg,
+            &session,
+            "sub",
+            "call",
+        )
+        .await
+        .expect("ok");
+        // On macOS/Linux we should have a platform sandbox and escalate on failure
+        assert_ne!(decision.initial_sandbox, SandboxType::None);
+        assert_eq!(decision.escalate_on_failure, true);
+    }
+}

From 2dd226891a80bb8d821ee19ece65d365b71f5330 Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 11:36:51 +0100
Subject: [PATCH 06/17] RV 2

---
 codex-rs/core/src/apply_patch.rs       |  2 +-
 codex-rs/core/src/codex.rs             |  2 +-
 codex-rs/core/src/executor/backends.rs | 33 +++++++-------------------
 codex-rs/core/src/executor/runner.rs   | 21 ++++------------
 4 files changed, 15 insertions(+), 43 deletions(-)

diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs
index 7efae19f15..f1653bf9e9 100644
--- a/codex-rs/core/src/apply_patch.rs
+++ b/codex-rs/core/src/apply_patch.rs
@@ -114,7 +114,7 @@ pub(crate) fn convert_apply_patch_to_protocol(
 mod tests {
     use super::*;
     use pretty_assertions::assert_eq;
-    
+
     use tempfile::tempdir;
 
     #[test]
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index cde64105f4..2877098444 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -2616,7 +2616,7 @@ async fn handle_container_exec_with_params(
         stdout_stream,
     };
 
-    sess.executor.update_environment( // todo this should not be needed ? Not sure what it means
+    sess.executor.update_environment(
         turn_context.sandbox_policy.clone(),
         turn_context.cwd.clone(),
     );
diff --git a/codex-rs/core/src/executor/backends.rs b/codex-rs/core/src/executor/backends.rs
index 09c786a9d4..aa744b8174 100644
--- a/codex-rs/core/src/executor/backends.rs
+++ b/codex-rs/core/src/executor/backends.rs
@@ -1,13 +1,11 @@
 use std::collections::HashMap;
 use std::env;
-use std::sync::Arc;
 
 use async_trait::async_trait;
 
-use crate::apply_patch::ApplyPatchExec;
 use crate::CODEX_APPLY_PATCH_ARG1;
+use crate::apply_patch::ApplyPatchExec;
 use crate::exec::ExecParams;
-use crate::exec::ExecToolCallOutput;
 use crate::function_tool::FunctionCallError;
 
 pub(crate) enum ExecutionMode {
@@ -27,31 +25,16 @@ pub(crate) trait ExecutionBackend: Send + Sync {
     ) -> Result<ExecParams, FunctionCallError>;
 }
 
-pub(crate) struct BackendStore {
-    shell: Arc<dyn ExecutionBackend>,
-    apply_patch: Arc<dyn ExecutionBackend>,
-}
-
-impl BackendStore {
-    pub(crate) fn new() -> Self {
-        Self {
-            shell: Arc::new(ShellBackend),
-            apply_patch: Arc::new(ApplyPatchBackend),
-        }
-    }
+static SHELL_BACKEND: ShellBackend = ShellBackend;
+static APPLY_PATCH_BACKEND: ApplyPatchBackend = ApplyPatchBackend;
 
-    pub(crate) fn for_mode(&self, mode: &ExecutionMode) -> Arc<dyn ExecutionBackend> {
-        match mode {
-            ExecutionMode::Shell => self.shell.clone(),
-            ExecutionMode::ApplyPatch(_) => self.apply_patch.clone(),
-        }
+pub(crate) fn backend_for_mode(mode: &ExecutionMode) -> &'static dyn ExecutionBackend {
+    match mode {
+        ExecutionMode::Shell => &SHELL_BACKEND,
+        ExecutionMode::ApplyPatch(_) => &APPLY_PATCH_BACKEND,
     }
 }
 
-pub(crate) fn default_backends() -> BackendStore {
-    BackendStore::new()
-}
-
 struct ShellBackend;
 
 #[async_trait]
@@ -101,7 +84,7 @@ impl ExecutionBackend for ApplyPatchBackend {
                     with_escalated_permissions: params.with_escalated_permissions,
                     justification: params.justification,
                 })
-            },
+            }
             ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
                 "apply_patch backend invoked without patch context".to_string(),
             )),
diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs
index a65da13802..9161e78659 100644
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -5,10 +5,8 @@ use std::time::Duration;
 
 use thiserror::Error;
 
-use super::backends::BackendStore;
-use super::backends::ExecutionBackend;
 use super::backends::ExecutionMode;
-use super::backends::default_backends;
+use super::backends::backend_for_mode;
 use super::cache::ApprovalCache;
 use crate::codex::Session;
 use crate::error::CodexErr;
@@ -64,7 +62,6 @@ impl ExecError {
 /// Coordinates sandbox selection, backend-specific preparation, and command
 /// execution for tool calls requested by the model.
 pub(crate) struct Executor {
-    backends: BackendStore,
     approval_cache: ApprovalCache,
     config: Arc<RwLock<ExecutorConfig>>,
 }
@@ -72,7 +69,6 @@ pub(crate) struct Executor {
 impl Executor {
     pub(crate) fn new(config: ExecutorConfig) -> Self {
         Self {
-            backends: default_backends(),
             approval_cache: ApprovalCache::default(),
             config: Arc::new(RwLock::new(config)),
         }
@@ -80,11 +76,7 @@ impl Executor {
 
     /// Updates the sandbox policy and working directory used for future
     /// executions without recreating the executor.
-    pub(crate) fn update_environment(
-        &self,
-        sandbox_policy: SandboxPolicy,
-        sandbox_cwd: PathBuf,
-    ) {
+    pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) {
         if let Ok(mut cfg) = self.config.write() {
             cfg.sandbox_policy = sandbox_policy;
             cfg.sandbox_cwd = sandbox_cwd;
@@ -103,7 +95,7 @@ impl Executor {
         call_id: &str,
     ) -> Result<ExecToolCallOutput, ExecError> {
         // Step 1: Normalise parameters via the selected backend.
-        let backend = self.backends.for_mode(&request.mode);
+        let backend = backend_for_mode(&request.mode);
         request.params = backend
             .prepare(request.params, &request.mode)
             .map_err(ExecError::from)?;
@@ -148,10 +140,8 @@ impl Executor {
             }
             Err(CodexErr::Sandbox(error @ SandboxErr::Denied { .. })) => {
                 return if sandbox_decision.escalate_on_failure {
-                    self.retry_without_sandbox(
-                        &*backend, &request, &config, session, sub_id, call_id, error,
-                    )
-                    .await
+                    self.retry_without_sandbox(&request, &config, session, sub_id, call_id, error)
+                        .await
                 } else {
                     Err(ExecError::rejection(format!(
                         "failed in sandbox {:?} with execution error: {error:?}",
@@ -170,7 +160,6 @@ impl Executor {
     #[allow(clippy::too_many_arguments)]
     async fn retry_without_sandbox(
         &self,
-        backend: &dyn ExecutionBackend,
         request: &ExecutionRequest,
         config: &ExecutorConfig,
         session: &Session,

From 4656160e31bdab4a773aac2e17a77fac92207156 Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 11:40:44 +0100
Subject: [PATCH 07/17] RV 3

---
 codex-rs/core/src/codex.rs         | 41 +++++++++++++-----------------
 codex-rs/core/src/state/service.rs |  4 +--
 2 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 2877098444..40551bfe50 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -263,7 +263,6 @@ pub(crate) struct Session {
     pub(crate) active_turn: Mutex<Option<ActiveTurn>>,
     services: SessionServices,
     next_internal_sub_id: AtomicU64,
-    executor: Executor,
 }
 
 /// The context needed for a single turn of the conversation.
@@ -457,17 +456,15 @@ impl Session {
             unified_exec_manager: UnifiedExecSessionManager::default(),
             notifier: notify,
             rollout: Mutex::new(Some(rollout_recorder)),
-            codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
             user_shell: default_shell,
             show_raw_agent_reasoning: config.show_raw_agent_reasoning,
+            executor: Executor::new(ExecutorConfig::new(
+                turn_context.sandbox_policy.clone(),
+                turn_context.cwd.clone(),
+                config.codex_linux_sandbox_exe.clone(),
+            )),
         };
 
-        let executor = Executor::new(ExecutorConfig::new(
-            turn_context.sandbox_policy.clone(),
-            turn_context.cwd.clone(),
-            config.codex_linux_sandbox_exe.clone(),
-        ));
-
         let sess = Arc::new(Session {
             conversation_id,
             tx_event: tx_event.clone(),
@@ -475,7 +472,6 @@ impl Session {
             active_turn: Mutex::new(None),
             services,
             next_internal_sub_id: AtomicU64::new(0),
-            executor,
         });
 
         // Dispatch the SessionConfiguredEvent first and then report any errors.
@@ -921,6 +917,7 @@ impl Session {
             .await;
 
         let result = self
+            .services
             .executor
             .run(request, self, approval_policy, &sub_id, &call_id)
             .await;
@@ -2616,7 +2613,7 @@ async fn handle_container_exec_with_params(
         stdout_stream,
     };
 
-    sess.executor.update_environment(
+    sess.services.executor.update_environment(
         turn_context.sandbox_policy.clone(),
         turn_context.cwd.clone(),
     );
@@ -3203,15 +3200,14 @@ mod tests {
             unified_exec_manager: UnifiedExecSessionManager::default(),
             notifier: UserNotifier::default(),
             rollout: Mutex::new(None),
-            codex_linux_sandbox_exe: None,
             user_shell: shell::Shell::Unknown,
             show_raw_agent_reasoning: config.show_raw_agent_reasoning,
+            executor: Executor::new(ExecutorConfig::new(
+                turn_context.sandbox_policy.clone(),
+                turn_context.cwd.clone(),
+                None,
+            )),
         };
-        let executor = Executor::new(ExecutorConfig::new(
-            turn_context.sandbox_policy.clone(),
-            turn_context.cwd.clone(),
-            None,
-        ));
         let session = Session {
             conversation_id,
             tx_event,
@@ -3219,7 +3215,6 @@ mod tests {
             active_turn: Mutex::new(None),
             services,
             next_internal_sub_id: AtomicU64::new(0),
-            executor,
         };
         (session, turn_context)
     }
@@ -3276,15 +3271,14 @@ mod tests {
             unified_exec_manager: UnifiedExecSessionManager::default(),
             notifier: UserNotifier::default(),
             rollout: Mutex::new(None),
-            codex_linux_sandbox_exe: None,
             user_shell: shell::Shell::Unknown,
             show_raw_agent_reasoning: config.show_raw_agent_reasoning,
+            executor: Executor::new(ExecutorConfig::new(
+                config.sandbox_policy.clone(),
+                config.cwd.clone(),
+                None,
+            )),
         };
-        let executor = Executor::new(ExecutorConfig::new(
-            config.sandbox_policy.clone(),
-            config.cwd.clone(),
-            None,
-        ));
         let session = Arc::new(Session {
             conversation_id,
             tx_event,
@@ -3292,7 +3286,6 @@ mod tests {
             active_turn: Mutex::new(None),
             services,
             next_internal_sub_id: AtomicU64::new(0),
-            executor,
         });
         (session, turn_context, rx_event)
     }
diff --git a/codex-rs/core/src/state/service.rs b/codex-rs/core/src/state/service.rs
index a67b9dda93..994352eddf 100644
--- a/codex-rs/core/src/state/service.rs
+++ b/codex-rs/core/src/state/service.rs
@@ -1,9 +1,9 @@
 use crate::RolloutRecorder;
 use crate::exec_command::ExecSessionManager;
+use crate::executor::Executor;
 use crate::mcp_connection_manager::McpConnectionManager;
 use crate::unified_exec::UnifiedExecSessionManager;
 use crate::user_notification::UserNotifier;
-use std::path::PathBuf;
 use tokio::sync::Mutex;
 
 pub(crate) struct SessionServices {
@@ -12,7 +12,7 @@ pub(crate) struct SessionServices {
     pub(crate) unified_exec_manager: UnifiedExecSessionManager,
     pub(crate) notifier: UserNotifier,
     pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
-    pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
     pub(crate) user_shell: crate::shell::Shell,
     pub(crate) show_raw_agent_reasoning: bool,
+    pub(crate) executor: Executor,
 }

From 1d87628d419c9a2fae08b227f612ad5d2b58b9d4 Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 11:55:43 +0100
Subject: [PATCH 08/17] RV 4

---
 codex-rs/core/src/codex.rs             | 62 ++++++++++----------------
 codex-rs/core/src/executor/backends.rs |  8 ++++
 codex-rs/core/src/executor/runner.rs   | 48 ++++++++++++++++++--
 codex-rs/core/src/executor/sandbox.rs  |  5 +++
 4 files changed, 80 insertions(+), 43 deletions(-)

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 40551bfe50..d5d3e22830 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -906,7 +906,11 @@ impl Session {
         &self,
         turn_diff_tracker: &mut TurnDiffTracker,
         begin_ctx: ExecCommandContext,
-        request: ExecutionRequest,
+        params: ExecParams,
+        approval_command: Vec<String>,
+        mode: ExecutionMode,
+        stdout_stream: Option<StdoutStream>,
+        use_shell_profile: bool,
         approval_policy: AskForApproval,
     ) -> Result<ExecToolCallOutput, ExecError> {
         let is_apply_patch = begin_ctx.apply_patch.is_some();
@@ -916,6 +920,14 @@ impl Session {
         self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone())
             .await;
 
+        let request = ExecutionRequest {
+            params,
+            approval_command,
+            mode,
+            stdout_stream,
+            use_shell_profile,
+        };
+
         let result = self
             .services
             .executor
@@ -1030,7 +1042,7 @@ impl Session {
         &self.services.notifier
     }
 
-    fn user_shell(&self) -> &shell::Shell {
+    pub(crate) fn user_shell(&self) -> &shell::Shell {
         &self.services.user_shell
     }
 
@@ -2508,24 +2520,6 @@ fn parse_container_exec_arguments(
         })
 }
 
-fn maybe_translate_shell_command(
-    params: ExecParams,
-    sess: &Session,
-    turn_context: &TurnContext,
-) -> ExecParams {
-    let should_translate = matches!(sess.user_shell(), crate::shell::Shell::PowerShell(_))
-        || turn_context.shell_environment_policy.use_profile;
-
-    if should_translate
-        && let Some(command) = sess
-            .user_shell()
-            .format_default_shell_invocation(params.command.clone())
-    {
-        return ExecParams { command, ..params };
-    }
-    params
-}
-
 async fn handle_container_exec_with_params(
     params: ExecParams,
     sess: &Session,
@@ -2590,29 +2584,11 @@ async fn handle_container_exec_with_params(
         ),
     };
 
-    let translated_params = maybe_translate_shell_command(params, sess, turn_context);
-    let stdout_stream = if exec_command_context.apply_patch.is_some() {
-        None
-    } else {
-        Some(StdoutStream {
-            sub_id: sub_id.clone(),
-            call_id: call_id.clone(),
-            tx_event: sess.tx_event.clone(),
-        })
-    };
-
     let mode = match apply_patch_exec {
         Some(exec) => ExecutionMode::ApplyPatch(exec),
         None => ExecutionMode::Shell,
     };
 
-    let request = ExecutionRequest {
-        params: translated_params,
-        approval_command: command_for_display,
-        mode,
-        stdout_stream,
-    };
-
     sess.services.executor.update_environment(
         turn_context.sandbox_policy.clone(),
         turn_context.cwd.clone(),
@@ -2622,7 +2598,15 @@ async fn handle_container_exec_with_params(
         .run_exec_with_events(
             turn_diff_tracker,
             exec_command_context,
-            request,
+            params,
+            command_for_display,
+            mode,
+            Some(StdoutStream {
+                sub_id: sub_id.clone(), // todo we should not nead the ids
+                call_id: call_id.clone(),
+                tx_event: sess.tx_event.clone(),
+            }),
+            turn_context.shell_environment_policy.use_profile,
             turn_context.approval_policy,
         )
         .await;
diff --git a/codex-rs/core/src/executor/backends.rs b/codex-rs/core/src/executor/backends.rs
index aa744b8174..95cdb3cacb 100644
--- a/codex-rs/core/src/executor/backends.rs
+++ b/codex-rs/core/src/executor/backends.rs
@@ -23,6 +23,10 @@ pub(crate) trait ExecutionBackend: Send + Sync {
         // Required for downcasting the apply_patch.
         mode: &ExecutionMode,
     ) -> Result<ExecParams, FunctionCallError>;
+
+    fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
+        true
+    }
 }
 
 static SHELL_BACKEND: ShellBackend = ShellBackend;
@@ -90,4 +94,8 @@ impl ExecutionBackend for ApplyPatchBackend {
             )),
         }
     }
+
+    fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
+        false
+    }
 }
diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs
index 9161e78659..086bdcf2f4 100644
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -23,6 +23,7 @@ use crate::function_tool::FunctionCallError;
 use crate::protocol::AskForApproval;
 use crate::protocol::ReviewDecision;
 use crate::protocol::SandboxPolicy;
+use crate::shell;
 
 #[derive(Clone, Debug)]
 pub(crate) struct ExecutorConfig {
@@ -94,8 +95,18 @@ impl Executor {
         sub_id: &str,
         call_id: &str,
     ) -> Result<ExecToolCallOutput, ExecError> {
+        if matches!(request.mode, ExecutionMode::Shell) {
+            request.params =
+                maybe_translate_shell_command(request.params, session, request.use_shell_profile);
+        }
+
         // Step 1: Normalise parameters via the selected backend.
         let backend = backend_for_mode(&request.mode);
+        let stdout_stream = if backend.stream_stdout(&request.mode) {
+            request.stdout_stream.clone()
+        } else {
+            None
+        };
         request.params = backend
             .prepare(request.params, &request.mode)
             .map_err(ExecError::from)?;
@@ -128,7 +139,7 @@ impl Executor {
                 request.params.clone(),
                 sandbox_decision.initial_sandbox,
                 &config,
-                request.stdout_stream.clone(),
+                stdout_stream.clone(),
             )
             .await;
 
@@ -140,8 +151,16 @@ impl Executor {
             }
             Err(CodexErr::Sandbox(error @ SandboxErr::Denied { .. })) => {
                 return if sandbox_decision.escalate_on_failure {
-                    self.retry_without_sandbox(&request, &config, session, sub_id, call_id, error)
-                        .await
+                    self.retry_without_sandbox(
+                        &request,
+                        &config,
+                        session,
+                        sub_id,
+                        call_id,
+                        stdout_stream.clone(),
+                        error,
+                    )
+                    .await
                 } else {
                     Err(ExecError::rejection(format!(
                         "failed in sandbox {:?} with execution error: {error:?}",
@@ -165,6 +184,7 @@ impl Executor {
         session: &Session,
         sub_id: &str,
         call_id: &str,
+        stdout_stream: Option<StdoutStream>,
         sandbox_error: SandboxErr,
     ) -> Result<ExecToolCallOutput, ExecError> {
         session
@@ -194,7 +214,7 @@ impl Executor {
                         request.params.clone(),
                         SandboxType::None,
                         config,
-                        request.stdout_stream.clone(),
+                        stdout_stream,
                     )
                     .await?;
 
@@ -225,11 +245,31 @@ impl Executor {
     }
 }
 
+fn maybe_translate_shell_command(
+    params: ExecParams,
+    session: &Session,
+    use_shell_profile: bool,
+) -> ExecParams {
+    let should_translate =
+        matches!(session.user_shell(), shell::Shell::PowerShell(_)) || use_shell_profile;
+
+    if should_translate
+        && let Some(command) = session
+            .user_shell()
+            .format_default_shell_invocation(params.command.clone())
+    {
+        return ExecParams { command, ..params };
+    }
+
+    params
+}
+
 pub(crate) struct ExecutionRequest {
     pub params: ExecParams,
     pub approval_command: Vec<String>,
     pub mode: ExecutionMode,
     pub stdout_stream: Option<StdoutStream>,
+    pub use_shell_profile: bool,
 }
 
 pub(crate) struct NormalizedExecOutput<'a> {
diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs
index 87fdc0f00a..99507fc446 100644
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -196,6 +196,7 @@ mod tests {
             approval_command: vec!["apply_patch".into()],
             mode: ExecutionMode::ApplyPatch(exec),
             stdout_stream: None,
+            use_shell_profile: false,
         };
         let decision = select_sandbox(
             &request,
@@ -236,6 +237,7 @@ mod tests {
             approval_command: vec!["apply_patch".into()],
             mode: ExecutionMode::ApplyPatch(exec),
             stdout_stream: None,
+            use_shell_profile: false,
         };
         let decision = select_sandbox(
             &request,
@@ -277,6 +279,7 @@ mod tests {
             approval_command: vec!["apply_patch".into()],
             mode: ExecutionMode::ApplyPatch(exec),
             stdout_stream: None,
+            use_shell_profile: false,
         };
         let result = select_sandbox(
             &request,
@@ -313,6 +316,7 @@ mod tests {
             approval_command: vec!["some-unknown".into()],
             mode: ExecutionMode::Shell,
             stdout_stream: None,
+            use_shell_profile: false,
         };
         let decision = select_sandbox(
             &request,
@@ -347,6 +351,7 @@ mod tests {
             approval_command: vec!["some-unknown".into()],
             mode: ExecutionMode::Shell,
             stdout_stream: None,
+            use_shell_profile: false,
         };
         let decision = select_sandbox(
             &request,

From 8c09db17c3d9aa9f79725cf000ee809028591246 Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 12:04:44 +0100
Subject: [PATCH 09/17] RV 5

---
 codex-rs/core/src/codex.rs            | 51 +++++++++++--------------
 codex-rs/core/src/executor/mod.rs     | 55 ++++++++++++++++++++++++++-
 codex-rs/core/src/executor/runner.rs  | 17 +--------
 codex-rs/core/src/executor/sandbox.rs |  2 +-
 4 files changed, 78 insertions(+), 47 deletions(-)

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index d5d3e22830..8e4e4475b2 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -63,7 +63,6 @@ use crate::exec_command::ExecSessionManager;
 use crate::exec_command::WRITE_STDIN_TOOL_NAME;
 use crate::exec_command::WriteStdinParams;
 use crate::exec_env::create_env;
-use crate::executor::ExecError;
 use crate::executor::ExecutionMode;
 use crate::executor::ExecutionRequest;
 use crate::executor::Executor;
@@ -905,29 +904,17 @@ impl Session {
     async fn run_exec_with_events(
         &self,
         turn_diff_tracker: &mut TurnDiffTracker,
-        begin_ctx: ExecCommandContext,
-        params: ExecParams,
-        approval_command: Vec<String>,
-        mode: ExecutionMode,
-        stdout_stream: Option<StdoutStream>,
-        use_shell_profile: bool,
+        prepared: PreparedExec,
         approval_policy: AskForApproval,
     ) -> Result<ExecToolCallOutput, ExecError> {
-        let is_apply_patch = begin_ctx.apply_patch.is_some();
-        let sub_id = begin_ctx.sub_id.clone();
-        let call_id = begin_ctx.call_id.clone();
+        let PreparedExec { context, request } = prepared;
+        let is_apply_patch = context.apply_patch.is_some();
+        let sub_id = context.sub_id.clone();
+        let call_id = context.call_id.clone();
 
-        self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone())
+        self.on_exec_command_begin(turn_diff_tracker, context.clone())
             .await;
 
-        let request = ExecutionRequest {
-            params,
-            approval_command,
-            mode,
-            stdout_stream,
-            use_shell_profile,
-        };
-
         let result = self
             .services
             .executor
@@ -2594,19 +2581,23 @@ async fn handle_container_exec_with_params(
         turn_context.cwd.clone(),
     );
 
+    let prepared_exec = PreparedExec::new(
+        exec_command_context,
+        params,
+        command_for_display,
+        mode,
+        Some(StdoutStream {
+            sub_id: sub_id.clone(),
+            call_id: call_id.clone(),
+            tx_event: sess.tx_event.clone(),
+        }),
+        turn_context.shell_environment_policy.use_profile,
+    );
+
     let output_result = sess
         .run_exec_with_events(
             turn_diff_tracker,
-            exec_command_context,
-            params,
-            command_for_display,
-            mode,
-            Some(StdoutStream {
-                sub_id: sub_id.clone(), // todo we should not nead the ids
-                call_id: call_id.clone(),
-                tx_event: sess.tx_event.clone(),
-            }),
-            turn_context.shell_environment_policy.use_profile,
+            prepared_exec,
             turn_context.approval_policy,
         )
         .await;
@@ -2889,6 +2880,8 @@ pub(crate) async fn exit_review_mode(
         .await;
 }
 
+use crate::executor::errors::ExecError;
+use crate::executor::linkers::PreparedExec;
 #[cfg(test)]
 pub(crate) use tests::make_session_and_context;
 
diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs
index c3dce856e5..3150f0176c 100644
--- a/codex-rs/core/src/executor/mod.rs
+++ b/codex-rs/core/src/executor/mod.rs
@@ -4,8 +4,61 @@ mod runner;
 mod sandbox;
 
 pub(crate) use backends::ExecutionMode;
-pub(crate) use runner::ExecError;
 pub(crate) use runner::ExecutionRequest;
 pub(crate) use runner::Executor;
 pub(crate) use runner::ExecutorConfig;
 pub(crate) use runner::normalize_exec_result;
+
+pub mod linkers {
+    use crate::codex::ExecCommandContext;
+    use crate::exec::ExecParams;
+    use crate::exec::StdoutStream;
+    use crate::executor::backends::ExecutionMode;
+    use crate::executor::runner::ExecutionRequest;
+
+    pub struct PreparedExec {
+        pub context: ExecCommandContext,
+        pub request: ExecutionRequest,
+    }
+
+    impl PreparedExec {
+        pub fn new(
+            context: ExecCommandContext,
+            params: ExecParams,
+            approval_command: Vec<String>,
+            mode: ExecutionMode,
+            stdout_stream: Option<StdoutStream>,
+            use_shell_profile: bool,
+        ) -> Self {
+            let request = ExecutionRequest {
+                params,
+                approval_command,
+                mode,
+                stdout_stream,
+                use_shell_profile,
+            };
+
+            Self { context, request }
+        }
+    }
+}
+
+pub mod errors {
+    use crate::error::CodexErr;
+    use crate::function_tool::FunctionCallError;
+    use thiserror::Error;
+
+    #[derive(Debug, Error)]
+    pub enum ExecError {
+        #[error(transparent)]
+        Function(#[from] FunctionCallError),
+        #[error(transparent)]
+        Codex(#[from] CodexErr),
+    }
+
+    impl ExecError {
+        pub(crate) fn rejection(msg: impl Into<String>) -> Self {
+            FunctionCallError::RespondToModel(msg.into()).into()
+        }
+    }
+}
diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs
index 086bdcf2f4..86caf313a8 100644
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -3,8 +3,6 @@ use std::sync::Arc;
 use std::sync::RwLock;
 use std::time::Duration;
 
-use thiserror::Error;
-
 use super::backends::ExecutionMode;
 use super::backends::backend_for_mode;
 use super::cache::ApprovalCache;
@@ -18,6 +16,7 @@ use crate::exec::SandboxType;
 use crate::exec::StdoutStream;
 use crate::exec::StreamOutput;
 use crate::exec::process_exec_tool_call;
+use crate::executor::errors::ExecError;
 use crate::executor::sandbox::select_sandbox;
 use crate::function_tool::FunctionCallError;
 use crate::protocol::AskForApproval;
@@ -46,20 +45,6 @@ impl ExecutorConfig {
     }
 }
 
-#[derive(Debug, Error)]
-pub enum ExecError {
-    #[error(transparent)]
-    Function(#[from] FunctionCallError),
-    #[error(transparent)]
-    Codex(#[from] CodexErr),
-}
-
-impl ExecError {
-    pub(crate) fn rejection(msg: impl Into<String>) -> Self {
-        FunctionCallError::RespondToModel(msg.into()).into()
-    }
-}
-
 /// Coordinates sandbox selection, backend-specific preparation, and command
 /// execution for tool calls requested by the model.
 pub(crate) struct Executor {
diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs
index 99507fc446..d420dfed2f 100644
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -3,7 +3,7 @@ use crate::apply_patch::ApplyPatchExec;
 use crate::codex::Session;
 use crate::exec::ExecParams;
 use crate::exec::SandboxType;
-use crate::executor::ExecError;
+use crate::executor::errors::ExecError;
 use crate::executor::ExecutionMode;
 use crate::executor::ExecutionRequest;
 use crate::executor::ExecutorConfig;

From 43c0abb31e29b97bb21083dabddff43fb25caf23 Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 12:42:36 +0100
Subject: [PATCH 10/17] RV 6

---
 codex-rs/core/src/codex.rs            | 1 -
 codex-rs/core/src/executor/mod.rs     | 6 +++---
 codex-rs/core/src/executor/sandbox.rs | 8 ++------
 codex-rs/core/tests/suite/seatbelt.rs | 6 ++++++
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 8e4e4475b2..20b3457562 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -64,7 +64,6 @@ use crate::exec_command::WRITE_STDIN_TOOL_NAME;
 use crate::exec_command::WriteStdinParams;
 use crate::exec_env::create_env;
 use crate::executor::ExecutionMode;
-use crate::executor::ExecutionRequest;
 use crate::executor::Executor;
 use crate::executor::ExecutorConfig;
 use crate::executor::normalize_exec_result;
diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs
index 3150f0176c..a5a305c604 100644
--- a/codex-rs/core/src/executor/mod.rs
+++ b/codex-rs/core/src/executor/mod.rs
@@ -9,7 +9,7 @@ pub(crate) use runner::Executor;
 pub(crate) use runner::ExecutorConfig;
 pub(crate) use runner::normalize_exec_result;
 
-pub mod linkers {
+pub(crate) mod linkers {
     use crate::codex::ExecCommandContext;
     use crate::exec::ExecParams;
     use crate::exec::StdoutStream;
@@ -17,8 +17,8 @@ pub mod linkers {
     use crate::executor::runner::ExecutionRequest;
 
     pub struct PreparedExec {
-        pub context: ExecCommandContext,
-        pub request: ExecutionRequest,
+        pub(crate) context: ExecCommandContext,
+        pub(crate) request: ExecutionRequest,
     }
 
     impl PreparedExec {
diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs
index d420dfed2f..a43760a9ff 100644
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -1,21 +1,16 @@
-use crate::CODEX_APPLY_PATCH_ARG1;
 use crate::apply_patch::ApplyPatchExec;
 use crate::codex::Session;
-use crate::exec::ExecParams;
 use crate::exec::SandboxType;
-use crate::executor::errors::ExecError;
 use crate::executor::ExecutionMode;
 use crate::executor::ExecutionRequest;
 use crate::executor::ExecutorConfig;
-use crate::function_tool::FunctionCallError;
+use crate::executor::errors::ExecError;
 use crate::safety::SafetyCheck;
 use crate::safety::assess_command_safety;
 use crate::safety::assess_patch_safety;
 use codex_protocol::protocol::AskForApproval;
 use codex_protocol::protocol::ReviewDecision;
-use std::collections::HashMap;
 use std::collections::HashSet;
-use std::env;
 
 /// Sandbox placement options selected for an execution run, including whether
 /// to escalate after failures and whether approvals should persist.
@@ -169,6 +164,7 @@ mod tests {
     use super::*;
     use crate::codex::make_session_and_context;
     use crate::exec::ExecParams;
+    use crate::function_tool::FunctionCallError;
     use crate::protocol::SandboxPolicy;
     use codex_apply_patch::ApplyPatchAction;
     use pretty_assertions::assert_eq;
diff --git a/codex-rs/core/tests/suite/seatbelt.rs b/codex-rs/core/tests/suite/seatbelt.rs
index 78f599d42e..a879d3e952 100644
--- a/codex-rs/core/tests/suite/seatbelt.rs
+++ b/codex-rs/core/tests/suite/seatbelt.rs
@@ -169,6 +169,12 @@ async fn python_getpwuid_works_under_seatbelt() {
         return;
     }
 
+    // For local dev.
+    if which::which("python3").is_err() {
+        eprintln!("python3 not found in PATH, skipping test.");
+        return;
+    }
+
     // ReadOnly is sufficient here since we are only exercising user lookup.
     let policy = SandboxPolicy::ReadOnly;
     let command_cwd = std::env::current_dir().expect("getcwd");

From 9c194dc0f90b5229497da6bc8caa4a065e7b4168 Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 12:56:30 +0100
Subject: [PATCH 11/17] Fix merge

---
 codex-rs/core/src/executor/sandbox.rs | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs
index a43760a9ff..3d3eaceaee 100644
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -102,10 +102,19 @@ async fn select_shell_sandbox(
     );
 
     match safety {
-        SafetyCheck::AutoApprove { sandbox_type } => Ok(SandboxDecision::auto(
+        SafetyCheck::AutoApprove {
             sandbox_type,
-            should_escalate_on_failure(approval_policy, sandbox_type),
-        )),
+            user_explicitly_approved,
+        } => {
+            let mut decision = SandboxDecision::auto(
+                sandbox_type,
+                should_escalate_on_failure(approval_policy, sandbox_type),
+            );
+            if user_explicitly_approved {
+                decision.record_session_approval = true;
+            }
+            Ok(decision)
+        }
         SafetyCheck::AskUser => {
             let decision = session
                 .request_command_approval(
@@ -146,7 +155,7 @@ fn select_apply_patch_sandbox(
         &config.sandbox_policy,
         &config.sandbox_cwd,
     ) {
-        SafetyCheck::AutoApprove { sandbox_type } => Ok(SandboxDecision::auto(
+        SafetyCheck::AutoApprove { sandbox_type, .. } => Ok(SandboxDecision::auto(
             sandbox_type,
             should_escalate_on_failure(approval_policy, sandbox_type),
         )),

From 5c00e1596a3adf0fd0683a2962475813cd38159f Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 13:28:25 +0100
Subject: [PATCH 12/17] Restore otel

---
 codex-rs/core/src/codex.rs            |  9 ++++---
 codex-rs/core/src/executor/runner.rs  | 34 ++++++++++++++---------
 codex-rs/core/src/executor/sandbox.rs | 39 +++++++++++++++++++++++----
 codex-rs/core/src/safety.rs           |  5 ++--
 4 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 2fb6bbfc17..3fc0da7829 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1,7 +1,6 @@
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fmt::Debug;
-use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::atomic::AtomicU64;
@@ -125,7 +124,6 @@ use crate::user_instructions::UserInstructions;
 use crate::user_notification::UserNotification;
 use crate::util::backoff;
 use codex_otel::otel_event_manager::OtelEventManager;
-use codex_otel::otel_event_manager::ToolDecisionSource;
 use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
 use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use codex_protocol::custom_prompts::CustomPrompt;
@@ -833,6 +831,7 @@ impl Session {
             command_for_display,
             cwd,
             apply_patch,
+            ..
         } = exec_command_context;
         let msg = match apply_patch {
             Some(ApplyPatchCommandContext {
@@ -946,7 +945,7 @@ impl Session {
         let result = self
             .services
             .executor
-            .run(request, self, approval_policy, &sub_id, &call_id)
+            .run(request, self, approval_policy, &context)
             .await;
 
         let normalized = normalize_exec_result(&result);
@@ -1079,6 +1078,8 @@ pub(crate) struct ExecCommandContext {
     pub(crate) command_for_display: Vec<String>,
     pub(crate) cwd: PathBuf,
     pub(crate) apply_patch: Option<ApplyPatchCommandContext>,
+    pub(crate) tool_name: String,
+    pub(crate) otel_event_manager: OtelEventManager,
 }
 
 #[derive(Clone, Debug)]
@@ -2662,6 +2663,8 @@ async fn handle_container_exec_with_params(
                 changes: convert_apply_patch_to_protocol(action),
             },
         ),
+        tool_name: tool_name.to_string(),
+        otel_event_manager,
     };
 
     let mode = match apply_patch_exec {
diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs
index 86caf313a8..ede9ed49e7 100644
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -6,6 +6,7 @@ use std::time::Duration;
 use super::backends::ExecutionMode;
 use super::backends::backend_for_mode;
 use super::cache::ApprovalCache;
+use crate::codex::ExecCommandContext;
 use crate::codex::Session;
 use crate::error::CodexErr;
 use crate::error::SandboxErr;
@@ -23,6 +24,7 @@ use crate::protocol::AskForApproval;
 use crate::protocol::ReviewDecision;
 use crate::protocol::SandboxPolicy;
 use crate::shell;
+use codex_otel::otel_event_manager::ToolDecisionSource;
 
 #[derive(Clone, Debug)]
 pub(crate) struct ExecutorConfig {
@@ -77,8 +79,7 @@ impl Executor {
         mut request: ExecutionRequest,
         session: &Session,
         approval_policy: AskForApproval,
-        sub_id: &str,
-        call_id: &str,
+        context: &ExecCommandContext,
     ) -> Result<ExecToolCallOutput, ExecError> {
         if matches!(request.mode, ExecutionMode::Shell) {
             request.params =
@@ -110,8 +111,9 @@ impl Executor {
             self.approval_cache.snapshot(),
             &config,
             session,
-            sub_id,
-            call_id,
+            &context.sub_id,
+            &context.call_id,
+            &context.otel_event_manager,
         )
         .await?;
         if sandbox_decision.record_session_approval {
@@ -140,8 +142,7 @@ impl Executor {
                         &request,
                         &config,
                         session,
-                        sub_id,
-                        call_id,
+                        context,
                         stdout_stream.clone(),
                         error,
                     )
@@ -161,37 +162,44 @@ impl Executor {
 
     /// Fallback path invoked when a sandboxed run is denied so the user can
     /// approve rerunning without isolation.
-    #[allow(clippy::too_many_arguments)]
     async fn retry_without_sandbox(
         &self,
         request: &ExecutionRequest,
         config: &ExecutorConfig,
         session: &Session,
-        sub_id: &str,
-        call_id: &str,
+        context: &ExecCommandContext,
         stdout_stream: Option<StdoutStream>,
         sandbox_error: SandboxErr,
     ) -> Result<ExecToolCallOutput, ExecError> {
         session
-            .notify_background_event(sub_id, format!("Execution failed: {sandbox_error}"))
+            .notify_background_event(
+                &context.sub_id,
+                format!("Execution failed: {sandbox_error}"),
+            )
             .await;
         let decision = session
             .request_command_approval(
-                sub_id.to_string(),
-                call_id.to_string(),
+                context.sub_id.to_string(),
+                context.call_id.to_string(),
                 request.approval_command.clone(),
                 request.params.cwd.clone(),
                 Some("command failed; retry without sandbox?".to_string()),
             )
             .await;
 
+        context.otel_event_manager.tool_decision(
+            &context.tool_name,
+            &context.call_id,
+            decision,
+            ToolDecisionSource::User,
+        );
         match decision {
             ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
                 if matches!(decision, ReviewDecision::ApprovedForSession) {
                     self.approval_cache.insert(request.approval_command.clone());
                 }
                 session
-                    .notify_background_event(sub_id, "retrying command without sandbox")
+                    .notify_background_event(&context.sub_id, "retrying command without sandbox")
                     .await;
 
                 let retry_output = self
diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs
index 3d3eaceaee..4c619c3e6e 100644
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -8,6 +8,8 @@ use crate::executor::errors::ExecError;
 use crate::safety::SafetyCheck;
 use crate::safety::assess_command_safety;
 use crate::safety::assess_patch_safety;
+use codex_otel::otel_event_manager::OtelEventManager;
+use codex_otel::otel_event_manager::ToolDecisionSource;
 use codex_protocol::protocol::AskForApproval;
 use codex_protocol::protocol::ReviewDecision;
 use std::collections::HashSet;
@@ -50,6 +52,7 @@ fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) ->
 
 /// Determines how a command should be sandboxed, prompting the user when
 /// policy requires explicit approval.
+#[warn(clippy::too_many_arguments)]
 pub async fn select_sandbox(
     request: &ExecutionRequest,
     approval_policy: AskForApproval,
@@ -58,6 +61,7 @@ pub async fn select_sandbox(
     session: &Session,
     sub_id: &str,
     call_id: &str,
+    otel_event_manager: &OtelEventManager,
 ) -> Result<SandboxDecision, ExecError> {
     match &request.mode {
         ExecutionMode::Shell => {
@@ -69,6 +73,7 @@ pub async fn select_sandbox(
                 session,
                 sub_id,
                 call_id,
+                otel_event_manager,
             )
             .await
         }
@@ -78,6 +83,7 @@ pub async fn select_sandbox(
     }
 }
 
+#[warn(clippy::too_many_arguments)]
 async fn select_shell_sandbox(
     request: &ExecutionRequest,
     approval_policy: AskForApproval,
@@ -86,6 +92,7 @@ async fn select_shell_sandbox(
     session: &Session,
     sub_id: &str,
     call_id: &str,
+    otel_event_manager: &OtelEventManager,
 ) -> Result<SandboxDecision, ExecError> {
     let command_for_safety = if request.approval_command.is_empty() {
         request.params.command.clone()
@@ -113,6 +120,12 @@ async fn select_shell_sandbox(
             if user_explicitly_approved {
                 decision.record_session_approval = true;
             }
+            let (decision_for_event, source) = if user_explicitly_approved {
+                (ReviewDecision::ApprovedForSession, ToolDecisionSource::User)
+            } else {
+                (ReviewDecision::Approved, ToolDecisionSource::Config)
+            };
+            otel_event_manager.tool_decision("local_shell", call_id, decision_for_event, source);
             Ok(decision)
         }
         SafetyCheck::AskUser => {
@@ -126,6 +139,12 @@ async fn select_shell_sandbox(
                 )
                 .await;
 
+            otel_event_manager.tool_decision(
+                "local_shell",
+                call_id,
+                decision,
+                ToolDecisionSource::User,
+            );
             match decision {
                 ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)),
                 ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)),
@@ -180,7 +199,7 @@ mod tests {
 
     #[tokio::test]
     async fn select_apply_patch_user_override_when_explicit() {
-        let (session, _ctx) = make_session_and_context();
+        let (session, ctx) = make_session_and_context();
         let tmp = tempfile::tempdir().expect("tmp");
         let p = tmp.path().join("a.txt");
         let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
@@ -203,6 +222,7 @@ mod tests {
             stdout_stream: None,
             use_shell_profile: false,
         };
+        let otel_event_manager = ctx.client.get_otel_event_manager();
         let decision = select_sandbox(
             &request,
             AskForApproval::OnRequest,
@@ -211,6 +231,7 @@ mod tests {
             &session,
             "sub",
             "call",
+            &otel_event_manager,
         )
         .await
         .expect("ok");
@@ -221,7 +242,7 @@ mod tests {
 
     #[tokio::test]
     async fn select_apply_patch_autoapprove_in_danger() {
-        let (session, _ctx) = make_session_and_context();
+        let (session, ctx) = make_session_and_context();
         let tmp = tempfile::tempdir().expect("tmp");
         let p = tmp.path().join("a.txt");
         let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
@@ -244,6 +265,7 @@ mod tests {
             stdout_stream: None,
             use_shell_profile: false,
         };
+        let otel_event_manager = ctx.client.get_otel_event_manager();
         let decision = select_sandbox(
             &request,
             AskForApproval::OnRequest,
@@ -252,6 +274,7 @@ mod tests {
             &session,
             "sub",
             "call",
+            &otel_event_manager,
         )
         .await
         .expect("ok");
@@ -263,7 +286,7 @@ mod tests {
 
     #[tokio::test]
     async fn select_apply_patch_requires_approval_on_unless_trusted() {
-        let (session, _ctx) = make_session_and_context();
+        let (session, ctx) = make_session_and_context();
         let tempdir = tempfile::tempdir().expect("tmpdir");
         let p = tempdir.path().join("a.txt");
         let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
@@ -286,6 +309,7 @@ mod tests {
             stdout_stream: None,
             use_shell_profile: false,
         };
+        let otel_event_manager = ctx.client.get_otel_event_manager();
         let result = select_sandbox(
             &request,
             AskForApproval::UnlessTrusted,
@@ -294,6 +318,7 @@ mod tests {
             &session,
             "sub",
             "call",
+            &otel_event_manager,
         )
         .await;
         match result {
@@ -307,7 +332,7 @@ mod tests {
 
     #[tokio::test]
     async fn select_shell_autoapprove_in_danger_mode() {
-        let (session, _ctx) = make_session_and_context();
+        let (session, ctx) = make_session_and_context();
         let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
         let request = ExecutionRequest {
             params: ExecParams {
@@ -323,6 +348,7 @@ mod tests {
             stdout_stream: None,
             use_shell_profile: false,
         };
+        let otel_event_manager = ctx.client.get_otel_event_manager();
         let decision = select_sandbox(
             &request,
             AskForApproval::OnRequest,
@@ -331,6 +357,7 @@ mod tests {
             &session,
             "sub",
             "call",
+            &otel_event_manager,
         )
         .await
         .expect("ok");
@@ -341,7 +368,7 @@ mod tests {
     #[cfg(any(target_os = "macos", target_os = "linux"))]
     #[tokio::test]
     async fn select_shell_escalates_on_failure_with_platform_sandbox() {
-        let (session, _ctx) = make_session_and_context();
+        let (session, ctx) = make_session_and_context();
         let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
         let request = ExecutionRequest {
             params: ExecParams {
@@ -358,6 +385,7 @@ mod tests {
             stdout_stream: None,
             use_shell_profile: false,
         };
+        let otel_event_manager = ctx.client.get_otel_event_manager();
         let decision = select_sandbox(
             &request,
             AskForApproval::OnFailure,
@@ -366,6 +394,7 @@ mod tests {
             &session,
             "sub",
             "call",
+            &otel_event_manager,
         )
         .await
         .expect("ok");
diff --git a/codex-rs/core/src/safety.rs b/codex-rs/core/src/safety.rs
index b976ae4a4c..0ed0f929ff 100644
--- a/codex-rs/core/src/safety.rs
+++ b/codex-rs/core/src/safety.rs
@@ -125,9 +125,10 @@ pub fn assess_command_safety(
     // the session _because_ they know it needs to run outside a sandbox.
 
     if is_known_safe_command(command) || approved.contains(command) {
+        let user_explicitly_approved = approved.contains(command);
         return SafetyCheck::AutoApprove {
             sandbox_type: SandboxType::None,
-            user_explicitly_approved: false,
+            user_explicitly_approved,
         };
     }
 
@@ -380,7 +381,7 @@ mod tests {
             safety_check,
             SafetyCheck::AutoApprove {
                 sandbox_type: SandboxType::None,
-                user_explicitly_approved: false,
+                user_explicitly_approved: true,
             }
         );
     }

From 3def127178b7392b08c86b0cf739316fc8c6cfc3 Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 13:31:48 +0100
Subject: [PATCH 13/17] Clippy

---
 codex-rs/core/src/executor/sandbox.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs
index 4c619c3e6e..5c01ff69b4 100644
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -52,7 +52,7 @@ fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) ->
 
 /// Determines how a command should be sandboxed, prompting the user when
 /// policy requires explicit approval.
-#[warn(clippy::too_many_arguments)]
+#[allow(clippy::too_many_arguments)]
 pub async fn select_sandbox(
     request: &ExecutionRequest,
     approval_policy: AskForApproval,
@@ -83,7 +83,7 @@ pub async fn select_sandbox(
     }
 }
 
-#[warn(clippy::too_many_arguments)]
+#[allow(clippy::too_many_arguments)]
 async fn select_shell_sandbox(
     request: &ExecutionRequest,
     approval_policy: AskForApproval,

From 60e7333575dd298863ffa0fa71da0d52744f96ab Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 17:50:30 +0100
Subject: [PATCH 14/17] Add integration tests

---
 .../suite/codex_message_processor_flow.rs     | 232 ++++++++++++++++++
 1 file changed, 232 insertions(+)

diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
index 7527411223..cc00298f28 100644
--- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
@@ -10,6 +10,7 @@ use codex_core::protocol::SandboxPolicy;
 use codex_core::protocol_config_types::ReasoningEffort;
 use codex_core::protocol_config_types::ReasoningSummary;
 use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_protocol::config_types::SandboxMode;
 use codex_protocol::mcp_protocol::AddConversationListenerParams;
 use codex_protocol::mcp_protocol::AddConversationSubscriptionResponse;
 use codex_protocol::mcp_protocol::EXEC_COMMAND_APPROVAL_METHOD;
@@ -21,6 +22,9 @@ use codex_protocol::mcp_protocol::SendUserMessageParams;
 use codex_protocol::mcp_protocol::SendUserMessageResponse;
 use codex_protocol::mcp_protocol::SendUserTurnParams;
 use codex_protocol::mcp_protocol::SendUserTurnResponse;
+use codex_protocol::protocol::Event;
+use codex_protocol::protocol::EventMsg;
+use codex_protocol::protocol::InputMessageKind;
 use mcp_types::JSONRPCNotification;
 use mcp_types::JSONRPCResponse;
 use mcp_types::RequestId;
@@ -349,6 +353,234 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
 }
 
 // Helper: minimal config.toml pointing at mock provider.
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
+    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    let tmp = TempDir::new().expect("tmp dir");
+    let codex_home = tmp.path().join("codex_home");
+    std::fs::create_dir(&codex_home).expect("create codex home dir");
+    let workspace_root = tmp.path().join("workspace");
+    std::fs::create_dir(&workspace_root).expect("create workspace root");
+    let first_cwd = workspace_root.join("turn1");
+    let second_cwd = workspace_root.join("turn2");
+    std::fs::create_dir(&first_cwd).expect("create first cwd");
+    std::fs::create_dir(&second_cwd).expect("create second cwd");
+
+    let responses = vec![
+        create_shell_sse_response(
+            vec![
+                "bash".to_string(),
+                "-lc".to_string(),
+                "echo first turn".to_string(),
+            ],
+            None,
+            Some(5000),
+            "call-first",
+        )
+        .expect("create first shell response"),
+        create_final_assistant_message_sse_response("done first")
+            .expect("create first final assistant message"),
+        create_shell_sse_response(
+            vec![
+                "bash".to_string(),
+                "-lc".to_string(),
+                "echo second turn".to_string(),
+            ],
+            None,
+            Some(5000),
+            "call-second",
+        )
+        .expect("create second shell response"),
+        create_final_assistant_message_sse_response("done second")
+            .expect("create second final assistant message"),
+    ];
+    let server = create_mock_chat_completions_server(responses).await;
+    create_config_toml(&codex_home, &server.uri()).expect("write config");
+
+    let mut mcp = McpProcess::new(&codex_home)
+        .await
+        .expect("spawn mcp process");
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
+        .await
+        .expect("init timeout")
+        .expect("init failed");
+
+    let new_conv_id = mcp
+        .send_new_conversation_request(NewConversationParams {
+            cwd: Some(first_cwd.to_string_lossy().into_owned()),
+            approval_policy: Some(AskForApproval::Never),
+            sandbox: Some(SandboxMode::WorkspaceWrite),
+            ..Default::default()
+        })
+        .await
+        .expect("send newConversation");
+    let new_conv_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
+    )
+    .await
+    .expect("newConversation timeout")
+    .expect("newConversation resp");
+    let NewConversationResponse {
+        conversation_id,
+        model,
+        ..
+    } = to_response::<NewConversationResponse>(new_conv_resp)
+        .expect("deserialize newConversation response");
+
+    let add_listener_id = mcp
+        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
+        .await
+        .expect("send addConversationListener");
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
+    )
+    .await
+    .expect("addConversationListener timeout")
+    .expect("addConversationListener resp");
+
+    let first_turn_id = mcp
+        .send_send_user_turn_request(SendUserTurnParams {
+            conversation_id,
+            items: vec![codex_protocol::mcp_protocol::InputItem::Text {
+                text: "first turn".to_string(),
+            }],
+            cwd: first_cwd.clone(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::WorkspaceWrite {
+                writable_roots: vec![first_cwd.clone()],
+                network_access: false,
+                exclude_tmpdir_env_var: false,
+                exclude_slash_tmp: false,
+            },
+            model: model.clone(),
+            effort: Some(ReasoningEffort::Medium),
+            summary: ReasoningSummary::Auto,
+        })
+        .await
+        .expect("send first sendUserTurn");
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(first_turn_id)),
+    )
+    .await
+    .expect("sendUserTurn 1 timeout")
+    .expect("sendUserTurn 1 resp");
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await
+    .expect("task_complete 1 timeout")
+    .expect("task_complete 1 notification");
+
+    let second_turn_id = mcp
+        .send_send_user_turn_request(SendUserTurnParams {
+            conversation_id,
+            items: vec![codex_protocol::mcp_protocol::InputItem::Text {
+                text: "second turn".to_string(),
+            }],
+            cwd: second_cwd.clone(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: model.clone(),
+            effort: Some(ReasoningEffort::Medium),
+            summary: ReasoningSummary::Auto,
+        })
+        .await
+        .expect("send second sendUserTurn");
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(second_turn_id)),
+    )
+    .await
+    .expect("sendUserTurn 2 timeout")
+    .expect("sendUserTurn 2 resp");
+
+    let mut env_message: Option<String> = None;
+    let second_cwd_str = second_cwd.to_string_lossy().into_owned();
+    for _ in 0..10 {
+        let notification = timeout(
+            DEFAULT_READ_TIMEOUT,
+            mcp.read_stream_until_notification_message("codex/event/user_message"),
+        )
+        .await
+        .expect("user_message timeout")
+        .expect("user_message notification");
+        let params = notification
+            .params
+            .clone()
+            .expect("user_message should include params");
+        let event: Event = serde_json::from_value(params).expect("deserialize user_message event");
+        if let EventMsg::UserMessage(user) = event.msg
+            && matches!(user.kind, Some(InputMessageKind::EnvironmentContext))
+            && user.message.contains(&second_cwd_str)
+        {
+            env_message = Some(user.message);
+            break;
+        }
+    }
+    let env_message = env_message.expect("expected environment context update");
+    assert!(
+        env_message.contains("<sandbox_mode>danger-full-access</sandbox_mode>"),
+        "env context should reflect new sandbox mode: {env_message}"
+    );
+    assert!(
+        env_message.contains("<network_access>enabled</network_access>"),
+        "env context should enable network access for danger-full-access policy: {env_message}"
+    );
+    assert!(
+        env_message.contains(&second_cwd_str),
+        "env context should include updated cwd: {env_message}"
+    );
+
+    let exec_begin_notification = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/exec_command_begin"),
+    )
+    .await
+    .expect("exec_command_begin timeout")
+    .expect("exec_command_begin notification");
+    let params = exec_begin_notification
+        .params
+        .clone()
+        .expect("exec_command_begin params");
+    let event: Event = serde_json::from_value(params).expect("deserialize exec begin event");
+    let exec_begin = match event.msg {
+        EventMsg::ExecCommandBegin(exec_begin) => exec_begin,
+        other => panic!("expected ExecCommandBegin event, got {other:?}"),
+    };
+    assert_eq!(
+        exec_begin.cwd, second_cwd,
+        "exec turn should run from updated cwd"
+    );
+    assert_eq!(
+        exec_begin.command,
+        vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "echo second turn".to_string()
+        ],
+        "exec turn should run expected command"
+    );
+
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await
+    .expect("task_complete 2 timeout")
+    .expect("task_complete 2 notification");
+}
+
 fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
     let config_toml = codex_home.join("config.toml");
     std::fs::write(

From c5cf9535e3cbc5fe805047985fbdb4f1d85d9d2d Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Tue, 30 Sep 2025 18:35:00 +0100
Subject: [PATCH 15/17] Fix test

---
 .../app-server/tests/common/mcp_process.rs    | 40 ++++++++++++++++---
 codex-rs/core/src/codex.rs                    | 13 +++++-
 2 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/codex-rs/app-server/tests/common/mcp_process.rs b/codex-rs/app-server/tests/common/mcp_process.rs
index cbf54cecf0..3d76c7a115 100644
--- a/codex-rs/app-server/tests/common/mcp_process.rs
+++ b/codex-rs/app-server/tests/common/mcp_process.rs
@@ -1,3 +1,4 @@
+use std::collections::VecDeque;
 use std::path::Path;
 use std::process::Stdio;
 use std::sync::atomic::AtomicI64;
@@ -46,6 +47,7 @@ pub struct McpProcess {
     process: Child,
     stdin: ChildStdin,
     stdout: BufReader<ChildStdout>,
+    pending_user_messages: VecDeque<JSONRPCNotification>,
 }
 
 impl McpProcess {
@@ -116,6 +118,7 @@ impl McpProcess {
             process,
             stdin,
             stdout,
+            pending_user_messages: VecDeque::new(),
         })
     }
 
@@ -380,8 +383,9 @@ impl McpProcess {
             let message = self.read_jsonrpc_message().await?;
 
             match message {
-                JSONRPCMessage::Notification(_) => {
-                    eprintln!("notification: {message:?}");
+                JSONRPCMessage::Notification(notification) => {
+                    eprintln!("notification: {notification:?}");
+                    self.enqueue_user_message(notification);
                 }
                 JSONRPCMessage::Request(jsonrpc_request) => {
                     return Ok(jsonrpc_request);
@@ -405,8 +409,9 @@ impl McpProcess {
         loop {
             let message = self.read_jsonrpc_message().await?;
             match message {
-                JSONRPCMessage::Notification(_) => {
-                    eprintln!("notification: {message:?}");
+                JSONRPCMessage::Notification(notification) => {
+                    eprintln!("notification: {notification:?}");
+                    self.enqueue_user_message(notification);
                 }
                 JSONRPCMessage::Request(_) => {
                     anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
@@ -430,8 +435,9 @@ impl McpProcess {
         loop {
             let message = self.read_jsonrpc_message().await?;
             match message {
-                JSONRPCMessage::Notification(_) => {
-                    eprintln!("notification: {message:?}");
+                JSONRPCMessage::Notification(notification) => {
+                    eprintln!("notification: {notification:?}");
+                    self.enqueue_user_message(notification);
                 }
                 JSONRPCMessage::Request(_) => {
                     anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
@@ -454,6 +460,10 @@ impl McpProcess {
     ) -> anyhow::Result<JSONRPCNotification> {
         eprintln!("in read_stream_until_notification_message({method})");
 
+        if let Some(notification) = self.take_pending_notification_by_method(method) {
+            return Ok(notification);
+        }
+
         loop {
             let message = self.read_jsonrpc_message().await?;
             match message {
@@ -461,6 +471,7 @@ impl McpProcess {
                     if notification.method == method {
                         return Ok(notification);
                     }
+                    self.enqueue_user_message(notification);
                 }
                 JSONRPCMessage::Request(_) => {
                     anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
@@ -474,4 +485,21 @@ impl McpProcess {
             }
         }
     }
+
+    fn take_pending_notification_by_method(&mut self, method: &str) -> Option<JSONRPCNotification> {
+        if let Some(pos) = self
+            .pending_user_messages
+            .iter()
+            .position(|notification| notification.method == method)
+        {
+            return self.pending_user_messages.remove(pos);
+        }
+        None
+    }
+
+    fn enqueue_user_message(&mut self, notification: JSONRPCNotification) {
+        if notification.method == "codex/event/user_message" {
+            self.pending_user_messages.push_back(notification);
+        }
+    }
 }
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 3fc0da7829..3d7a0be639 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1286,8 +1286,19 @@ async fn submission_loop(
                     let previous_env_context = EnvironmentContext::from(turn_context.as_ref());
                     let new_env_context = EnvironmentContext::from(&fresh_turn_context);
                     if !new_env_context.equals_except_shell(&previous_env_context) {
-                        sess.record_conversation_items(&[ResponseItem::from(new_env_context)])
+                        let env_response_item = ResponseItem::from(new_env_context);
+                        sess.record_conversation_items(std::slice::from_ref(&env_response_item))
                             .await;
+                        for msg in map_response_item_to_event_messages(
+                            &env_response_item,
+                            sess.show_raw_agent_reasoning(),
+                        ) {
+                            let event = Event {
+                                id: sub.id.clone(),
+                                msg,
+                            };
+                            sess.send_event(event).await;
+                        }
                     }
 
                     // Install the new persistent context for subsequent tasks/turns.

From c1fa66618612ec796e65be3f4d4c24df74f4a840 Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Wed, 1 Oct 2025 10:13:30 +0100
Subject: [PATCH 16/17] Fix merge

---
 .../suite/codex_message_processor_flow.rs     | 20 +++----------------
 codex-rs/core/src/executor/runner.rs          | 20 +++++++++----------
 2 files changed, 12 insertions(+), 28 deletions(-)

diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
index 9f36521c73..f611e39856 100644
--- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
@@ -5,7 +5,7 @@ use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_chat_completions_server;
 use app_test_support::create_shell_sse_response;
 use app_test_support::to_response;
-use codex_app_server_protocol::AddConversationListenerParams;
+use codex_app_server_protocol::{AddConversationListenerParams, InputItem};
 use codex_app_server_protocol::AddConversationSubscriptionResponse;
 use codex_app_server_protocol::ExecCommandApprovalParams;
 use codex_app_server_protocol::JSONRPCNotification;
@@ -26,23 +26,9 @@ use codex_core::protocol_config_types::ReasoningEffort;
 use codex_core::protocol_config_types::ReasoningSummary;
 use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use codex_protocol::config_types::SandboxMode;
-use codex_protocol::mcp_protocol::AddConversationListenerParams;
-use codex_protocol::mcp_protocol::AddConversationSubscriptionResponse;
-use codex_protocol::mcp_protocol::EXEC_COMMAND_APPROVAL_METHOD;
-use codex_protocol::mcp_protocol::NewConversationParams;
-use codex_protocol::mcp_protocol::NewConversationResponse;
-use codex_protocol::mcp_protocol::RemoveConversationListenerParams;
-use codex_protocol::mcp_protocol::RemoveConversationSubscriptionResponse;
-use codex_protocol::mcp_protocol::SendUserMessageParams;
-use codex_protocol::mcp_protocol::SendUserMessageResponse;
-use codex_protocol::mcp_protocol::SendUserTurnParams;
-use codex_protocol::mcp_protocol::SendUserTurnResponse;
 use codex_protocol::protocol::Event;
 use codex_protocol::protocol::EventMsg;
 use codex_protocol::protocol::InputMessageKind;
-use mcp_types::JSONRPCNotification;
-use mcp_types::JSONRPCResponse;
-use mcp_types::RequestId;
 use pretty_assertions::assert_eq;
 use std::env;
 use tempfile::TempDir;
@@ -482,7 +468,7 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
     let first_turn_id = mcp
         .send_send_user_turn_request(SendUserTurnParams {
             conversation_id,
-            items: vec![codex_protocol::mcp_protocol::InputItem::Text {
+            items: vec![InputItem::Text {
                 text: "first turn".to_string(),
             }],
             cwd: first_cwd.clone(),
@@ -517,7 +503,7 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
     let second_turn_id = mcp
         .send_send_user_turn_request(SendUserTurnParams {
             conversation_id,
-            items: vec![codex_protocol::mcp_protocol::InputItem::Text {
+            items: vec![InputItem::Text {
                 text: "second turn".to_string(),
             }],
             cwd: second_cwd.clone(),
diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs
index ede9ed49e7..befa83360d 100644
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -131,19 +131,19 @@ impl Executor {
             .await;
 
         // Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry.
-        let raw_output = match first_attempt {
-            Ok(output) => output,
+        match first_attempt {
+            Ok(output) => Ok(output),
             Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => {
-                return Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into());
+                Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into())
             }
-            Err(CodexErr::Sandbox(error @ SandboxErr::Denied { .. })) => {
-                return if sandbox_decision.escalate_on_failure {
+            Err(CodexErr::Sandbox(error)) => {
+                if sandbox_decision.escalate_on_failure {
                     self.retry_without_sandbox(
                         &request,
                         &config,
                         session,
                         context,
-                        stdout_stream.clone(),
+                        stdout_stream,
                         error,
                     )
                     .await
@@ -152,12 +152,10 @@ impl Executor {
                         "failed in sandbox {:?} with execution error: {error:?}",
                         sandbox_decision.initial_sandbox
                     )))
-                };
+                }
             }
-            Err(err) => return Err(err.into()),
-        };
-
-        Ok(raw_output)
+            Err(err) => Err(err.into()),
+        }
     }
 
     /// Fallback path invoked when a sandboxed run is denied so the user can

From 16abbe34f82c9f3783eabbfeb4bd05b31ab2052c Mon Sep 17 00:00:00 2001
From: jimmyfraiture <jif@openai.com>
Date: Wed, 1 Oct 2025 10:16:58 +0100
Subject: [PATCH 17/17] FMT

---
 .../app-server/tests/suite/codex_message_processor_flow.rs     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
index f611e39856..4dff2a1575 100644
--- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
@@ -5,9 +5,10 @@ use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_chat_completions_server;
 use app_test_support::create_shell_sse_response;
 use app_test_support::to_response;
-use codex_app_server_protocol::{AddConversationListenerParams, InputItem};
+use codex_app_server_protocol::AddConversationListenerParams;
 use codex_app_server_protocol::AddConversationSubscriptionResponse;
 use codex_app_server_protocol::ExecCommandApprovalParams;
+use codex_app_server_protocol::InputItem;
 use codex_app_server_protocol::JSONRPCNotification;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::NewConversationParams;