From 49715e14812aec6232dd7ca47466364678aff406 Mon Sep 17 00:00:00 2001 From: Eric Traut Date: Wed, 22 Oct 2025 14:54:54 -0700 Subject: [PATCH 1/6] Added model summary and risk assessment for commands that violate sandbox policy and require user approval --- codex-rs/app-server-protocol/src/protocol.rs | 4 + .../app-server/src/codex_message_processor.rs | 3 + .../suite/codex_message_processor_flow.rs | 1 + codex-rs/core/src/client.rs | 8 + codex-rs/core/src/codex.rs | 28 ++ codex-rs/core/src/config.rs | 15 + codex-rs/core/src/config_profile.rs | 1 + codex-rs/core/src/sandboxing/assessment.rs | 319 ++++++++++++++++++ codex-rs/core/src/sandboxing/mod.rs | 3 + codex-rs/core/src/tools/orchestrator.rs | 23 ++ .../core/src/tools/runtimes/apply_patch.rs | 4 +- codex-rs/core/src/tools/runtimes/shell.rs | 13 +- .../core/src/tools/runtimes/unified_exec.rs | 11 +- codex-rs/core/src/tools/sandboxing.rs | 15 + codex-rs/exec/src/lib.rs | 1 + codex-rs/mcp-server/src/codex_tool_config.rs | 1 + codex-rs/mcp-server/src/codex_tool_runner.rs | 2 + codex-rs/mcp-server/src/exec_approval.rs | 5 + codex-rs/mcp-server/tests/suite/codex_tool.rs | 1 + codex-rs/otel/src/otel_event_manager.rs | 41 +++ codex-rs/protocol/src/protocol.rs | 55 +++ .../tui/src/bottom_pane/approval_overlay.rs | 98 +++++- codex-rs/tui/src/bottom_pane/mod.rs | 1 + codex-rs/tui/src/chatwidget.rs | 1 + codex-rs/tui/src/chatwidget/tests.rs | 6 + codex-rs/tui/src/lib.rs | 1 + docs/config.md | 1 + 27 files changed, 653 insertions(+), 9 deletions(-) create mode 100644 codex-rs/core/src/sandboxing/assessment.rs diff --git a/codex-rs/app-server-protocol/src/protocol.rs b/codex-rs/app-server-protocol/src/protocol.rs index b4cd358b224..362c216246f 100644 --- a/codex-rs/app-server-protocol/src/protocol.rs +++ b/codex-rs/app-server-protocol/src/protocol.rs @@ -16,6 +16,7 @@ use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::FileChange; use codex_protocol::protocol::RateLimitSnapshot; use codex_protocol::protocol::ReviewDecision; +use codex_protocol::protocol::SandboxCommandAssessment; use codex_protocol::protocol::SandboxPolicy; use codex_protocol::protocol::TurnAbortReason; use paste::paste; @@ -784,6 +785,8 @@ pub struct ExecCommandApprovalParams { pub cwd: PathBuf, #[serde(skip_serializing_if = "Option::is_none")] pub reason: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub risk: Option, pub parsed_cmd: Vec, } @@ -992,6 +995,7 @@ mod tests { command: vec!["echo".to_string(), "hello".to_string()], cwd: PathBuf::from("/tmp"), reason: Some("because tests".to_string()), + risk: None, parsed_cmd: vec![ParsedCommand::Unknown { cmd: "echo hello".to_string(), }], diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index 39992e8391e..cc1abb9f23b 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -1417,6 +1417,7 @@ async fn apply_bespoke_event_handling( command, cwd, reason, + risk, parsed_cmd, }) => { let params = ExecCommandApprovalParams { @@ -1425,6 +1426,7 @@ async fn apply_bespoke_event_handling( command, cwd, reason, + risk, parsed_cmd, }; let rx = outgoing @@ -1484,6 +1486,7 @@ async fn derive_config_from_params( include_view_image_tool: None, show_raw_agent_reasoning: None, tools_web_search_request: None, + sandbox_command_assessment: None, additional_writable_roots: Vec::new(), }; diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs index 30b90f6e4aa..6d3e2b42c42 100644 --- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs +++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs @@ -311,6 +311,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() { ], cwd: working_directory.clone(), reason: None, + risk: None, parsed_cmd: vec![ParsedCommand::Unknown { cmd: "python3 -c 'print(42)'".to_string() }], diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 1bf1ad0d8b8..698c79c66cb 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -133,6 +133,14 @@ impl ModelClient { self.stream_with_task_kind(prompt, TaskKind::Regular).await } + pub fn config(&self) -> Arc { + Arc::clone(&self.config) + } + + pub fn provider(&self) -> &ModelProviderInfo { + &self.provider + } + pub(crate) async fn stream_with_task_kind( &self, prompt: &Prompt, diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 202e24b00ca..f31a65acdb9 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -87,6 +87,7 @@ use crate::protocol::Op; use crate::protocol::RateLimitSnapshot; use crate::protocol::ReviewDecision; use crate::protocol::ReviewOutputEvent; +use crate::protocol::SandboxCommandAssessment; use crate::protocol::SandboxPolicy; use crate::protocol::SessionConfiguredEvent; use crate::protocol::StreamErrorEvent; @@ -754,6 +755,31 @@ impl Session { } } + pub(crate) async fn assess_sandbox_command( + &self, + turn_context: &TurnContext, + call_id: &str, + command: &[String], + failure_message: Option<&str>, + ) -> Option { + let config = turn_context.client.config(); + let provider = turn_context.client.provider().clone(); + let auth_manager = Arc::clone(&self.services.auth_manager); + let otel = self.services.otel_event_manager.clone(); + crate::sandboxing::assessment::assess_command( + config, + provider, + auth_manager, + &otel, + call_id, + command, + &turn_context.sandbox_policy, + &turn_context.cwd, + failure_message, + ) + .await + } + /// Emit an exec approval request event and await the user's decision. /// /// The request is keyed by `sub_id`/`call_id` so matching responses are delivered @@ -766,6 +792,7 @@ impl Session { command: Vec, cwd: PathBuf, reason: Option, + risk: Option, ) -> ReviewDecision { let sub_id = turn_context.sub_id.clone(); // Add the tx_approve callback to the map before sending the request. @@ -791,6 +818,7 @@ impl Session { command, cwd, reason, + risk, parsed_cmd, }); self.send_event(turn_context, event).await; diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index 0af01f68571..dc1b0fe8c4d 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -223,6 +223,9 @@ pub struct Config { pub tools_web_search_request: bool, + /// When `true`, run a model-based assessment for commands denied by the sandbox. + pub sandbox_command_assessment: bool, + pub use_experimental_streamable_shell_tool: bool, /// If set to `true`, used only the experimental unified exec tool. @@ -958,6 +961,7 @@ pub struct ConfigToml { pub experimental_use_unified_exec_tool: Option, pub experimental_use_rmcp_client: Option, pub experimental_use_freeform_apply_patch: Option, + pub experimental_sandbox_command_assessment: Option, } impl From for UserSavedConfig { @@ -1118,6 +1122,7 @@ pub struct ConfigOverrides { pub include_view_image_tool: Option, pub show_raw_agent_reasoning: Option, pub tools_web_search_request: Option, + pub sandbox_command_assessment: Option, /// Additional directories that should be treated as writable roots for this session. pub additional_writable_roots: Vec, } @@ -1147,6 +1152,7 @@ impl Config { include_view_image_tool: include_view_image_tool_override, show_raw_agent_reasoning, tools_web_search_request: override_tools_web_search_request, + sandbox_command_assessment: sandbox_command_assessment_override, additional_writable_roots, } = overrides; @@ -1269,6 +1275,10 @@ impl Config { let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell); let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec); let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient); + let sandbox_command_assessment = sandbox_command_assessment_override + .or(config_profile.experimental_sandbox_command_assessment) + .or(cfg.experimental_sandbox_command_assessment) + .unwrap_or(false); let forced_chatgpt_workspace_id = cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| { @@ -1390,6 +1400,7 @@ impl Config { forced_login_method, include_apply_patch_tool: include_apply_patch_tool_flag, tools_web_search_request, + sandbox_command_assessment, use_experimental_streamable_shell_tool, use_experimental_unified_exec_tool, use_experimental_use_rmcp_client, @@ -2873,6 +2884,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, + sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, @@ -2941,6 +2953,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, + sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, @@ -3024,6 +3037,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, + sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, @@ -3093,6 +3107,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, + sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, diff --git a/codex-rs/core/src/config_profile.rs b/codex-rs/core/src/config_profile.rs index 1986d42ffa4..84b90f5adb8 100644 --- a/codex-rs/core/src/config_profile.rs +++ b/codex-rs/core/src/config_profile.rs @@ -26,6 +26,7 @@ pub struct ConfigProfile { pub experimental_use_exec_command_tool: Option, pub experimental_use_rmcp_client: Option, pub experimental_use_freeform_apply_patch: Option, + pub experimental_sandbox_command_assessment: Option, pub tools_web_search: Option, pub tools_view_image: Option, /// Optional feature toggles scoped to this profile. diff --git a/codex-rs/core/src/sandboxing/assessment.rs b/codex-rs/core/src/sandboxing/assessment.rs new file mode 100644 index 00000000000..e84c3dd7e50 --- /dev/null +++ b/codex-rs/core/src/sandboxing/assessment.rs @@ -0,0 +1,319 @@ +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; + +use crate::AuthManager; +use crate::ModelProviderInfo; +use crate::auth::CodexAuth; +use crate::client::ModelClient; +use crate::client_common::Prompt; +use crate::client_common::ResponseEvent; +use crate::config::Config; +use crate::error::CodexErr; +use crate::protocol::SandboxPolicy; +use crate::terminal; +use codex_otel::otel_event_manager::OtelEventManager; +use codex_protocol::ConversationId; +use codex_protocol::models::ContentItem; +use codex_protocol::models::ResponseItem; +use codex_protocol::protocol::SandboxCommandAssessment; +use futures::StreamExt; +use serde_json::json; +use tokio::time::timeout; +use tracing::debug; +use tracing::warn; + +const SANDBOX_ASSESSMENT_SYSTEM_PROMPT: &str = r#"You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys: +- description (concise summary, at most two sentences) +- risk_level ("low", "medium", or "high") +- risk_categories (optional array of zero or more category strings) +Risk level examples: +- low: read-only inspections, listing files, printing configuration +- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources +- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls +Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance. +Use multiple categories when appropriate. +Placeholders such as or indicate redacted sensitive paths. +If information is insufficient, choose the most cautious risk level supported by the evidence. +Respond with JSON only, without markdown code fences or extra commentary."#; + +const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5); + +const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[ + "data_deletion", + "data_exfiltration", + "privilege_escalation", + "system_modification", + "network_access", + "resource_exhaustion", + "compliance", +]; + +#[allow(clippy::too_many_arguments)] +pub(crate) async fn assess_command( + config: Arc, + provider: ModelProviderInfo, + auth_manager: Arc, + parent_otel: &OtelEventManager, + call_id: &str, + command: &[String], + sandbox_policy: &SandboxPolicy, + cwd: &Path, + failure_message: Option<&str>, +) -> Option { + if !config.sandbox_command_assessment || command.is_empty() { + return None; + } + + let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string()); + let command_joined = + shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" ")); + let failure = failure_message + .map(str::trim) + .filter(|msg| !msg.is_empty()) + .map(str::to_string); + + let cwd_str = cwd.to_string_lossy().to_string(); + let sandbox_summary = summarize_sandbox_policy(sandbox_policy); + let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd); + roots.sort(); + roots.dedup(); + + let platform = std::env::consts::OS; + let mut prompt_sections = Vec::new(); + prompt_sections.push(format!("Platform: {platform}")); + prompt_sections.push(format!("Sandbox policy: {sandbox_summary}")); + if !roots.is_empty() { + let formatted = roots + .iter() + .map(|root| root.to_string_lossy()) + .collect::>() + .join(", "); + prompt_sections.push(format!("Filesystem roots: {formatted}")); + } + prompt_sections.push(format!("Working directory: {cwd_str}")); + prompt_sections.push(format!("Command argv: {command_json}")); + prompt_sections.push(format!("Command (joined): {command_joined}")); + if let Some(msg) = failure.as_ref() { + prompt_sections.push(format!("Sandbox failure message: {msg}")); + } + let metadata = prompt_sections.join("\n"); + let user_prompt = format!("Command metadata:\n{metadata}"); + + let prompt = Prompt { + input: vec![ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { text: user_prompt }], + }], + tools: Vec::new(), + parallel_tool_calls: false, + base_instructions_override: Some(SANDBOX_ASSESSMENT_SYSTEM_PROMPT.to_string()), + output_schema: Some(sandbox_assessment_schema()), + }; + + let auth_snapshot = auth_manager.auth(); + let conversation_id = ConversationId::new(); + let child_otel = OtelEventManager::new( + conversation_id, + config.model.as_str(), + config.model_family.slug.as_str(), + auth_snapshot.as_ref().and_then(CodexAuth::get_account_id), + auth_snapshot + .as_ref() + .and_then(CodexAuth::get_account_email), + auth_snapshot.as_ref().map(|a| a.mode), + config.otel.log_user_prompt, + terminal::user_agent(), + ); + child_otel.conversation_starts( + config.model_provider.name.as_str(), + config.model_reasoning_effort, + config.model_reasoning_summary, + config.model_context_window, + config.model_max_output_tokens, + config.model_auto_compact_token_limit, + config.approval_policy, + config.sandbox_policy.clone(), + config.mcp_servers.keys().map(String::as_str).collect(), + config.active_profile.clone(), + ); + + let client = ModelClient::new( + Arc::clone(&config), + Some(auth_manager), + child_otel, + provider, + config.model_reasoning_effort, + config.model_reasoning_summary, + conversation_id, + ); + + let start = Instant::now(); + let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move { + let mut stream = client.stream(&prompt).await?; + let mut last_json: Option = None; + while let Some(event) = stream.next().await { + match event { + Ok(ResponseEvent::OutputItemDone(item)) => { + if let Some(text) = response_item_text(&item) { + last_json = Some(text); + } + } + Ok(ResponseEvent::RateLimits(_)) => {} + Ok(ResponseEvent::Completed { .. }) => break, + Ok(_) => continue, + Err(err) => return Err(err), + } + } + Ok(last_json) + }) + .await; + let duration = start.elapsed(); + + match assessment_result { + Ok(Ok(Some(raw))) => { + if let Some(json_slice) = extract_assessment_json(&raw) { + match serde_json::from_str::(json_slice) { + Ok(assessment) => { + parent_otel.sandbox_assessment( + call_id, + "success", + Some(assessment.risk_level), + &assessment.risk_categories, + duration, + ); + return Some(assessment); + } + Err(err) => { + warn!("failed to parse sandbox assessment JSON: {err}"); + parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration); + } + } + } else { + warn!("sandbox assessment response missing JSON object"); + parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration); + } + } + Ok(Ok(None)) => { + warn!("sandbox assessment response did not include any message"); + parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration); + } + Ok(Err(err)) => { + if let CodexErr::UnexpectedStatus(unexpected) = &err { + debug!( + "sandbox assessment failed: {err} (status: {}, body: {})", + unexpected.status, unexpected.body + ); + } else { + debug!("sandbox assessment failed: {err}"); + } + parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration); + } + Err(_) => { + debug!("sandbox assessment timed out"); + parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration); + } + } + + None +} + +fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String { + match policy { + SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(), + SandboxPolicy::ReadOnly => "read-only".to_string(), + SandboxPolicy::WorkspaceWrite { network_access, .. } => { + let network = if *network_access { + "network" + } else { + "no-network" + }; + format!("workspace-write (network_access={network})") + } + } +} + +fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec { + let mut roots = vec![cwd.to_path_buf()]; + if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy { + roots.extend(writable_roots.iter().cloned()); + } + roots +} + +fn sandbox_assessment_schema() -> serde_json::Value { + json!({ + "type": "object", + "required": ["description", "risk_level", "risk_categories"], + "properties": { + "description": { + "type": "string", + "minLength": 1, + "maxLength": 500 + }, + "risk_level": { + "type": "string", + "enum": ["low", "medium", "high"] + }, + "risk_categories": { + "type": "array", + "items": { + "type": "string", + "enum": SANDBOX_RISK_CATEGORY_VALUES + } + } + }, + "additionalProperties": false + }) +} + +fn extract_assessment_json(raw: &str) -> Option<&str> { + let mut slice = raw.trim(); + if let Some(stripped) = slice.strip_prefix("```json") { + slice = stripped.trim_start(); + } + if let Some(stripped) = slice.strip_prefix("```") { + slice = stripped.trim_start(); + } + if let Some(stripped) = slice.strip_suffix("```") { + slice = stripped.trim_end(); + } + let slice = slice.trim(); + if slice.starts_with('{') && slice.ends_with('}') { + return Some(slice); + } + let start = slice.find('{')?; + let end = slice.rfind('}')?; + if end <= start { + return None; + } + slice.get(start..=end) +} + +fn response_item_text(item: &ResponseItem) -> Option { + match item { + ResponseItem::Message { content, .. } => { + let mut buffers: Vec<&str> = Vec::new(); + for segment in content { + match segment { + ContentItem::InputText { text } | ContentItem::OutputText { text } => { + if !text.is_empty() { + buffers.push(text); + } + } + ContentItem::InputImage { .. } => {} + } + } + if buffers.is_empty() { + None + } else { + Some(buffers.join("\n")) + } + } + ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()), + _ => None, + } +} diff --git a/codex-rs/core/src/sandboxing/mod.rs b/codex-rs/core/src/sandboxing/mod.rs index d632b5da19b..c11f3588fe6 100644 --- a/codex-rs/core/src/sandboxing/mod.rs +++ b/codex-rs/core/src/sandboxing/mod.rs @@ -5,6 +5,9 @@ Build platform wrappers and produce ExecEnv for execution. Owns low‑level sandbox placement and transformation of portable CommandSpec into a ready‑to‑spawn environment. */ + +pub mod assessment; + use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::StdoutStream; diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index bdc4e3aff70..278d6214db6 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -7,6 +7,7 @@ retry without sandbox on denial (no re‑approval thanks to caching). */ use crate::error::CodexErr; use crate::error::SandboxErr; +use crate::error::get_error_message_ui; use crate::exec::ExecToolCallOutput; use crate::sandboxing::SandboxManager; use crate::tools::sandboxing::ApprovalCtx; @@ -56,6 +57,7 @@ impl ToolOrchestrator { turn: turn_ctx, call_id: &tool_ctx.call_id, retry_reason: None, + risk: None, }; let decision = tool.start_approval_async(req, approval_ctx).await; @@ -107,12 +109,33 @@ impl ToolOrchestrator { // Ask for approval before retrying without sandbox. if !tool.should_bypass_approval(approval_policy, already_approved) { + let mut risk = None; + + if let Some(metadata) = tool.sandbox_retry_data(req) { + let err = SandboxErr::Denied { + output: output.clone(), + }; + let friendly = get_error_message_ui(&CodexErr::Sandbox(err)); + let failure_summary = format!("failed in sandbox: {friendly}"); + + risk = tool_ctx + .session + .assess_sandbox_command( + turn_ctx, + &tool_ctx.call_id, + &metadata.command, + Some(failure_summary.as_str()), + ) + .await; + } + let reason_msg = build_denial_reason_from_output(output.as_ref()); let approval_ctx = ApprovalCtx { session: tool_ctx.session, turn: turn_ctx, call_id: &tool_ctx.call_id, retry_reason: Some(reason_msg), + risk, }; let decision = tool.start_approval_async(req, approval_ctx).await; diff --git a/codex-rs/core/src/tools/runtimes/apply_patch.rs b/codex-rs/core/src/tools/runtimes/apply_patch.rs index eb1cda4e5b2..15b77873de8 100644 --- a/codex-rs/core/src/tools/runtimes/apply_patch.rs +++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs @@ -105,9 +105,10 @@ impl Approvable for ApplyPatchRuntime { let call_id = ctx.call_id.to_string(); let cwd = req.cwd.clone(); let retry_reason = ctx.retry_reason.clone(); + let risk = ctx.risk.clone(); let user_explicitly_approved = req.user_explicitly_approved; Box::pin(async move { - with_cached_approval(&session.services, key, || async move { + with_cached_approval(&session.services, key, move || async move { if let Some(reason) = retry_reason { session .request_command_approval( @@ -116,6 +117,7 @@ impl Approvable for ApplyPatchRuntime { vec!["apply_patch".to_string()], cwd, Some(reason), + risk, ) .await } else if user_explicitly_approved { diff --git a/codex-rs/core/src/tools/runtimes/shell.rs b/codex-rs/core/src/tools/runtimes/shell.rs index bfc2114fb37..79940b970b2 100644 --- a/codex-rs/core/src/tools/runtimes/shell.rs +++ b/codex-rs/core/src/tools/runtimes/shell.rs @@ -13,6 +13,7 @@ use crate::tools::runtimes::build_command_spec; use crate::tools::sandboxing::Approvable; use crate::tools::sandboxing::ApprovalCtx; use crate::tools::sandboxing::SandboxAttempt; +use crate::tools::sandboxing::SandboxRetryData; use crate::tools::sandboxing::Sandboxable; use crate::tools::sandboxing::SandboxablePreference; use crate::tools::sandboxing::ToolCtx; @@ -90,13 +91,14 @@ impl Approvable for ShellRuntime { .retry_reason .clone() .or_else(|| req.justification.clone()); + let risk = ctx.risk.clone(); let session = ctx.session; let turn = ctx.turn; let call_id = ctx.call_id.to_string(); Box::pin(async move { - with_cached_approval(&session.services, key, || async move { + with_cached_approval(&session.services, key, move || async move { session - .request_command_approval(turn, call_id, command, cwd, reason) + .request_command_approval(turn, call_id, command, cwd, reason, risk) .await }) .await @@ -139,6 +141,13 @@ impl Approvable for ShellRuntime { } impl ToolRuntime for ShellRuntime { + fn sandbox_retry_data(&self, req: &ShellRequest) -> Option { + Some(SandboxRetryData { + command: req.command.clone(), + cwd: req.cwd.clone(), + }) + } + async fn run( &mut self, req: &ShellRequest, diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs index c7d136ebfdc..b75ec849cde 100644 --- a/codex-rs/core/src/tools/runtimes/unified_exec.rs +++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs @@ -10,6 +10,7 @@ use crate::tools::runtimes::build_command_spec; use crate::tools::sandboxing::Approvable; use crate::tools::sandboxing::ApprovalCtx; use crate::tools::sandboxing::SandboxAttempt; +use crate::tools::sandboxing::SandboxRetryData; use crate::tools::sandboxing::Sandboxable; use crate::tools::sandboxing::SandboxablePreference; use crate::tools::sandboxing::ToolCtx; @@ -85,10 +86,11 @@ impl Approvable for UnifiedExecRuntime<'_> { let command = req.command.clone(); let cwd = req.cwd.clone(); let reason = ctx.retry_reason.clone(); + let risk = ctx.risk.clone(); Box::pin(async move { with_cached_approval(&session.services, key, || async move { session - .request_command_approval(turn, call_id, command, cwd, reason) + .request_command_approval(turn, call_id, command, cwd, reason, risk) .await }) .await @@ -97,6 +99,13 @@ impl Approvable for UnifiedExecRuntime<'_> { } impl<'a> ToolRuntime for UnifiedExecRuntime<'a> { + fn sandbox_retry_data(&self, req: &UnifiedExecRequest) -> Option { + Some(SandboxRetryData { + command: req.command.clone(), + cwd: req.cwd.clone(), + }) + } + async fn run( &mut self, req: &UnifiedExecRequest, diff --git a/codex-rs/core/src/tools/sandboxing.rs b/codex-rs/core/src/tools/sandboxing.rs index 7c4d65ca800..198bdf05050 100644 --- a/codex-rs/core/src/tools/sandboxing.rs +++ b/codex-rs/core/src/tools/sandboxing.rs @@ -7,6 +7,7 @@ use crate::codex::Session; use crate::codex::TurnContext; use crate::error::CodexErr; +use crate::protocol::SandboxCommandAssessment; use crate::protocol::SandboxPolicy; use crate::sandboxing::CommandSpec; use crate::sandboxing::SandboxManager; @@ -18,6 +19,7 @@ use std::collections::HashMap; use std::fmt::Debug; use std::hash::Hash; use std::path::Path; +use std::path::PathBuf; use futures::Future; use futures::future::BoxFuture; @@ -81,6 +83,7 @@ pub(crate) struct ApprovalCtx<'a> { pub turn: &'a TurnContext, pub call_id: &'a str, pub retry_reason: Option, + pub risk: Option, } pub(crate) trait Approvable { @@ -151,6 +154,13 @@ pub(crate) struct ToolCtx<'a> { pub tool_name: String, } +/// Captures the command metadata needed to re-run a tool request without sandboxing. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct SandboxRetryData { + pub command: Vec, + pub cwd: PathBuf, +} + #[derive(Debug)] pub(crate) enum ToolError { Rejected(String), @@ -165,6 +175,11 @@ pub(crate) trait ToolRuntime: Approvable + Sandboxable { attempt: &SandboxAttempt<'_>, ctx: &ToolCtx, ) -> Result; + + /// Allow a runtime to supply metadata for risk assessment when the orchestrator retries without isolation. + fn sandbox_retry_data(&self, _req: &Req) -> Option { + None + } } pub(crate) struct SandboxAttempt<'a> { diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index e470194cad8..975591bf50f 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -179,6 +179,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any include_view_image_tool: None, show_raw_agent_reasoning: oss.then_some(true), tools_web_search_request: None, + sandbox_command_assessment: None, additional_writable_roots: Vec::new(), }; // Parse `-c` overrides. diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs index b4c4eb749dc..a51de207a0c 100644 --- a/codex-rs/mcp-server/src/codex_tool_config.rs +++ b/codex-rs/mcp-server/src/codex_tool_config.rs @@ -158,6 +158,7 @@ impl CodexToolCallParam { include_view_image_tool: None, show_raw_agent_reasoning: None, tools_web_search_request: None, + sandbox_command_assessment: None, additional_writable_roots: Vec::new(), }; diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index a59755008d6..05d653aff97 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -178,6 +178,7 @@ async fn run_codex_tool_session_inner( cwd, call_id, reason: _, + risk, parsed_cmd, }) => { handle_exec_approval_request( @@ -190,6 +191,7 @@ async fn run_codex_tool_session_inner( event.id.clone(), call_id, parsed_cmd, + risk, ) .await; continue; diff --git a/codex-rs/mcp-server/src/exec_approval.rs b/codex-rs/mcp-server/src/exec_approval.rs index 44607b754d7..033523ac0df 100644 --- a/codex-rs/mcp-server/src/exec_approval.rs +++ b/codex-rs/mcp-server/src/exec_approval.rs @@ -4,6 +4,7 @@ use std::sync::Arc; use codex_core::CodexConversation; use codex_core::protocol::Op; use codex_core::protocol::ReviewDecision; +use codex_core::protocol::SandboxCommandAssessment; use codex_protocol::parse_command::ParsedCommand; use mcp_types::ElicitRequest; use mcp_types::ElicitRequestParamsRequestedSchema; @@ -37,6 +38,8 @@ pub struct ExecApprovalElicitRequestParams { pub codex_command: Vec, pub codex_cwd: PathBuf, pub codex_parsed_cmd: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub codex_risk: Option, } // TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See: @@ -59,6 +62,7 @@ pub(crate) async fn handle_exec_approval_request( event_id: String, call_id: String, codex_parsed_cmd: Vec, + codex_risk: Option, ) { let escaped_command = shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" ")); @@ -81,6 +85,7 @@ pub(crate) async fn handle_exec_approval_request( codex_command: command, codex_cwd: cwd, codex_parsed_cmd, + codex_risk, }; let params_json = match serde_json::to_value(¶ms) { Ok(value) => value, diff --git a/codex-rs/mcp-server/tests/suite/codex_tool.rs b/codex-rs/mcp-server/tests/suite/codex_tool.rs index d7cd200f074..4ab83ca24e9 100644 --- a/codex-rs/mcp-server/tests/suite/codex_tool.rs +++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs @@ -196,6 +196,7 @@ fn create_expected_elicitation_request( codex_cwd: workdir.to_path_buf(), codex_call_id: "call1234".to_string(), codex_parsed_cmd, + codex_risk: None, })?), }) } diff --git a/codex-rs/otel/src/otel_event_manager.rs b/codex-rs/otel/src/otel_event_manager.rs index 486683dae0e..55ce88f947c 100644 --- a/codex-rs/otel/src/otel_event_manager.rs +++ b/codex-rs/otel/src/otel_event_manager.rs @@ -8,6 +8,8 @@ use codex_protocol::models::ResponseItem; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::ReviewDecision; use codex_protocol::protocol::SandboxPolicy; +use codex_protocol::protocol::SandboxRiskCategory; +use codex_protocol::protocol::SandboxRiskLevel; use codex_protocol::user_input::UserInput; use eventsource_stream::Event as StreamEvent; use eventsource_stream::EventStreamError as StreamError; @@ -366,6 +368,45 @@ impl OtelEventManager { ); } + pub fn sandbox_assessment( + &self, + call_id: &str, + status: &str, + risk_level: Option, + risk_categories: &[SandboxRiskCategory], + duration: Duration, + ) { + let level = risk_level.map(|level| level.as_str()); + let categories = if risk_categories.is_empty() { + String::new() + } else { + risk_categories + .iter() + .map(SandboxRiskCategory::as_str) + .collect::>() + .join(", ") + }; + + tracing::event!( + tracing::Level::INFO, + event.name = "codex.sandbox_assessment", + event.timestamp = %timestamp(), + conversation.id = %self.metadata.conversation_id, + app.version = %self.metadata.app_version, + auth_mode = self.metadata.auth_mode, + user.account_id = self.metadata.account_id, + user.email = self.metadata.account_email, + terminal.type = %self.metadata.terminal_type, + model = %self.metadata.model, + slug = %self.metadata.slug, + call_id = %call_id, + status = %status, + risk_level = level, + risk_categories = categories, + duration_ms = %duration.as_millis(), + ); + } + pub async fn log_tool_result( &self, tool_name: &str, diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 105f0280497..09a8e25565b 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -1126,6 +1126,58 @@ pub struct ExecCommandOutputDeltaEvent { pub chunk: Vec, } +#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +pub enum SandboxRiskLevel { + Low, + Medium, + High, +} + +#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +pub enum SandboxRiskCategory { + DataDeletion, + DataExfiltration, + PrivilegeEscalation, + SystemModification, + NetworkAccess, + ResourceExhaustion, + Compliance, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)] +pub struct SandboxCommandAssessment { + pub description: String, + pub risk_level: SandboxRiskLevel, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub risk_categories: Vec, +} + +impl SandboxRiskLevel { + pub fn as_str(&self) -> &'static str { + match self { + Self::Low => "low", + Self::Medium => "medium", + Self::High => "high", + } + } +} + +impl SandboxRiskCategory { + pub fn as_str(&self) -> &'static str { + match self { + Self::DataDeletion => "data_deletion", + Self::DataExfiltration => "data_exfiltration", + Self::PrivilegeEscalation => "privilege_escalation", + Self::SystemModification => "system_modification", + Self::NetworkAccess => "network_access", + Self::ResourceExhaustion => "resource_exhaustion", + Self::Compliance => "compliance", + } + } +} + #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] pub struct ExecApprovalRequestEvent { /// Identifier for the associated exec call, if available. @@ -1137,6 +1189,9 @@ pub struct ExecApprovalRequestEvent { /// Optional human-readable reason for the approval (e.g. retry without sandbox). #[serde(skip_serializing_if = "Option::is_none")] pub reason: Option, + /// Optional model-provided risk assessment describing the blocked command. + #[serde(skip_serializing_if = "Option::is_none")] + pub risk: Option, pub parsed_cmd: Vec, } diff --git a/codex-rs/tui/src/bottom_pane/approval_overlay.rs b/codex-rs/tui/src/bottom_pane/approval_overlay.rs index e8ecc084c4a..1c6af6baf0a 100644 --- a/codex-rs/tui/src/bottom_pane/approval_overlay.rs +++ b/codex-rs/tui/src/bottom_pane/approval_overlay.rs @@ -19,6 +19,9 @@ use crate::render::renderable::Renderable; use codex_core::protocol::FileChange; use codex_core::protocol::Op; use codex_core::protocol::ReviewDecision; +use codex_core::protocol::SandboxCommandAssessment; +use codex_core::protocol::SandboxRiskCategory; +use codex_core::protocol::SandboxRiskLevel; use crossterm::event::KeyCode; use crossterm::event::KeyEvent; use crossterm::event::KeyEventKind; @@ -38,6 +41,7 @@ pub(crate) enum ApprovalRequest { id: String, command: Vec, reason: Option, + risk: Option, }, ApplyPatch { id: String, @@ -104,6 +108,13 @@ impl ApprovalOverlay { ), }; + let subtitle = match &variant { + ApprovalVariant::Exec { + risk: Some(risk), .. + } => Some(risk_summary_subtitle(risk)), + _ => None, + }; + let header = Box::new(ColumnRenderable::with([ Line::from(title.bold()).into(), Line::from("").into(), @@ -130,6 +141,7 @@ impl ApprovalOverlay { ])), items, header, + subtitle, ..Default::default() }; @@ -145,7 +157,7 @@ impl ApprovalOverlay { }; if let Some(variant) = self.current_variant.as_ref() { match (&variant, option.decision) { - (ApprovalVariant::Exec { id, command }, decision) => { + (ApprovalVariant::Exec { id, command, .. }, decision) => { self.handle_exec_decision(id, command, decision); } (ApprovalVariant::ApplyPatch { id, .. }, decision) => { @@ -233,7 +245,7 @@ impl BottomPaneView for ApprovalOverlay { && let Some(variant) = self.current_variant.as_ref() { match &variant { - ApprovalVariant::Exec { id, command } => { + ApprovalVariant::Exec { id, command, .. } => { self.handle_exec_decision(id, command, ReviewDecision::Abort); } ApprovalVariant::ApplyPatch { id, .. } => { @@ -285,8 +297,12 @@ impl From for ApprovalRequestState { id, command, reason, + risk, } => { let mut header: Vec> = Vec::new(); + if let Some(risk) = risk.as_ref() { + header.extend(render_risk_lines(risk)); + } if let Some(reason) = reason && !reason.is_empty() { @@ -300,7 +316,7 @@ impl From for ApprovalRequestState { } header.extend(full_cmd_lines); Self { - variant: ApprovalVariant::Exec { id, command }, + variant: ApprovalVariant::Exec { id, command, risk }, header: Box::new(Paragraph::new(header).wrap(Wrap { trim: false })), } } @@ -330,10 +346,80 @@ impl From for ApprovalRequestState { } } +fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec> { + let level_span = match risk.risk_level { + SandboxRiskLevel::Low => "LOW".green().bold(), + SandboxRiskLevel::Medium => "MEDIUM".cyan().bold(), + SandboxRiskLevel::High => "HIGH".red().bold(), + }; + + let mut spans: Vec> = vec!["Risk: ".into(), level_span]; + if !risk.risk_categories.is_empty() { + spans.push(" (".into()); + for (idx, category) in risk.risk_categories.iter().enumerate() { + if idx > 0 { + spans.push(", ".into()); + } + spans.push(risk_category_label(*category).into()); + } + spans.push(")".into()); + } + + let mut lines = vec![Line::from(spans)]; + let description = risk.description.trim(); + if !description.is_empty() { + lines.push(Line::from(vec![ + "Summary: ".into(), + description.to_string().into(), + ])); + } + lines.push(Line::from("")); + lines +} + +fn risk_category_label(category: SandboxRiskCategory) -> &'static str { + match category { + SandboxRiskCategory::DataDeletion => "data deletion", + SandboxRiskCategory::DataExfiltration => "data exfiltration", + SandboxRiskCategory::PrivilegeEscalation => "privilege escalation", + SandboxRiskCategory::SystemModification => "system modification", + SandboxRiskCategory::NetworkAccess => "network access", + SandboxRiskCategory::ResourceExhaustion => "resource exhaustion", + SandboxRiskCategory::Compliance => "compliance", + } +} + +fn risk_summary_subtitle(risk: &SandboxCommandAssessment) -> String { + let level = risk.risk_level.as_str().to_uppercase(); + let mut parts = vec![format!("{level} risk")]; + if !risk.risk_categories.is_empty() { + let cats = risk + .risk_categories + .iter() + .map(|category| risk_category_label(*category)) + .collect::>() + .join(", "); + parts.push(format!("Categories: {cats}")); + } + let summary = parts.join(" · "); + let description = risk.description.trim(); + if description.is_empty() { + summary + } else { + format!("{summary} – {description}") + } +} + #[derive(Clone)] enum ApprovalVariant { - Exec { id: String, command: Vec }, - ApplyPatch { id: String }, + Exec { + id: String, + command: Vec, + risk: Option, + }, + ApplyPatch { + id: String, + }, } #[derive(Clone)] @@ -404,6 +490,7 @@ mod tests { id: "test".to_string(), command: vec!["echo".to_string(), "hi".to_string()], reason: Some("reason".to_string()), + risk: None, } } @@ -445,6 +532,7 @@ mod tests { id: "test".into(), command, reason: None, + risk: None, }; let view = ApprovalOverlay::new(exec_request, tx); diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index 69405cd823d..48bc0e9d4ca 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -557,6 +557,7 @@ mod tests { id: "1".to_string(), command: vec!["echo".into(), "ok".into()], reason: None, + risk: None, } } diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index c1a6dd67694..6b504407d2c 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -778,6 +778,7 @@ impl ChatWidget { id, command: ev.command, reason: ev.reason, + risk: ev.risk, }; self.bottom_pane.push_approval_request(request); self.request_redraw(); diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index 60f1ab047a7..33e2596fd3f 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -394,6 +394,7 @@ fn exec_approval_emits_proposed_command_and_decision_history() { reason: Some( "this is a test reason such as one that would be produced by the model".into(), ), + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { @@ -436,6 +437,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() { reason: Some( "this is a test reason such as one that would be produced by the model".into(), ), + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { @@ -484,6 +486,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() { command: vec!["bash".into(), "-lc".into(), long], cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), reason: None, + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { @@ -1413,6 +1416,7 @@ fn approval_modal_exec_snapshot() { reason: Some( "this is a test reason such as one that would be produced by the model".into(), ), + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { @@ -1457,6 +1461,7 @@ fn approval_modal_exec_without_reason_snapshot() { command: vec!["bash".into(), "-lc".into(), "echo hello world".into()], cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), reason: None, + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { @@ -1667,6 +1672,7 @@ fn status_widget_and_approval_modal_snapshot() { reason: Some( "this is a test reason such as one that would be produced by the model".into(), ), + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 681b4c6ca12..d1c07957a6e 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -148,6 +148,7 @@ pub async fn run_main( include_view_image_tool: None, show_raw_agent_reasoning: cli.oss.then_some(true), tools_web_search_request: cli.web_search.then_some(true), + sandbox_command_assessment: None, additional_writable_roots: additional_dirs, }; let raw_overrides = cli.config_overrides.raw_overrides.clone(); diff --git a/docs/config.md b/docs/config.md index 4186c4ff348..200e0bc8b6c 100644 --- a/docs/config.md +++ b/docs/config.md @@ -906,6 +906,7 @@ If `forced_chatgpt_workspace_id` is set but `forced_login_method` is not set, AP | `chatgpt_base_url` | string | Base URL for ChatGPT auth flow. | | `experimental_instructions_file` | string (path) | Replace built‑in instructions (experimental). | | `experimental_use_exec_command_tool` | boolean | Use experimental exec command tool. | +| `experimental_sandbox_command_assessment` | boolean | Enable model-based risk summaries when sandboxed commands are denied. | | `projects..trust_level` | string | Mark project/worktree as trusted (only `"trusted"` is recognized). | | `tools.web_search` | boolean | Enable web search tool (alias: `web_search_request`) (default: false). | | `forced_login_method` | `chatgpt` \| `api` | Only allow Codex to be used with ChatGPT or API keys. | From 3e294f245848aa3b420581ddeb9b65f699b702f5 Mon Sep 17 00:00:00 2001 From: Eric Traut Date: Wed, 22 Oct 2025 15:10:03 -0700 Subject: [PATCH 2/6] Refined TUI --- .../tui/src/bottom_pane/approval_overlay.rs | 51 +++---------------- docs/config.md | 2 +- 2 files changed, 9 insertions(+), 44 deletions(-) diff --git a/codex-rs/tui/src/bottom_pane/approval_overlay.rs b/codex-rs/tui/src/bottom_pane/approval_overlay.rs index 1c6af6baf0a..ce3ba660f14 100644 --- a/codex-rs/tui/src/bottom_pane/approval_overlay.rs +++ b/codex-rs/tui/src/bottom_pane/approval_overlay.rs @@ -108,13 +108,6 @@ impl ApprovalOverlay { ), }; - let subtitle = match &variant { - ApprovalVariant::Exec { - risk: Some(risk), .. - } => Some(risk_summary_subtitle(risk)), - _ => None, - }; - let header = Box::new(ColumnRenderable::with([ Line::from(title.bold()).into(), Line::from("").into(), @@ -141,7 +134,6 @@ impl ApprovalOverlay { ])), items, header, - subtitle, ..Default::default() }; @@ -157,7 +149,7 @@ impl ApprovalOverlay { }; if let Some(variant) = self.current_variant.as_ref() { match (&variant, option.decision) { - (ApprovalVariant::Exec { id, command, .. }, decision) => { + (ApprovalVariant::Exec { id, command }, decision) => { self.handle_exec_decision(id, command, decision); } (ApprovalVariant::ApplyPatch { id, .. }, decision) => { @@ -245,7 +237,7 @@ impl BottomPaneView for ApprovalOverlay { && let Some(variant) = self.current_variant.as_ref() { match &variant { - ApprovalVariant::Exec { id, command, .. } => { + ApprovalVariant::Exec { id, command } => { self.handle_exec_decision(id, command, ReviewDecision::Abort); } ApprovalVariant::ApplyPatch { id, .. } => { @@ -300,15 +292,15 @@ impl From for ApprovalRequestState { risk, } => { let mut header: Vec> = Vec::new(); - if let Some(risk) = risk.as_ref() { - header.extend(render_risk_lines(risk)); - } if let Some(reason) = reason && !reason.is_empty() { header.push(Line::from(vec!["Reason: ".into(), reason.italic()])); header.push(Line::from("")); } + if let Some(risk) = risk.as_ref() { + header.extend(render_risk_lines(risk)); + } let full_cmd = strip_bash_lc_and_escape(&command); let mut full_cmd_lines = highlight_bash_to_lines(&full_cmd); if let Some(first) = full_cmd_lines.first_mut() { @@ -316,7 +308,7 @@ impl From for ApprovalRequestState { } header.extend(full_cmd_lines); Self { - variant: ApprovalVariant::Exec { id, command, risk }, + variant: ApprovalVariant::Exec { id, command }, header: Box::new(Paragraph::new(header).wrap(Wrap { trim: false })), } } @@ -389,37 +381,10 @@ fn risk_category_label(category: SandboxRiskCategory) -> &'static str { } } -fn risk_summary_subtitle(risk: &SandboxCommandAssessment) -> String { - let level = risk.risk_level.as_str().to_uppercase(); - let mut parts = vec![format!("{level} risk")]; - if !risk.risk_categories.is_empty() { - let cats = risk - .risk_categories - .iter() - .map(|category| risk_category_label(*category)) - .collect::>() - .join(", "); - parts.push(format!("Categories: {cats}")); - } - let summary = parts.join(" · "); - let description = risk.description.trim(); - if description.is_empty() { - summary - } else { - format!("{summary} – {description}") - } -} - #[derive(Clone)] enum ApprovalVariant { - Exec { - id: String, - command: Vec, - risk: Option, - }, - ApplyPatch { - id: String, - }, + Exec { id: String, command: Vec }, + ApplyPatch { id: String }, } #[derive(Clone)] diff --git a/docs/config.md b/docs/config.md index 200e0bc8b6c..e09e621ad64 100644 --- a/docs/config.md +++ b/docs/config.md @@ -906,7 +906,7 @@ If `forced_chatgpt_workspace_id` is set but `forced_login_method` is not set, AP | `chatgpt_base_url` | string | Base URL for ChatGPT auth flow. | | `experimental_instructions_file` | string (path) | Replace built‑in instructions (experimental). | | `experimental_use_exec_command_tool` | boolean | Use experimental exec command tool. | -| `experimental_sandbox_command_assessment` | boolean | Enable model-based risk summaries when sandboxed commands are denied. | +| `experimental_sandbox_command_assessment` | boolean | Enable model-based risk summaries when sandboxed commands are denied. | | `projects..trust_level` | string | Mark project/worktree as trusted (only `"trusted"` is recognized). | | `tools.web_search` | boolean | Enable web search tool (alias: `web_search_request`) (default: false). | | `forced_login_method` | `chatgpt` \| `api` | Only allow Codex to be used with ChatGPT or API keys. | From c1e68c110f6d68f059b3f171c95487db10bea831 Mon Sep 17 00:00:00 2001 From: Eric Traut Date: Wed, 22 Oct 2025 15:24:00 -0700 Subject: [PATCH 3/6] Removed inaccurate part of prompt --- codex-rs/core/src/sandboxing/assessment.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/codex-rs/core/src/sandboxing/assessment.rs b/codex-rs/core/src/sandboxing/assessment.rs index e84c3dd7e50..43eeab040ff 100644 --- a/codex-rs/core/src/sandboxing/assessment.rs +++ b/codex-rs/core/src/sandboxing/assessment.rs @@ -35,7 +35,6 @@ Risk level examples: - high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance. Use multiple categories when appropriate. -Placeholders such as or indicate redacted sensitive paths. If information is insufficient, choose the most cautious risk level supported by the evidence. Respond with JSON only, without markdown code fences or extra commentary."#; From 5c130c88e4d93dd1cffbf1338b57017e78e7aa40 Mon Sep 17 00:00:00 2001 From: Eric Traut Date: Fri, 24 Oct 2025 13:29:02 -0700 Subject: [PATCH 4/6] Code review feedback: * Moved prompt into its own file and switched it to use askama for templating * Refactored sandbox_retry_data trait for simplification * Fixed otel telemetry so assessment conversation doesn't appear as a new task * Added otel telemetry point for recording latency of assessment * Removed defensive JSON parsing of assessment response Removed new experimental config key from public documentation for now. We're going to roll this out internally first to get feedback. --- codex-rs/core/src/codex.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index f31a65acdb9..ada207fa52e 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -771,6 +771,7 @@ impl Session { provider, auth_manager, &otel, + self.conversation_id, call_id, command, &turn_context.sandbox_policy, From f050c8e310a9959ca0577ecd6dc43746f594dfc6 Mon Sep 17 00:00:00 2001 From: Eric Traut Date: Fri, 24 Oct 2025 13:59:11 -0700 Subject: [PATCH 5/6] More code review feedback: * Simplified config handling by leveraging "features" mechanism * Moved approvals-related schemas from protocol.rs to simplify --- codex-rs/core/src/config.rs | 7 +- codex-rs/core/src/config_profile.rs | 2 +- codex-rs/core/src/features.rs | 9 + codex-rs/core/src/sandboxing/assessment.rs | 189 +++++++----------- codex-rs/core/src/tools/orchestrator.rs | 4 +- .../core/src/tools/runtimes/apply_patch.rs | 8 + codex-rs/core/src/tools/runtimes/shell.rs | 17 +- .../core/src/tools/runtimes/unified_exec.rs | 17 +- codex-rs/core/src/tools/sandboxing.rs | 9 +- .../templates/sandboxing/assessment_prompt.md | 25 +++ codex-rs/otel/src/otel_event_manager.rs | 18 ++ codex-rs/protocol/src/approvals.rs | 91 +++++++++ codex-rs/protocol/src/lib.rs | 1 + codex-rs/protocol/src/protocol.rs | 88 +------- docs/config.md | 1 - 15 files changed, 261 insertions(+), 225 deletions(-) create mode 100644 codex-rs/core/templates/sandboxing/assessment_prompt.md create mode 100644 codex-rs/protocol/src/approvals.rs diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index dc1b0fe8c4d..5d0c7363405 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -961,7 +961,6 @@ pub struct ConfigToml { pub experimental_use_unified_exec_tool: Option, pub experimental_use_rmcp_client: Option, pub experimental_use_freeform_apply_patch: Option, - pub experimental_sandbox_command_assessment: Option, } impl From for UserSavedConfig { @@ -1178,6 +1177,7 @@ impl Config { include_apply_patch_tool: include_apply_patch_tool_override, include_view_image_tool: include_view_image_tool_override, web_search_request: override_tools_web_search_request, + sandbox_command_assessment: sandbox_command_assessment_override, }; let features = Features::from_config(&cfg, &config_profile, feature_overrides); @@ -1275,10 +1275,7 @@ impl Config { let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell); let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec); let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient); - let sandbox_command_assessment = sandbox_command_assessment_override - .or(config_profile.experimental_sandbox_command_assessment) - .or(cfg.experimental_sandbox_command_assessment) - .unwrap_or(false); + let sandbox_command_assessment = features.enabled(Feature::SandboxCommandAssessment); let forced_chatgpt_workspace_id = cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| { diff --git a/codex-rs/core/src/config_profile.rs b/codex-rs/core/src/config_profile.rs index 84b90f5adb8..b45c6882d08 100644 --- a/codex-rs/core/src/config_profile.rs +++ b/codex-rs/core/src/config_profile.rs @@ -26,7 +26,7 @@ pub struct ConfigProfile { pub experimental_use_exec_command_tool: Option, pub experimental_use_rmcp_client: Option, pub experimental_use_freeform_apply_patch: Option, - pub experimental_sandbox_command_assessment: Option, + pub sandbox_command_assessment: Option, pub tools_web_search: Option, pub tools_view_image: Option, /// Optional feature toggles scoped to this profile. diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index ead4604d547..7ec39391295 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -39,6 +39,8 @@ pub enum Feature { ViewImageTool, /// Allow the model to request web searches. WebSearchRequest, + /// Enable the model-based risk assessments for sandboxed commands. + SandboxCommandAssessment, } impl Feature { @@ -73,6 +75,7 @@ pub struct FeatureOverrides { pub include_apply_patch_tool: Option, pub include_view_image_tool: Option, pub web_search_request: Option, + pub sandbox_command_assessment: Option, } impl FeatureOverrides { @@ -236,4 +239,10 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Stable, default_enabled: false, }, + FeatureSpec { + id: Feature::SandboxCommandAssessment, + key: "sandbox_command_assessment", + stage: Stage::Experimental, + default_enabled: false, + }, ]; diff --git a/codex-rs/core/src/sandboxing/assessment.rs b/codex-rs/core/src/sandboxing/assessment.rs index 43eeab040ff..ea87074a738 100644 --- a/codex-rs/core/src/sandboxing/assessment.rs +++ b/codex-rs/core/src/sandboxing/assessment.rs @@ -6,14 +6,12 @@ use std::time::Instant; use crate::AuthManager; use crate::ModelProviderInfo; -use crate::auth::CodexAuth; use crate::client::ModelClient; use crate::client_common::Prompt; use crate::client_common::ResponseEvent; use crate::config::Config; -use crate::error::CodexErr; use crate::protocol::SandboxPolicy; -use crate::terminal; +use askama::Template; use codex_otel::otel_event_manager::OtelEventManager; use codex_protocol::ConversationId; use codex_protocol::models::ContentItem; @@ -22,22 +20,8 @@ use codex_protocol::protocol::SandboxCommandAssessment; use futures::StreamExt; use serde_json::json; use tokio::time::timeout; -use tracing::debug; use tracing::warn; -const SANDBOX_ASSESSMENT_SYSTEM_PROMPT: &str = r#"You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys: -- description (concise summary, at most two sentences) -- risk_level ("low", "medium", or "high") -- risk_categories (optional array of zero or more category strings) -Risk level examples: -- low: read-only inspections, listing files, printing configuration -- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources -- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls -Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance. -Use multiple categories when appropriate. -If information is insufficient, choose the most cautious risk level supported by the evidence. -Respond with JSON only, without markdown code fences or extra commentary."#; - const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5); const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[ @@ -50,12 +34,25 @@ const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[ "compliance", ]; +#[derive(Template)] +#[template(path = "sandboxing/assessment_prompt.md", escape = "none")] +struct SandboxAssessmentPromptTemplate<'a> { + platform: &'a str, + sandbox_policy: &'a str, + filesystem_roots: Option<&'a str>, + working_directory: &'a str, + command_argv: &'a str, + command_joined: &'a str, + sandbox_failure_message: Option<&'a str>, +} + #[allow(clippy::too_many_arguments)] pub(crate) async fn assess_command( config: Arc, provider: ModelProviderInfo, auth_manager: Arc, parent_otel: &OtelEventManager, + conversation_id: ConversationId, call_id: &str, command: &[String], sandbox_policy: &SandboxPolicy, @@ -81,25 +78,45 @@ pub(crate) async fn assess_command( roots.dedup(); let platform = std::env::consts::OS; - let mut prompt_sections = Vec::new(); - prompt_sections.push(format!("Platform: {platform}")); - prompt_sections.push(format!("Sandbox policy: {sandbox_summary}")); - if !roots.is_empty() { - let formatted = roots - .iter() - .map(|root| root.to_string_lossy()) - .collect::>() - .join(", "); - prompt_sections.push(format!("Filesystem roots: {formatted}")); - } - prompt_sections.push(format!("Working directory: {cwd_str}")); - prompt_sections.push(format!("Command argv: {command_json}")); - prompt_sections.push(format!("Command (joined): {command_joined}")); - if let Some(msg) = failure.as_ref() { - prompt_sections.push(format!("Sandbox failure message: {msg}")); - } - let metadata = prompt_sections.join("\n"); - let user_prompt = format!("Command metadata:\n{metadata}"); + let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string()); + let filesystem_roots = match roots_formatted.collect::>() { + collected if collected.is_empty() => None, + collected => Some(collected.join(", ")), + }; + + let prompt_template = SandboxAssessmentPromptTemplate { + platform, + sandbox_policy: sandbox_summary.as_str(), + filesystem_roots: filesystem_roots.as_deref(), + working_directory: cwd_str.as_str(), + command_argv: command_json.as_str(), + command_joined: command_joined.as_str(), + sandbox_failure_message: failure.as_deref(), + }; + let rendered_prompt = match prompt_template.render() { + Ok(rendered) => rendered, + Err(err) => { + warn!("failed to render sandbox assessment prompt: {err}"); + return None; + } + }; + let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") { + Some(split) => split, + None => { + warn!("rendered sandbox assessment prompt missing separator"); + return None; + } + }; + let system_prompt = system_prompt_section + .strip_prefix("System Prompt:\n") + .unwrap_or(system_prompt_section) + .trim() + .to_string(); + let user_prompt = user_prompt_section + .strip_prefix("User Prompt:\n") + .unwrap_or(user_prompt_section) + .trim() + .to_string(); let prompt = Prompt { input: vec![ResponseItem::Message { @@ -109,36 +126,12 @@ pub(crate) async fn assess_command( }], tools: Vec::new(), parallel_tool_calls: false, - base_instructions_override: Some(SANDBOX_ASSESSMENT_SYSTEM_PROMPT.to_string()), + base_instructions_override: Some(system_prompt), output_schema: Some(sandbox_assessment_schema()), }; - let auth_snapshot = auth_manager.auth(); - let conversation_id = ConversationId::new(); - let child_otel = OtelEventManager::new( - conversation_id, - config.model.as_str(), - config.model_family.slug.as_str(), - auth_snapshot.as_ref().and_then(CodexAuth::get_account_id), - auth_snapshot - .as_ref() - .and_then(CodexAuth::get_account_email), - auth_snapshot.as_ref().map(|a| a.mode), - config.otel.log_user_prompt, - terminal::user_agent(), - ); - child_otel.conversation_starts( - config.model_provider.name.as_str(), - config.model_reasoning_effort, - config.model_reasoning_summary, - config.model_context_window, - config.model_max_output_tokens, - config.model_auto_compact_token_limit, - config.approval_policy, - config.sandbox_policy.clone(), - config.mcp_servers.keys().map(String::as_str).collect(), - config.active_profile.clone(), - ); + let child_otel = + parent_otel.with_model(config.model.as_str(), config.model_family.slug.as_str()); let client = ModelClient::new( Arc::clone(&config), @@ -171,48 +164,35 @@ pub(crate) async fn assess_command( }) .await; let duration = start.elapsed(); + parent_otel.sandbox_assessment_latency(call_id, duration); match assessment_result { - Ok(Ok(Some(raw))) => { - if let Some(json_slice) = extract_assessment_json(&raw) { - match serde_json::from_str::(json_slice) { - Ok(assessment) => { - parent_otel.sandbox_assessment( - call_id, - "success", - Some(assessment.risk_level), - &assessment.risk_categories, - duration, - ); - return Some(assessment); - } - Err(err) => { - warn!("failed to parse sandbox assessment JSON: {err}"); - parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration); - } - } - } else { - warn!("sandbox assessment response missing JSON object"); + Ok(Ok(Some(raw))) => match serde_json::from_str::(raw.trim()) { + Ok(assessment) => { + parent_otel.sandbox_assessment( + call_id, + "success", + Some(assessment.risk_level), + &assessment.risk_categories, + duration, + ); + return Some(assessment); + } + Err(err) => { + warn!("failed to parse sandbox assessment JSON: {err}"); parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration); } - } + }, Ok(Ok(None)) => { warn!("sandbox assessment response did not include any message"); parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration); } Ok(Err(err)) => { - if let CodexErr::UnexpectedStatus(unexpected) = &err { - debug!( - "sandbox assessment failed: {err} (status: {}, body: {})", - unexpected.status, unexpected.body - ); - } else { - debug!("sandbox assessment failed: {err}"); - } + warn!("sandbox assessment failed: {err}"); parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration); } Err(_) => { - debug!("sandbox assessment timed out"); + warn!("sandbox assessment timed out"); parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration); } } @@ -269,29 +249,6 @@ fn sandbox_assessment_schema() -> serde_json::Value { }) } -fn extract_assessment_json(raw: &str) -> Option<&str> { - let mut slice = raw.trim(); - if let Some(stripped) = slice.strip_prefix("```json") { - slice = stripped.trim_start(); - } - if let Some(stripped) = slice.strip_prefix("```") { - slice = stripped.trim_start(); - } - if let Some(stripped) = slice.strip_suffix("```") { - slice = stripped.trim_end(); - } - let slice = slice.trim(); - if slice.starts_with('{') && slice.ends_with('}') { - return Some(slice); - } - let start = slice.find('{')?; - let end = slice.rfind('}')?; - if end <= start { - return None; - } - slice.get(start..=end) -} - fn response_item_text(item: &ResponseItem) -> Option { match item { ResponseItem::Message { content, .. } => { diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index 278d6214db6..90b0145eadb 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -11,6 +11,7 @@ use crate::error::get_error_message_ui; use crate::exec::ExecToolCallOutput; use crate::sandboxing::SandboxManager; use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::ProvidesSandboxRetryData; use crate::tools::sandboxing::SandboxAttempt; use crate::tools::sandboxing::ToolCtx; use crate::tools::sandboxing::ToolError; @@ -39,6 +40,7 @@ impl ToolOrchestrator { ) -> Result where T: ToolRuntime, + Rq: ProvidesSandboxRetryData, { let otel = turn_ctx.client.get_otel_event_manager(); let otel_tn = &tool_ctx.tool_name; @@ -111,7 +113,7 @@ impl ToolOrchestrator { if !tool.should_bypass_approval(approval_policy, already_approved) { let mut risk = None; - if let Some(metadata) = tool.sandbox_retry_data(req) { + if let Some(metadata) = req.sandbox_retry_data() { let err = SandboxErr::Denied { output: output.clone(), }; diff --git a/codex-rs/core/src/tools/runtimes/apply_patch.rs b/codex-rs/core/src/tools/runtimes/apply_patch.rs index 15b77873de8..2fcc5b056e4 100644 --- a/codex-rs/core/src/tools/runtimes/apply_patch.rs +++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs @@ -10,7 +10,9 @@ use crate::sandboxing::CommandSpec; use crate::sandboxing::execute_env; use crate::tools::sandboxing::Approvable; use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::ProvidesSandboxRetryData; use crate::tools::sandboxing::SandboxAttempt; +use crate::tools::sandboxing::SandboxRetryData; use crate::tools::sandboxing::Sandboxable; use crate::tools::sandboxing::SandboxablePreference; use crate::tools::sandboxing::ToolCtx; @@ -31,6 +33,12 @@ pub struct ApplyPatchRequest { pub codex_exe: Option, } +impl ProvidesSandboxRetryData for ApplyPatchRequest { + fn sandbox_retry_data(&self) -> Option { + None + } +} + #[derive(Default)] pub struct ApplyPatchRuntime; diff --git a/codex-rs/core/src/tools/runtimes/shell.rs b/codex-rs/core/src/tools/runtimes/shell.rs index 79940b970b2..f29224fcc1c 100644 --- a/codex-rs/core/src/tools/runtimes/shell.rs +++ b/codex-rs/core/src/tools/runtimes/shell.rs @@ -12,6 +12,7 @@ use crate::sandboxing::execute_env; use crate::tools::runtimes::build_command_spec; use crate::tools::sandboxing::Approvable; use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::ProvidesSandboxRetryData; use crate::tools::sandboxing::SandboxAttempt; use crate::tools::sandboxing::SandboxRetryData; use crate::tools::sandboxing::Sandboxable; @@ -35,6 +36,15 @@ pub struct ShellRequest { pub justification: Option, } +impl ProvidesSandboxRetryData for ShellRequest { + fn sandbox_retry_data(&self) -> Option { + Some(SandboxRetryData { + command: self.command.clone(), + cwd: self.cwd.clone(), + }) + } +} + #[derive(Default)] pub struct ShellRuntime; @@ -141,13 +151,6 @@ impl Approvable for ShellRuntime { } impl ToolRuntime for ShellRuntime { - fn sandbox_retry_data(&self, req: &ShellRequest) -> Option { - Some(SandboxRetryData { - command: req.command.clone(), - cwd: req.cwd.clone(), - }) - } - async fn run( &mut self, req: &ShellRequest, diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs index b75ec849cde..85c996387ab 100644 --- a/codex-rs/core/src/tools/runtimes/unified_exec.rs +++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs @@ -9,6 +9,7 @@ use crate::error::SandboxErr; use crate::tools::runtimes::build_command_spec; use crate::tools::sandboxing::Approvable; use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::ProvidesSandboxRetryData; use crate::tools::sandboxing::SandboxAttempt; use crate::tools::sandboxing::SandboxRetryData; use crate::tools::sandboxing::Sandboxable; @@ -32,6 +33,15 @@ pub struct UnifiedExecRequest { pub env: HashMap, } +impl ProvidesSandboxRetryData for UnifiedExecRequest { + fn sandbox_retry_data(&self) -> Option { + Some(SandboxRetryData { + command: self.command.clone(), + cwd: self.cwd.clone(), + }) + } +} + #[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)] pub struct UnifiedExecApprovalKey { pub command: Vec, @@ -99,13 +109,6 @@ impl Approvable for UnifiedExecRuntime<'_> { } impl<'a> ToolRuntime for UnifiedExecRuntime<'a> { - fn sandbox_retry_data(&self, req: &UnifiedExecRequest) -> Option { - Some(SandboxRetryData { - command: req.command.clone(), - cwd: req.cwd.clone(), - }) - } - async fn run( &mut self, req: &UnifiedExecRequest, diff --git a/codex-rs/core/src/tools/sandboxing.rs b/codex-rs/core/src/tools/sandboxing.rs index 198bdf05050..d2b974bce8e 100644 --- a/codex-rs/core/src/tools/sandboxing.rs +++ b/codex-rs/core/src/tools/sandboxing.rs @@ -161,6 +161,10 @@ pub(crate) struct SandboxRetryData { pub cwd: PathBuf, } +pub(crate) trait ProvidesSandboxRetryData { + fn sandbox_retry_data(&self) -> Option; +} + #[derive(Debug)] pub(crate) enum ToolError { Rejected(String), @@ -175,11 +179,6 @@ pub(crate) trait ToolRuntime: Approvable + Sandboxable { attempt: &SandboxAttempt<'_>, ctx: &ToolCtx, ) -> Result; - - /// Allow a runtime to supply metadata for risk assessment when the orchestrator retries without isolation. - fn sandbox_retry_data(&self, _req: &Req) -> Option { - None - } } pub(crate) struct SandboxAttempt<'a> { diff --git a/codex-rs/core/templates/sandboxing/assessment_prompt.md b/codex-rs/core/templates/sandboxing/assessment_prompt.md new file mode 100644 index 00000000000..51345d5ebc7 --- /dev/null +++ b/codex-rs/core/templates/sandboxing/assessment_prompt.md @@ -0,0 +1,25 @@ +You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys: +- description (concise summary, at most two sentences) +- risk_level ("low", "medium", or "high") +- risk_categories (optional array of zero or more category strings) +Risk level examples: +- low: read-only inspections, listing files, printing configuration +- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources +- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls +Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance. +Use multiple categories when appropriate. +If information is insufficient, choose the most cautious risk level supported by the evidence. +Respond with JSON only, without markdown code fences or extra commentary. + +Command metadata: +Platform: {{ platform }} +Sandbox policy: {{ sandbox_policy }} +{% if let Some(roots) = filesystem_roots %} +Filesystem roots: {{ roots }} +{% endif %} +Working directory: {{ working_directory }} +Command argv: {{ command_argv }} +Command (joined): {{ command_joined }} +{% if let Some(message) = sandbox_failure_message %} +Sandbox failure message: {{ message }} +{% endif %} diff --git a/codex-rs/otel/src/otel_event_manager.rs b/codex-rs/otel/src/otel_event_manager.rs index 55ce88f947c..4006df17d9a 100644 --- a/codex-rs/otel/src/otel_event_manager.rs +++ b/codex-rs/otel/src/otel_event_manager.rs @@ -407,6 +407,24 @@ impl OtelEventManager { ); } + pub fn sandbox_assessment_latency(&self, call_id: &str, duration: Duration) { + tracing::event!( + tracing::Level::INFO, + event.name = "codex.sandbox_assessment_latency", + event.timestamp = %timestamp(), + conversation.id = %self.metadata.conversation_id, + app.version = %self.metadata.app_version, + auth_mode = self.metadata.auth_mode, + user.account_id = self.metadata.account_id, + user.email = self.metadata.account_email, + terminal.type = %self.metadata.terminal_type, + model = %self.metadata.model, + slug = %self.metadata.slug, + call_id = %call_id, + duration_ms = %duration.as_millis(), + ); + } + pub async fn log_tool_result( &self, tool_name: &str, diff --git a/codex-rs/protocol/src/approvals.rs b/codex-rs/protocol/src/approvals.rs new file mode 100644 index 00000000000..d608dba639e --- /dev/null +++ b/codex-rs/protocol/src/approvals.rs @@ -0,0 +1,91 @@ +use std::collections::HashMap; +use std::path::PathBuf; + +use crate::parse_command::ParsedCommand; +use crate::protocol::FileChange; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use ts_rs::TS; + +#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +pub enum SandboxRiskLevel { + Low, + Medium, + High, +} + +#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +pub enum SandboxRiskCategory { + DataDeletion, + DataExfiltration, + PrivilegeEscalation, + SystemModification, + NetworkAccess, + ResourceExhaustion, + Compliance, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)] +pub struct SandboxCommandAssessment { + pub description: String, + pub risk_level: SandboxRiskLevel, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub risk_categories: Vec, +} + +impl SandboxRiskLevel { + pub fn as_str(&self) -> &'static str { + match self { + Self::Low => "low", + Self::Medium => "medium", + Self::High => "high", + } + } +} + +impl SandboxRiskCategory { + pub fn as_str(&self) -> &'static str { + match self { + Self::DataDeletion => "data_deletion", + Self::DataExfiltration => "data_exfiltration", + Self::PrivilegeEscalation => "privilege_escalation", + Self::SystemModification => "system_modification", + Self::NetworkAccess => "network_access", + Self::ResourceExhaustion => "resource_exhaustion", + Self::Compliance => "compliance", + } + } +} + +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub struct ExecApprovalRequestEvent { + /// Identifier for the associated exec call, if available. + pub call_id: String, + /// The command to be executed. + pub command: Vec, + /// The command's working directory. + pub cwd: PathBuf, + /// Optional human-readable reason for the approval (e.g. retry without sandbox). + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + /// Optional model-provided risk assessment describing the blocked command. + #[serde(skip_serializing_if = "Option::is_none")] + pub risk: Option, + pub parsed_cmd: Vec, +} + +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub struct ApplyPatchApprovalRequestEvent { + /// Responses API call id for the associated patch apply call, if available. + pub call_id: String, + pub changes: HashMap, + /// Optional explanatory reason (e.g. request for extra write access). + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + /// When set, the agent is asking the user to allow writes under this root for the remainder of the session. + #[serde(skip_serializing_if = "Option::is_none")] + pub grant_root: Option, +} diff --git a/codex-rs/protocol/src/lib.rs b/codex-rs/protocol/src/lib.rs index e79eff3f56e..781a7f559ad 100644 --- a/codex-rs/protocol/src/lib.rs +++ b/codex-rs/protocol/src/lib.rs @@ -1,5 +1,6 @@ mod conversation_id; pub use conversation_id::ConversationId; +pub mod approvals; pub mod config_types; pub mod custom_prompts; pub mod items; diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 09a8e25565b..cd10c2786b4 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -34,6 +34,12 @@ use serde_with::serde_as; use strum_macros::Display; use ts_rs::TS; +pub use crate::approvals::ApplyPatchApprovalRequestEvent; +pub use crate::approvals::ExecApprovalRequestEvent; +pub use crate::approvals::SandboxCommandAssessment; +pub use crate::approvals::SandboxRiskCategory; +pub use crate::approvals::SandboxRiskLevel; + /// Open/close tags for special user-input blocks. Used across crates to avoid /// duplicated hardcoded strings. pub const USER_INSTRUCTIONS_OPEN_TAG: &str = ""; @@ -1126,88 +1132,6 @@ pub struct ExecCommandOutputDeltaEvent { pub chunk: Vec, } -#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)] -#[serde(rename_all = "snake_case")] -pub enum SandboxRiskLevel { - Low, - Medium, - High, -} - -#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)] -#[serde(rename_all = "snake_case")] -pub enum SandboxRiskCategory { - DataDeletion, - DataExfiltration, - PrivilegeEscalation, - SystemModification, - NetworkAccess, - ResourceExhaustion, - Compliance, -} - -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)] -pub struct SandboxCommandAssessment { - pub description: String, - pub risk_level: SandboxRiskLevel, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub risk_categories: Vec, -} - -impl SandboxRiskLevel { - pub fn as_str(&self) -> &'static str { - match self { - Self::Low => "low", - Self::Medium => "medium", - Self::High => "high", - } - } -} - -impl SandboxRiskCategory { - pub fn as_str(&self) -> &'static str { - match self { - Self::DataDeletion => "data_deletion", - Self::DataExfiltration => "data_exfiltration", - Self::PrivilegeEscalation => "privilege_escalation", - Self::SystemModification => "system_modification", - Self::NetworkAccess => "network_access", - Self::ResourceExhaustion => "resource_exhaustion", - Self::Compliance => "compliance", - } - } -} - -#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] -pub struct ExecApprovalRequestEvent { - /// Identifier for the associated exec call, if available. - pub call_id: String, - /// The command to be executed. - pub command: Vec, - /// The command's working directory. - pub cwd: PathBuf, - /// Optional human-readable reason for the approval (e.g. retry without sandbox). - #[serde(skip_serializing_if = "Option::is_none")] - pub reason: Option, - /// Optional model-provided risk assessment describing the blocked command. - #[serde(skip_serializing_if = "Option::is_none")] - pub risk: Option, - pub parsed_cmd: Vec, -} - -#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] -pub struct ApplyPatchApprovalRequestEvent { - /// Responses API call id for the associated patch apply call, if available. - pub call_id: String, - pub changes: HashMap, - /// Optional explanatory reason (e.g. request for extra write access). - #[serde(skip_serializing_if = "Option::is_none")] - pub reason: Option, - /// When set, the agent is asking the user to allow writes under this root for the remainder of the session. - #[serde(skip_serializing_if = "Option::is_none")] - pub grant_root: Option, -} - #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] pub struct BackgroundEventEvent { pub message: String, diff --git a/docs/config.md b/docs/config.md index e09e621ad64..4186c4ff348 100644 --- a/docs/config.md +++ b/docs/config.md @@ -906,7 +906,6 @@ If `forced_chatgpt_workspace_id` is set but `forced_login_method` is not set, AP | `chatgpt_base_url` | string | Base URL for ChatGPT auth flow. | | `experimental_instructions_file` | string (path) | Replace built‑in instructions (experimental). | | `experimental_use_exec_command_tool` | boolean | Use experimental exec command tool. | -| `experimental_sandbox_command_assessment` | boolean | Enable model-based risk summaries when sandboxed commands are denied. | | `projects..trust_level` | string | Mark project/worktree as trusted (only `"trusted"` is recognized). | | `tools.web_search` | boolean | Enable web search tool (alias: `web_search_request`) (default: false). | | `forced_login_method` | `chatgpt` \| `api` | Only allow Codex to be used with ChatGPT or API keys. | From 46fc68987c5b480ba7c3d747e3273334159cb856 Mon Sep 17 00:00:00 2001 From: Eric Traut Date: Fri, 24 Oct 2025 14:42:52 -0700 Subject: [PATCH 6/6] Bug fixes --- .../app-server/src/codex_message_processor.rs | 2 +- codex-rs/core/src/config.rs | 22 ++++++++------- codex-rs/core/src/config_profile.rs | 2 +- codex-rs/core/src/features.rs | 7 +++-- codex-rs/core/src/features/legacy.rs | 11 ++++++++ codex-rs/core/src/sandboxing/assessment.rs | 2 +- .../templates/sandboxing/assessment_prompt.md | 2 ++ codex-rs/exec/src/lib.rs | 2 +- codex-rs/mcp-server/src/codex_tool_config.rs | 2 +- .../tui/src/bottom_pane/approval_overlay.rs | 28 +++++++++++-------- codex-rs/tui/src/lib.rs | 2 +- 11 files changed, 52 insertions(+), 30 deletions(-) diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index cc1abb9f23b..f373ce7029e 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -1486,7 +1486,7 @@ async fn derive_config_from_params( include_view_image_tool: None, show_raw_agent_reasoning: None, tools_web_search_request: None, - sandbox_command_assessment: None, + experimental_sandbox_command_assessment: None, additional_writable_roots: Vec::new(), }; diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index 5d0c7363405..b04aba5db55 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -224,7 +224,7 @@ pub struct Config { pub tools_web_search_request: bool, /// When `true`, run a model-based assessment for commands denied by the sandbox. - pub sandbox_command_assessment: bool, + pub experimental_sandbox_command_assessment: bool, pub use_experimental_streamable_shell_tool: bool, @@ -961,6 +961,7 @@ pub struct ConfigToml { pub experimental_use_unified_exec_tool: Option, pub experimental_use_rmcp_client: Option, pub experimental_use_freeform_apply_patch: Option, + pub experimental_sandbox_command_assessment: Option, } impl From for UserSavedConfig { @@ -1121,7 +1122,7 @@ pub struct ConfigOverrides { pub include_view_image_tool: Option, pub show_raw_agent_reasoning: Option, pub tools_web_search_request: Option, - pub sandbox_command_assessment: Option, + pub experimental_sandbox_command_assessment: Option, /// Additional directories that should be treated as writable roots for this session. pub additional_writable_roots: Vec, } @@ -1151,7 +1152,7 @@ impl Config { include_view_image_tool: include_view_image_tool_override, show_raw_agent_reasoning, tools_web_search_request: override_tools_web_search_request, - sandbox_command_assessment: sandbox_command_assessment_override, + experimental_sandbox_command_assessment: sandbox_command_assessment_override, additional_writable_roots, } = overrides; @@ -1177,7 +1178,7 @@ impl Config { include_apply_patch_tool: include_apply_patch_tool_override, include_view_image_tool: include_view_image_tool_override, web_search_request: override_tools_web_search_request, - sandbox_command_assessment: sandbox_command_assessment_override, + experimental_sandbox_command_assessment: sandbox_command_assessment_override, }; let features = Features::from_config(&cfg, &config_profile, feature_overrides); @@ -1275,7 +1276,8 @@ impl Config { let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell); let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec); let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient); - let sandbox_command_assessment = features.enabled(Feature::SandboxCommandAssessment); + let experimental_sandbox_command_assessment = + features.enabled(Feature::SandboxCommandAssessment); let forced_chatgpt_workspace_id = cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| { @@ -1397,7 +1399,7 @@ impl Config { forced_login_method, include_apply_patch_tool: include_apply_patch_tool_flag, tools_web_search_request, - sandbox_command_assessment, + experimental_sandbox_command_assessment, use_experimental_streamable_shell_tool, use_experimental_unified_exec_tool, use_experimental_use_rmcp_client, @@ -2881,7 +2883,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, - sandbox_command_assessment: false, + experimental_sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, @@ -2950,7 +2952,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, - sandbox_command_assessment: false, + experimental_sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, @@ -3034,7 +3036,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, - sandbox_command_assessment: false, + experimental_sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, @@ -3104,7 +3106,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, - sandbox_command_assessment: false, + experimental_sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, diff --git a/codex-rs/core/src/config_profile.rs b/codex-rs/core/src/config_profile.rs index b45c6882d08..84b90f5adb8 100644 --- a/codex-rs/core/src/config_profile.rs +++ b/codex-rs/core/src/config_profile.rs @@ -26,7 +26,7 @@ pub struct ConfigProfile { pub experimental_use_exec_command_tool: Option, pub experimental_use_rmcp_client: Option, pub experimental_use_freeform_apply_patch: Option, - pub sandbox_command_assessment: Option, + pub experimental_sandbox_command_assessment: Option, pub tools_web_search: Option, pub tools_view_image: Option, /// Optional feature toggles scoped to this profile. diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index 7ec39391295..8e8a203062b 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -75,7 +75,7 @@ pub struct FeatureOverrides { pub include_apply_patch_tool: Option, pub include_view_image_tool: Option, pub web_search_request: Option, - pub sandbox_command_assessment: Option, + pub experimental_sandbox_command_assessment: Option, } impl FeatureOverrides { @@ -140,6 +140,7 @@ impl Features { let mut features = Features::with_defaults(); let base_legacy = LegacyFeatureToggles { + experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment, experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch, experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool, experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool, @@ -157,6 +158,8 @@ impl Features { let profile_legacy = LegacyFeatureToggles { include_apply_patch_tool: config_profile.include_apply_patch_tool, include_view_image_tool: config_profile.include_view_image_tool, + experimental_sandbox_command_assessment: config_profile + .experimental_sandbox_command_assessment, experimental_use_freeform_apply_patch: config_profile .experimental_use_freeform_apply_patch, experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool, @@ -241,7 +244,7 @@ pub const FEATURES: &[FeatureSpec] = &[ }, FeatureSpec { id: Feature::SandboxCommandAssessment, - key: "sandbox_command_assessment", + key: "experimental_sandbox_command_assessment", stage: Stage::Experimental, default_enabled: false, }, diff --git a/codex-rs/core/src/features/legacy.rs b/codex-rs/core/src/features/legacy.rs index 22d8442ace0..54f6a2d5490 100644 --- a/codex-rs/core/src/features/legacy.rs +++ b/codex-rs/core/src/features/legacy.rs @@ -9,6 +9,10 @@ struct Alias { } const ALIASES: &[Alias] = &[ + Alias { + legacy_key: "experimental_sandbox_command_assessment", + feature: Feature::SandboxCommandAssessment, + }, Alias { legacy_key: "experimental_use_unified_exec_tool", feature: Feature::UnifiedExec, @@ -53,6 +57,7 @@ pub(crate) fn feature_for_key(key: &str) -> Option { pub struct LegacyFeatureToggles { pub include_apply_patch_tool: Option, pub include_view_image_tool: Option, + pub experimental_sandbox_command_assessment: Option, pub experimental_use_freeform_apply_patch: Option, pub experimental_use_exec_command_tool: Option, pub experimental_use_unified_exec_tool: Option, @@ -69,6 +74,12 @@ impl LegacyFeatureToggles { self.include_apply_patch_tool, "include_apply_patch_tool", ); + set_if_some( + features, + Feature::SandboxCommandAssessment, + self.experimental_sandbox_command_assessment, + "experimental_sandbox_command_assessment", + ); set_if_some( features, Feature::ApplyPatchFreeform, diff --git a/codex-rs/core/src/sandboxing/assessment.rs b/codex-rs/core/src/sandboxing/assessment.rs index ea87074a738..f02a90b46e3 100644 --- a/codex-rs/core/src/sandboxing/assessment.rs +++ b/codex-rs/core/src/sandboxing/assessment.rs @@ -59,7 +59,7 @@ pub(crate) async fn assess_command( cwd: &Path, failure_message: Option<&str>, ) -> Option { - if !config.sandbox_command_assessment || command.is_empty() { + if !config.experimental_sandbox_command_assessment || command.is_empty() { return None; } diff --git a/codex-rs/core/templates/sandboxing/assessment_prompt.md b/codex-rs/core/templates/sandboxing/assessment_prompt.md index 51345d5ebc7..7e5664a0ad6 100644 --- a/codex-rs/core/templates/sandboxing/assessment_prompt.md +++ b/codex-rs/core/templates/sandboxing/assessment_prompt.md @@ -11,6 +11,8 @@ Use multiple categories when appropriate. If information is insufficient, choose the most cautious risk level supported by the evidence. Respond with JSON only, without markdown code fences or extra commentary. +--- + Command metadata: Platform: {{ platform }} Sandbox policy: {{ sandbox_policy }} diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index 975591bf50f..2e615df0c6b 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -179,7 +179,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any include_view_image_tool: None, show_raw_agent_reasoning: oss.then_some(true), tools_web_search_request: None, - sandbox_command_assessment: None, + experimental_sandbox_command_assessment: None, additional_writable_roots: Vec::new(), }; // Parse `-c` overrides. diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs index a51de207a0c..24a5eec4b89 100644 --- a/codex-rs/mcp-server/src/codex_tool_config.rs +++ b/codex-rs/mcp-server/src/codex_tool_config.rs @@ -158,7 +158,7 @@ impl CodexToolCallParam { include_view_image_tool: None, show_raw_agent_reasoning: None, tools_web_search_request: None, - sandbox_command_assessment: None, + experimental_sandbox_command_assessment: None, additional_writable_roots: Vec::new(), }; diff --git a/codex-rs/tui/src/bottom_pane/approval_overlay.rs b/codex-rs/tui/src/bottom_pane/approval_overlay.rs index ce3ba660f14..ba36870005a 100644 --- a/codex-rs/tui/src/bottom_pane/approval_overlay.rs +++ b/codex-rs/tui/src/bottom_pane/approval_overlay.rs @@ -291,15 +291,16 @@ impl From for ApprovalRequestState { reason, risk, } => { + let reason = reason.filter(|item| !item.is_empty()); + let has_reason = reason.is_some(); let mut header: Vec> = Vec::new(); - if let Some(reason) = reason - && !reason.is_empty() - { + if let Some(reason) = reason { header.push(Line::from(vec!["Reason: ".into(), reason.italic()])); - header.push(Line::from("")); } if let Some(risk) = risk.as_ref() { header.extend(render_risk_lines(risk)); + } else if has_reason { + header.push(Line::from("")); } let full_cmd = strip_bash_lc_and_escape(&command); let mut full_cmd_lines = highlight_bash_to_lines(&full_cmd); @@ -345,6 +346,16 @@ fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec> { SandboxRiskLevel::High => "HIGH".red().bold(), }; + let mut lines = Vec::new(); + + let description = risk.description.trim(); + if !description.is_empty() { + lines.push(Line::from(vec![ + "Summary: ".into(), + description.to_string().into(), + ])); + } + let mut spans: Vec> = vec!["Risk: ".into(), level_span]; if !risk.risk_categories.is_empty() { spans.push(" (".into()); @@ -357,14 +368,7 @@ fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec> { spans.push(")".into()); } - let mut lines = vec![Line::from(spans)]; - let description = risk.description.trim(); - if !description.is_empty() { - lines.push(Line::from(vec![ - "Summary: ".into(), - description.to_string().into(), - ])); - } + lines.push(Line::from(spans)); lines.push(Line::from("")); lines } diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index d1c07957a6e..50f6b1a0ddf 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -148,7 +148,7 @@ pub async fn run_main( include_view_image_tool: None, show_raw_agent_reasoning: cli.oss.then_some(true), tools_web_search_request: cli.web_search.then_some(true), - sandbox_command_assessment: None, + experimental_sandbox_command_assessment: None, additional_writable_roots: additional_dirs, }; let raw_overrides = cli.config_overrides.raw_overrides.clone();