diff --git a/codex-rs/app-server-protocol/src/protocol.rs b/codex-rs/app-server-protocol/src/protocol.rs index ceeeb489190..7340871f0f8 100644 --- a/codex-rs/app-server-protocol/src/protocol.rs +++ b/codex-rs/app-server-protocol/src/protocol.rs @@ -17,6 +17,7 @@ use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::FileChange; use codex_protocol::protocol::RateLimitSnapshot; use codex_protocol::protocol::ReviewDecision; +use codex_protocol::protocol::SandboxCommandAssessment; use codex_protocol::protocol::SandboxPolicy; use codex_protocol::protocol::TurnAbortReason; use paste::paste; @@ -847,6 +848,8 @@ pub struct ExecCommandApprovalParams { pub cwd: PathBuf, #[serde(skip_serializing_if = "Option::is_none")] pub reason: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub risk: Option, pub parsed_cmd: Vec, } @@ -1063,6 +1066,7 @@ mod tests { command: vec!["echo".to_string(), "hello".to_string()], cwd: PathBuf::from("/tmp"), reason: Some("because tests".to_string()), + risk: None, parsed_cmd: vec![ParsedCommand::Unknown { cmd: "echo hello".to_string(), }], diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index 5c10a89c385..ff1a43f5e55 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -1447,6 +1447,7 @@ async fn apply_bespoke_event_handling( command, cwd, reason, + risk, parsed_cmd, }) => { let params = ExecCommandApprovalParams { @@ -1455,6 +1456,7 @@ async fn apply_bespoke_event_handling( command, cwd, reason, + risk, parsed_cmd, }; let rx = outgoing @@ -1523,6 +1525,7 @@ async fn derive_config_from_params( include_view_image_tool: None, show_raw_agent_reasoning: None, tools_web_search_request: None, + experimental_sandbox_command_assessment: None, additional_writable_roots: Vec::new(), }; diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs index 30b90f6e4aa..6d3e2b42c42 100644 --- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs +++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs @@ -311,6 +311,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() { ], cwd: working_directory.clone(), reason: None, + risk: None, parsed_cmd: vec![ParsedCommand::Unknown { cmd: "python3 -c 'print(42)'".to_string() }], diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 62e003a9154..de10d6406fd 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -134,6 +134,14 @@ impl ModelClient { self.stream_with_task_kind(prompt, TaskKind::Regular).await } + pub fn config(&self) -> Arc { + Arc::clone(&self.config) + } + + pub fn provider(&self) -> &ModelProviderInfo { + &self.provider + } + pub(crate) async fn stream_with_task_kind( &self, prompt: &Prompt, diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index b9e98e0cd8c..4eba28c4aaf 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -88,6 +88,7 @@ use crate::protocol::Op; use crate::protocol::RateLimitSnapshot; use crate::protocol::ReviewDecision; use crate::protocol::ReviewOutputEvent; +use crate::protocol::SandboxCommandAssessment; use crate::protocol::SandboxPolicy; use crate::protocol::SessionConfiguredEvent; use crate::protocol::StreamErrorEvent; @@ -755,6 +756,32 @@ impl Session { } } + pub(crate) async fn assess_sandbox_command( + &self, + turn_context: &TurnContext, + call_id: &str, + command: &[String], + failure_message: Option<&str>, + ) -> Option { + let config = turn_context.client.config(); + let provider = turn_context.client.provider().clone(); + let auth_manager = Arc::clone(&self.services.auth_manager); + let otel = self.services.otel_event_manager.clone(); + crate::sandboxing::assessment::assess_command( + config, + provider, + auth_manager, + &otel, + self.conversation_id, + call_id, + command, + &turn_context.sandbox_policy, + &turn_context.cwd, + failure_message, + ) + .await + } + /// Emit an exec approval request event and await the user's decision. /// /// The request is keyed by `sub_id`/`call_id` so matching responses are delivered @@ -767,6 +794,7 @@ impl Session { command: Vec, cwd: PathBuf, reason: Option, + risk: Option, ) -> ReviewDecision { let sub_id = turn_context.sub_id.clone(); // Add the tx_approve callback to the map before sending the request. @@ -792,6 +820,7 @@ impl Session { command, cwd, reason, + risk, parsed_cmd, }); self.send_event(turn_context, event).await; diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index 0af01f68571..b04aba5db55 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -223,6 +223,9 @@ pub struct Config { pub tools_web_search_request: bool, + /// When `true`, run a model-based assessment for commands denied by the sandbox. + pub experimental_sandbox_command_assessment: bool, + pub use_experimental_streamable_shell_tool: bool, /// If set to `true`, used only the experimental unified exec tool. @@ -958,6 +961,7 @@ pub struct ConfigToml { pub experimental_use_unified_exec_tool: Option, pub experimental_use_rmcp_client: Option, pub experimental_use_freeform_apply_patch: Option, + pub experimental_sandbox_command_assessment: Option, } impl From for UserSavedConfig { @@ -1118,6 +1122,7 @@ pub struct ConfigOverrides { pub include_view_image_tool: Option, pub show_raw_agent_reasoning: Option, pub tools_web_search_request: Option, + pub experimental_sandbox_command_assessment: Option, /// Additional directories that should be treated as writable roots for this session. pub additional_writable_roots: Vec, } @@ -1147,6 +1152,7 @@ impl Config { include_view_image_tool: include_view_image_tool_override, show_raw_agent_reasoning, tools_web_search_request: override_tools_web_search_request, + experimental_sandbox_command_assessment: sandbox_command_assessment_override, additional_writable_roots, } = overrides; @@ -1172,6 +1178,7 @@ impl Config { include_apply_patch_tool: include_apply_patch_tool_override, include_view_image_tool: include_view_image_tool_override, web_search_request: override_tools_web_search_request, + experimental_sandbox_command_assessment: sandbox_command_assessment_override, }; let features = Features::from_config(&cfg, &config_profile, feature_overrides); @@ -1269,6 +1276,8 @@ impl Config { let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell); let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec); let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient); + let experimental_sandbox_command_assessment = + features.enabled(Feature::SandboxCommandAssessment); let forced_chatgpt_workspace_id = cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| { @@ -1390,6 +1399,7 @@ impl Config { forced_login_method, include_apply_patch_tool: include_apply_patch_tool_flag, tools_web_search_request, + experimental_sandbox_command_assessment, use_experimental_streamable_shell_tool, use_experimental_unified_exec_tool, use_experimental_use_rmcp_client, @@ -2873,6 +2883,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, + experimental_sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, @@ -2941,6 +2952,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, + experimental_sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, @@ -3024,6 +3036,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, + experimental_sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, @@ -3093,6 +3106,7 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, + experimental_sandbox_command_assessment: false, use_experimental_streamable_shell_tool: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, diff --git a/codex-rs/core/src/config_profile.rs b/codex-rs/core/src/config_profile.rs index 1986d42ffa4..84b90f5adb8 100644 --- a/codex-rs/core/src/config_profile.rs +++ b/codex-rs/core/src/config_profile.rs @@ -26,6 +26,7 @@ pub struct ConfigProfile { pub experimental_use_exec_command_tool: Option, pub experimental_use_rmcp_client: Option, pub experimental_use_freeform_apply_patch: Option, + pub experimental_sandbox_command_assessment: Option, pub tools_web_search: Option, pub tools_view_image: Option, /// Optional feature toggles scoped to this profile. diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index ead4604d547..8e8a203062b 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -39,6 +39,8 @@ pub enum Feature { ViewImageTool, /// Allow the model to request web searches. WebSearchRequest, + /// Enable the model-based risk assessments for sandboxed commands. + SandboxCommandAssessment, } impl Feature { @@ -73,6 +75,7 @@ pub struct FeatureOverrides { pub include_apply_patch_tool: Option, pub include_view_image_tool: Option, pub web_search_request: Option, + pub experimental_sandbox_command_assessment: Option, } impl FeatureOverrides { @@ -137,6 +140,7 @@ impl Features { let mut features = Features::with_defaults(); let base_legacy = LegacyFeatureToggles { + experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment, experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch, experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool, experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool, @@ -154,6 +158,8 @@ impl Features { let profile_legacy = LegacyFeatureToggles { include_apply_patch_tool: config_profile.include_apply_patch_tool, include_view_image_tool: config_profile.include_view_image_tool, + experimental_sandbox_command_assessment: config_profile + .experimental_sandbox_command_assessment, experimental_use_freeform_apply_patch: config_profile .experimental_use_freeform_apply_patch, experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool, @@ -236,4 +242,10 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Stable, default_enabled: false, }, + FeatureSpec { + id: Feature::SandboxCommandAssessment, + key: "experimental_sandbox_command_assessment", + stage: Stage::Experimental, + default_enabled: false, + }, ]; diff --git a/codex-rs/core/src/features/legacy.rs b/codex-rs/core/src/features/legacy.rs index 22d8442ace0..54f6a2d5490 100644 --- a/codex-rs/core/src/features/legacy.rs +++ b/codex-rs/core/src/features/legacy.rs @@ -9,6 +9,10 @@ struct Alias { } const ALIASES: &[Alias] = &[ + Alias { + legacy_key: "experimental_sandbox_command_assessment", + feature: Feature::SandboxCommandAssessment, + }, Alias { legacy_key: "experimental_use_unified_exec_tool", feature: Feature::UnifiedExec, @@ -53,6 +57,7 @@ pub(crate) fn feature_for_key(key: &str) -> Option { pub struct LegacyFeatureToggles { pub include_apply_patch_tool: Option, pub include_view_image_tool: Option, + pub experimental_sandbox_command_assessment: Option, pub experimental_use_freeform_apply_patch: Option, pub experimental_use_exec_command_tool: Option, pub experimental_use_unified_exec_tool: Option, @@ -69,6 +74,12 @@ impl LegacyFeatureToggles { self.include_apply_patch_tool, "include_apply_patch_tool", ); + set_if_some( + features, + Feature::SandboxCommandAssessment, + self.experimental_sandbox_command_assessment, + "experimental_sandbox_command_assessment", + ); set_if_some( features, Feature::ApplyPatchFreeform, diff --git a/codex-rs/core/src/sandboxing/assessment.rs b/codex-rs/core/src/sandboxing/assessment.rs new file mode 100644 index 00000000000..f02a90b46e3 --- /dev/null +++ b/codex-rs/core/src/sandboxing/assessment.rs @@ -0,0 +1,275 @@ +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; + +use crate::AuthManager; +use crate::ModelProviderInfo; +use crate::client::ModelClient; +use crate::client_common::Prompt; +use crate::client_common::ResponseEvent; +use crate::config::Config; +use crate::protocol::SandboxPolicy; +use askama::Template; +use codex_otel::otel_event_manager::OtelEventManager; +use codex_protocol::ConversationId; +use codex_protocol::models::ContentItem; +use codex_protocol::models::ResponseItem; +use codex_protocol::protocol::SandboxCommandAssessment; +use futures::StreamExt; +use serde_json::json; +use tokio::time::timeout; +use tracing::warn; + +const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5); + +const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[ + "data_deletion", + "data_exfiltration", + "privilege_escalation", + "system_modification", + "network_access", + "resource_exhaustion", + "compliance", +]; + +#[derive(Template)] +#[template(path = "sandboxing/assessment_prompt.md", escape = "none")] +struct SandboxAssessmentPromptTemplate<'a> { + platform: &'a str, + sandbox_policy: &'a str, + filesystem_roots: Option<&'a str>, + working_directory: &'a str, + command_argv: &'a str, + command_joined: &'a str, + sandbox_failure_message: Option<&'a str>, +} + +#[allow(clippy::too_many_arguments)] +pub(crate) async fn assess_command( + config: Arc, + provider: ModelProviderInfo, + auth_manager: Arc, + parent_otel: &OtelEventManager, + conversation_id: ConversationId, + call_id: &str, + command: &[String], + sandbox_policy: &SandboxPolicy, + cwd: &Path, + failure_message: Option<&str>, +) -> Option { + if !config.experimental_sandbox_command_assessment || command.is_empty() { + return None; + } + + let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string()); + let command_joined = + shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" ")); + let failure = failure_message + .map(str::trim) + .filter(|msg| !msg.is_empty()) + .map(str::to_string); + + let cwd_str = cwd.to_string_lossy().to_string(); + let sandbox_summary = summarize_sandbox_policy(sandbox_policy); + let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd); + roots.sort(); + roots.dedup(); + + let platform = std::env::consts::OS; + let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string()); + let filesystem_roots = match roots_formatted.collect::>() { + collected if collected.is_empty() => None, + collected => Some(collected.join(", ")), + }; + + let prompt_template = SandboxAssessmentPromptTemplate { + platform, + sandbox_policy: sandbox_summary.as_str(), + filesystem_roots: filesystem_roots.as_deref(), + working_directory: cwd_str.as_str(), + command_argv: command_json.as_str(), + command_joined: command_joined.as_str(), + sandbox_failure_message: failure.as_deref(), + }; + let rendered_prompt = match prompt_template.render() { + Ok(rendered) => rendered, + Err(err) => { + warn!("failed to render sandbox assessment prompt: {err}"); + return None; + } + }; + let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") { + Some(split) => split, + None => { + warn!("rendered sandbox assessment prompt missing separator"); + return None; + } + }; + let system_prompt = system_prompt_section + .strip_prefix("System Prompt:\n") + .unwrap_or(system_prompt_section) + .trim() + .to_string(); + let user_prompt = user_prompt_section + .strip_prefix("User Prompt:\n") + .unwrap_or(user_prompt_section) + .trim() + .to_string(); + + let prompt = Prompt { + input: vec![ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { text: user_prompt }], + }], + tools: Vec::new(), + parallel_tool_calls: false, + base_instructions_override: Some(system_prompt), + output_schema: Some(sandbox_assessment_schema()), + }; + + let child_otel = + parent_otel.with_model(config.model.as_str(), config.model_family.slug.as_str()); + + let client = ModelClient::new( + Arc::clone(&config), + Some(auth_manager), + child_otel, + provider, + config.model_reasoning_effort, + config.model_reasoning_summary, + conversation_id, + ); + + let start = Instant::now(); + let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move { + let mut stream = client.stream(&prompt).await?; + let mut last_json: Option = None; + while let Some(event) = stream.next().await { + match event { + Ok(ResponseEvent::OutputItemDone(item)) => { + if let Some(text) = response_item_text(&item) { + last_json = Some(text); + } + } + Ok(ResponseEvent::RateLimits(_)) => {} + Ok(ResponseEvent::Completed { .. }) => break, + Ok(_) => continue, + Err(err) => return Err(err), + } + } + Ok(last_json) + }) + .await; + let duration = start.elapsed(); + parent_otel.sandbox_assessment_latency(call_id, duration); + + match assessment_result { + Ok(Ok(Some(raw))) => match serde_json::from_str::(raw.trim()) { + Ok(assessment) => { + parent_otel.sandbox_assessment( + call_id, + "success", + Some(assessment.risk_level), + &assessment.risk_categories, + duration, + ); + return Some(assessment); + } + Err(err) => { + warn!("failed to parse sandbox assessment JSON: {err}"); + parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration); + } + }, + Ok(Ok(None)) => { + warn!("sandbox assessment response did not include any message"); + parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration); + } + Ok(Err(err)) => { + warn!("sandbox assessment failed: {err}"); + parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration); + } + Err(_) => { + warn!("sandbox assessment timed out"); + parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration); + } + } + + None +} + +fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String { + match policy { + SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(), + SandboxPolicy::ReadOnly => "read-only".to_string(), + SandboxPolicy::WorkspaceWrite { network_access, .. } => { + let network = if *network_access { + "network" + } else { + "no-network" + }; + format!("workspace-write (network_access={network})") + } + } +} + +fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec { + let mut roots = vec![cwd.to_path_buf()]; + if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy { + roots.extend(writable_roots.iter().cloned()); + } + roots +} + +fn sandbox_assessment_schema() -> serde_json::Value { + json!({ + "type": "object", + "required": ["description", "risk_level", "risk_categories"], + "properties": { + "description": { + "type": "string", + "minLength": 1, + "maxLength": 500 + }, + "risk_level": { + "type": "string", + "enum": ["low", "medium", "high"] + }, + "risk_categories": { + "type": "array", + "items": { + "type": "string", + "enum": SANDBOX_RISK_CATEGORY_VALUES + } + } + }, + "additionalProperties": false + }) +} + +fn response_item_text(item: &ResponseItem) -> Option { + match item { + ResponseItem::Message { content, .. } => { + let mut buffers: Vec<&str> = Vec::new(); + for segment in content { + match segment { + ContentItem::InputText { text } | ContentItem::OutputText { text } => { + if !text.is_empty() { + buffers.push(text); + } + } + ContentItem::InputImage { .. } => {} + } + } + if buffers.is_empty() { + None + } else { + Some(buffers.join("\n")) + } + } + ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()), + _ => None, + } +} diff --git a/codex-rs/core/src/sandboxing/mod.rs b/codex-rs/core/src/sandboxing/mod.rs index d632b5da19b..c11f3588fe6 100644 --- a/codex-rs/core/src/sandboxing/mod.rs +++ b/codex-rs/core/src/sandboxing/mod.rs @@ -5,6 +5,9 @@ Build platform wrappers and produce ExecEnv for execution. Owns low‑level sandbox placement and transformation of portable CommandSpec into a ready‑to‑spawn environment. */ + +pub mod assessment; + use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::StdoutStream; diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index 38aac5c1edf..e39c0703f8d 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -7,9 +7,11 @@ retry without sandbox on denial (no re‑approval thanks to caching). */ use crate::error::CodexErr; use crate::error::SandboxErr; +use crate::error::get_error_message_ui; use crate::exec::ExecToolCallOutput; use crate::sandboxing::SandboxManager; use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::ProvidesSandboxRetryData; use crate::tools::sandboxing::SandboxAttempt; use crate::tools::sandboxing::ToolCtx; use crate::tools::sandboxing::ToolError; @@ -38,6 +40,7 @@ impl ToolOrchestrator { ) -> Result where T: ToolRuntime, + Rq: ProvidesSandboxRetryData, { let otel = turn_ctx.client.get_otel_event_manager(); let otel_tn = &tool_ctx.tool_name; @@ -56,6 +59,7 @@ impl ToolOrchestrator { turn: turn_ctx, call_id: &tool_ctx.call_id, retry_reason: None, + risk: None, }; let decision = tool.start_approval_async(req, approval_ctx).await; @@ -107,12 +111,33 @@ impl ToolOrchestrator { // Ask for approval before retrying without sandbox. if !tool.should_bypass_approval(approval_policy, already_approved) { + let mut risk = None; + + if let Some(metadata) = req.sandbox_retry_data() { + let err = SandboxErr::Denied { + output: output.clone(), + }; + let friendly = get_error_message_ui(&CodexErr::Sandbox(err)); + let failure_summary = format!("failed in sandbox: {friendly}"); + + risk = tool_ctx + .session + .assess_sandbox_command( + turn_ctx, + &tool_ctx.call_id, + &metadata.command, + Some(failure_summary.as_str()), + ) + .await; + } + let reason_msg = build_denial_reason_from_output(output.as_ref()); let approval_ctx = ApprovalCtx { session: tool_ctx.session, turn: turn_ctx, call_id: &tool_ctx.call_id, retry_reason: Some(reason_msg), + risk, }; let decision = tool.start_approval_async(req, approval_ctx).await; diff --git a/codex-rs/core/src/tools/runtimes/apply_patch.rs b/codex-rs/core/src/tools/runtimes/apply_patch.rs index 18abbfee4a4..0cdddd50877 100644 --- a/codex-rs/core/src/tools/runtimes/apply_patch.rs +++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs @@ -10,7 +10,9 @@ use crate::sandboxing::CommandSpec; use crate::sandboxing::execute_env; use crate::tools::sandboxing::Approvable; use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::ProvidesSandboxRetryData; use crate::tools::sandboxing::SandboxAttempt; +use crate::tools::sandboxing::SandboxRetryData; use crate::tools::sandboxing::Sandboxable; use crate::tools::sandboxing::SandboxablePreference; use crate::tools::sandboxing::ToolCtx; @@ -32,6 +34,12 @@ pub struct ApplyPatchRequest { pub codex_exe: Option, } +impl ProvidesSandboxRetryData for ApplyPatchRequest { + fn sandbox_retry_data(&self) -> Option { + None + } +} + #[derive(Default)] pub struct ApplyPatchRuntime; @@ -106,9 +114,10 @@ impl Approvable for ApplyPatchRuntime { let call_id = ctx.call_id.to_string(); let cwd = req.cwd.clone(); let retry_reason = ctx.retry_reason.clone(); + let risk = ctx.risk.clone(); let user_explicitly_approved = req.user_explicitly_approved; Box::pin(async move { - with_cached_approval(&session.services, key, || async move { + with_cached_approval(&session.services, key, move || async move { if let Some(reason) = retry_reason { session .request_command_approval( @@ -117,6 +126,7 @@ impl Approvable for ApplyPatchRuntime { vec!["apply_patch".to_string()], cwd, Some(reason), + risk, ) .await } else if user_explicitly_approved { diff --git a/codex-rs/core/src/tools/runtimes/shell.rs b/codex-rs/core/src/tools/runtimes/shell.rs index bfc2114fb37..f29224fcc1c 100644 --- a/codex-rs/core/src/tools/runtimes/shell.rs +++ b/codex-rs/core/src/tools/runtimes/shell.rs @@ -12,7 +12,9 @@ use crate::sandboxing::execute_env; use crate::tools::runtimes::build_command_spec; use crate::tools::sandboxing::Approvable; use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::ProvidesSandboxRetryData; use crate::tools::sandboxing::SandboxAttempt; +use crate::tools::sandboxing::SandboxRetryData; use crate::tools::sandboxing::Sandboxable; use crate::tools::sandboxing::SandboxablePreference; use crate::tools::sandboxing::ToolCtx; @@ -34,6 +36,15 @@ pub struct ShellRequest { pub justification: Option, } +impl ProvidesSandboxRetryData for ShellRequest { + fn sandbox_retry_data(&self) -> Option { + Some(SandboxRetryData { + command: self.command.clone(), + cwd: self.cwd.clone(), + }) + } +} + #[derive(Default)] pub struct ShellRuntime; @@ -90,13 +101,14 @@ impl Approvable for ShellRuntime { .retry_reason .clone() .or_else(|| req.justification.clone()); + let risk = ctx.risk.clone(); let session = ctx.session; let turn = ctx.turn; let call_id = ctx.call_id.to_string(); Box::pin(async move { - with_cached_approval(&session.services, key, || async move { + with_cached_approval(&session.services, key, move || async move { session - .request_command_approval(turn, call_id, command, cwd, reason) + .request_command_approval(turn, call_id, command, cwd, reason, risk) .await }) .await diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs index c7d136ebfdc..85c996387ab 100644 --- a/codex-rs/core/src/tools/runtimes/unified_exec.rs +++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs @@ -9,7 +9,9 @@ use crate::error::SandboxErr; use crate::tools::runtimes::build_command_spec; use crate::tools::sandboxing::Approvable; use crate::tools::sandboxing::ApprovalCtx; +use crate::tools::sandboxing::ProvidesSandboxRetryData; use crate::tools::sandboxing::SandboxAttempt; +use crate::tools::sandboxing::SandboxRetryData; use crate::tools::sandboxing::Sandboxable; use crate::tools::sandboxing::SandboxablePreference; use crate::tools::sandboxing::ToolCtx; @@ -31,6 +33,15 @@ pub struct UnifiedExecRequest { pub env: HashMap, } +impl ProvidesSandboxRetryData for UnifiedExecRequest { + fn sandbox_retry_data(&self) -> Option { + Some(SandboxRetryData { + command: self.command.clone(), + cwd: self.cwd.clone(), + }) + } +} + #[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)] pub struct UnifiedExecApprovalKey { pub command: Vec, @@ -85,10 +96,11 @@ impl Approvable for UnifiedExecRuntime<'_> { let command = req.command.clone(); let cwd = req.cwd.clone(); let reason = ctx.retry_reason.clone(); + let risk = ctx.risk.clone(); Box::pin(async move { with_cached_approval(&session.services, key, || async move { session - .request_command_approval(turn, call_id, command, cwd, reason) + .request_command_approval(turn, call_id, command, cwd, reason, risk) .await }) .await diff --git a/codex-rs/core/src/tools/sandboxing.rs b/codex-rs/core/src/tools/sandboxing.rs index 0dd7a99f470..ad483e2f937 100644 --- a/codex-rs/core/src/tools/sandboxing.rs +++ b/codex-rs/core/src/tools/sandboxing.rs @@ -7,6 +7,7 @@ use crate::codex::Session; use crate::codex::TurnContext; use crate::error::CodexErr; +use crate::protocol::SandboxCommandAssessment; use crate::protocol::SandboxPolicy; use crate::sandboxing::CommandSpec; use crate::sandboxing::SandboxManager; @@ -18,6 +19,7 @@ use std::collections::HashMap; use std::fmt::Debug; use std::hash::Hash; use std::path::Path; +use std::path::PathBuf; use futures::Future; use futures::future::BoxFuture; @@ -81,6 +83,7 @@ pub(crate) struct ApprovalCtx<'a> { pub turn: &'a TurnContext, pub call_id: &'a str, pub retry_reason: Option, + pub risk: Option, } pub(crate) trait Approvable { @@ -156,6 +159,17 @@ pub(crate) struct ToolCtx<'a> { pub tool_name: String, } +/// Captures the command metadata needed to re-run a tool request without sandboxing. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct SandboxRetryData { + pub command: Vec, + pub cwd: PathBuf, +} + +pub(crate) trait ProvidesSandboxRetryData { + fn sandbox_retry_data(&self) -> Option; +} + #[derive(Debug)] pub(crate) enum ToolError { Rejected(String), diff --git a/codex-rs/core/templates/sandboxing/assessment_prompt.md b/codex-rs/core/templates/sandboxing/assessment_prompt.md new file mode 100644 index 00000000000..7e5664a0ad6 --- /dev/null +++ b/codex-rs/core/templates/sandboxing/assessment_prompt.md @@ -0,0 +1,27 @@ +You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys: +- description (concise summary, at most two sentences) +- risk_level ("low", "medium", or "high") +- risk_categories (optional array of zero or more category strings) +Risk level examples: +- low: read-only inspections, listing files, printing configuration +- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources +- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls +Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance. +Use multiple categories when appropriate. +If information is insufficient, choose the most cautious risk level supported by the evidence. +Respond with JSON only, without markdown code fences or extra commentary. + +--- + +Command metadata: +Platform: {{ platform }} +Sandbox policy: {{ sandbox_policy }} +{% if let Some(roots) = filesystem_roots %} +Filesystem roots: {{ roots }} +{% endif %} +Working directory: {{ working_directory }} +Command argv: {{ command_argv }} +Command (joined): {{ command_joined }} +{% if let Some(message) = sandbox_failure_message %} +Sandbox failure message: {{ message }} +{% endif %} diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index e470194cad8..2e615df0c6b 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -179,6 +179,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any include_view_image_tool: None, show_raw_agent_reasoning: oss.then_some(true), tools_web_search_request: None, + experimental_sandbox_command_assessment: None, additional_writable_roots: Vec::new(), }; // Parse `-c` overrides. diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs index b4c4eb749dc..24a5eec4b89 100644 --- a/codex-rs/mcp-server/src/codex_tool_config.rs +++ b/codex-rs/mcp-server/src/codex_tool_config.rs @@ -158,6 +158,7 @@ impl CodexToolCallParam { include_view_image_tool: None, show_raw_agent_reasoning: None, tools_web_search_request: None, + experimental_sandbox_command_assessment: None, additional_writable_roots: Vec::new(), }; diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index a59755008d6..05d653aff97 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -178,6 +178,7 @@ async fn run_codex_tool_session_inner( cwd, call_id, reason: _, + risk, parsed_cmd, }) => { handle_exec_approval_request( @@ -190,6 +191,7 @@ async fn run_codex_tool_session_inner( event.id.clone(), call_id, parsed_cmd, + risk, ) .await; continue; diff --git a/codex-rs/mcp-server/src/exec_approval.rs b/codex-rs/mcp-server/src/exec_approval.rs index 44607b754d7..033523ac0df 100644 --- a/codex-rs/mcp-server/src/exec_approval.rs +++ b/codex-rs/mcp-server/src/exec_approval.rs @@ -4,6 +4,7 @@ use std::sync::Arc; use codex_core::CodexConversation; use codex_core::protocol::Op; use codex_core::protocol::ReviewDecision; +use codex_core::protocol::SandboxCommandAssessment; use codex_protocol::parse_command::ParsedCommand; use mcp_types::ElicitRequest; use mcp_types::ElicitRequestParamsRequestedSchema; @@ -37,6 +38,8 @@ pub struct ExecApprovalElicitRequestParams { pub codex_command: Vec, pub codex_cwd: PathBuf, pub codex_parsed_cmd: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub codex_risk: Option, } // TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See: @@ -59,6 +62,7 @@ pub(crate) async fn handle_exec_approval_request( event_id: String, call_id: String, codex_parsed_cmd: Vec, + codex_risk: Option, ) { let escaped_command = shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" ")); @@ -81,6 +85,7 @@ pub(crate) async fn handle_exec_approval_request( codex_command: command, codex_cwd: cwd, codex_parsed_cmd, + codex_risk, }; let params_json = match serde_json::to_value(¶ms) { Ok(value) => value, diff --git a/codex-rs/mcp-server/tests/suite/codex_tool.rs b/codex-rs/mcp-server/tests/suite/codex_tool.rs index d7cd200f074..4ab83ca24e9 100644 --- a/codex-rs/mcp-server/tests/suite/codex_tool.rs +++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs @@ -196,6 +196,7 @@ fn create_expected_elicitation_request( codex_cwd: workdir.to_path_buf(), codex_call_id: "call1234".to_string(), codex_parsed_cmd, + codex_risk: None, })?), }) } diff --git a/codex-rs/otel/src/otel_event_manager.rs b/codex-rs/otel/src/otel_event_manager.rs index 486683dae0e..4006df17d9a 100644 --- a/codex-rs/otel/src/otel_event_manager.rs +++ b/codex-rs/otel/src/otel_event_manager.rs @@ -8,6 +8,8 @@ use codex_protocol::models::ResponseItem; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::ReviewDecision; use codex_protocol::protocol::SandboxPolicy; +use codex_protocol::protocol::SandboxRiskCategory; +use codex_protocol::protocol::SandboxRiskLevel; use codex_protocol::user_input::UserInput; use eventsource_stream::Event as StreamEvent; use eventsource_stream::EventStreamError as StreamError; @@ -366,6 +368,63 @@ impl OtelEventManager { ); } + pub fn sandbox_assessment( + &self, + call_id: &str, + status: &str, + risk_level: Option, + risk_categories: &[SandboxRiskCategory], + duration: Duration, + ) { + let level = risk_level.map(|level| level.as_str()); + let categories = if risk_categories.is_empty() { + String::new() + } else { + risk_categories + .iter() + .map(SandboxRiskCategory::as_str) + .collect::>() + .join(", ") + }; + + tracing::event!( + tracing::Level::INFO, + event.name = "codex.sandbox_assessment", + event.timestamp = %timestamp(), + conversation.id = %self.metadata.conversation_id, + app.version = %self.metadata.app_version, + auth_mode = self.metadata.auth_mode, + user.account_id = self.metadata.account_id, + user.email = self.metadata.account_email, + terminal.type = %self.metadata.terminal_type, + model = %self.metadata.model, + slug = %self.metadata.slug, + call_id = %call_id, + status = %status, + risk_level = level, + risk_categories = categories, + duration_ms = %duration.as_millis(), + ); + } + + pub fn sandbox_assessment_latency(&self, call_id: &str, duration: Duration) { + tracing::event!( + tracing::Level::INFO, + event.name = "codex.sandbox_assessment_latency", + event.timestamp = %timestamp(), + conversation.id = %self.metadata.conversation_id, + app.version = %self.metadata.app_version, + auth_mode = self.metadata.auth_mode, + user.account_id = self.metadata.account_id, + user.email = self.metadata.account_email, + terminal.type = %self.metadata.terminal_type, + model = %self.metadata.model, + slug = %self.metadata.slug, + call_id = %call_id, + duration_ms = %duration.as_millis(), + ); + } + pub async fn log_tool_result( &self, tool_name: &str, diff --git a/codex-rs/protocol/src/approvals.rs b/codex-rs/protocol/src/approvals.rs new file mode 100644 index 00000000000..d608dba639e --- /dev/null +++ b/codex-rs/protocol/src/approvals.rs @@ -0,0 +1,91 @@ +use std::collections::HashMap; +use std::path::PathBuf; + +use crate::parse_command::ParsedCommand; +use crate::protocol::FileChange; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use ts_rs::TS; + +#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +pub enum SandboxRiskLevel { + Low, + Medium, + High, +} + +#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +pub enum SandboxRiskCategory { + DataDeletion, + DataExfiltration, + PrivilegeEscalation, + SystemModification, + NetworkAccess, + ResourceExhaustion, + Compliance, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)] +pub struct SandboxCommandAssessment { + pub description: String, + pub risk_level: SandboxRiskLevel, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub risk_categories: Vec, +} + +impl SandboxRiskLevel { + pub fn as_str(&self) -> &'static str { + match self { + Self::Low => "low", + Self::Medium => "medium", + Self::High => "high", + } + } +} + +impl SandboxRiskCategory { + pub fn as_str(&self) -> &'static str { + match self { + Self::DataDeletion => "data_deletion", + Self::DataExfiltration => "data_exfiltration", + Self::PrivilegeEscalation => "privilege_escalation", + Self::SystemModification => "system_modification", + Self::NetworkAccess => "network_access", + Self::ResourceExhaustion => "resource_exhaustion", + Self::Compliance => "compliance", + } + } +} + +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub struct ExecApprovalRequestEvent { + /// Identifier for the associated exec call, if available. + pub call_id: String, + /// The command to be executed. + pub command: Vec, + /// The command's working directory. + pub cwd: PathBuf, + /// Optional human-readable reason for the approval (e.g. retry without sandbox). + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + /// Optional model-provided risk assessment describing the blocked command. + #[serde(skip_serializing_if = "Option::is_none")] + pub risk: Option, + pub parsed_cmd: Vec, +} + +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +pub struct ApplyPatchApprovalRequestEvent { + /// Responses API call id for the associated patch apply call, if available. + pub call_id: String, + pub changes: HashMap, + /// Optional explanatory reason (e.g. request for extra write access). + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + /// When set, the agent is asking the user to allow writes under this root for the remainder of the session. + #[serde(skip_serializing_if = "Option::is_none")] + pub grant_root: Option, +} diff --git a/codex-rs/protocol/src/lib.rs b/codex-rs/protocol/src/lib.rs index 3b8747e63af..08ea7533473 100644 --- a/codex-rs/protocol/src/lib.rs +++ b/codex-rs/protocol/src/lib.rs @@ -1,6 +1,7 @@ pub mod account; mod conversation_id; pub use conversation_id::ConversationId; +pub mod approvals; pub mod config_types; pub mod custom_prompts; pub mod items; diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 105f0280497..cd10c2786b4 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -34,6 +34,12 @@ use serde_with::serde_as; use strum_macros::Display; use ts_rs::TS; +pub use crate::approvals::ApplyPatchApprovalRequestEvent; +pub use crate::approvals::ExecApprovalRequestEvent; +pub use crate::approvals::SandboxCommandAssessment; +pub use crate::approvals::SandboxRiskCategory; +pub use crate::approvals::SandboxRiskLevel; + /// Open/close tags for special user-input blocks. Used across crates to avoid /// duplicated hardcoded strings. pub const USER_INSTRUCTIONS_OPEN_TAG: &str = ""; @@ -1126,33 +1132,6 @@ pub struct ExecCommandOutputDeltaEvent { pub chunk: Vec, } -#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] -pub struct ExecApprovalRequestEvent { - /// Identifier for the associated exec call, if available. - pub call_id: String, - /// The command to be executed. - pub command: Vec, - /// The command's working directory. - pub cwd: PathBuf, - /// Optional human-readable reason for the approval (e.g. retry without sandbox). - #[serde(skip_serializing_if = "Option::is_none")] - pub reason: Option, - pub parsed_cmd: Vec, -} - -#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] -pub struct ApplyPatchApprovalRequestEvent { - /// Responses API call id for the associated patch apply call, if available. - pub call_id: String, - pub changes: HashMap, - /// Optional explanatory reason (e.g. request for extra write access). - #[serde(skip_serializing_if = "Option::is_none")] - pub reason: Option, - /// When set, the agent is asking the user to allow writes under this root for the remainder of the session. - #[serde(skip_serializing_if = "Option::is_none")] - pub grant_root: Option, -} - #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] pub struct BackgroundEventEvent { pub message: String, diff --git a/codex-rs/tui/src/bottom_pane/approval_overlay.rs b/codex-rs/tui/src/bottom_pane/approval_overlay.rs index e8ecc084c4a..ba36870005a 100644 --- a/codex-rs/tui/src/bottom_pane/approval_overlay.rs +++ b/codex-rs/tui/src/bottom_pane/approval_overlay.rs @@ -19,6 +19,9 @@ use crate::render::renderable::Renderable; use codex_core::protocol::FileChange; use codex_core::protocol::Op; use codex_core::protocol::ReviewDecision; +use codex_core::protocol::SandboxCommandAssessment; +use codex_core::protocol::SandboxRiskCategory; +use codex_core::protocol::SandboxRiskLevel; use crossterm::event::KeyCode; use crossterm::event::KeyEvent; use crossterm::event::KeyEventKind; @@ -38,6 +41,7 @@ pub(crate) enum ApprovalRequest { id: String, command: Vec, reason: Option, + risk: Option, }, ApplyPatch { id: String, @@ -285,12 +289,17 @@ impl From for ApprovalRequestState { id, command, reason, + risk, } => { + let reason = reason.filter(|item| !item.is_empty()); + let has_reason = reason.is_some(); let mut header: Vec> = Vec::new(); - if let Some(reason) = reason - && !reason.is_empty() - { + if let Some(reason) = reason { header.push(Line::from(vec!["Reason: ".into(), reason.italic()])); + } + if let Some(risk) = risk.as_ref() { + header.extend(render_risk_lines(risk)); + } else if has_reason { header.push(Line::from("")); } let full_cmd = strip_bash_lc_and_escape(&command); @@ -330,6 +339,52 @@ impl From for ApprovalRequestState { } } +fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec> { + let level_span = match risk.risk_level { + SandboxRiskLevel::Low => "LOW".green().bold(), + SandboxRiskLevel::Medium => "MEDIUM".cyan().bold(), + SandboxRiskLevel::High => "HIGH".red().bold(), + }; + + let mut lines = Vec::new(); + + let description = risk.description.trim(); + if !description.is_empty() { + lines.push(Line::from(vec![ + "Summary: ".into(), + description.to_string().into(), + ])); + } + + let mut spans: Vec> = vec!["Risk: ".into(), level_span]; + if !risk.risk_categories.is_empty() { + spans.push(" (".into()); + for (idx, category) in risk.risk_categories.iter().enumerate() { + if idx > 0 { + spans.push(", ".into()); + } + spans.push(risk_category_label(*category).into()); + } + spans.push(")".into()); + } + + lines.push(Line::from(spans)); + lines.push(Line::from("")); + lines +} + +fn risk_category_label(category: SandboxRiskCategory) -> &'static str { + match category { + SandboxRiskCategory::DataDeletion => "data deletion", + SandboxRiskCategory::DataExfiltration => "data exfiltration", + SandboxRiskCategory::PrivilegeEscalation => "privilege escalation", + SandboxRiskCategory::SystemModification => "system modification", + SandboxRiskCategory::NetworkAccess => "network access", + SandboxRiskCategory::ResourceExhaustion => "resource exhaustion", + SandboxRiskCategory::Compliance => "compliance", + } +} + #[derive(Clone)] enum ApprovalVariant { Exec { id: String, command: Vec }, @@ -404,6 +459,7 @@ mod tests { id: "test".to_string(), command: vec!["echo".to_string(), "hi".to_string()], reason: Some("reason".to_string()), + risk: None, } } @@ -445,6 +501,7 @@ mod tests { id: "test".into(), command, reason: None, + risk: None, }; let view = ApprovalOverlay::new(exec_request, tx); diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index 69405cd823d..48bc0e9d4ca 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -557,6 +557,7 @@ mod tests { id: "1".to_string(), command: vec!["echo".into(), "ok".into()], reason: None, + risk: None, } } diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 75dad91372d..64f680d2ff0 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -777,6 +777,7 @@ impl ChatWidget { id, command: ev.command, reason: ev.reason, + risk: ev.risk, }; self.bottom_pane.push_approval_request(request); self.request_redraw(); diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index 738fc31e427..e25e04e09ef 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -402,6 +402,7 @@ fn exec_approval_emits_proposed_command_and_decision_history() { reason: Some( "this is a test reason such as one that would be produced by the model".into(), ), + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { @@ -444,6 +445,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() { reason: Some( "this is a test reason such as one that would be produced by the model".into(), ), + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { @@ -492,6 +494,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() { command: vec!["bash".into(), "-lc".into(), long], cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), reason: None, + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { @@ -1421,6 +1424,7 @@ fn approval_modal_exec_snapshot() { reason: Some( "this is a test reason such as one that would be produced by the model".into(), ), + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { @@ -1465,6 +1469,7 @@ fn approval_modal_exec_without_reason_snapshot() { command: vec!["bash".into(), "-lc".into(), "echo hello world".into()], cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), reason: None, + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { @@ -1675,6 +1680,7 @@ fn status_widget_and_approval_modal_snapshot() { reason: Some( "this is a test reason such as one that would be produced by the model".into(), ), + risk: None, parsed_cmd: vec![], }; chat.handle_codex_event(Event { diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 681b4c6ca12..50f6b1a0ddf 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -148,6 +148,7 @@ pub async fn run_main( include_view_image_tool: None, show_raw_agent_reasoning: cli.oss.then_some(true), tools_web_search_request: cli.web_search.then_some(true), + experimental_sandbox_command_assessment: None, additional_writable_roots: additional_dirs, }; let raw_overrides = cli.config_overrides.raw_overrides.clone();