diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 43916d1e14f..383a9cf48e0 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1077,6 +1077,7 @@ dependencies = [ "codex-chatgpt", "codex-common", "codex-core", + "codex-execpolicy", "codex-feedback", "codex-file-search", "codex-login", @@ -1756,6 +1757,7 @@ name = "codex-protocol" version = "0.0.0" dependencies = [ "anyhow", + "codex-execpolicy", "codex-git", "codex-utils-absolute-path", "codex-utils-image", diff --git a/codex-rs/app-server/Cargo.toml b/codex-rs/app-server/Cargo.toml index 9fa20440581..af2f30c49ca 100644 --- a/codex-rs/app-server/Cargo.toml +++ b/codex-rs/app-server/Cargo.toml @@ -56,6 +56,7 @@ axum = { workspace = true, default-features = false, features = [ "tokio", ] } base64 = { workspace = true } +codex-execpolicy = { workspace = true } core_test_support = { workspace = true } mcp-types = { workspace = true } os_info = { workspace = true } diff --git a/codex-rs/app-server/tests/suite/send_message.rs b/codex-rs/app-server/tests/suite/send_message.rs index 0c713de87ce..814352a007a 100644 --- a/codex-rs/app-server/tests/suite/send_message.rs +++ b/codex-rs/app-server/tests/suite/send_message.rs @@ -11,6 +11,7 @@ use codex_app_server_protocol::NewConversationResponse; use codex_app_server_protocol::RequestId; use codex_app_server_protocol::SendUserMessageParams; use codex_app_server_protocol::SendUserMessageResponse; +use codex_execpolicy::Policy; use codex_protocol::ThreadId; use codex_protocol::models::ContentItem; use codex_protocol::models::DeveloperInstructions; @@ -358,6 +359,8 @@ fn assert_permissions_message(item: &ResponseItem) { let expected = DeveloperInstructions::from_policy( &SandboxPolicy::DangerFullAccess, AskForApproval::Never, + &Policy::empty(), + false, &PathBuf::from("/tmp"), ) .into_text(); diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index 1fe5f9e75d6..c11256c70d9 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -189,6 +189,9 @@ "remote_models": { "type": "boolean" }, + "request_rule": { + "type": "boolean" + }, "responses_websockets": { "type": "boolean" }, @@ -1184,6 +1187,9 @@ "remote_models": { "type": "boolean" }, + "request_rule": { + "type": "boolean" + }, "responses_websockets": { "type": "boolean" }, diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index d4f2822a735..e9fbecfa90d 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -190,6 +190,7 @@ use codex_protocol::models::ContentItem; use codex_protocol::models::DeveloperInstructions; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; +use codex_protocol::models::render_command_prefix_list; use codex_protocol::protocol::CodexErrorInfo; use codex_protocol::protocol::InitialHistory; use codex_protocol::user_input::UserInput; @@ -1226,6 +1227,8 @@ impl Session { DeveloperInstructions::from_policy( &next.sandbox_policy, next.approval_policy, + self.services.exec_policy.current().as_ref(), + self.features.enabled(Feature::RequestRule), &next.cwd, ) .into(), @@ -1416,6 +1419,44 @@ impl Session { Ok(()) } + async fn turn_context_for_sub_id(&self, sub_id: &str) -> Option> { + let active = self.active_turn.lock().await; + active + .as_ref() + .and_then(|turn| turn.tasks.get(sub_id)) + .map(|task| Arc::clone(&task.turn_context)) + } + + pub(crate) async fn record_execpolicy_amendment_message( + &self, + sub_id: &str, + amendment: &ExecPolicyAmendment, + ) { + let Some(prefixes) = render_command_prefix_list([amendment.command.as_slice()]) else { + warn!("execpolicy amendment for {sub_id} had no command prefix"); + return; + }; + let text = format!("Approved command prefix saved:\n{prefixes}"); + let message: ResponseItem = DeveloperInstructions::new(text.clone()).into(); + + if let Some(turn_context) = self.turn_context_for_sub_id(sub_id).await { + self.record_conversation_items(&turn_context, std::slice::from_ref(&message)) + .await; + return; + } + + if self + .inject_response_items(vec![ResponseInputItem::Message { + role: "developer".to_string(), + content: vec![ContentItem::InputText { text }], + }]) + .await + .is_err() + { + warn!("no active turn found to record execpolicy amendment message for {sub_id}"); + } + } + /// Emit an exec approval request event and await the user's decision. /// /// The request is keyed by `sub_id`/`call_id` so matching responses are delivered @@ -1749,6 +1790,8 @@ impl Session { DeveloperInstructions::from_policy( &turn_context.sandbox_policy, turn_context.approval_policy, + self.services.exec_policy.current().as_ref(), + self.features.enabled(Feature::RequestRule), &turn_context.cwd, ) .into(), @@ -2595,18 +2638,26 @@ mod handlers { if let ReviewDecision::ApprovedExecpolicyAmendment { proposed_execpolicy_amendment, } = &decision - && let Err(err) = sess + { + match sess .persist_execpolicy_amendment(proposed_execpolicy_amendment) .await - { - let message = format!("Failed to apply execpolicy amendment: {err}"); - tracing::warn!("{message}"); - let warning = EventMsg::Warning(WarningEvent { message }); - sess.send_event_raw(Event { - id: id.clone(), - msg: warning, - }) - .await; + { + Ok(()) => { + sess.record_execpolicy_amendment_message(&id, proposed_execpolicy_amendment) + .await; + } + Err(err) => { + let message = format!("Failed to apply execpolicy amendment: {err}"); + tracing::warn!("{message}"); + let warning = EventMsg::Warning(WarningEvent { message }); + sess.send_event_raw(Event { + id: id.clone(), + msg: warning, + }) + .await; + } + } } match decision { ReviewDecision::Abort => { diff --git a/codex-rs/core/src/exec_policy.rs b/codex-rs/core/src/exec_policy.rs index 61c00702392..07a9ebceba1 100644 --- a/codex-rs/core/src/exec_policy.rs +++ b/codex-rs/core/src/exec_policy.rs @@ -87,6 +87,15 @@ pub(crate) struct ExecPolicyManager { policy: ArcSwap, } +pub(crate) struct ExecApprovalRequest<'a> { + pub(crate) features: &'a Features, + pub(crate) command: &'a [String], + pub(crate) approval_policy: AskForApproval, + pub(crate) sandbox_policy: &'a SandboxPolicy, + pub(crate) sandbox_permissions: SandboxPermissions, + pub(crate) prefix_rule: Option>, +} + impl ExecPolicyManager { pub(crate) fn new(policy: Arc) -> Self { Self { @@ -112,12 +121,16 @@ impl ExecPolicyManager { pub(crate) async fn create_exec_approval_requirement_for_command( &self, - features: &Features, - command: &[String], - approval_policy: AskForApproval, - sandbox_policy: &SandboxPolicy, - sandbox_permissions: SandboxPermissions, + req: ExecApprovalRequest<'_>, ) -> ExecApprovalRequirement { + let ExecApprovalRequest { + features, + command, + approval_policy, + sandbox_policy, + sandbox_permissions, + prefix_rule, + } = req; let exec_policy = self.current(); let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]); @@ -131,6 +144,12 @@ impl ExecPolicyManager { }; let evaluation = exec_policy.check_multiple(commands.iter(), &exec_policy_fallback); + let requested_amendment = derive_requested_execpolicy_amendment( + features, + prefix_rule.as_ref(), + &evaluation.matched_rules, + ); + match evaluation.decision { Decision::Forbidden => ExecApprovalRequirement::Forbidden { reason: derive_forbidden_reason(command, &evaluation), @@ -144,9 +163,11 @@ impl ExecPolicyManager { ExecApprovalRequirement::NeedsApproval { reason: derive_prompt_reason(command, &evaluation), proposed_execpolicy_amendment: if features.enabled(Feature::ExecPolicy) { - try_derive_execpolicy_amendment_for_prompt_rules( - &evaluation.matched_rules, - ) + requested_amendment.or_else(|| { + try_derive_execpolicy_amendment_for_prompt_rules( + &evaluation.matched_rules, + ) + }) } else { None }, @@ -382,6 +403,30 @@ fn try_derive_execpolicy_amendment_for_allow_rules( }) } +fn derive_requested_execpolicy_amendment( + features: &Features, + prefix_rule: Option<&Vec>, + matched_rules: &[RuleMatch], +) -> Option { + if !features.enabled(Feature::ExecPolicy) { + return None; + } + + let prefix_rule = prefix_rule?; + if prefix_rule.is_empty() { + return None; + } + + if matched_rules + .iter() + .any(|rule_match| is_policy_match(rule_match) && rule_match.decision() == Decision::Prompt) + { + return None; + } + + Some(ExecPolicyAmendment::new(prefix_rule.clone())) +} + /// Only return a reason when a policy rule drove the prompt decision. fn derive_prompt_reason(command_args: &[String], evaluation: &Evaluation) -> Option { let command = render_shlex_command(command_args); @@ -756,13 +801,14 @@ prefix_rule(pattern=["rm"], decision="forbidden") let manager = ExecPolicyManager::new(policy); let requirement = manager - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &forbidden_script, - AskForApproval::OnRequest, - &SandboxPolicy::DangerFullAccess, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &forbidden_script, + approval_policy: AskForApproval::OnRequest, + sandbox_policy: &SandboxPolicy::DangerFullAccess, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -790,17 +836,18 @@ prefix_rule( let manager = ExecPolicyManager::new(policy); let requirement = manager - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &[ + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &[ "rm".to_string(), "-rf".to_string(), "/some/important/folder".to_string(), ], - AskForApproval::OnRequest, - &SandboxPolicy::DangerFullAccess, - SandboxPermissions::UseDefault, - ) + approval_policy: AskForApproval::OnRequest, + sandbox_policy: &SandboxPolicy::DangerFullAccess, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -823,13 +870,14 @@ prefix_rule( let manager = ExecPolicyManager::new(policy); let requirement = manager - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &command, - AskForApproval::OnRequest, - &SandboxPolicy::DangerFullAccess, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &command, + approval_policy: AskForApproval::OnRequest, + sandbox_policy: &SandboxPolicy::DangerFullAccess, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -853,13 +901,14 @@ prefix_rule( let manager = ExecPolicyManager::new(policy); let requirement = manager - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &command, - AskForApproval::Never, - &SandboxPolicy::DangerFullAccess, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &command, + approval_policy: AskForApproval::Never, + sandbox_policy: &SandboxPolicy::DangerFullAccess, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -876,13 +925,14 @@ prefix_rule( let manager = ExecPolicyManager::default(); let requirement = manager - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &command, - AskForApproval::UnlessTrusted, - &SandboxPolicy::ReadOnly, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &command, + approval_policy: AskForApproval::UnlessTrusted, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -894,6 +944,40 @@ prefix_rule( ); } + #[tokio::test] + async fn request_rule_uses_prefix_rule() { + let command = vec![ + "cargo".to_string(), + "install".to_string(), + "cargo-insta".to_string(), + ]; + let manager = ExecPolicyManager::default(); + let mut features = Features::with_defaults(); + features.enable(Feature::RequestRule); + + let requirement = manager + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &features, + command: &command, + approval_policy: AskForApproval::OnRequest, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: SandboxPermissions::RequireEscalated, + prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]), + }) + .await; + + assert_eq!( + requirement, + ExecApprovalRequirement::NeedsApproval { + reason: None, + proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![ + "cargo".to_string(), + "install".to_string(), + ])), + } + ); + } + #[tokio::test] async fn heuristics_apply_when_other_commands_match_policy() { let policy_src = r#"prefix_rule(pattern=["apple"], decision="allow")"#; @@ -910,13 +994,14 @@ prefix_rule( assert_eq!( ExecPolicyManager::new(policy) - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &command, - AskForApproval::UnlessTrusted, - &SandboxPolicy::DangerFullAccess, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &command, + approval_policy: AskForApproval::UnlessTrusted, + sandbox_policy: &SandboxPolicy::DangerFullAccess, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await, ExecApprovalRequirement::NeedsApproval { reason: None, @@ -984,13 +1069,14 @@ prefix_rule( let manager = ExecPolicyManager::default(); let requirement = manager - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &command, - AskForApproval::UnlessTrusted, - &SandboxPolicy::ReadOnly, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &command, + approval_policy: AskForApproval::UnlessTrusted, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -1011,13 +1097,14 @@ prefix_rule( let manager = ExecPolicyManager::default(); let requirement = manager - .create_exec_approval_requirement_for_command( - &features, - &command, - AskForApproval::UnlessTrusted, - &SandboxPolicy::ReadOnly, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &features, + command: &command, + approval_policy: AskForApproval::UnlessTrusted, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -1041,13 +1128,14 @@ prefix_rule( let manager = ExecPolicyManager::new(policy); let requirement = manager - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &command, - AskForApproval::OnRequest, - &SandboxPolicy::DangerFullAccess, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &command, + approval_policy: AskForApproval::OnRequest, + sandbox_policy: &SandboxPolicy::DangerFullAccess, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -1068,13 +1156,14 @@ prefix_rule( ]; let manager = ExecPolicyManager::default(); let requirement = manager - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &command, - AskForApproval::UnlessTrusted, - &SandboxPolicy::ReadOnly, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &command, + approval_policy: AskForApproval::UnlessTrusted, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -1106,13 +1195,14 @@ prefix_rule( assert_eq!( ExecPolicyManager::new(policy) - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &command, - AskForApproval::UnlessTrusted, - &SandboxPolicy::ReadOnly, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &command, + approval_policy: AskForApproval::UnlessTrusted, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await, ExecApprovalRequirement::NeedsApproval { reason: None, @@ -1129,13 +1219,14 @@ prefix_rule( let manager = ExecPolicyManager::default(); let requirement = manager - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &command, - AskForApproval::OnRequest, - &SandboxPolicy::ReadOnly, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &command, + approval_policy: AskForApproval::OnRequest, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -1159,13 +1250,14 @@ prefix_rule( let manager = ExecPolicyManager::new(policy); let requirement = manager - .create_exec_approval_requirement_for_command( - &Features::with_defaults(), - &command, - AskForApproval::OnRequest, - &SandboxPolicy::ReadOnly, - SandboxPermissions::UseDefault, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &Features::with_defaults(), + command: &command, + approval_policy: AskForApproval::OnRequest, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: SandboxPermissions::UseDefault, + prefix_rule: None, + }) .await; assert_eq!( @@ -1226,13 +1318,14 @@ prefix_rule( assert_eq!( expected_req, policy - .create_exec_approval_requirement_for_command( - &features, - &sneaky_command, - AskForApproval::OnRequest, - &SandboxPolicy::ReadOnly, - permissions, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &features, + command: &sneaky_command, + approval_policy: AskForApproval::OnRequest, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: permissions, + prefix_rule: None, + }) .await, "{pwsh_approval_reason}" ); @@ -1249,13 +1342,14 @@ prefix_rule( ]))), }, policy - .create_exec_approval_requirement_for_command( - &features, - &dangerous_command, - AskForApproval::OnRequest, - &SandboxPolicy::ReadOnly, - permissions, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &features, + command: &dangerous_command, + approval_policy: AskForApproval::OnRequest, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: permissions, + prefix_rule: None, + }) .await, r#"On all platforms, a forbidden command should require approval (unless AskForApproval::Never is specified)."# @@ -1268,13 +1362,14 @@ prefix_rule( reason: "`rm -rf /important/data` rejected: blocked by policy".to_string(), }, policy - .create_exec_approval_requirement_for_command( - &features, - &dangerous_command, - AskForApproval::Never, - &SandboxPolicy::ReadOnly, - permissions, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &features, + command: &dangerous_command, + approval_policy: AskForApproval::Never, + sandbox_policy: &SandboxPolicy::ReadOnly, + sandbox_permissions: permissions, + prefix_rule: None, + }) .await, r#"On all platforms, a forbidden command should require approval (unless AskForApproval::Never is specified)."# diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index cc56970276e..bd739e5a75c 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -89,6 +89,8 @@ pub enum Feature { WebSearchCached, /// Gate the execpolicy enforcement for shell/unified exec. ExecPolicy, + /// Allow the model to request approval and propose exec rules. + RequestRule, /// Enable Windows sandbox (restricted token) on Windows. WindowsSandbox, /// Use the elevated Windows sandbox pipeline (setup + runner). @@ -393,6 +395,12 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::UnderDevelopment, default_enabled: true, }, + FeatureSpec { + id: Feature::RequestRule, + key: "request_rule", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, FeatureSpec { id: Feature::WindowsSandbox, key: "experimental_windows_sandbox", diff --git a/codex-rs/core/src/tools/handlers/shell.rs b/codex-rs/core/src/tools/handlers/shell.rs index c3628e8c347..dbfb0364bf2 100644 --- a/codex-rs/core/src/tools/handlers/shell.rs +++ b/codex-rs/core/src/tools/handlers/shell.rs @@ -6,6 +6,7 @@ use std::sync::Arc; use crate::codex::TurnContext; use crate::exec::ExecParams; use crate::exec_env::create_env; +use crate::exec_policy::ExecApprovalRequest; use crate::function_tool::FunctionCallError; use crate::is_safe_command::is_known_safe_command; use crate::protocol::ExecCommandSource; @@ -28,16 +29,27 @@ pub struct ShellHandler; pub struct ShellCommandHandler; +struct RunExecLikeArgs { + tool_name: String, + exec_params: ExecParams, + prefix_rule: Option>, + session: Arc, + turn: Arc, + tracker: crate::tools::context::SharedTurnDiffTracker, + call_id: String, + freeform: bool, +} + impl ShellHandler { - fn to_exec_params(params: ShellToolCallParams, turn_context: &TurnContext) -> ExecParams { + fn to_exec_params(params: &ShellToolCallParams, turn_context: &TurnContext) -> ExecParams { ExecParams { - command: params.command, + command: params.command.clone(), cwd: turn_context.resolve_path(params.workdir.clone()), expiration: params.timeout_ms.into(), env: create_env(&turn_context.shell_environment_policy), sandbox_permissions: params.sandbox_permissions.unwrap_or_default(), windows_sandbox_level: turn_context.windows_sandbox_level, - justification: params.justification, + justification: params.justification.clone(), arg0: None, } } @@ -50,7 +62,7 @@ impl ShellCommandHandler { } fn to_exec_params( - params: ShellCommandToolCallParams, + params: &ShellCommandToolCallParams, session: &crate::codex::Session, turn_context: &TurnContext, ) -> ExecParams { @@ -64,7 +76,7 @@ impl ShellCommandHandler { env: create_env(&turn_context.shell_environment_policy), sandbox_permissions: params.sandbox_permissions.unwrap_or_default(), windows_sandbox_level: turn_context.windows_sandbox_level, - justification: params.justification, + justification: params.justification.clone(), arg0: None, } } @@ -108,29 +120,32 @@ impl ToolHandler for ShellHandler { match payload { ToolPayload::Function { arguments } => { let params: ShellToolCallParams = parse_arguments(&arguments)?; - let exec_params = Self::to_exec_params(params, turn.as_ref()); - Self::run_exec_like( - tool_name.as_str(), + let prefix_rule = params.prefix_rule.clone(); + let exec_params = Self::to_exec_params(¶ms, turn.as_ref()); + Self::run_exec_like(RunExecLikeArgs { + tool_name: tool_name.clone(), exec_params, + prefix_rule, session, turn, tracker, call_id, - false, - ) + freeform: false, + }) .await } ToolPayload::LocalShell { params } => { - let exec_params = Self::to_exec_params(params, turn.as_ref()); - Self::run_exec_like( - tool_name.as_str(), + let exec_params = Self::to_exec_params(¶ms, turn.as_ref()); + Self::run_exec_like(RunExecLikeArgs { + tool_name: tool_name.clone(), exec_params, + prefix_rule: None, session, turn, tracker, call_id, - false, - ) + freeform: false, + }) .await } _ => Err(FunctionCallError::RespondToModel(format!( @@ -181,30 +196,43 @@ impl ToolHandler for ShellCommandHandler { }; let params: ShellCommandToolCallParams = parse_arguments(&arguments)?; - let exec_params = Self::to_exec_params(params, session.as_ref(), turn.as_ref()); - ShellHandler::run_exec_like( - tool_name.as_str(), + let prefix_rule = params.prefix_rule.clone(); + let exec_params = Self::to_exec_params(¶ms, session.as_ref(), turn.as_ref()); + ShellHandler::run_exec_like(RunExecLikeArgs { + tool_name, exec_params, + prefix_rule, session, turn, tracker, call_id, - true, - ) + freeform: true, + }) .await } } impl ShellHandler { - async fn run_exec_like( - tool_name: &str, - exec_params: ExecParams, - session: Arc, - turn: Arc, - tracker: crate::tools::context::SharedTurnDiffTracker, - call_id: String, - freeform: bool, - ) -> Result { + async fn run_exec_like(args: RunExecLikeArgs) -> Result { + let RunExecLikeArgs { + tool_name, + exec_params, + prefix_rule, + session, + turn, + tracker, + call_id, + freeform, + } = args; + + let features = session.features(); + let request_rule_enabled = features.enabled(crate::features::Feature::RequestRule); + let prefix_rule = if request_rule_enabled { + prefix_rule + } else { + None + }; + // Approval policy guard for explicit escalation in non-OnRequest modes. if exec_params .sandbox_permissions @@ -214,9 +242,9 @@ impl ShellHandler { codex_protocol::protocol::AskForApproval::OnRequest ) { + let approval_policy = turn.approval_policy; return Err(FunctionCallError::RespondToModel(format!( - "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}", - policy = turn.approval_policy + "approval policy is {approval_policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {approval_policy:?}" ))); } @@ -229,7 +257,7 @@ impl ShellHandler { turn.as_ref(), Some(&tracker), &call_id, - tool_name, + tool_name.as_str(), ) .await? { @@ -246,17 +274,17 @@ impl ShellHandler { let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None); emitter.begin(event_ctx).await; - let features = session.features(); let exec_approval_requirement = session .services .exec_policy - .create_exec_approval_requirement_for_command( - &features, - &exec_params.command, - turn.approval_policy, - &turn.sandbox_policy, - exec_params.sandbox_permissions, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &features, + command: &exec_params.command, + approval_policy: turn.approval_policy, + sandbox_policy: &turn.sandbox_policy, + sandbox_permissions: exec_params.sandbox_permissions, + prefix_rule, + }) .await; let req = ShellRequest { @@ -274,7 +302,7 @@ impl ShellHandler { session: session.as_ref(), turn: turn.as_ref(), call_id: call_id.clone(), - tool_name: tool_name.to_string(), + tool_name, }; let out = orchestrator .run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy) @@ -377,10 +405,11 @@ mod tests { login, timeout_ms, sandbox_permissions: Some(sandbox_permissions), + prefix_rule: None, justification: justification.clone(), }; - let exec_params = ShellCommandHandler::to_exec_params(params, &session, &turn_context); + let exec_params = ShellCommandHandler::to_exec_params(¶ms, &session, &turn_context); // ExecParams cannot derive Eq due to the CancellationToken field, so we manually compare the fields. assert_eq!(exec_params.command, expected_command); diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index c9c5a3a71d3..2e331bae591 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -43,6 +43,8 @@ struct ExecCommandArgs { sandbox_permissions: SandboxPermissions, #[serde(default)] justification: Option, + #[serde(default)] + prefix_rule: Option>, } #[derive(Debug, Deserialize)] @@ -135,19 +137,28 @@ impl ToolHandler for UnifiedExecHandler { max_output_tokens, sandbox_permissions, justification, + prefix_rule, .. } = args; + let features = session.features(); + let request_rule_enabled = features.enabled(crate::features::Feature::RequestRule); + let prefix_rule = if request_rule_enabled { + prefix_rule + } else { + None + }; + if sandbox_permissions.requires_escalated_permissions() && !matches!( context.turn.approval_policy, codex_protocol::protocol::AskForApproval::OnRequest ) { + let approval_policy = context.turn.approval_policy; manager.release_process_id(&process_id).await; return Err(FunctionCallError::RespondToModel(format!( - "approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}", - policy = context.turn.approval_policy + "approval policy is {approval_policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {approval_policy:?}" ))); } @@ -183,6 +194,7 @@ impl ToolHandler for UnifiedExecHandler { tty, sandbox_permissions, justification, + prefix_rule, }, &context, ) diff --git a/codex-rs/core/src/tools/router.rs b/codex-rs/core/src/tools/router.rs index b1f41923bcc..d3ff84a2b04 100644 --- a/codex-rs/core/src/tools/router.rs +++ b/codex-rs/core/src/tools/router.rs @@ -114,6 +114,7 @@ impl ToolRouter { workdir: exec.working_directory, timeout_ms: exec.timeout_ms, sandbox_permissions: Some(SandboxPermissions::UseDefault), + prefix_rule: None, justification: None, }; Ok(Some(ToolCall { diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index a128ceeaf68..df65a94d8a7 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -30,6 +30,7 @@ pub(crate) struct ToolsConfig { pub web_search_mode: Option, pub collab_tools: bool, pub collaboration_modes_tools: bool, + pub request_rule_enabled: bool, pub experimental_supported_tools: Vec, } @@ -49,6 +50,7 @@ impl ToolsConfig { let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform); let include_collab_tools = features.enabled(Feature::Collab); let include_collaboration_modes_tools = features.enabled(Feature::CollaborationModes); + let request_rule_enabled = features.enabled(Feature::RequestRule); let shell_type = if !features.enabled(Feature::ShellTool) { ConfigShellToolType::Disabled @@ -81,6 +83,7 @@ impl ToolsConfig { web_search_mode: *web_search_mode, collab_tools: include_collab_tools, collaboration_modes_tools: include_collaboration_modes_tools, + request_rule_enabled, experimental_supported_tools: model_info.experimental_supported_tools.clone(), } } @@ -142,8 +145,50 @@ impl From for AdditionalProperties { } } -fn create_exec_command_tool() -> ToolSpec { - let properties = BTreeMap::from([ +fn create_approval_parameters(include_prefix_rule: bool) -> BTreeMap { + let mut properties = BTreeMap::from([ + ( + "sandbox_permissions".to_string(), + JsonSchema::String { + description: Some( + "Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\"." + .to_string(), + ), + }, + ), + ( + "justification".to_string(), + JsonSchema::String { + description: Some( + r#"Only set if sandbox_permissions is \"require_escalated\". + Request approval from the user to run this command outside the sandbox. + Phrased as a simple question that summarizes the purpose of the + command as it relates to the task at hand - e.g. 'Do you want to + fetch and pull the latest version of this git branch?'"# + .to_string(), + ), + }, + ), + ]); + + if include_prefix_rule { + properties.insert( + "prefix_rule".to_string(), + JsonSchema::Array { + items: Box::new(JsonSchema::String { description: None }), + description: Some( + r#"Only specify when sandbox_permissions is `require_escalated`. + Suggest a prefix command pattern that will allow you to fulfill similar requests from the user in the future. + Should be a short but reasonable prefix, e.g. [\"git\", \"pull\"] or [\"uv\", \"run\"] or [\"pytest\"]."#.to_string(), + ), + }); + } + + properties +} + +fn create_exec_command_tool(include_prefix_rule: bool) -> ToolSpec { + let mut properties = BTreeMap::from([ ( "cmd".to_string(), JsonSchema::String { @@ -199,25 +244,8 @@ fn create_exec_command_tool() -> ToolSpec { ), }, ), - ( - "sandbox_permissions".to_string(), - JsonSchema::String { - description: Some( - "Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\"." - .to_string(), - ), - }, - ), - ( - "justification".to_string(), - JsonSchema::String { - description: Some( - "Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command." - .to_string(), - ), - }, - ), ]); + properties.extend(create_approval_parameters(include_prefix_rule)); ToolSpec::Function(ResponsesApiTool { name: "exec_command".to_string(), @@ -280,8 +308,8 @@ fn create_write_stdin_tool() -> ToolSpec { }) } -fn create_shell_tool() -> ToolSpec { - let properties = BTreeMap::from([ +fn create_shell_tool(include_prefix_rule: bool) -> ToolSpec { + let mut properties = BTreeMap::from([ ( "command".to_string(), JsonSchema::Array { @@ -301,19 +329,8 @@ fn create_shell_tool() -> ToolSpec { description: Some("The timeout for the command in milliseconds".to_string()), }, ), - ( - "sandbox_permissions".to_string(), - JsonSchema::String { - description: Some("Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\".".to_string()), - }, - ), - ( - "justification".to_string(), - JsonSchema::String { - description: Some("Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command.".to_string()), - }, - ), ]); + properties.extend(create_approval_parameters(include_prefix_rule)); let description = if cfg!(windows) { r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"]. @@ -344,8 +361,8 @@ Examples of valid command strings: }) } -fn create_shell_command_tool() -> ToolSpec { - let properties = BTreeMap::from([ +fn create_shell_command_tool(include_prefix_rule: bool) -> ToolSpec { + let mut properties = BTreeMap::from([ ( "command".to_string(), JsonSchema::String { @@ -375,19 +392,8 @@ fn create_shell_command_tool() -> ToolSpec { description: Some("The timeout for the command in milliseconds".to_string()), }, ), - ( - "sandbox_permissions".to_string(), - JsonSchema::String { - description: Some("Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\".".to_string()), - }, - ), - ( - "justification".to_string(), - JsonSchema::String { - description: Some("Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command.".to_string()), - }, - ), ]); + properties.extend(create_approval_parameters(include_prefix_rule)); let description = if cfg!(windows) { r#"Runs a Powershell command (Windows) and returns its output. @@ -1292,13 +1298,13 @@ pub(crate) fn build_specs( match &config.shell_type { ConfigShellToolType::Default => { - builder.push_spec(create_shell_tool()); + builder.push_spec(create_shell_tool(config.request_rule_enabled)); } ConfigShellToolType::Local => { builder.push_spec(ToolSpec::LocalShell {}); } ConfigShellToolType::UnifiedExec => { - builder.push_spec(create_exec_command_tool()); + builder.push_spec(create_exec_command_tool(config.request_rule_enabled)); builder.push_spec(create_write_stdin_tool()); builder.register_handler("exec_command", unified_exec_handler.clone()); builder.register_handler("write_stdin", unified_exec_handler); @@ -1307,7 +1313,7 @@ pub(crate) fn build_specs( // Do nothing. } ConfigShellToolType::ShellCommand => { - builder.push_spec(create_shell_command_tool()); + builder.push_spec(create_shell_command_tool(config.request_rule_enabled)); } } @@ -1579,7 +1585,7 @@ mod tests { // Build expected from the same helpers used by the builder. let mut expected: BTreeMap = BTreeMap::from([]); for spec in [ - create_exec_command_tool(), + create_exec_command_tool(false), create_write_stdin_tool(), create_list_mcp_resources_tool(), create_list_mcp_resource_templates_tool(), @@ -2413,7 +2419,7 @@ mod tests { #[test] fn test_shell_tool() { - let tool = super::create_shell_tool(); + let tool = super::create_shell_tool(false); let ToolSpec::Function(ResponsesApiTool { description, name, .. }) = &tool @@ -2443,7 +2449,7 @@ Examples of valid command strings: #[test] fn test_shell_command_tool() { - let tool = super::create_shell_command_tool(); + let tool = super::create_shell_command_tool(false); let ToolSpec::Function(ResponsesApiTool { description, name, .. }) = &tool diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index 3f359c38662..4c45f1cf421 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -82,6 +82,7 @@ pub(crate) struct ExecCommandRequest { pub tty: bool, pub sandbox_permissions: SandboxPermissions, pub justification: Option, + pub prefix_rule: Option>, } #[derive(Debug)] @@ -205,6 +206,7 @@ mod tests { tty: true, sandbox_permissions: SandboxPermissions::UseDefault, justification: None, + prefix_rule: None, }, &context, ) diff --git a/codex-rs/core/src/unified_exec/process_manager.rs b/codex-rs/core/src/unified_exec/process_manager.rs index 3230e75b15f..5b3e83a5dc5 100644 --- a/codex-rs/core/src/unified_exec/process_manager.rs +++ b/codex-rs/core/src/unified_exec/process_manager.rs @@ -11,9 +11,9 @@ use tokio::time::Instant; use tokio_util::sync::CancellationToken; use crate::exec_env::create_env; +use crate::exec_policy::ExecApprovalRequest; use crate::protocol::ExecCommandSource; use crate::sandboxing::ExecEnv; -use crate::sandboxing::SandboxPermissions; use crate::tools::events::ToolEmitter; use crate::tools::events::ToolEventCtx; use crate::tools::events::ToolEventStage; @@ -123,14 +123,7 @@ impl UnifiedExecProcessManager { .unwrap_or_else(|| context.turn.cwd.clone()); let process = self - .open_session_with_sandbox( - &request.command, - cwd.clone(), - request.sandbox_permissions, - request.justification, - request.tty, - context, - ) + .open_session_with_sandbox(&request, cwd.clone(), context) .await; let process = match process { @@ -486,11 +479,8 @@ impl UnifiedExecProcessManager { pub(super) async fn open_session_with_sandbox( &self, - command: &[String], + request: &ExecCommandRequest, cwd: PathBuf, - sandbox_permissions: SandboxPermissions, - justification: Option, - tty: bool, context: &UnifiedExecContext, ) -> Result { let env = apply_unified_exec_env(create_env(&context.turn.shell_environment_policy)); @@ -501,21 +491,22 @@ impl UnifiedExecProcessManager { .session .services .exec_policy - .create_exec_approval_requirement_for_command( - &features, - command, - context.turn.approval_policy, - &context.turn.sandbox_policy, - sandbox_permissions, - ) + .create_exec_approval_requirement_for_command(ExecApprovalRequest { + features: &features, + command: &request.command, + approval_policy: context.turn.approval_policy, + sandbox_policy: &context.turn.sandbox_policy, + sandbox_permissions: request.sandbox_permissions, + prefix_rule: request.prefix_rule.clone(), + }) .await; let req = UnifiedExecToolRequest::new( - command.to_vec(), + request.command.clone(), cwd, env, - tty, - sandbox_permissions, - justification, + request.tty, + request.sandbox_permissions, + request.justification.clone(), exec_approval_requirement, ); let tool_ctx = ToolCtx { diff --git a/codex-rs/core/tests/suite/approvals.rs b/codex-rs/core/tests/suite/approvals.rs index ad1881f9459..1b295964e86 100644 --- a/codex-rs/core/tests/suite/approvals.rs +++ b/codex-rs/core/tests/suite/approvals.rs @@ -1754,6 +1754,16 @@ async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts .await?; wait_for_completion(&test).await; + let developer_messages = first_results + .single_request() + .message_input_texts("developer"); + assert!( + developer_messages + .iter() + .any(|message| message.contains(r#"["touch", "allow-prefix.txt"]"#)), + "expected developer message documenting saved rule, got: {developer_messages:?}" + ); + let policy_path = test.home.path().join("rules").join("default.rules"); let policy_contents = fs::read_to_string(&policy_path)?; assert!( diff --git a/codex-rs/core/tests/suite/permissions_messages.rs b/codex-rs/core/tests/suite/permissions_messages.rs index 679c0c1f6c4..e4eded15ffe 100644 --- a/codex-rs/core/tests/suite/permissions_messages.rs +++ b/codex-rs/core/tests/suite/permissions_messages.rs @@ -4,6 +4,8 @@ use codex_core::protocol::AskForApproval; use codex_core::protocol::EventMsg; use codex_core::protocol::Op; use codex_core::protocol::SandboxPolicy; +use codex_execpolicy::Policy; +use codex_protocol::models::DeveloperInstructions; use codex_protocol::user_input::UserInput; use codex_utils_absolute_path::AbsolutePathBuf; use core_test_support::responses::ev_completed; @@ -411,10 +413,11 @@ async fn permissions_message_includes_writable_roots() -> Result<()> { exclude_tmpdir_env_var: false, exclude_slash_tmp: false, }; + let sandbox_policy_for_config = sandbox_policy.clone(); let mut builder = test_codex().with_config(move |config| { config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest); - config.sandbox_policy = Constrained::allow_any(sandbox_policy); + config.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config); }); let test = builder.build(&server).await?; @@ -432,39 +435,14 @@ async fn permissions_message_includes_writable_roots() -> Result<()> { let body = req.single_request().body_json(); let input = body["input"].as_array().expect("input array"); let permissions = permissions_texts(input); - let sandbox_text = "Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is restricted."; - let approval_text = " Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `on-request`: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task.\n\nHere are scenarios where you'll need to request approval:\n- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)\n- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.\n- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)\n- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command.\n- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for.\n\nWhen requesting approval to execute a command that will require escalated privileges:\n - Provide the `sandbox_permissions` parameter with the value `\"require_escalated\"`\n - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter"; - // Normalize paths by removing trailing slashes to match AbsolutePathBuf behavior - let normalize_path = - |p: &std::path::Path| -> String { p.to_string_lossy().trim_end_matches('/').to_string() }; - let mut roots = vec![ - normalize_path(writable.path()), - normalize_path(test.config.cwd.as_path()), - ]; - if cfg!(unix) && std::path::Path::new("/tmp").is_dir() { - roots.push("/tmp".to_string()); - } - if let Some(tmpdir) = std::env::var_os("TMPDIR") { - let tmpdir_path = std::path::PathBuf::from(&tmpdir); - if tmpdir_path.is_absolute() && !tmpdir.is_empty() { - roots.push(normalize_path(&tmpdir_path)); - } - } - let roots_text = if roots.len() == 1 { - format!(" The writable root is `{}`.", roots[0]) - } else { - format!( - " The writable roots are {}.", - roots - .iter() - .map(|root| format!("`{root}`")) - .collect::>() - .join(", ") - ) - }; - let expected = format!( - "{sandbox_text}{approval_text}{roots_text}" - ); + let expected = DeveloperInstructions::from_policy( + &sandbox_policy, + AskForApproval::OnRequest, + &Policy::empty(), + false, + test.config.cwd.as_path(), + ) + .into_text(); // Normalize line endings to handle Windows vs Unix differences let normalize_line_endings = |s: &str| s.replace("\r\n", "\n"); let expected_normalized = normalize_line_endings(&expected); diff --git a/codex-rs/execpolicy/src/policy.rs b/codex-rs/execpolicy/src/policy.rs index 1e758277b84..0da0332d0b8 100644 --- a/codex-rs/execpolicy/src/policy.rs +++ b/codex-rs/execpolicy/src/policy.rs @@ -31,6 +31,30 @@ impl Policy { &self.rules_by_program } + pub fn get_allowed_prefixes(&self) -> Vec> { + let mut prefixes = Vec::new(); + + for (_program, rules) in self.rules_by_program.iter_all() { + for rule in rules { + let Some(prefix_rule) = rule.as_any().downcast_ref::() else { + continue; + }; + if prefix_rule.decision != Decision::Allow { + continue; + } + + let mut prefix = Vec::with_capacity(prefix_rule.pattern.rest.len() + 1); + prefix.push(prefix_rule.pattern.first.as_ref().to_string()); + prefix.extend(prefix_rule.pattern.rest.iter().map(render_pattern_token)); + prefixes.push(prefix); + } + } + + prefixes.sort(); + prefixes.dedup(); + prefixes + } + pub fn add_prefix_rule(&mut self, prefix: &[String], decision: Decision) -> Result<()> { let (first_token, rest) = prefix .split_first() @@ -116,6 +140,13 @@ impl Policy { } } +fn render_pattern_token(token: &PatternToken) -> String { + match token { + PatternToken::Single(value) => value.clone(), + PatternToken::Alts(alternatives) => format!("[{}]", alternatives.join("|")), + } +} + #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Evaluation { diff --git a/codex-rs/execpolicy/src/rule.rs b/codex-rs/execpolicy/src/rule.rs index de78a5fda91..b7c1a7cfde2 100644 --- a/codex-rs/execpolicy/src/rule.rs +++ b/codex-rs/execpolicy/src/rule.rs @@ -96,6 +96,8 @@ pub trait Rule: Any + Debug + Send + Sync { fn program(&self) -> &str; fn matches(&self, cmd: &[String]) -> Option; + + fn as_any(&self) -> &dyn Any; } pub type RuleRef = Arc; @@ -114,6 +116,10 @@ impl Rule for PrefixRule { justification: self.justification.clone(), }) } + + fn as_any(&self) -> &dyn Any { + self + } } /// Count how many rules match each provided example and error if any example is unmatched. diff --git a/codex-rs/protocol/Cargo.toml b/codex-rs/protocol/Cargo.toml index 19ec1fcc369..c3d9d309152 100644 --- a/codex-rs/protocol/Cargo.toml +++ b/codex-rs/protocol/Cargo.toml @@ -13,6 +13,7 @@ workspace = true [dependencies] codex-git = { workspace = true } +codex-execpolicy = { workspace = true } codex-utils-absolute-path = { workspace = true } codex-utils-image = { workspace = true } icu_decimal = { workspace = true } diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index b0b81569164..50e8ee342f4 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -19,6 +19,7 @@ use crate::protocol::NetworkAccess; use crate::protocol::SandboxPolicy; use crate::protocol::WritableRoot; use crate::user_input::UserInput; +use codex_execpolicy::Policy; use codex_git::GhostCommit; use codex_utils_image::error::ImageProcessingError; use schemars::JsonSchema; @@ -205,6 +206,8 @@ const APPROVAL_POLICY_ON_FAILURE: &str = include_str!("prompts/permissions/approval_policy/on_failure.md"); const APPROVAL_POLICY_ON_REQUEST: &str = include_str!("prompts/permissions/approval_policy/on_request.md"); +const APPROVAL_POLICY_ON_REQUEST_RULE: &str = + include_str!("prompts/permissions/approval_policy/on_request_rule.md"); const SANDBOX_MODE_DANGER_FULL_ACCESS: &str = include_str!("prompts/permissions/sandbox_mode/danger_full_access.md"); @@ -217,12 +220,42 @@ impl DeveloperInstructions { Self { text: text.into() } } + pub fn from( + approval_policy: AskForApproval, + exec_policy: &Policy, + request_rule_enabled: bool, + ) -> DeveloperInstructions { + let text = match approval_policy { + AskForApproval::Never => APPROVAL_POLICY_NEVER.to_string(), + AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.to_string(), + AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.to_string(), + AskForApproval::OnRequest => { + if !request_rule_enabled { + APPROVAL_POLICY_ON_REQUEST.to_string() + } else { + let command_prefixes = format_allow_prefixes(exec_policy); + match command_prefixes { + Some(prefixes) => { + format!("{APPROVAL_POLICY_ON_REQUEST_RULE}\n{prefixes}") + } + None => APPROVAL_POLICY_ON_REQUEST_RULE.to_string(), + } + } + } + }; + + DeveloperInstructions::new(text) + } + pub fn into_text(self) -> String { self.text } pub fn concat(self, other: impl Into) -> Self { let mut text = self.text; + if !text.ends_with('\n') { + text.push('\n'); + } text.push_str(&other.into().text); Self { text } } @@ -237,6 +270,8 @@ impl DeveloperInstructions { pub fn from_policy( sandbox_policy: &SandboxPolicy, approval_policy: AskForApproval, + exec_policy: &Policy, + request_rule_enabled: bool, cwd: &Path, ) -> Self { let network_access = if sandbox_policy.has_full_network_access() { @@ -259,6 +294,8 @@ impl DeveloperInstructions { sandbox_mode, network_access, approval_policy, + exec_policy, + request_rule_enabled, writable_roots, ) } @@ -281,6 +318,8 @@ impl DeveloperInstructions { sandbox_mode: SandboxMode, network_access: NetworkAccess, approval_policy: AskForApproval, + exec_policy: &Policy, + request_rule_enabled: bool, writable_roots: Option>, ) -> Self { let start_tag = DeveloperInstructions::new(""); @@ -290,7 +329,11 @@ impl DeveloperInstructions { sandbox_mode, network_access, )) - .concat(DeveloperInstructions::from(approval_policy)) + .concat(DeveloperInstructions::from( + approval_policy, + exec_policy, + request_rule_enabled, + )) .concat(DeveloperInstructions::from_writable_roots(writable_roots)) .concat(end_tag) } @@ -328,6 +371,37 @@ impl DeveloperInstructions { } } +pub fn render_command_prefix_list(prefixes: I) -> Option +where + I: IntoIterator, + P: AsRef<[String]>, +{ + let lines = prefixes + .into_iter() + .map(|prefix| format!("- {}", render_command_prefix(prefix.as_ref()))) + .collect::>(); + if lines.is_empty() { + return None; + } + + Some(lines.join("\n")) +} + +fn render_command_prefix(prefix: &[String]) -> String { + let tokens = prefix + .iter() + .map(|token| serde_json::to_string(token).unwrap_or_else(|_| format!("{token:?}"))) + .collect::>() + .join(", "); + format!("[{tokens}]") +} + +fn format_allow_prefixes(exec_policy: &Policy) -> Option { + let prefixes = exec_policy.get_allowed_prefixes(); + let lines = render_command_prefix_list(prefixes)?; + Some(format!("Approved command prefixes:\n{lines}")) +} + impl From for ResponseItem { fn from(di: DeveloperInstructions) -> Self { ResponseItem::Message { @@ -352,19 +426,6 @@ impl From for DeveloperInstructions { } } -impl From for DeveloperInstructions { - fn from(mode: AskForApproval) -> Self { - let text = match mode { - AskForApproval::Never => APPROVAL_POLICY_NEVER.trim_end(), - AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.trim_end(), - AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.trim_end(), - AskForApproval::OnRequest => APPROVAL_POLICY_ON_REQUEST.trim_end(), - }; - - DeveloperInstructions::new(text) - } -} - fn should_serialize_reasoning_content(content: &Option>) -> bool { match content { Some(content) => !content @@ -633,6 +694,10 @@ pub struct ShellToolCallParams { #[serde(default, skip_serializing_if = "Option::is_none")] #[ts(optional)] pub sandbox_permissions: Option, + /// Suggests a command prefix to persist for future sessions + #[serde(default, skip_serializing_if = "Option::is_none")] + #[ts(optional)] + pub prefix_rule: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub justification: Option, } @@ -653,6 +718,9 @@ pub struct ShellCommandToolCallParams { #[serde(default, skip_serializing_if = "Option::is_none")] #[ts(optional)] pub sandbox_permissions: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + #[ts(optional)] + pub prefix_rule: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub justification: Option, } @@ -836,6 +904,7 @@ mod tests { use crate::config_types::SandboxMode; use crate::protocol::AskForApproval; use anyhow::Result; + use codex_execpolicy::Policy; use mcp_types::ImageContent; use mcp_types::TextContent; use pretty_assertions::assert_eq; @@ -844,15 +913,17 @@ mod tests { #[test] fn converts_sandbox_mode_into_developer_instructions() { + let workspace_write: DeveloperInstructions = SandboxMode::WorkspaceWrite.into(); assert_eq!( - DeveloperInstructions::from(SandboxMode::WorkspaceWrite), + workspace_write, DeveloperInstructions::new( "Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is restricted." ) ); + let read_only: DeveloperInstructions = SandboxMode::ReadOnly.into(); assert_eq!( - DeveloperInstructions::from(SandboxMode::ReadOnly), + read_only, DeveloperInstructions::new( "Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted." ) @@ -865,6 +936,8 @@ mod tests { SandboxMode::WorkspaceWrite, NetworkAccess::Enabled, AskForApproval::OnRequest, + &Policy::empty(), + false, None, ); @@ -891,6 +964,8 @@ mod tests { let instructions = DeveloperInstructions::from_policy( &policy, AskForApproval::UnlessTrusted, + &Policy::empty(), + false, &PathBuf::from("/tmp"), ); let text = instructions.into_text(); @@ -898,6 +973,30 @@ mod tests { assert!(text.contains("`approval_policy` is `unless-trusted`")); } + #[test] + fn includes_request_rule_instructions_when_enabled() { + let mut exec_policy = Policy::empty(); + exec_policy + .add_prefix_rule( + &["git".to_string(), "pull".to_string()], + codex_execpolicy::Decision::Allow, + ) + .expect("add rule"); + let instructions = DeveloperInstructions::from_permissions_with_network( + SandboxMode::WorkspaceWrite, + NetworkAccess::Enabled, + AskForApproval::OnRequest, + &exec_policy, + true, + None, + ); + + let text = instructions.into_text(); + assert!(text.contains("prefix_rule")); + assert!(text.contains("Approved command prefixes")); + assert!(text.contains(r#"["git", "pull"]"#)); + } + #[test] fn serializes_success_as_plain_string() -> Result<()> { let item = ResponseInputItem::FunctionCallOutput { @@ -1126,6 +1225,7 @@ mod tests { workdir: Some("/tmp".to_string()), timeout_ms: Some(1000), sandbox_permissions: None, + prefix_rule: None, justification: None, }, params diff --git a/codex-rs/protocol/src/prompts/permissions/approval_policy/on_request_rule.md b/codex-rs/protocol/src/prompts/permissions/approval_policy/on_request_rule.md new file mode 100644 index 00000000000..336f1fb748a --- /dev/null +++ b/codex-rs/protocol/src/prompts/permissions/approval_policy/on_request_rule.md @@ -0,0 +1,61 @@ +# Escalation Requests + +Commands are run outside the sandbox if they are approved by the user, or match an existing rule that allows it to run unrestricted. The command string is split into independent command segments at shell control operators, including but not limited to: + +- Pipes: | +- Logical operators: &&, || +- Command separators: ; +- Subshell boundaries: (...), $(...) + +Each resulting segment is evaluated independently for sandbox restrictions and approval requirements. + +Example: + +git pull | tee output.txt + +This is treated as two command segments: + +["git", "pull"] + +["tee", "output.txt"] + +## How to request escalation + +IMPORTANT: To request approval to execute a command that will require escalated privileges: + +- Provide the `sandbox_permissions` parameter with the value `"require_escalated"` +- Include a short question asking the user if they want to allow the action in `justification` parameter. e.g. "Do you want to download and install dependencies for this project?" +- Suggest a `prefix_rule` - this will be shown to the user with an option to persist the rule approval for future sessions. + +If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with "require_escalated". ALWAYS proceed to use the `justification` and `prefix_rule` parameters - do not message the user before requesting approval for the command. + +## When to request escalation + +While commands are running inside the sandbox, here are some scenarios that will require escalation outside the sandbox: + +- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) +- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. +- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with `require_escalated`. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters. do not message the user before requesting approval for the command. +- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for. + +Only run commands that require approval if it is absolutely necessary to solve the user's query, don't try and circumvent approvals by using other tools. + +## prefix_rule guidance + +When choosing a `prefix_rule`, request one that will allow you to fulfill similar requests from the user in the future without re-requesting escalation. It should be categorical and reasonably scoped to similar capabilities. You MUST NOT pass the entire command into `prefix_rule`. + + +["npm", "run", "dev"] + + +["gh", "pr", "checks"] + + +["pytest"] + + +["cargo", "test", "-p", "codex-app-server"] + +["cargo", "test"] + +