Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions codex-rs/app-server-protocol/src/protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::FileChange;
use codex_protocol::protocol::RateLimitSnapshot;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxCommandAssessment;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::TurnAbortReason;
use paste::paste;
Expand Down Expand Up @@ -847,6 +848,8 @@ pub struct ExecCommandApprovalParams {
pub cwd: PathBuf,
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub risk: Option<SandboxCommandAssessment>,
pub parsed_cmd: Vec<ParsedCommand>,
}

Expand Down Expand Up @@ -1063,6 +1066,7 @@ mod tests {
command: vec!["echo".to_string(), "hello".to_string()],
cwd: PathBuf::from("/tmp"),
reason: Some("because tests".to_string()),
risk: None,
parsed_cmd: vec![ParsedCommand::Unknown {
cmd: "echo hello".to_string(),
}],
Expand Down
3 changes: 3 additions & 0 deletions codex-rs/app-server/src/codex_message_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,7 @@ async fn apply_bespoke_event_handling(
command,
cwd,
reason,
risk,
parsed_cmd,
}) => {
let params = ExecCommandApprovalParams {
Expand All @@ -1455,6 +1456,7 @@ async fn apply_bespoke_event_handling(
command,
cwd,
reason,
risk,
parsed_cmd,
};
let rx = outgoing
Expand Down Expand Up @@ -1523,6 +1525,7 @@ async fn derive_config_from_params(
include_view_image_tool: None,
show_raw_agent_reasoning: None,
tools_web_search_request: None,
experimental_sandbox_command_assessment: None,
additional_writable_roots: Vec::new(),
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
],
cwd: working_directory.clone(),
reason: None,
risk: None,
parsed_cmd: vec![ParsedCommand::Unknown {
cmd: "python3 -c 'print(42)'".to_string()
}],
Expand Down
8 changes: 8 additions & 0 deletions codex-rs/core/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,14 @@ impl ModelClient {
self.stream_with_task_kind(prompt, TaskKind::Regular).await
}

pub fn config(&self) -> Arc<Config> {
Arc::clone(&self.config)
}

pub fn provider(&self) -> &ModelProviderInfo {
&self.provider
}

pub(crate) async fn stream_with_task_kind(
&self,
prompt: &Prompt,
Expand Down
29 changes: 29 additions & 0 deletions codex-rs/core/src/codex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ use crate::protocol::Op;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::ReviewDecision;
use crate::protocol::ReviewOutputEvent;
use crate::protocol::SandboxCommandAssessment;
use crate::protocol::SandboxPolicy;
use crate::protocol::SessionConfiguredEvent;
use crate::protocol::StreamErrorEvent;
Expand Down Expand Up @@ -755,6 +756,32 @@ impl Session {
}
}

pub(crate) async fn assess_sandbox_command(
&self,
turn_context: &TurnContext,
call_id: &str,
command: &[String],
failure_message: Option<&str>,
) -> Option<SandboxCommandAssessment> {
let config = turn_context.client.config();
let provider = turn_context.client.provider().clone();
let auth_manager = Arc::clone(&self.services.auth_manager);
let otel = self.services.otel_event_manager.clone();
crate::sandboxing::assessment::assess_command(
config,
provider,
auth_manager,
&otel,
self.conversation_id,
call_id,
command,
&turn_context.sandbox_policy,
&turn_context.cwd,
failure_message,
)
.await
}

/// Emit an exec approval request event and await the user's decision.
///
/// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
Expand All @@ -767,6 +794,7 @@ impl Session {
command: Vec<String>,
cwd: PathBuf,
reason: Option<String>,
risk: Option<SandboxCommandAssessment>,
) -> ReviewDecision {
let sub_id = turn_context.sub_id.clone();
// Add the tx_approve callback to the map before sending the request.
Expand All @@ -792,6 +820,7 @@ impl Session {
command,
cwd,
reason,
risk,
parsed_cmd,
});
self.send_event(turn_context, event).await;
Expand Down
14 changes: 14 additions & 0 deletions codex-rs/core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ pub struct Config {

pub tools_web_search_request: bool,

/// When `true`, run a model-based assessment for commands denied by the sandbox.
pub experimental_sandbox_command_assessment: bool,

pub use_experimental_streamable_shell_tool: bool,

/// If set to `true`, used only the experimental unified exec tool.
Expand Down Expand Up @@ -958,6 +961,7 @@ pub struct ConfigToml {
pub experimental_use_unified_exec_tool: Option<bool>,
pub experimental_use_rmcp_client: Option<bool>,
pub experimental_use_freeform_apply_patch: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
}

impl From<ConfigToml> for UserSavedConfig {
Expand Down Expand Up @@ -1118,6 +1122,7 @@ pub struct ConfigOverrides {
pub include_view_image_tool: Option<bool>,
pub show_raw_agent_reasoning: Option<bool>,
pub tools_web_search_request: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
/// Additional directories that should be treated as writable roots for this session.
pub additional_writable_roots: Vec<PathBuf>,
}
Expand Down Expand Up @@ -1147,6 +1152,7 @@ impl Config {
include_view_image_tool: include_view_image_tool_override,
show_raw_agent_reasoning,
tools_web_search_request: override_tools_web_search_request,
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
additional_writable_roots,
} = overrides;

Expand All @@ -1172,6 +1178,7 @@ impl Config {
include_apply_patch_tool: include_apply_patch_tool_override,
include_view_image_tool: include_view_image_tool_override,
web_search_request: override_tools_web_search_request,
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
};

let features = Features::from_config(&cfg, &config_profile, feature_overrides);
Expand Down Expand Up @@ -1269,6 +1276,8 @@ impl Config {
let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell);
let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
let experimental_sandbox_command_assessment =
features.enabled(Feature::SandboxCommandAssessment);

let forced_chatgpt_workspace_id =
cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
Expand Down Expand Up @@ -1390,6 +1399,7 @@ impl Config {
forced_login_method,
include_apply_patch_tool: include_apply_patch_tool_flag,
tools_web_search_request,
experimental_sandbox_command_assessment,
use_experimental_streamable_shell_tool,
use_experimental_unified_exec_tool,
use_experimental_use_rmcp_client,
Expand Down Expand Up @@ -2873,6 +2883,7 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,
Expand Down Expand Up @@ -2941,6 +2952,7 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,
Expand Down Expand Up @@ -3024,6 +3036,7 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,
Expand Down Expand Up @@ -3093,6 +3106,7 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,
Expand Down
1 change: 1 addition & 0 deletions codex-rs/core/src/config_profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pub struct ConfigProfile {
pub experimental_use_exec_command_tool: Option<bool>,
pub experimental_use_rmcp_client: Option<bool>,
pub experimental_use_freeform_apply_patch: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
pub tools_web_search: Option<bool>,
pub tools_view_image: Option<bool>,
/// Optional feature toggles scoped to this profile.
Expand Down
12 changes: 12 additions & 0 deletions codex-rs/core/src/features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ pub enum Feature {
ViewImageTool,
/// Allow the model to request web searches.
WebSearchRequest,
/// Enable the model-based risk assessments for sandboxed commands.
SandboxCommandAssessment,
}

impl Feature {
Expand Down Expand Up @@ -73,6 +75,7 @@ pub struct FeatureOverrides {
pub include_apply_patch_tool: Option<bool>,
pub include_view_image_tool: Option<bool>,
pub web_search_request: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
}

impl FeatureOverrides {
Expand Down Expand Up @@ -137,6 +140,7 @@ impl Features {
let mut features = Features::with_defaults();

let base_legacy = LegacyFeatureToggles {
experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment,
experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool,
experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
Expand All @@ -154,6 +158,8 @@ impl Features {
let profile_legacy = LegacyFeatureToggles {
include_apply_patch_tool: config_profile.include_apply_patch_tool,
include_view_image_tool: config_profile.include_view_image_tool,
experimental_sandbox_command_assessment: config_profile
.experimental_sandbox_command_assessment,
experimental_use_freeform_apply_patch: config_profile
.experimental_use_freeform_apply_patch,
experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool,
Expand Down Expand Up @@ -236,4 +242,10 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Stable,
default_enabled: false,
},
FeatureSpec {
id: Feature::SandboxCommandAssessment,
key: "experimental_sandbox_command_assessment",
stage: Stage::Experimental,
default_enabled: false,
},
];
11 changes: 11 additions & 0 deletions codex-rs/core/src/features/legacy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ struct Alias {
}

const ALIASES: &[Alias] = &[
Alias {
legacy_key: "experimental_sandbox_command_assessment",
feature: Feature::SandboxCommandAssessment,
},
Alias {
legacy_key: "experimental_use_unified_exec_tool",
feature: Feature::UnifiedExec,
Expand Down Expand Up @@ -53,6 +57,7 @@ pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
pub struct LegacyFeatureToggles {
pub include_apply_patch_tool: Option<bool>,
pub include_view_image_tool: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
pub experimental_use_freeform_apply_patch: Option<bool>,
pub experimental_use_exec_command_tool: Option<bool>,
pub experimental_use_unified_exec_tool: Option<bool>,
Expand All @@ -69,6 +74,12 @@ impl LegacyFeatureToggles {
self.include_apply_patch_tool,
"include_apply_patch_tool",
);
set_if_some(
features,
Feature::SandboxCommandAssessment,
self.experimental_sandbox_command_assessment,
"experimental_sandbox_command_assessment",
);
set_if_some(
features,
Feature::ApplyPatchFreeform,
Expand Down
Loading
Loading