diff --git a/codex-rs/core/src/arc_monitor.rs b/codex-rs/core/src/arc_monitor.rs deleted file mode 100644 index a4f7e038e020..000000000000 --- a/codex-rs/core/src/arc_monitor.rs +++ /dev/null @@ -1,416 +0,0 @@ -use std::env; -use std::time::Duration; - -use serde::Deserialize; -use serde::Serialize; -use tracing::warn; - -use crate::compact::content_items_to_text; -use crate::event_mapping::is_contextual_user_message_content; -use crate::session::session::Session; -use crate::session::turn_context::TurnContext; -use codex_login::default_client::build_reqwest_client; -use codex_protocol::models::MessagePhase; -use codex_protocol::models::ResponseItem; - -const ARC_MONITOR_TIMEOUT: Duration = Duration::from_secs(30); -const CODEX_ARC_MONITOR_ENDPOINT_OVERRIDE: &str = "CODEX_ARC_MONITOR_ENDPOINT_OVERRIDE"; -const CODEX_ARC_MONITOR_TOKEN: &str = "CODEX_ARC_MONITOR_TOKEN"; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum ArcMonitorOutcome { - Ok, - SteerModel(String), - AskUser(String), -} - -#[derive(Debug, Serialize, PartialEq)] -struct ArcMonitorRequest { - metadata: ArcMonitorMetadata, - #[serde(skip_serializing_if = "Option::is_none")] - messages: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - input: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - policies: Option, - action: serde_json::Map, -} - -#[derive(Debug, Deserialize)] -#[serde(deny_unknown_fields)] -struct ArcMonitorResult { - outcome: ArcMonitorResultOutcome, - short_reason: String, - rationale: String, - risk_score: u8, - risk_level: ArcMonitorRiskLevel, - evidence: Vec, -} - -#[derive(Debug, Serialize, PartialEq)] -struct ArcMonitorChatMessage { - role: String, - content: serde_json::Value, -} - -#[derive(Debug, Serialize, PartialEq)] -struct ArcMonitorPolicies { - user: Option, - developer: Option, -} - -#[derive(Debug, Serialize, PartialEq)] -#[serde(deny_unknown_fields)] -struct ArcMonitorMetadata { - codex_thread_id: String, - codex_turn_id: String, - #[serde(skip_serializing_if = "Option::is_none")] - conversation_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - protection_client_callsite: Option, -} - -#[derive(Debug, Deserialize)] -#[serde(deny_unknown_fields)] -#[allow(dead_code)] -struct ArcMonitorEvidence { - message: String, - why: String, -} - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "kebab-case")] -enum ArcMonitorResultOutcome { - Ok, - SteerModel, - AskUser, -} - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "lowercase")] -enum ArcMonitorRiskLevel { - Low, - Medium, - High, - Critical, -} - -pub(crate) async fn monitor_action( - sess: &Session, - turn_context: &TurnContext, - action: serde_json::Value, - protection_client_callsite: &'static str, -) -> ArcMonitorOutcome { - let auth = match turn_context.auth_manager.as_ref() { - Some(auth_manager) => match auth_manager.auth().await { - Some(auth) if auth.uses_codex_backend() => Some(auth), - _ => None, - }, - None => None, - }; - let env_token = read_non_empty_env_var(CODEX_ARC_MONITOR_TOKEN); - if env_token.is_none() && auth.is_none() { - return ArcMonitorOutcome::Ok; - } - - let url = read_non_empty_env_var(CODEX_ARC_MONITOR_ENDPOINT_OVERRIDE).unwrap_or_else(|| { - format!( - "{}/codex/safety/arc", - turn_context.config.chatgpt_base_url.trim_end_matches('/') - ) - }); - let action = match action { - serde_json::Value::Object(action) => action, - _ => { - warn!("skipping safety monitor because action payload is not an object"); - return ArcMonitorOutcome::Ok; - } - }; - let body = - build_arc_monitor_request(sess, turn_context, action, protection_client_callsite).await; - let client = build_reqwest_client(); - let mut request = client.post(&url).timeout(ARC_MONITOR_TIMEOUT).json(&body); - if let Some(token) = env_token { - request = request.bearer_auth(token); - } else if let Some(auth) = auth.as_ref() { - request = - request.headers(codex_model_provider::auth_provider_from_auth(auth).to_auth_headers()); - } - - let response = match request.send().await { - Ok(response) => response, - Err(err) => { - warn!(error = %err, %url, "safety monitor request failed"); - return ArcMonitorOutcome::Ok; - } - }; - let status = response.status(); - if !status.is_success() { - let response_text = response.text().await.unwrap_or_default(); - warn!( - %status, - %url, - response_text, - "safety monitor returned non-success status" - ); - return ArcMonitorOutcome::Ok; - } - - let response = match response.json::().await { - Ok(response) => response, - Err(err) => { - warn!(error = %err, %url, "failed to parse safety monitor response"); - return ArcMonitorOutcome::Ok; - } - }; - tracing::debug!( - risk_score = response.risk_score, - risk_level = ?response.risk_level, - evidence_count = response.evidence.len(), - "safety monitor completed" - ); - - let short_reason = response.short_reason.trim(); - let rationale = response.rationale.trim(); - match response.outcome { - ArcMonitorResultOutcome::Ok => ArcMonitorOutcome::Ok, - ArcMonitorResultOutcome::AskUser => { - if !short_reason.is_empty() { - ArcMonitorOutcome::AskUser(short_reason.to_string()) - } else if !rationale.is_empty() { - ArcMonitorOutcome::AskUser(rationale.to_string()) - } else { - ArcMonitorOutcome::AskUser( - "Additional confirmation is required before this tool call can continue." - .to_string(), - ) - } - } - ArcMonitorResultOutcome::SteerModel => { - if !rationale.is_empty() { - ArcMonitorOutcome::SteerModel(rationale.to_string()) - } else if !short_reason.is_empty() { - ArcMonitorOutcome::SteerModel(short_reason.to_string()) - } else { - ArcMonitorOutcome::SteerModel( - "Tool call was cancelled because of safety risks.".to_string(), - ) - } - } - } -} - -fn read_non_empty_env_var(key: &str) -> Option { - match env::var(key) { - Ok(value) => { - let value = value.trim(); - (!value.is_empty()).then(|| value.to_string()) - } - Err(env::VarError::NotPresent) => None, - Err(env::VarError::NotUnicode(_)) => { - warn!( - env_var = key, - "ignoring non-unicode safety monitor env override" - ); - None - } - } -} - -async fn build_arc_monitor_request( - sess: &Session, - turn_context: &TurnContext, - action: serde_json::Map, - protection_client_callsite: &'static str, -) -> ArcMonitorRequest { - let history = sess.clone_history().await; - let mut messages = build_arc_monitor_messages(history.raw_items()); - if messages.is_empty() { - messages.push(build_arc_monitor_message( - "user", - serde_json::Value::String( - "No prior conversation history is available for this ARC evaluation.".to_string(), - ), - )); - } - - let conversation_id = sess.conversation_id.to_string(); - ArcMonitorRequest { - metadata: ArcMonitorMetadata { - codex_thread_id: conversation_id.clone(), - codex_turn_id: turn_context.sub_id.clone(), - conversation_id: Some(conversation_id), - protection_client_callsite: Some(protection_client_callsite.to_string()), - }, - messages: Some(messages), - input: None, - policies: Some(ArcMonitorPolicies { - user: None, - developer: None, - }), - action, - } -} - -fn build_arc_monitor_messages(items: &[ResponseItem]) -> Vec { - let last_tool_call_index = items - .iter() - .enumerate() - .rev() - .find(|(_, item)| { - matches!( - item, - ResponseItem::LocalShellCall { .. } - | ResponseItem::FunctionCall { .. } - | ResponseItem::CustomToolCall { .. } - | ResponseItem::WebSearchCall { .. } - ) - }) - .map(|(index, _)| index); - let last_encrypted_reasoning_index = items - .iter() - .enumerate() - .rev() - .find(|(_, item)| { - matches!( - item, - ResponseItem::Reasoning { - encrypted_content: Some(encrypted_content), - .. - } if !encrypted_content.trim().is_empty() - ) - }) - .map(|(index, _)| index); - - items - .iter() - .enumerate() - .filter_map(|(index, item)| { - build_arc_monitor_message_item( - item, - index, - last_tool_call_index, - last_encrypted_reasoning_index, - ) - }) - .collect() -} - -fn build_arc_monitor_message_item( - item: &ResponseItem, - index: usize, - last_tool_call_index: Option, - last_encrypted_reasoning_index: Option, -) -> Option { - match item { - ResponseItem::Message { role, content, .. } if role == "user" => { - if is_contextual_user_message_content(content) { - None - } else { - content_items_to_text(content) - .map(|text| build_arc_monitor_text_message("user", "input_text", text)) - } - } - ResponseItem::Message { - role, - content, - phase: Some(MessagePhase::FinalAnswer), - .. - } if role == "assistant" => content_items_to_text(content) - .map(|text| build_arc_monitor_text_message("assistant", "output_text", text)), - ResponseItem::Message { .. } => None, - ResponseItem::Reasoning { - encrypted_content: Some(encrypted_content), - .. - } if Some(index) == last_encrypted_reasoning_index - && !encrypted_content.trim().is_empty() => - { - Some(build_arc_monitor_message( - "assistant", - serde_json::json!([{ - "type": "encrypted_reasoning", - "encrypted_content": encrypted_content, - }]), - )) - } - ResponseItem::Reasoning { .. } => None, - ResponseItem::LocalShellCall { action, .. } if Some(index) == last_tool_call_index => { - Some(build_arc_monitor_message( - "assistant", - serde_json::json!([{ - "type": "tool_call", - "tool_name": "shell", - "action": action, - }]), - )) - } - ResponseItem::FunctionCall { - name, arguments, .. - } if Some(index) == last_tool_call_index => Some(build_arc_monitor_message( - "assistant", - serde_json::json!([{ - "type": "tool_call", - "tool_name": name, - "arguments": arguments, - }]), - )), - ResponseItem::CustomToolCall { name, input, .. } if Some(index) == last_tool_call_index => { - Some(build_arc_monitor_message( - "assistant", - serde_json::json!([{ - "type": "tool_call", - "tool_name": name, - "input": input, - }]), - )) - } - ResponseItem::WebSearchCall { action, .. } if Some(index) == last_tool_call_index => { - Some(build_arc_monitor_message( - "assistant", - serde_json::json!([{ - "type": "tool_call", - "tool_name": "web_search", - "action": action, - }]), - )) - } - ResponseItem::LocalShellCall { .. } - | ResponseItem::FunctionCall { .. } - | ResponseItem::CustomToolCall { .. } - | ResponseItem::ToolSearchCall { .. } - | ResponseItem::WebSearchCall { .. } - | ResponseItem::FunctionCallOutput { .. } - | ResponseItem::CustomToolCallOutput { .. } - | ResponseItem::ToolSearchOutput { .. } - | ResponseItem::ImageGenerationCall { .. } - | ResponseItem::Compaction { .. } - | ResponseItem::CompactionTrigger - | ResponseItem::ContextCompaction { .. } - | ResponseItem::Other => None, - } -} - -fn build_arc_monitor_text_message( - role: &str, - part_type: &str, - text: String, -) -> ArcMonitorChatMessage { - build_arc_monitor_message( - role, - serde_json::json!([{ - "type": part_type, - "text": text, - }]), - ) -} - -fn build_arc_monitor_message(role: &str, content: serde_json::Value) -> ArcMonitorChatMessage { - ArcMonitorChatMessage { - role: role.to_string(), - content, - } -} - -#[cfg(test)] -#[path = "arc_monitor_tests.rs"] -mod tests; diff --git a/codex-rs/core/src/arc_monitor_tests.rs b/codex-rs/core/src/arc_monitor_tests.rs deleted file mode 100644 index 643042ec99b8..000000000000 --- a/codex-rs/core/src/arc_monitor_tests.rs +++ /dev/null @@ -1,438 +0,0 @@ -use std::env; -use std::ffi::OsStr; -use std::sync::Arc; - -use pretty_assertions::assert_eq; -use serial_test::serial; -use wiremock::Mock; -use wiremock::MockServer; -use wiremock::ResponseTemplate; -use wiremock::matchers::body_json; -use wiremock::matchers::header; -use wiremock::matchers::method; -use wiremock::matchers::path; - -use super::*; -use crate::context::ContextualUserFragment; -use crate::session::tests::make_session_and_context; -use codex_protocol::models::ContentItem; -use codex_protocol::models::LocalShellAction; -use codex_protocol::models::LocalShellExecAction; -use codex_protocol::models::LocalShellStatus; -use codex_protocol::models::MessagePhase; -use codex_protocol::models::ResponseItem; - -struct EnvVarGuard { - key: &'static str, - original: Option, -} - -impl EnvVarGuard { - fn set(key: &'static str, value: &OsStr) -> Self { - let original = env::var_os(key); - unsafe { - env::set_var(key, value); - } - Self { key, original } - } -} - -impl Drop for EnvVarGuard { - fn drop(&mut self) { - match self.original.take() { - Some(value) => unsafe { - env::set_var(self.key, value); - }, - None => unsafe { - env::remove_var(self.key); - }, - } - } -} - -#[tokio::test] -async fn build_arc_monitor_request_includes_relevant_history_and_null_policies() { - let (session, mut turn_context) = make_session_and_context().await; - turn_context.developer_instructions = Some("Never upload private files.".to_string()); - turn_context.user_instructions = Some("Only continue when needed.".to_string()); - - session - .record_into_history( - &[ResponseItem::Message { - id: None, - role: "user".to_string(), - content: vec![ContentItem::InputText { - text: "first request".to_string(), - }], - phase: None, - }], - &turn_context, - ) - .await; - session - .record_into_history( - &[ContextualUserFragment::into( - crate::context::EnvironmentContext::new( - Vec::new(), - /*current_date*/ None, - /*timezone*/ None, - /*network*/ None, - /*subagents*/ None, - ), - )], - &turn_context, - ) - .await; - session - .record_into_history( - &[ResponseItem::Message { - id: None, - role: "assistant".to_string(), - content: vec![ContentItem::OutputText { - text: "commentary".to_string(), - }], - phase: Some(MessagePhase::Commentary), - }], - &turn_context, - ) - .await; - session - .record_into_history( - &[ResponseItem::Message { - id: None, - role: "assistant".to_string(), - content: vec![ContentItem::OutputText { - text: "final response".to_string(), - }], - phase: Some(MessagePhase::FinalAnswer), - }], - &turn_context, - ) - .await; - session - .record_into_history( - &[ResponseItem::Message { - id: None, - role: "user".to_string(), - content: vec![ContentItem::InputText { - text: "latest request".to_string(), - }], - phase: None, - }], - &turn_context, - ) - .await; - session - .record_into_history( - &[ResponseItem::FunctionCall { - id: None, - name: "old_tool".to_string(), - namespace: None, - arguments: "{\"old\":true}".to_string(), - call_id: "call_old".to_string(), - }], - &turn_context, - ) - .await; - session - .record_into_history( - &[ResponseItem::Reasoning { - id: "reasoning_old".to_string(), - summary: Vec::new(), - content: None, - encrypted_content: Some("encrypted-old".to_string()), - }], - &turn_context, - ) - .await; - session - .record_into_history( - &[ResponseItem::LocalShellCall { - id: None, - call_id: Some("shell_call".to_string()), - status: LocalShellStatus::Completed, - action: LocalShellAction::Exec(LocalShellExecAction { - command: vec!["pwd".to_string()], - timeout_ms: Some(1000), - working_directory: Some("/tmp".to_string()), - env: None, - user: None, - }), - }], - &turn_context, - ) - .await; - session - .record_into_history( - &[ResponseItem::Reasoning { - id: "reasoning_latest".to_string(), - summary: Vec::new(), - content: None, - encrypted_content: Some("encrypted-latest".to_string()), - }], - &turn_context, - ) - .await; - - let request = build_arc_monitor_request( - &session, - &turn_context, - serde_json::from_value(serde_json::json!({ "tool": "mcp_tool_call" })) - .expect("action should deserialize"), - "normal", - ) - .await; - - assert_eq!( - request, - ArcMonitorRequest { - metadata: ArcMonitorMetadata { - codex_thread_id: session.conversation_id.to_string(), - codex_turn_id: turn_context.sub_id.clone(), - conversation_id: Some(session.conversation_id.to_string()), - protection_client_callsite: Some("normal".to_string()), - }, - messages: Some(vec![ - ArcMonitorChatMessage { - role: "user".to_string(), - content: serde_json::json!([{ - "type": "input_text", - "text": "first request", - }]), - }, - ArcMonitorChatMessage { - role: "assistant".to_string(), - content: serde_json::json!([{ - "type": "output_text", - "text": "final response", - }]), - }, - ArcMonitorChatMessage { - role: "user".to_string(), - content: serde_json::json!([{ - "type": "input_text", - "text": "latest request", - }]), - }, - ArcMonitorChatMessage { - role: "assistant".to_string(), - content: serde_json::json!([{ - "type": "tool_call", - "tool_name": "shell", - "action": { - "type": "exec", - "command": ["pwd"], - "timeout_ms": 1000, - "working_directory": "/tmp", - "env": null, - "user": null, - }, - }]), - }, - ArcMonitorChatMessage { - role: "assistant".to_string(), - content: serde_json::json!([{ - "type": "encrypted_reasoning", - "encrypted_content": "encrypted-latest", - }]), - }, - ]), - input: None, - policies: Some(ArcMonitorPolicies { - user: None, - developer: None, - }), - action: serde_json::from_value(serde_json::json!({ "tool": "mcp_tool_call" })) - .expect("action should deserialize"), - } - ); -} - -#[tokio::test] -#[serial(arc_monitor_env)] -async fn monitor_action_posts_expected_arc_request() { - let server = MockServer::start().await; - let (session, mut turn_context) = make_session_and_context().await; - turn_context.auth_manager = Some(crate::test_support::auth_manager_from_auth( - codex_login::CodexAuth::create_dummy_chatgpt_auth_for_testing(), - )); - turn_context.developer_instructions = Some("Developer policy".to_string()); - turn_context.user_instructions = Some("User policy".to_string()); - - let mut config = (*turn_context.config).clone(); - config.chatgpt_base_url = server.uri(); - turn_context.config = Arc::new(config); - - session - .record_into_history( - &[ResponseItem::Message { - id: None, - role: "user".to_string(), - content: vec![ContentItem::InputText { - text: "please run the tool".to_string(), - }], - phase: None, - }], - &turn_context, - ) - .await; - - Mock::given(method("POST")) - .and(path("/codex/safety/arc")) - .and(header("authorization", "Bearer Access Token")) - .and(header("chatgpt-account-id", "account_id")) - .and(body_json(serde_json::json!({ - "metadata": { - "codex_thread_id": session.conversation_id.to_string(), - "codex_turn_id": turn_context.sub_id.clone(), - "conversation_id": session.conversation_id.to_string(), - "protection_client_callsite": "normal", - }, - "messages": [{ - "role": "user", - "content": [{ - "type": "input_text", - "text": "please run the tool", - }], - }], - "policies": { - "developer": null, - "user": null, - }, - "action": { - "tool": "mcp_tool_call", - }, - }))) - .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ - "outcome": "ask-user", - "short_reason": "needs confirmation", - "rationale": "tool call needs additional review", - "risk_score": 42, - "risk_level": "medium", - "evidence": [{ - "message": "browser_navigate", - "why": "tool call needs additional review", - }], - }))) - .expect(1) - .mount(&server) - .await; - - let outcome = monitor_action( - &session, - &turn_context, - serde_json::json!({ "tool": "mcp_tool_call" }), - "normal", - ) - .await; - - assert_eq!( - outcome, - ArcMonitorOutcome::AskUser("needs confirmation".to_string()) - ); -} - -#[tokio::test] -#[serial(arc_monitor_env)] -async fn monitor_action_uses_env_url_and_token_overrides() { - let server = MockServer::start().await; - let _url_guard = EnvVarGuard::set( - CODEX_ARC_MONITOR_ENDPOINT_OVERRIDE, - OsStr::new(&format!("{}/override/arc", server.uri())), - ); - let _token_guard = EnvVarGuard::set(CODEX_ARC_MONITOR_TOKEN, OsStr::new("override-token")); - - let (session, turn_context) = make_session_and_context().await; - session - .record_into_history( - &[ResponseItem::Message { - id: None, - role: "user".to_string(), - content: vec![ContentItem::InputText { - text: "please run the tool".to_string(), - }], - phase: None, - }], - &turn_context, - ) - .await; - - Mock::given(method("POST")) - .and(path("/override/arc")) - .and(header("authorization", "Bearer override-token")) - .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ - "outcome": "steer-model", - "short_reason": "needs approval", - "rationale": "high-risk action", - "risk_score": 96, - "risk_level": "critical", - "evidence": [{ - "message": "browser_navigate", - "why": "high-risk action", - }], - }))) - .expect(1) - .mount(&server) - .await; - - let outcome = monitor_action( - &session, - &turn_context, - serde_json::json!({ "tool": "mcp_tool_call" }), - "normal", - ) - .await; - - assert_eq!( - outcome, - ArcMonitorOutcome::SteerModel("high-risk action".to_string()) - ); -} - -#[tokio::test] -#[serial(arc_monitor_env)] -async fn monitor_action_rejects_legacy_response_fields() { - let server = MockServer::start().await; - Mock::given(method("POST")) - .and(path("/codex/safety/arc")) - .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ - "outcome": "steer-model", - "reason": "legacy high-risk action", - "monitorRequestId": "arc_456", - }))) - .expect(1) - .mount(&server) - .await; - - let (session, mut turn_context) = make_session_and_context().await; - turn_context.auth_manager = Some(crate::test_support::auth_manager_from_auth( - codex_login::CodexAuth::create_dummy_chatgpt_auth_for_testing(), - )); - let mut config = (*turn_context.config).clone(); - config.chatgpt_base_url = server.uri(); - turn_context.config = Arc::new(config); - - session - .record_into_history( - &[ResponseItem::Message { - id: None, - role: "user".to_string(), - content: vec![ContentItem::InputText { - text: "please run the tool".to_string(), - }], - phase: None, - }], - &turn_context, - ) - .await; - - let outcome = monitor_action( - &session, - &turn_context, - serde_json::json!({ "tool": "mcp_tool_call" }), - "normal", - ) - .await; - - assert_eq!(outcome, ArcMonitorOutcome::Ok); -} diff --git a/codex-rs/core/src/guardian/mod.rs b/codex-rs/core/src/guardian/mod.rs index 131433208a93..058c19d008e3 100644 --- a/codex-rs/core/src/guardian/mod.rs +++ b/codex-rs/core/src/guardian/mod.rs @@ -26,6 +26,7 @@ use serde::Serialize; pub(crate) use approval_request::GuardianApprovalRequest; pub(crate) use approval_request::GuardianMcpAnnotations; pub(crate) use approval_request::GuardianNetworkAccessTrigger; +#[cfg(test)] pub(crate) use approval_request::guardian_approval_request_to_json; pub(crate) use review::guardian_rejection_message; pub(crate) use review::guardian_timeout_message; diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index aca6049698e2..d5c123763857 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -7,7 +7,6 @@ mod apply_patch; mod apps; -mod arc_monitor; mod client; mod client_common; mod realtime_context; diff --git a/codex-rs/core/src/mcp_tool_call.rs b/codex-rs/core/src/mcp_tool_call.rs index 902ede574ef9..dda8a17b431e 100644 --- a/codex-rs/core/src/mcp_tool_call.rs +++ b/codex-rs/core/src/mcp_tool_call.rs @@ -3,22 +3,12 @@ use std::collections::HashMap; use std::time::Duration; use std::time::Instant; -use codex_app_server_protocol::ConfigLayerSource; -use codex_app_server_protocol::McpElicitationObjectType; -use codex_app_server_protocol::McpElicitationSchema; -use codex_app_server_protocol::McpServerElicitationRequest; -use codex_app_server_protocol::McpServerElicitationRequestParams; -use tracing::error; - -use crate::arc_monitor::ArcMonitorOutcome; -use crate::arc_monitor::monitor_action; use crate::config::Config; use crate::config::edit::ConfigEdit; use crate::config::edit::ConfigEditsBuilder; use crate::connectors; use crate::guardian::GuardianApprovalRequest; use crate::guardian::GuardianMcpAnnotations; -use crate::guardian::guardian_approval_request_to_json; use crate::guardian::guardian_rejection_message; use crate::guardian::guardian_timeout_message; use crate::guardian::new_guardian_review_id; @@ -36,6 +26,11 @@ use crate::turn_metadata::McpTurnMetadataContext; use codex_analytics::AppInvocation; use codex_analytics::InvocationType; use codex_analytics::build_track_events_context; +use codex_app_server_protocol::ConfigLayerSource; +use codex_app_server_protocol::McpElicitationObjectType; +use codex_app_server_protocol::McpElicitationSchema; +use codex_app_server_protocol::McpServerElicitationRequest; +use codex_app_server_protocol::McpServerElicitationRequestParams; use codex_config::types::AppToolApproval; use codex_features::Feature; use codex_hooks::PermissionRequestDecision; @@ -91,6 +86,7 @@ use std::sync::Arc; use toml_edit::value; use tracing::Instrument; use tracing::Span; +use tracing::error; use tracing::field::Empty; use url::Url; @@ -259,18 +255,6 @@ pub(crate) async fn handle_mcp_tool_call( ) .await } - McpToolApprovalDecision::BlockedBySafetyMonitor(message) => { - notify_mcp_tool_call_skip( - sess.as_ref(), - turn_context.as_ref(), - &call_id, - invocation, - mcp_app_resource_uri.clone(), - message, - /*already_started*/ true, - ) - .await - } }; let status = if result.is_ok() { "ok" } else { "error" }; @@ -966,7 +950,6 @@ enum McpToolApprovalDecision { AcceptAndRemember, Decline { message: Option }, Cancel, - BlockedBySafetyMonitor(String), } pub(crate) struct McpToolApprovalMetadata { @@ -1130,8 +1113,6 @@ pub(crate) const MCP_TOOL_APPROVAL_ACCEPT_FOR_SESSION: &str = "Allow for this se pub(crate) const MCP_TOOL_APPROVAL_DECLINE_SYNTHETIC: &str = "__codex_mcp_decline__"; const MCP_TOOL_APPROVAL_ACCEPT_AND_REMEMBER: &str = "Allow and don't ask me again"; const MCP_TOOL_APPROVAL_CANCEL: &str = "Cancel"; -const MCP_TOOL_CALL_ARC_MONITOR_CALLSITE_DEFAULT: &str = "mcp_tool_call__default"; -const MCP_TOOL_CALL_ARC_MONITOR_CALLSITE_ALWAYS_ALLOW: &str = "mcp_tool_call__always_allow"; pub(crate) fn is_mcp_tool_approval_question_id(question_id: &str) -> bool { question_id @@ -1184,31 +1165,6 @@ async fn maybe_request_mcp_tool_approval( return None; } - let mut monitor_reason = None; - let auto_approved_by_policy = approval_mode == AppToolApproval::Approve; - - if auto_approved_by_policy { - match maybe_monitor_auto_approved_mcp_tool_call( - sess, - turn_context, - invocation, - metadata, - approval_mode, - ) - .await - { - ArcMonitorOutcome::Ok => return None, - ArcMonitorOutcome::AskUser(reason) => { - monitor_reason = Some(reason); - } - ArcMonitorOutcome::SteerModel(reason) => { - return Some(McpToolApprovalDecision::BlockedBySafetyMonitor( - arc_monitor_interrupt_message(&reason), - )); - } - } - } - let session_approval_key = session_mcp_tool_approval_key(invocation, metadata, approval_mode); let persistent_approval_key = persistent_mcp_tool_approval_key(invocation, metadata, approval_mode); @@ -1255,7 +1211,7 @@ async fn maybe_request_mcp_tool_approval( turn_context, review_id.clone(), build_guardian_mcp_tool_review_request(call_id, invocation, metadata), - monitor_reason.clone(), + /*retry_reason*/ None, ) .await; let decision = mcp_tool_approval_decision_from_guardian(sess, &review_id, decision).await; @@ -1287,7 +1243,7 @@ async fn maybe_request_mcp_tool_approval( .as_ref() .map(|rendered_template| rendered_template.tool_params_display.clone()) .or_else(|| build_mcp_tool_approval_display_params(invocation.arguments.as_ref())); - let mut question = build_mcp_tool_approval_question( + let question = build_mcp_tool_approval_question( question_id.clone(), &invocation.server, &invocation.tool, @@ -1297,8 +1253,6 @@ async fn maybe_request_mcp_tool_approval( .as_ref() .map(|rendered_template| rendered_template.question.as_str()), ); - question.question = - mcp_tool_approval_question_text(question.question, monitor_reason.as_deref()); if tool_call_mcp_elicitation_enabled { let request_id = rmcp::model::RequestId::String( format!("{MCP_TOOL_APPROVAL_QUESTION_ID_PREFIX}_{call_id}").into(), @@ -1315,11 +1269,9 @@ async fn maybe_request_mcp_tool_approval( .or(invocation.arguments.as_ref()), tool_params_display: tool_params_display.as_deref(), question, - message_override: rendered_template.as_ref().and_then(|rendered_template| { - monitor_reason - .is_none() - .then_some(rendered_template.elicitation_message.as_str()) - }), + message_override: rendered_template + .as_ref() + .map(|rendered_template| rendered_template.elicitation_message.as_str()), prompt_options, }, ); @@ -1361,37 +1313,6 @@ async fn maybe_request_mcp_tool_approval( Some(decision) } -async fn maybe_monitor_auto_approved_mcp_tool_call( - sess: &Session, - turn_context: &TurnContext, - invocation: &McpInvocation, - metadata: Option<&McpToolApprovalMetadata>, - approval_mode: AppToolApproval, -) -> ArcMonitorOutcome { - let action = prepare_arc_request_action(invocation, metadata); - monitor_action( - sess, - turn_context, - action, - mcp_tool_approval_callsite_mode(approval_mode, turn_context), - ) - .await -} - -fn prepare_arc_request_action( - invocation: &McpInvocation, - metadata: Option<&McpToolApprovalMetadata>, -) -> serde_json::Value { - let request = build_guardian_mcp_tool_review_request("arc-monitor", invocation, metadata); - match guardian_approval_request_to_json(&request) { - Ok(action) => action, - Err(error) => { - error!(error = %error, "failed to serialize guardian MCP approval request for ARC"); - serde_json::Value::Null - } - } -} - fn session_mcp_tool_approval_key( invocation: &McpInvocation, metadata: Option<&McpToolApprovalMetadata>, @@ -1466,18 +1387,6 @@ async fn mcp_tool_approval_decision_from_guardian( } } -fn mcp_tool_approval_callsite_mode( - approval_mode: AppToolApproval, - _turn_context: &TurnContext, -) -> &'static str { - match approval_mode { - AppToolApproval::Approve => MCP_TOOL_CALL_ARC_MONITOR_CALLSITE_ALWAYS_ALLOW, - AppToolApproval::Auto | AppToolApproval::Prompt => { - MCP_TOOL_CALL_ARC_MONITOR_CALLSITE_DEFAULT - } - } -} - #[expect( clippy::await_holding_invalid_type, reason = "MCP approval metadata reads through the session-owned manager guard" @@ -1667,24 +1576,6 @@ fn build_mcp_tool_approval_fallback_message( format!("Allow {actor} to run tool \"{tool_name}\"?") } -fn mcp_tool_approval_question_text(question: String, monitor_reason: Option<&str>) -> String { - match monitor_reason.map(str::trim) { - Some(reason) if !reason.is_empty() => { - format!("Tool call needs your approval. Reason: {reason}") - } - _ => question, - } -} - -fn arc_monitor_interrupt_message(reason: &str) -> String { - let reason = reason.trim(); - if reason.is_empty() { - "Tool call was cancelled because of safety risks.".to_string() - } else { - format!("Tool call was cancelled because of safety risks: {reason}") - } -} - fn build_mcp_tool_approval_elicitation_request( sess: &Session, turn_context: &TurnContext, @@ -1986,8 +1877,7 @@ async fn apply_mcp_tool_approval_decision( } McpToolApprovalDecision::Accept | McpToolApprovalDecision::Decline { .. } - | McpToolApprovalDecision::Cancel - | McpToolApprovalDecision::BlockedBySafetyMonitor(_) => {} + | McpToolApprovalDecision::Cancel => {} } } diff --git a/codex-rs/core/src/mcp_tool_call_tests.rs b/codex-rs/core/src/mcp_tool_call_tests.rs index 25f19c87ddae..084bf57e2079 100644 --- a/codex-rs/core/src/mcp_tool_call_tests.rs +++ b/codex-rs/core/src/mcp_tool_call_tests.rs @@ -405,17 +405,6 @@ fn prompt_mode_does_not_allow_persistent_remember() { ); } -#[test] -fn approval_question_text_prepends_safety_reason() { - assert_eq!( - mcp_tool_approval_question_text( - "Allow this action?".to_string(), - Some("This tool may contact an external system."), - ), - "Tool call needs your approval. Reason: This tool may contact an external system." - ); -} - #[tokio::test] async fn mcp_tool_call_span_records_expected_fields() { let buffer: &'static std::sync::Mutex> = @@ -1629,42 +1618,6 @@ fn guardian_mcp_review_request_includes_annotations_when_present() { ); } -#[test] -fn prepare_arc_request_action_serializes_mcp_tool_call_shape() { - let invocation = McpInvocation { - server: CODEX_APPS_MCP_SERVER_NAME.to_string(), - tool: "browser_navigate".to_string(), - arguments: Some(serde_json::json!({ - "url": "https://example.com", - })), - }; - - let action = prepare_arc_request_action( - &invocation, - Some(&approval_metadata( - /*connector_id*/ None, - Some("Playwright"), - /*connector_description*/ None, - Some("Navigate"), - /*tool_description*/ None, - )), - ); - - assert_eq!( - action, - serde_json::json!({ - "tool": "mcp_tool_call", - "server": CODEX_APPS_MCP_SERVER_NAME, - "tool_name": "browser_navigate", - "arguments": { - "url": "https://example.com", - }, - "connector_name": "Playwright", - "tool_title": "Navigate", - }) - ); -} - #[tokio::test(flavor = "current_thread")] async fn guardian_review_decision_maps_to_mcp_tool_decision() { let (session, _) = make_session_and_context().await; @@ -1799,24 +1752,6 @@ fn approval_elicitation_meta_merges_session_and_always_persist_with_connector_so ); } -#[tokio::test] -async fn approval_callsite_mode_distinguishes_default_and_always_allow() { - let (_session, turn_context) = make_session_and_context().await; - - assert_eq!( - mcp_tool_approval_callsite_mode(AppToolApproval::Auto, &turn_context), - "mcp_tool_call__default" - ); - assert_eq!( - mcp_tool_approval_callsite_mode(AppToolApproval::Prompt, &turn_context), - "mcp_tool_call__default" - ); - assert_eq!( - mcp_tool_approval_callsite_mode(AppToolApproval::Approve, &turn_context), - "mcp_tool_call__always_allow" - ); -} - #[test] fn declined_elicitation_response_stays_decline() { let response = parse_mcp_tool_approval_elicitation_response( @@ -2759,247 +2694,13 @@ async fn prompt_mode_waits_for_approval_when_annotations_do_not_require_approval } #[tokio::test] -async fn approve_mode_skips_arc_interrupt_for_model() { - use wiremock::Mock; - use wiremock::MockServer; - use wiremock::ResponseTemplate; - use wiremock::matchers::method; - use wiremock::matchers::path; - - let server = MockServer::start().await; - Mock::given(method("POST")) - .and(path("/codex/safety/arc")) - .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ - "outcome": "steer-model", - "short_reason": "needs approval", - "rationale": "high-risk action", - "risk_score": 96, - "risk_level": "critical", - "evidence": [{ - "message": "dangerous_tool", - "why": "high-risk action", - }], - }))) - .expect(0) - .mount(&server) - .await; - - let (session, mut turn_context) = make_session_and_context().await; - turn_context.auth_manager = Some(crate::test_support::auth_manager_from_auth( - codex_login::CodexAuth::create_dummy_chatgpt_auth_for_testing(), - )); - let mut config = (*turn_context.config).clone(); - config.chatgpt_base_url = server.uri(); - turn_context.config = Arc::new(config); - - let session = Arc::new(session); - let turn_context = Arc::new(turn_context); - let invocation = McpInvocation { - server: CODEX_APPS_MCP_SERVER_NAME.to_string(), - tool: "dangerous_tool".to_string(), - arguments: Some(serde_json::json!({ "id": 1 })), - }; - let metadata = McpToolApprovalMetadata { - annotations: Some(annotations(Some(false), Some(true), Some(true))), - connector_id: Some("calendar".to_string()), - connector_name: Some("Calendar".to_string()), - connector_description: Some("Manage events".to_string()), - plugin_id: None, - tool_title: Some("Dangerous Tool".to_string()), - tool_description: Some("Performs a risky action.".to_string()), - mcp_app_resource_uri: None, - codex_apps_meta: None, - openai_file_input_params: None, - }; - - let decision = maybe_request_mcp_tool_approval( - &session, - &turn_context, - "call-2", - &invocation, - "mcp__test__tool", - Some(&metadata), - AppToolApproval::Approve, - ) - .await; - - assert_eq!(decision, None); -} - -#[tokio::test] -async fn custom_approve_mode_skips_arc_interrupt_for_model() { - use wiremock::Mock; - use wiremock::MockServer; - use wiremock::ResponseTemplate; - use wiremock::matchers::method; - use wiremock::matchers::path; - - let server = MockServer::start().await; - Mock::given(method("POST")) - .and(path("/codex/safety/arc")) - .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ - "outcome": "steer-model", - "short_reason": "needs approval", - "rationale": "high-risk action", - "risk_score": 96, - "risk_level": "critical", - "evidence": [{ - "message": "dangerous_tool", - "why": "high-risk action", - }], - }))) - .expect(0) - .mount(&server) - .await; - - let (session, mut turn_context) = make_session_and_context().await; - turn_context.auth_manager = Some(crate::test_support::auth_manager_from_auth( - codex_login::CodexAuth::create_dummy_chatgpt_auth_for_testing(), - )); - let mut config = (*turn_context.config).clone(); - config.chatgpt_base_url = server.uri(); - turn_context.config = Arc::new(config); - - let session = Arc::new(session); - let turn_context = Arc::new(turn_context); - let invocation = McpInvocation { - server: "docs".to_string(), - tool: "dangerous_tool".to_string(), - arguments: Some(serde_json::json!({ "id": 1 })), - }; - let metadata = McpToolApprovalMetadata { - annotations: Some(annotations(Some(false), Some(true), Some(true))), - connector_id: None, - connector_name: None, - connector_description: None, - plugin_id: None, - tool_title: Some("Dangerous Tool".to_string()), - tool_description: Some("Performs a risky action.".to_string()), - mcp_app_resource_uri: None, - codex_apps_meta: None, - openai_file_input_params: None, - }; - - let decision = maybe_request_mcp_tool_approval( - &session, - &turn_context, - "call-2-custom", - &invocation, - "mcp__test__tool", - Some(&metadata), - AppToolApproval::Approve, - ) - .await; - - assert_eq!(decision, None); -} - -#[tokio::test] -async fn approve_mode_skips_arc_interrupt_without_annotations() { - use wiremock::Mock; - use wiremock::MockServer; - use wiremock::ResponseTemplate; - use wiremock::matchers::method; - use wiremock::matchers::path; - - let server = MockServer::start().await; - Mock::given(method("POST")) - .and(path("/codex/safety/arc")) - .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ - "outcome": "steer-model", - "short_reason": "needs approval", - "rationale": "high-risk action", - "risk_score": 96, - "risk_level": "critical", - "evidence": [{ - "message": "dangerous_tool", - "why": "high-risk action", - }], - }))) - .expect(0) - .mount(&server) - .await; - - let (session, mut turn_context) = make_session_and_context().await; - turn_context.auth_manager = Some(crate::test_support::auth_manager_from_auth( - codex_login::CodexAuth::create_dummy_chatgpt_auth_for_testing(), - )); - let mut config = (*turn_context.config).clone(); - config.chatgpt_base_url = server.uri(); - turn_context.config = Arc::new(config); - - let session = Arc::new(session); - let turn_context = Arc::new(turn_context); - let invocation = McpInvocation { - server: CODEX_APPS_MCP_SERVER_NAME.to_string(), - tool: "dangerous_tool".to_string(), - arguments: Some(serde_json::json!({ "id": 1 })), - }; - let metadata = McpToolApprovalMetadata { - annotations: None, - connector_id: Some("calendar".to_string()), - connector_name: Some("Calendar".to_string()), - connector_description: Some("Manage events".to_string()), - plugin_id: None, - tool_title: Some("Dangerous Tool".to_string()), - tool_description: Some("Performs a risky action.".to_string()), - mcp_app_resource_uri: None, - codex_apps_meta: None, - openai_file_input_params: None, - }; - - let decision = maybe_request_mcp_tool_approval( - &session, - &turn_context, - "call-3", - &invocation, - "mcp__test__tool", - Some(&metadata), - AppToolApproval::Approve, - ) - .await; - - assert_eq!(decision, None); -} - -#[tokio::test] -async fn full_access_mode_skips_arc_monitor_for_all_approval_modes() { - use wiremock::Mock; - use wiremock::MockServer; - use wiremock::ResponseTemplate; - use wiremock::matchers::method; - use wiremock::matchers::path; - - let server = MockServer::start().await; - Mock::given(method("POST")) - .and(path("/codex/safety/arc")) - .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ - "outcome": "steer-model", - "short_reason": "needs approval", - "rationale": "high-risk action", - "risk_score": 96, - "risk_level": "critical", - "evidence": [{ - "message": "dangerous_tool", - "why": "high-risk action", - }], - }))) - .expect(0) - .mount(&server) - .await; - +async fn full_access_mode_skips_mcp_tool_approval_for_all_approval_modes() { let (session, mut turn_context) = make_session_and_context().await; - turn_context.auth_manager = Some(crate::test_support::auth_manager_from_auth( - codex_login::CodexAuth::create_dummy_chatgpt_auth_for_testing(), - )); turn_context .approval_policy .set(AskForApproval::Never) .expect("test setup should allow updating approval policy"); turn_context.permission_profile = PermissionProfile::Disabled; - let mut config = (*turn_context.config).clone(); - config.chatgpt_base_url = server.uri(); - turn_context.config = Arc::new(config); let session = Arc::new(session); let turn_context = Arc::new(turn_context); @@ -3042,7 +2743,7 @@ async fn full_access_mode_skips_arc_monitor_for_all_approval_modes() { } #[tokio::test] -async fn approve_mode_skips_arc_and_guardian_in_every_permission_mode() { +async fn approve_mode_skips_guardian_in_every_permission_mode() { use wiremock::Mock; use wiremock::ResponseTemplate; use wiremock::matchers::method; @@ -3055,22 +2756,6 @@ async fn approve_mode_skips_arc_and_guardian_in_every_permission_mode() { .expect(0) .mount(&server) .await; - Mock::given(method("POST")) - .and(path("/codex/safety/arc")) - .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ - "outcome": "ask-user", - "short_reason": "needs confirmation", - "rationale": "ARC wants a second review", - "risk_score": 65, - "risk_level": "medium", - "evidence": [{ - "message": "dangerous_tool", - "why": "requires review", - }], - }))) - .expect(0) - .mount(&server) - .await; let invocation = McpInvocation { server: CODEX_APPS_MCP_SERVER_NAME.to_string(),