From 47b5a8f4ef8fbb82d45d8b0c19a477d6d76a4253 Mon Sep 17 00:00:00 2001 From: Dylan Hurd Date: Fri, 17 Apr 2026 18:03:25 -0700 Subject: [PATCH] chore(guardian) Disable skills in auto-review --- codex-rs/core/src/guardian/review_session.rs | 17 +++ codex-rs/core/tests/suite/guardian.rs | 147 +++++++++++++++++++ codex-rs/core/tests/suite/mod.rs | 1 + 3 files changed, 165 insertions(+) create mode 100644 codex-rs/core/tests/suite/guardian.rs diff --git a/codex-rs/core/src/guardian/review_session.rs b/codex-rs/core/src/guardian/review_session.rs index 3721dc8dfe53..55246428dfcc 100644 --- a/codex-rs/core/src/guardian/review_session.rs +++ b/codex-rs/core/src/guardian/review_session.rs @@ -727,6 +727,7 @@ pub(crate) fn build_guardian_review_session_config( .map(guardian_policy_prompt_with_config) .unwrap_or_else(guardian_policy_prompt), ); + guardian_config.inject_skills_message = false; guardian_config.permissions.approval_policy = Constrained::allow_only(AskForApproval::Never); guardian_config.permissions.sandbox_policy = Constrained::allow_only(SandboxPolicy::new_read_only_policy()); @@ -874,6 +875,22 @@ mod tests { assert!(!guardian_config.features.enabled(Feature::CodexHooks)); } + #[tokio::test] + async fn guardian_review_session_config_disables_skills_message() { + let mut parent_config = crate::config::test_config().await; + parent_config.inject_skills_message = true; + + let guardian_config = build_guardian_review_session_config( + &parent_config, + /*live_network_config*/ None, + "active-model", + /*reasoning_effort*/ None, + ) + .expect("guardian config"); + + assert!(!guardian_config.inject_skills_message); + } + #[tokio::test(flavor = "current_thread")] async fn run_before_review_deadline_times_out_before_future_completes() { let outcome = run_before_review_deadline( diff --git a/codex-rs/core/tests/suite/guardian.rs b/codex-rs/core/tests/suite/guardian.rs new file mode 100644 index 000000000000..775b991fa540 --- /dev/null +++ b/codex-rs/core/tests/suite/guardian.rs @@ -0,0 +1,147 @@ +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use anyhow::Result; +use codex_core::config::Constrained; +use codex_protocol::config_types::ApprovalsReviewer; +use codex_protocol::protocol::AskForApproval; +use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::Op; +use codex_protocol::protocol::SandboxPolicy; +use codex_protocol::user_input::UserInput; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_function_call; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::mount_sse_sequence; +use core_test_support::responses::sse; +use core_test_support::responses::start_mock_server; +use core_test_support::skip_if_no_network; +use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; +use serde_json::json; +use std::sync::Arc; +use tempfile::TempDir; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn guardian_mode_does_not_inject_skills_message() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let codex_home = Arc::new(TempDir::new()?); + let skill_dir = codex_home.path().join("skills/demo"); + std::fs::create_dir_all(&skill_dir)?; + std::fs::write( + skill_dir.join("SKILL.md"), + "---\nname: demo\ndescription: build charts\n---\n\n# body\n", + )?; + + let builder = test_codex().with_home(codex_home); + let test = builder + .with_config(|config| { + config.permissions.approval_policy = Constrained::allow_any(AskForApproval::OnRequest); + config.permissions.sandbox_policy = + Constrained::allow_any(SandboxPolicy::new_read_only_policy()); + config.approvals_reviewer = ApprovalsReviewer::GuardianSubagent; + config.inject_skills_message = true; + }) + .build(&server) + .await?; + + let call_id = "guardian-shell-call"; + let command = "echo guardian"; + let response_log = mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_response_created("resp-parent-1"), + ev_function_call( + call_id, + "shell_command", + &serde_json::to_string(&json!({ + "command": command, + "timeout_ms": 1_000_u64, + }))?, + ), + ev_completed("resp-parent-1"), + ]), + sse(vec![ + ev_response_created("resp-guardian"), + ev_assistant_message( + "msg-guardian", + &json!({ + "risk_level": "low", + "user_authorization": "high", + "outcome": "allow", + "rationale": "The planned command is a benign echo requested by the test.", + }) + .to_string(), + ), + ev_completed("resp-guardian"), + ]), + sse(vec![ + ev_assistant_message("msg-parent-2", "done"), + ev_completed("resp-parent-2"), + ]), + ], + ) + .await; + + test.codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "run a benign command".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: test.cwd.path().to_path_buf(), + approval_policy: AskForApproval::OnRequest, + approvals_reviewer: Some(ApprovalsReviewer::GuardianSubagent), + sandbox_policy: SandboxPolicy::new_read_only_policy(), + model: test.session_configured.model.clone(), + effort: None, + summary: None, + service_tier: None, + collaboration_mode: None, + personality: None, + }) + .await?; + + wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + let requests = response_log.requests(); + let guardian_request = requests + .iter() + .find(|request| { + request + .message_input_texts("developer") + .iter() + .any(|text| text.contains("You are judging one planned coding-agent action.")) + }) + .expect("guardian review request should be captured"); + + assert!( + requests + .iter() + .filter(|request| request.body_json()["model"] == test.session_configured.model) + .flat_map(|request| request.message_input_texts("developer")) + .any(|text| text.contains("demo: build charts")), + "parent request should include the test skill" + ); + + let guardian_developer_text = guardian_request + .message_input_texts("developer") + .join("\n\n"); + assert!( + !guardian_developer_text.contains("## Skills"), + "guardian request should not include skills section: {guardian_developer_text}" + ); + assert!( + !guardian_developer_text.contains("demo: build charts"), + "guardian request should not include skill summary: {guardian_developer_text}" + ); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index 9f8389c45e2d..ef134a12bd27 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -44,6 +44,7 @@ mod deprecation_notice; mod exec; mod exec_policy; mod fork_thread; +mod guardian; mod hierarchical_agents; #[cfg(not(target_os = "windows"))] mod hooks;