Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 37 additions & 134 deletions codex-rs/core/tests/suite/safety_check_downgrade.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use anyhow::Result;
use codex_protocol::models::ContentItem;
use codex_protocol::models::PermissionProfile;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::CodexErrorInfo;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ModelRerouteReason;
use codex_protocol::protocol::ModelVerification;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_function_call;
Expand All @@ -20,7 +20,9 @@ use core_test_support::responses::sse_completed;
use core_test_support::responses::sse_response;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::test_codex::turn_permission_fields;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
use wiremock::ResponseTemplate;
Expand All @@ -32,6 +34,30 @@ const TRUSTED_ACCESS_FOR_CYBER_VERIFICATION: &str = "trusted_access_for_cyber";
const CYBER_POLICY_MESSAGE: &str =
"This request has been flagged for potentially high-risk cyber activity.";

fn disabled_text_turn(test: &TestCodex, text: &str) -> Op {
let (sandbox_policy, permission_profile) =
turn_permission_fields(PermissionProfile::Disabled, test.cwd_path());
Op::UserTurn {
environments: None,
items: vec![UserInput::Text {
text: text.to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy,
permission_profile,
model: REQUESTED_MODEL.to_string(),
effort: test.config.model_reasoning_effort,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
}
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn openai_model_header_mismatch_emits_warning_event_and_warning_item() -> Result<()> {
skip_if_no_network!(Ok(()));
Expand All @@ -45,25 +71,7 @@ async fn openai_model_header_mismatch_emits_warning_event_and_warning_item() ->
let test = builder.build(&server).await?;

test.codex
.submit(Op::UserTurn {
environments: None,
items: vec![UserInput::Text {
text: "trigger safety check".to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: SandboxPolicy::DangerFullAccess,
permission_profile: None,
model: REQUESTED_MODEL.to_string(),
effort: test.config.model_reasoning_effort,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.submit(disabled_text_turn(&test, "trigger safety check"))
.await?;

let reroute = wait_for_event(&test.codex, |event| {
Expand Down Expand Up @@ -141,25 +149,7 @@ async fn cyber_policy_response_emits_typed_error_without_retry() -> Result<()> {
let test = builder.build(&server).await?;

test.codex
.submit(Op::UserTurn {
environments: None,
items: vec![UserInput::Text {
text: "trigger cyber policy error".to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: SandboxPolicy::DangerFullAccess,
permission_profile: None,
model: REQUESTED_MODEL.to_string(),
effort: test.config.model_reasoning_effort,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.submit(disabled_text_turn(&test, "trigger cyber policy error"))
.await?;

let error = wait_for_event(&test.codex, |event| matches!(event, EventMsg::Error(_))).await;
Expand Down Expand Up @@ -198,25 +188,7 @@ async fn response_model_field_mismatch_emits_warning_when_header_matches_request
let test = builder.build(&server).await?;

test.codex
.submit(Op::UserTurn {
environments: None,
items: vec![UserInput::Text {
text: "trigger response model check".to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: SandboxPolicy::DangerFullAccess,
permission_profile: None,
model: REQUESTED_MODEL.to_string(),
effort: test.config.model_reasoning_effort,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.submit(disabled_text_turn(&test, "trigger response model check"))
.await?;

let reroute = wait_for_event(&test.codex, |event| {
Expand Down Expand Up @@ -286,25 +258,7 @@ async fn openai_model_header_mismatch_only_emits_one_warning_per_turn() -> Resul
let test = builder.build(&server).await?;

test.codex
.submit(Op::UserTurn {
environments: None,
items: vec![UserInput::Text {
text: "trigger follow-up turn".to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: SandboxPolicy::DangerFullAccess,
permission_profile: None,
model: REQUESTED_MODEL.to_string(),
effort: test.config.model_reasoning_effort,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.submit(disabled_text_turn(&test, "trigger follow-up turn"))
.await?;

let mut warning_count = 0;
Expand Down Expand Up @@ -338,25 +292,7 @@ async fn openai_model_header_casing_only_mismatch_does_not_warn() -> Result<()>
let test = builder.build(&server).await?;

test.codex
.submit(Op::UserTurn {
environments: None,
items: vec![UserInput::Text {
text: "trigger casing check".to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: SandboxPolicy::DangerFullAccess,
permission_profile: None,
model: REQUESTED_MODEL.to_string(),
effort: test.config.model_reasoning_effort,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.submit(disabled_text_turn(&test, "trigger casing check"))
.await?;

let mut reroute_count = 0;
Expand Down Expand Up @@ -399,25 +335,7 @@ async fn model_verification_emits_structured_event_without_reroute_or_warning()
let test = builder.build(&server).await?;

test.codex
.submit(Op::UserTurn {
environments: None,
items: vec![UserInput::Text {
text: "trigger model verification".to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: SandboxPolicy::DangerFullAccess,
permission_profile: None,
model: REQUESTED_MODEL.to_string(),
effort: test.config.model_reasoning_effort,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.submit(disabled_text_turn(&test, "trigger model verification"))
.await?;

let mut verification_count = 0;
Expand Down Expand Up @@ -493,25 +411,10 @@ async fn model_verification_only_emits_once_per_turn() -> Result<()> {
let test = builder.build(&server).await?;

test.codex
.submit(Op::UserTurn {
environments: None,
items: vec![UserInput::Text {
text: "trigger follow-up model verification".to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: SandboxPolicy::DangerFullAccess,
permission_profile: None,
model: REQUESTED_MODEL.to_string(),
effort: test.config.model_reasoning_effort,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.submit(disabled_text_turn(
&test,
"trigger follow-up model verification",
))
.await?;

let mut verification_count = 0;
Expand Down
Loading