diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 164e8a085774..e3f2ac1a1f67 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -2957,6 +2957,7 @@ version = "0.0.0" dependencies = [ "anyhow", "chrono", + "codex-backend-client", "codex-config", "codex-core", "codex-features", diff --git a/codex-rs/config/src/types.rs b/codex-rs/config/src/types.rs index f46a7fc009a6..b1adc1fd7338 100644 --- a/codex-rs/config/src/types.rs +++ b/codex-rs/config/src/types.rs @@ -32,6 +32,7 @@ pub const DEFAULT_OTEL_ENVIRONMENT: &str = "dev"; pub const DEFAULT_MEMORIES_MAX_ROLLOUTS_PER_STARTUP: usize = 2; pub const DEFAULT_MEMORIES_MAX_ROLLOUT_AGE_DAYS: i64 = 10; pub const DEFAULT_MEMORIES_MIN_ROLLOUT_IDLE_HOURS: i64 = 6; +pub const DEFAULT_MEMORIES_MIN_RATE_LIMIT_REMAINING_PERCENT: i64 = 25; pub const DEFAULT_MEMORIES_MAX_RAW_MEMORIES_FOR_CONSOLIDATION: usize = 256; pub const DEFAULT_MEMORIES_MAX_UNUSED_DAYS: i64 = 30; const MIN_MEMORIES_MAX_RAW_MEMORIES_FOR_CONSOLIDATION: usize = 1; @@ -204,6 +205,9 @@ pub struct MemoriesToml { pub max_rollouts_per_startup: Option, /// Minimum idle time between last thread activity and memory creation (hours). > 12h recommended. pub min_rollout_idle_hours: Option, + /// Minimum remaining percentage required in Codex rate-limit windows before memory startup runs. + #[schemars(range(min = 0, max = 100))] + pub min_rate_limit_remaining_percent: Option, /// Model used for thread summarisation. pub extract_model: Option, /// Model used for memory consolidation. @@ -221,6 +225,7 @@ pub struct MemoriesConfig { pub max_rollout_age_days: i64, pub max_rollouts_per_startup: usize, pub min_rollout_idle_hours: i64, + pub min_rate_limit_remaining_percent: i64, pub extract_model: Option, pub consolidation_model: Option, } @@ -236,6 +241,7 @@ impl Default for MemoriesConfig { max_rollout_age_days: DEFAULT_MEMORIES_MAX_ROLLOUT_AGE_DAYS, max_rollouts_per_startup: DEFAULT_MEMORIES_MAX_ROLLOUTS_PER_STARTUP, min_rollout_idle_hours: DEFAULT_MEMORIES_MIN_ROLLOUT_IDLE_HOURS, + min_rate_limit_remaining_percent: DEFAULT_MEMORIES_MIN_RATE_LIMIT_REMAINING_PERCENT, extract_model: None, consolidation_model: None, } @@ -277,6 +283,10 @@ impl From for MemoriesConfig { .min_rollout_idle_hours .unwrap_or(defaults.min_rollout_idle_hours) .clamp(1, 48), + min_rate_limit_remaining_percent: toml + .min_rate_limit_remaining_percent + .unwrap_or(defaults.min_rate_limit_remaining_percent) + .clamp(0, 100), extract_model: toml.extract_model, consolidation_model: toml.consolidation_model, } diff --git a/codex-rs/config/src/types_tests.rs b/codex-rs/config/src/types_tests.rs index b18c1cc645fc..2c3f69d9867e 100644 --- a/codex-rs/config/src/types_tests.rs +++ b/codex-rs/config/src/types_tests.rs @@ -59,3 +59,30 @@ fn memories_config_clamps_count_limits_to_nonzero_values() { } ); } + +#[test] +fn memories_config_clamps_rate_limit_remaining_threshold() { + let config = MemoriesConfig::from(MemoriesToml { + min_rate_limit_remaining_percent: Some(101), + ..Default::default() + }); + assert_eq!( + config, + MemoriesConfig { + min_rate_limit_remaining_percent: 100, + ..MemoriesConfig::default() + } + ); + + let config = MemoriesConfig::from(MemoriesToml { + min_rate_limit_remaining_percent: Some(-1), + ..Default::default() + }); + assert_eq!( + config, + MemoriesConfig { + min_rate_limit_remaining_percent: 0, + ..MemoriesConfig::default() + } + ); +} diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index c4a72c90b0a6..9cb23a4434ff 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -1114,6 +1114,13 @@ "format": "int64", "type": "integer" }, + "min_rate_limit_remaining_percent": { + "description": "Minimum remaining percentage required in Codex rate-limit windows before memory startup runs.", + "format": "int64", + "maximum": 100.0, + "minimum": 0.0, + "type": "integer" + }, "min_rollout_idle_hours": { "description": "Minimum idle time between last thread activity and memory creation (hours). > 12h recommended.", "format": "int64", diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs index a2b5312103e9..0e51ac37e96b 100644 --- a/codex-rs/core/src/config/config_tests.rs +++ b/codex-rs/core/src/config/config_tests.rs @@ -266,6 +266,7 @@ max_unused_days = 21 max_rollout_age_days = 42 max_rollouts_per_startup = 9 min_rollout_idle_hours = 24 +min_rate_limit_remaining_percent = 12 extract_model = "gpt-5-mini" consolidation_model = "gpt-5.2" "#; @@ -281,6 +282,7 @@ consolidation_model = "gpt-5.2" max_rollout_age_days: Some(42), max_rollouts_per_startup: Some(9), min_rollout_idle_hours: Some(24), + min_rate_limit_remaining_percent: Some(12), extract_model: Some("gpt-5-mini".to_string()), consolidation_model: Some("gpt-5.2".to_string()), }), @@ -305,6 +307,7 @@ consolidation_model = "gpt-5.2" max_rollout_age_days: 42, max_rollouts_per_startup: 9, min_rollout_idle_hours: 24, + min_rate_limit_remaining_percent: 12, extract_model: Some("gpt-5-mini".to_string()), consolidation_model: Some("gpt-5.2".to_string()), } diff --git a/codex-rs/memories/write/Cargo.toml b/codex-rs/memories/write/Cargo.toml index 86ed83129584..53d870596437 100644 --- a/codex-rs/memories/write/Cargo.toml +++ b/codex-rs/memories/write/Cargo.toml @@ -14,6 +14,7 @@ workspace = true [dependencies] anyhow = { workspace = true } chrono = { workspace = true } +codex-backend-client = { workspace = true } codex-core = { workspace = true } codex-config = { workspace = true } codex-features = { workspace = true } diff --git a/codex-rs/memories/write/src/guard.rs b/codex-rs/memories/write/src/guard.rs new file mode 100644 index 000000000000..7deb74517059 --- /dev/null +++ b/codex-rs/memories/write/src/guard.rs @@ -0,0 +1,70 @@ +use codex_backend_client::Client as BackendClient; +use codex_core::config::Config; +use codex_login::AuthManager; +use codex_protocol::protocol::RateLimitSnapshot; +use codex_protocol::protocol::RateLimitWindow; +use tracing::info; +use tracing::warn; + +const CODEX_LIMIT_ID: &str = "codex"; + +pub(crate) async fn rate_limits_ok(auth_manager: &AuthManager, config: &Config) -> bool { + rate_limits_check(auth_manager, config) + .await + .unwrap_or(true) +} + +async fn rate_limits_check(auth_manager: &AuthManager, config: &Config) -> Option { + let auth = auth_manager.auth().await?; + if !auth.uses_codex_backend() { + return None; + } + + let client = BackendClient::from_auth(config.chatgpt_base_url.clone(), &auth) + .map_err(|err| warn!(%err, "failed to construct backend client")) + .ok()?; + + let snapshots = client + .get_rate_limits_many() + .await + .map_err(|err| warn!(%err, "failed to fetch rate limits")) + .ok()?; + + let snapshot = snapshots + .iter() + .find(|s| s.limit_id.as_deref() == Some(CODEX_LIMIT_ID)) + .or_else(|| snapshots.first())?; + + let min_remaining_percent = config.memories.min_rate_limit_remaining_percent; + let allowed = snapshot_allows_startup(snapshot, min_remaining_percent); + + if !allowed { + info!( + min_remaining_percent, + "skipping memories startup because Codex rate limits are below the configured threshold" + ); + } + + Some(allowed) +} + +fn snapshot_allows_startup(snapshot: &RateLimitSnapshot, min_remaining_percent: i64) -> bool { + if snapshot.rate_limit_reached_type.is_some() { + return false; + } + + let max_used_percent = 100.0 - min_remaining_percent.clamp(0, 100) as f64; + window_allows_startup(snapshot.primary.as_ref(), max_used_percent) + && window_allows_startup(snapshot.secondary.as_ref(), max_used_percent) +} + +fn window_allows_startup(window: Option<&RateLimitWindow>, max_used_percent: f64) -> bool { + match window { + Some(window) => window.used_percent <= max_used_percent, + None => true, + } +} + +#[cfg(test)] +#[path = "guard_tests.rs"] +mod tests; diff --git a/codex-rs/memories/write/src/guard_tests.rs b/codex-rs/memories/write/src/guard_tests.rs new file mode 100644 index 000000000000..6c22b0681c02 --- /dev/null +++ b/codex-rs/memories/write/src/guard_tests.rs @@ -0,0 +1,78 @@ +use super::*; +use codex_protocol::protocol::RateLimitReachedType; + +fn snapshot( + primary_used_percent: Option, + secondary_used_percent: Option, +) -> RateLimitSnapshot { + RateLimitSnapshot { + limit_id: Some(CODEX_LIMIT_ID.to_string()), + limit_name: None, + primary: primary_used_percent.map(window), + secondary: secondary_used_percent.map(window), + credits: None, + plan_type: None, + rate_limit_reached_type: None, + } +} + +fn window(used_percent: f64) -> RateLimitWindow { + RateLimitWindow { + used_percent, + window_minutes: None, + resets_at: None, + } +} + +#[test] +fn startup_check_uses_configured_remaining_threshold() { + let snapshot = snapshot( + /*primary_used_percent*/ Some(89.9), + /*secondary_used_percent*/ Some(50.0), + ); + + assert!(snapshot_allows_startup( + &snapshot, /*min_remaining_percent*/ 10 + )); + assert!(!snapshot_allows_startup( + &snapshot, /*min_remaining_percent*/ 11 + )); +} + +#[test] +fn startup_check_skips_when_primary_or_secondary_is_too_low() { + assert!(!snapshot_allows_startup( + &snapshot( + /*primary_used_percent*/ Some(75.1), + /*secondary_used_percent*/ Some(10.0), + ), + /*min_remaining_percent*/ 25, + )); + assert!(!snapshot_allows_startup( + &snapshot( + /*primary_used_percent*/ Some(10.0), + /*secondary_used_percent*/ Some(75.1), + ), + /*min_remaining_percent*/ 25, + )); + assert!(snapshot_allows_startup( + &snapshot( + /*primary_used_percent*/ Some(74.9), + /*secondary_used_percent*/ Some(74.9), + ), + /*min_remaining_percent*/ 25, + )); +} + +#[test] +fn startup_check_skips_when_limit_is_reached() { + let mut snapshot = snapshot( + /*primary_used_percent*/ Some(10.0), + /*secondary_used_percent*/ Some(10.0), + ); + snapshot.rate_limit_reached_type = Some(RateLimitReachedType::RateLimitReached); + + assert!(!snapshot_allows_startup( + &snapshot, /*min_remaining_percent*/ 25, + )); +} diff --git a/codex-rs/memories/write/src/lib.rs b/codex-rs/memories/write/src/lib.rs index be796630371e..c92d5b652490 100644 --- a/codex-rs/memories/write/src/lib.rs +++ b/codex-rs/memories/write/src/lib.rs @@ -6,6 +6,7 @@ mod control; mod extensions; +mod guard; mod phase1; mod phase2; mod prompts; diff --git a/codex-rs/memories/write/src/start.rs b/codex-rs/memories/write/src/start.rs index 362ce9fa4328..0ad8b2730179 100644 --- a/codex-rs/memories/write/src/start.rs +++ b/codex-rs/memories/write/src/start.rs @@ -1,3 +1,4 @@ +use crate::guard; use crate::phase1; use crate::phase2; use crate::runtime::MemoryStartupContext; @@ -11,6 +12,8 @@ use codex_protocol::protocol::SessionSource; use std::sync::Arc; use tracing::warn; +const MEMORY_STARTUP: &str = "codex.memory.startup"; + /// Starts the asynchronous startup memory pipeline for an eligible root session. /// /// The pipeline is skipped for ephemeral sessions, disabled feature flags, and @@ -32,7 +35,7 @@ pub fn start_memories_startup_task( let context = Arc::new(MemoryStartupContext::new( thread_manager, - auth_manager, + Arc::clone(&auth_manager), thread_id, thread, config.as_ref(), @@ -45,8 +48,19 @@ pub fn start_memories_startup_task( } tokio::spawn(async move { - // Clean memories to make preserve DB size + // Clean memories to make preserve DB size. This does not consume tokens so can be + // done before the quota check. phase1::prune(context.as_ref(), &config).await; + + if !guard::rate_limits_ok(&auth_manager, &config).await { + context.counter( + MEMORY_STARTUP, + /*inc*/ 1, + &[("status", "skipped_rate_limit")], + ); + return; + } + // Run phase 1. phase1::run(Arc::clone(&context), Arc::clone(&config)).await; // Run phase 2.