diff --git a/codex-rs/app-server-protocol/src/protocol/thread_history.rs b/codex-rs/app-server-protocol/src/protocol/thread_history.rs index bc486df23707..16c36610c75f 100644 --- a/codex-rs/app-server-protocol/src/protocol/thread_history.rs +++ b/codex-rs/app-server-protocol/src/protocol/thread_history.rs @@ -232,7 +232,10 @@ impl ThreadHistoryBuilder { RolloutItem::EventMsg(event) => self.handle_event(event), RolloutItem::Compacted(payload) => self.handle_compacted(payload), RolloutItem::ResponseItem(item) => self.handle_response_item(item), - RolloutItem::TurnContext(_) | RolloutItem::SessionMeta(_) => {} + RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::TurnContext(_) + | RolloutItem::SessionMeta(_) => {} } } diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index edc142c840e1..18729633c922 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -4039,7 +4039,12 @@ impl CodexMessageProcessor { let (mut thread, history) = thread_from_stored_thread(stored_thread, fallback_provider, &self.config.cwd); if include_turns && let Some(history) = history { - thread.turns = build_turns_from_rollout_items(&history.items); + let items = codex_core::materialize_rollout_items_for_replay( + &self.config.codex_home, + &history.items, + ) + .await; + thread.turns = build_turns_from_rollout_items(&items); } Ok(Some(thread)) } @@ -4104,7 +4109,12 @@ impl CodexMessageProcessor { .load_history(/*include_archived*/ true) .await .map_err(|err| thread_read_history_load_error(thread_id, err))?; - thread.turns = build_turns_from_rollout_items(&history.items); + let items = codex_core::materialize_rollout_items_for_replay( + &self.config.codex_home, + &history.items, + ) + .await; + thread.turns = build_turns_from_rollout_items(&items); } Ok(()) @@ -4137,6 +4147,8 @@ impl CodexMessageProcessor { .load_thread_turns_list_history(thread_uuid) .await .map_err(thread_read_view_error)?; + let items = + codex_core::materialize_rollout_items_for_replay(&self.config.codex_home, &items).await; // This API optimizes network transfer by letting clients page through a // thread's turns incrementally, but it still replays the entire rollout on // every request. Rollback and compaction events can change earlier turns, so @@ -4815,12 +4827,20 @@ impl CodexMessageProcessor { ) -> std::result::Result { let (mut thread, history) = thread_from_stored_thread(stored_thread, fallback_provider, &self.config.cwd); - if include_turns && let Some(history) = history { - populate_thread_turns_from_history( - &mut thread, + if let Some(history) = history { + let history_items = codex_core::materialize_rollout_items_for_replay( + self.config.codex_home.as_path(), &history.items, - /*active_turn*/ None, - )?; + ) + .await; + thread.preview = preview_from_rollout_items(&history_items); + if include_turns { + populate_thread_turns_from_history( + &mut thread, + &history_items, + /*active_turn*/ None, + )?; + } } Ok(thread) } @@ -4925,7 +4945,11 @@ impl CodexMessageProcessor { thread.id = thread_id.to_string(); thread.path = Some(rollout_path.to_path_buf()); if include_turns { - let history_items = thread_history.get_rollout_items(); + let history_items = codex_core::materialize_rollout_items_for_replay( + self.config.codex_home.as_path(), + &thread_history.get_rollout_items(), + ) + .await; populate_thread_turns_from_history( &mut thread, &history_items, @@ -5079,7 +5103,10 @@ impl CodexMessageProcessor { let mut thread = if let Some(fork_rollout_path) = session_configured.rollout_path.as_ref() { let stored_thread = self - .read_stored_thread_for_new_fork(thread_id, include_turns) + // The forked rollout may contain a compact ForkReference. Load history + // even when excludeTurns is set so preview generation can materialize the + // referenced source rollout without returning turns. + .read_stored_thread_for_new_fork(thread_id, /*include_history*/ true) .await?; self.stored_thread_to_api_thread( stored_thread, @@ -8386,7 +8413,7 @@ async fn handle_thread_listener_command( async fn handle_pending_thread_resume_request( conversation_id: ThreadId, conversation: &Arc, - _codex_home: &Path, + codex_home: &Path, thread_state_manager: &ThreadStateManager, thread_state: &Arc>, thread_watch_manager: &ThreadWatchManager, @@ -8415,12 +8442,14 @@ async fn handle_pending_thread_resume_request( let request_id = pending.request_id; let connection_id = request_id.connection_id; let mut thread = pending.thread_summary; + let history_items = if pending.include_turns { + codex_core::materialize_rollout_items_for_replay(codex_home, &pending.history_items).await + } else { + Vec::new() + }; if pending.include_turns - && let Err(message) = populate_thread_turns_from_history( - &mut thread, - &pending.history_items, - active_turn.as_ref(), - ) + && let Err(message) = + populate_thread_turns_from_history(&mut thread, &history_items, active_turn.as_ref()) { outgoing .send_error(request_id, internal_error(message)) @@ -8508,7 +8537,7 @@ async fn handle_pending_thread_resume_request( // paying the cost of turn reconstruction for historical usage replay. if let Some(token_usage_thread) = token_usage_thread { let token_usage_turn_id = latest_token_usage_turn_id_from_rollout_items( - &pending.history_items, + &history_items, token_usage_thread.turns.as_slice(), ); // Rejoining a loaded thread has the same UI contract as a cold resume, but @@ -10840,6 +10869,7 @@ mod tests { let session_meta = SessionMeta { id: conversation_id, + segment_id: None, timestamp: timestamp.clone(), model_provider: None, ..SessionMeta::default() @@ -10896,6 +10926,7 @@ mod tests { let session_meta = SessionMeta { id: conversation_id, + segment_id: None, timestamp: timestamp.clone(), source: SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, @@ -10944,6 +10975,7 @@ mod tests { let session_meta = SessionMeta { id: conversation_id, + segment_id: None, forked_from_id: Some(forked_from_id), timestamp: timestamp.clone(), model_provider: Some("test-provider".to_string()), diff --git a/codex-rs/app-server/tests/common/rollout.rs b/codex-rs/app-server/tests/common/rollout.rs index 06b273754cd9..4e59e12d342e 100644 --- a/codex-rs/app-server/tests/common/rollout.rs +++ b/codex-rs/app-server/tests/common/rollout.rs @@ -132,6 +132,7 @@ pub fn create_fake_rollout_with_source( // Build JSONL lines let meta = SessionMeta { id: conversation_id, + segment_id: None, forked_from_id: None, timestamp: meta_rfc3339.to_string(), cwd: PathBuf::from("/"), @@ -215,6 +216,7 @@ pub fn create_fake_rollout_with_text_elements( // Build JSONL lines let meta = SessionMeta { id: conversation_id, + segment_id: None, forked_from_id: None, timestamp: meta_rfc3339.to_string(), cwd: PathBuf::from("/"), diff --git a/codex-rs/app-server/tests/suite/v2/thread_read.rs b/codex-rs/app-server/tests/suite/v2/thread_read.rs index 589c7c330a02..dc8703846764 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_read.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_read.rs @@ -1,5 +1,6 @@ use anyhow::Result; use app_test_support::McpProcess; +use app_test_support::create_fake_rollout; use app_test_support::create_fake_rollout_with_text_elements; use app_test_support::create_mock_responses_server_repeating_assistant; use app_test_support::rollout_path; @@ -47,6 +48,7 @@ use codex_feedback::CodexFeedback; use codex_protocol::models::BaseInstructions; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::RolloutItem; +use codex_protocol::protocol::RolloutReferenceItem; use codex_protocol::protocol::SessionSource as ProtocolSessionSource; use codex_protocol::protocol::ThreadMemoryMode; use codex_protocol::protocol::UserMessageEvent; @@ -421,6 +423,125 @@ async fn thread_read_loaded_include_turns_reads_store_history_without_rollout_pa Ok(()) } +#[tokio::test] +async fn thread_read_unloaded_include_turns_materializes_rollout_reference() -> Result<()> { + let codex_home = TempDir::new()?; + let referenced_thread_id = create_fake_rollout( + codex_home.path(), + "2025-01-05T12-00-00", + "2025-01-05T12:00:00Z", + "history before segment rotation", + Some("mock_provider"), + /*git_info*/ None, + )?; + let referenced_rollout_path = rollout_path( + codex_home.path(), + "2025-01-05T12-00-00", + &referenced_thread_id, + ); + + let thread_id = codex_protocol::ThreadId::from_string("00000000-0000-4000-8000-000000000125")?; + let store_id = Uuid::new_v4().to_string(); + create_config_toml_with_thread_store(codex_home.path(), &store_id)?; + let store = InMemoryThreadStore::for_id(store_id.clone()); + let _in_memory_store = InMemoryThreadStoreId { store_id }; + store + .create_thread(CreateThreadParams { + thread_id, + forked_from_id: None, + source: ProtocolSessionSource::Cli, + base_instructions: BaseInstructions::default(), + dynamic_tools: Vec::new(), + metadata: ThreadPersistenceMetadata { + cwd: None, + model_provider: "test-provider".to_string(), + memory_mode: ThreadMemoryMode::Disabled, + }, + event_persistence_mode: ThreadEventPersistenceMode::default(), + }) + .await?; + store + .append_items(AppendThreadItemsParams { + thread_id, + items: vec![ + RolloutItem::RolloutReference(RolloutReferenceItem { + rollout_path: referenced_rollout_path, + thread_id: Some(codex_protocol::ThreadId::from_string( + &referenced_thread_id, + )?), + rollout_timestamp: None, + segment_id: None, + max_depth: 2, + }), + RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent { + message: "history after segment rotation".to_string(), + images: None, + local_images: Vec::new(), + text_elements: Vec::new(), + })), + ], + }) + .await?; + + let loader_overrides = LoaderOverrides::without_managed_config_for_tests(); + let config = ConfigBuilder::default() + .codex_home(codex_home.path().to_path_buf()) + .fallback_cwd(Some(codex_home.path().to_path_buf())) + .loader_overrides(loader_overrides.clone()) + .build() + .await?; + let client = in_process::start(InProcessStartArgs { + arg0_paths: Arg0DispatchPaths::default(), + config: Arc::new(config), + cli_overrides: Vec::new(), + loader_overrides, + cloud_requirements: CloudRequirementsLoader::default(), + thread_config_loader: Arc::new(codex_config::NoopThreadConfigLoader), + feedback: CodexFeedback::new(), + log_db: None, + environment_manager: Arc::new(EnvironmentManager::default_for_tests()), + config_warnings: Vec::new(), + session_source: SessionSource::Cli.into(), + enable_codex_api_key_env: false, + initialize: InitializeParams { + client_info: ClientInfo { + name: "codex-app-server-tests".to_string(), + title: None, + version: "0.1.0".to_string(), + }, + capabilities: Some(InitializeCapabilities { + experimental_api: true, + ..Default::default() + }), + }, + channel_capacity: in_process::DEFAULT_IN_PROCESS_CHANNEL_CAPACITY, + }) + .await?; + + let result = client + .request(ClientRequest::ThreadRead { + request_id: RequestId::Integer(1), + params: ThreadReadParams { + thread_id: thread_id.to_string(), + include_turns: true, + }, + }) + .await? + .expect("thread/read should succeed"); + let ThreadReadResponse { thread, .. } = serde_json::from_value(result)?; + + assert_eq!( + turn_user_texts(&thread.turns), + vec![ + "history before segment rotation", + "history after segment rotation", + ] + ); + + client.shutdown().await?; + Ok(()) +} + #[tokio::test] async fn thread_list_includes_store_thread_without_rollout_path() -> Result<()> { let codex_home = TempDir::new()?; diff --git a/codex-rs/app-server/tests/suite/v2/thread_resume.rs b/codex-rs/app-server/tests/suite/v2/thread_resume.rs index 057e45783e14..4220491ee970 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_resume.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_resume.rs @@ -52,6 +52,7 @@ use codex_app_server_protocol::UserInput; use codex_config::types::AuthCredentialsStoreMode; use codex_login::REFRESH_TOKEN_URL_OVERRIDE_ENV_VAR; use codex_protocol::AgentPath; +use codex_protocol::SegmentId; use codex_protocol::ThreadId; use codex_protocol::config_types::Personality; use codex_protocol::models::ContentItem; @@ -61,10 +62,12 @@ use codex_protocol::protocol::AgentMessageEvent; use codex_protocol::protocol::AgentStatus; use codex_protocol::protocol::CollabAgentSpawnEndEvent; use codex_protocol::protocol::CollabCloseEndEvent; +use codex_protocol::protocol::ContextCompactedEvent; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::RolloutLine; +use codex_protocol::protocol::RolloutReferenceItem; use codex_protocol::protocol::SessionMeta; use codex_protocol::protocol::SessionMetaLine; use codex_protocol::protocol::SessionSource as RolloutSessionSource; @@ -74,6 +77,7 @@ use codex_protocol::protocol::TokenUsageInfo; use codex_protocol::protocol::TurnAbortReason; use codex_protocol::protocol::TurnAbortedEvent; use codex_protocol::protocol::TurnStartedEvent; +use codex_protocol::protocol::UserMessageEvent; use codex_protocol::user_input::ByteRange; use codex_protocol::user_input::TextElement; use codex_state::StateRuntime; @@ -112,6 +116,49 @@ fn normalized_existing_path(path: impl AsRef) -> Result { Ok(AbsolutePathBuf::from_absolute_path(path.as_ref().canonicalize()?)?.into_path_buf()) } +fn session_meta_rollout_item( + thread_id: ThreadId, + segment_id: Option, + timestamp: &str, +) -> RolloutItem { + RolloutItem::SessionMeta(SessionMetaLine { + meta: SessionMeta { + id: thread_id, + segment_id, + timestamp: timestamp.to_string(), + cwd: PathBuf::from("/"), + originator: "codex".to_string(), + cli_version: "0.0.0".to_string(), + source: RolloutSessionSource::Cli, + model_provider: Some("mock_provider".to_string()), + ..SessionMeta::default() + }, + git: None, + }) +} + +fn user_message_item(text: &str) -> RolloutItem { + RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent { + message: text.to_string(), + images: None, + local_images: Vec::new(), + text_elements: Vec::new(), + })) +} + +fn write_rollout_items(path: &Path, timestamp: &str, items: &[RolloutItem]) -> Result<()> { + let mut jsonl = String::new(); + for item in items { + jsonl.push_str(&serde_json::to_string(&RolloutLine { + timestamp: timestamp.to_string(), + item: item.clone(), + })?); + jsonl.push('\n'); + } + std::fs::write(path, jsonl)?; + Ok(()) +} + async fn wait_for_responses_request_count( server: &wiremock::MockServer, expected_count: usize, @@ -357,6 +404,108 @@ async fn thread_resume_returns_rollout_history() -> Result<()> { Ok(()) } +#[tokio::test] +async fn thread_resume_materializes_rollout_references_for_scrollback() -> Result<()> { + let server = create_mock_responses_server_repeating_assistant("Done").await; + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let thread_id = ThreadId::new(); + let thread_id_str = thread_id.to_string(); + let old_ts = "2025-01-05T12-00-00"; + let current_ts = "2025-01-05T12-05-00"; + let old_rfc3339 = "2025-01-05T12:00:00Z"; + let current_rfc3339 = "2025-01-05T12:05:00Z"; + let active_old_path = rollout_path(codex_home.path(), old_ts, &thread_id_str); + let archived_old_path = codex_home + .path() + .join("archived_sessions") + .join(active_old_path.file_name().expect("old rollout file name")); + std::fs::create_dir_all(archived_old_path.parent().expect("archived rollout parent"))?; + let current_path = rollout_path(codex_home.path(), current_ts, &thread_id_str); + std::fs::create_dir_all(current_path.parent().expect("current rollout parent"))?; + + let old_items = vec![ + session_meta_rollout_item(thread_id, Some(SegmentId::new()), old_rfc3339), + user_message_item("before compaction"), + ]; + write_rollout_items(archived_old_path.as_path(), old_rfc3339, &old_items)?; + + let current_items = vec![ + session_meta_rollout_item(thread_id, Some(SegmentId::new()), current_rfc3339), + RolloutItem::RolloutReference(RolloutReferenceItem { + rollout_path: active_old_path, + thread_id: Some(thread_id), + rollout_timestamp: Some(old_ts.to_string()), + segment_id: None, + max_depth: 2, + }), + RolloutItem::EventMsg(EventMsg::ContextCompacted(ContextCompactedEvent {})), + user_message_item("after compaction"), + ]; + write_rollout_items(current_path.as_path(), current_rfc3339, ¤t_items)?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let resume_id = mcp + .send_thread_resume_request(ThreadResumeParams { + thread_id: thread_id_str.clone(), + ..Default::default() + }) + .await?; + let resume_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(resume_id)), + ) + .await??; + let ThreadResumeResponse { thread, .. } = to_response::(resume_resp)?; + + let user_messages: Vec<&str> = thread + .turns + .iter() + .flat_map(|turn| turn.items.iter()) + .filter_map(|item| match item { + ThreadItem::UserMessage { content, .. } => { + content.iter().find_map(|input| match input { + UserInput::Text { text, .. } => Some(text.as_str()), + UserInput::Image { .. } + | UserInput::LocalImage { .. } + | UserInput::Skill { .. } + | UserInput::Mention { .. } => None, + }) + } + ThreadItem::AgentMessage { .. } + | ThreadItem::Reasoning { .. } + | ThreadItem::CommandExecution { .. } + | ThreadItem::FileChange { .. } + | ThreadItem::McpToolCall { .. } + | ThreadItem::WebSearch { .. } + | ThreadItem::ImageView { .. } + | ThreadItem::ImageGeneration { .. } + | ThreadItem::EnteredReviewMode { .. } + | ThreadItem::ExitedReviewMode { .. } + | ThreadItem::ContextCompaction { .. } + | ThreadItem::HookPrompt { .. } + | ThreadItem::CollabAgentToolCall { .. } + | ThreadItem::DynamicToolCall { .. } + | ThreadItem::Plan { .. } + | ThreadItem::RawResponseItem { .. } => None, + }) + .collect(); + assert_eq!(user_messages, vec!["before compaction", "after compaction"]); + assert!( + thread.turns.iter().any(|turn| { + turn.items + .iter() + .any(|item| matches!(item, ThreadItem::ContextCompaction { .. })) + }), + "resume scrollback should include the compaction marker from the current segment" + ); + + Ok(()) +} + #[tokio::test] async fn thread_resume_can_skip_turns_for_metadata_only_resume() -> Result<()> { let server = create_mock_responses_server_repeating_assistant("Done").await; @@ -1184,6 +1333,7 @@ stream_max_retries = 0 std::fs::create_dir_all(rollout_dir)?; let session_meta = SessionMeta { id: conversation_id, + segment_id: None, forked_from_id: None, timestamp: "2025-01-05T12:00:00Z".to_string(), cwd: repo_path.clone(), diff --git a/codex-rs/core/src/agent/control.rs b/codex-rs/core/src/agent/control.rs index da4fa8b50d24..e111a8841fa9 100644 --- a/codex-rs/core/src/agent/control.rs +++ b/codex-rs/core/src/agent/control.rs @@ -7,8 +7,11 @@ use crate::agent::registry::AgentRegistry; use crate::agent::role::DEFAULT_ROLE_NAME; use crate::agent::role::resolve_role_config; use crate::agent::status::is_final; +use crate::agent::watchdog::final_message_requests_watchdog_close; use crate::codex_thread::ThreadConfigSnapshot; +use crate::find_thread_path_by_id_str; use crate::inherited_thread_state::InheritedThreadState; +use crate::rollout::RolloutRecorder; use crate::session::emit_subagent_session_started; use crate::session_prefix::format_subagent_context_line; use crate::session_prefix::format_subagent_notification_message; @@ -28,6 +31,7 @@ use codex_protocol::models::MessagePhase; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::CollabCloseEndEvent; use codex_protocol::protocol::Event; +use codex_protocol::protocol::ForkReferenceItem; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::Op; @@ -36,6 +40,7 @@ use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::SessionSource; use codex_protocol::protocol::SubAgentSource; use codex_protocol::protocol::TurnEnvironmentSelection; +use codex_protocol::protocol::WarningEvent; use codex_protocol::user_input::UserInput; use codex_rollout::state_db; use codex_state::DirectionalThreadSpawnEdgeStatus; @@ -47,6 +52,7 @@ use codex_tools::create_watchdog_tools_namespace; use serde::Serialize; use std::collections::HashMap; use std::collections::VecDeque; +use std::path::PathBuf; use std::sync::Arc; use std::sync::Weak; use std::time::SystemTime; @@ -58,6 +64,10 @@ const AGENT_NAMES: &str = include_str!("agent_names.txt"); const ROOT_LAST_TASK_MESSAGE: &str = "Main thread"; const CODEX_EXPERIMENTAL_FORK_PREVIOUS_RESPONSE_ID_ENV: &str = "CODEX_EXPERIMENTAL_FORK_PREVIOUS_RESPONSE_ID"; +const CODEX_EXPERIMENTAL_FORK_PARENT_PROMPT_CACHE_KEY_ENV: &str = + "CODEX_EXPERIMENTAL_FORK_PARENT_PROMPT_CACHE_KEY"; +const CODEX_EXPERIMENTAL_FORK_PROMPT_CACHE_KEY_ENV: &str = + "CODEX_EXPERIMENTAL_FORK_PROMPT_CACHE_KEY"; const WATCHDOG_BOOT_TOOL_SEARCH_CALL_ID: &str = "synthetic_watchdog_tool_search"; const WATCHDOG_BOOT_LIST_AGENTS_CALL_ID: &str = "synthetic_watchdog_list_agents"; @@ -72,6 +82,7 @@ pub(crate) struct SpawnAgentOptions { pub(crate) fork_parent_spawn_call_id: Option, pub(crate) fork_mode: Option, pub(crate) environments: Option>, + pub(crate) initial_task_message: Option, } #[derive(Clone, Debug)] @@ -150,10 +161,50 @@ fn keep_forked_rollout_item(item: &RolloutItem) -> bool { // A forked child gets its own runtime config, including spawned-agent // instructions, so it must establish a fresh context diff baseline. RolloutItem::TurnContext(_) => false, - RolloutItem::Compacted(_) | RolloutItem::EventMsg(_) | RolloutItem::SessionMeta(_) => true, + RolloutItem::Compacted(_) + | RolloutItem::EventMsg(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::SessionMeta(_) => true, } } +fn full_history_fork_reference_items( + rollout_path: PathBuf, + source_items: &[RolloutItem], +) -> Vec { + let source_meta = source_items.iter().find_map(|item| match item { + RolloutItem::SessionMeta(meta) => Some(meta), + RolloutItem::Compacted(_) + | RolloutItem::EventMsg(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::TurnContext(_) => None, + }); + source_items + .iter() + .find_map(|item| match item { + RolloutItem::SessionMeta(meta) => Some(RolloutItem::SessionMeta(meta.clone())), + RolloutItem::Compacted(_) + | RolloutItem::EventMsg(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::TurnContext(_) => None, + }) + .into_iter() + .chain(std::iter::once(RolloutItem::ForkReference( + ForkReferenceItem { + rollout_path, + thread_id: source_meta.map(|meta| meta.meta.id), + segment_id: source_meta.and_then(|meta| meta.meta.segment_id), + nth_user_message: usize::MAX, + }, + ))) + .collect() +} + fn is_watchdog_helper_source(session_source: &SessionSource) -> bool { matches!( session_source, @@ -228,6 +279,25 @@ fn synthetic_watchdog_list_agents_items( ] } +fn role_prompt_item(prompt: String) -> RolloutItem { + RolloutItem::ResponseItem(ResponseItem::Message { + id: None, + role: "developer".to_string(), + content: vec![ContentItem::InputText { text: prompt }], + phase: None, + }) +} + +fn subagent_assignment_item(session_source: &SessionSource, message: String) -> RolloutItem { + let agent_path = session_source + .get_agent_path() + .map(String::from) + .unwrap_or_else(|| "this subagent".to_string()); + role_prompt_item(format!( + "# Subagent Assignment\n\nYou are `{agent_path}`. Your direct assignment from your parent agent is:\n\n{message}" + )) +} + /// Control-plane handle for multi-agent operations. /// `AgentControl` is held by each session (via `SessionServices`). It provides capability to /// spawn new agents and the inter-agent communication layer. @@ -447,10 +517,14 @@ impl AgentControl { ) .await; - self.send_input(new_thread.thread_id, initial_operation) - .await?; - if !new_thread.thread.enabled(Feature::MultiAgentV2) - && !matches!(agent_metadata.agent_role.as_deref(), Some("watchdog")) + Box::pin(self.send_input(new_thread.thread_id, initial_operation)).await?; + let is_watchdog_helper = options.fork_mode.is_some() + && notification_source + .as_ref() + .is_some_and(is_watchdog_helper_source); + if (!new_thread.thread.enabled(Feature::MultiAgentV2) + && !matches!(agent_metadata.agent_role.as_deref(), Some("watchdog"))) + || is_watchdog_helper { let child_reference = agent_metadata .agent_path @@ -517,36 +591,42 @@ impl AgentControl { parent_thread.codex.session.flush_rollout().await?; } - let parent_history = state - .read_stored_thread(ReadThreadParams { - thread_id: parent_thread_id, - include_archived: true, - include_history: true, - }) - .await? - .history + let rollout_path = parent_thread + .as_ref() + .and_then(|parent_thread| parent_thread.rollout_path()) + .or(find_thread_path_by_id_str( + config.codex_home.as_path(), + &parent_thread_id.to_string(), + ) + .await?) .ok_or_else(|| { CodexErr::Fatal(format!( - "parent thread history unavailable for fork: {parent_thread_id}" + "parent thread rollout unavailable for fork: {parent_thread_id}" )) })?; let response_continuation = inherited_thread_state.response_continuation(); let use_response_continuation_baseline = response_continuation.is_some() && matches!(fork_mode, SpawnAgentForkMode::FullHistory); + let source_items = RolloutRecorder::get_rollout_history(&rollout_path) + .await? + .get_rollout_items(); let mut forked_rollout_items = if let (Some(response_continuation), true) = (&response_continuation, use_response_continuation_baseline) { previous_response_fork_rollout_items( - parent_history.items, + source_items, response_continuation.fork_baseline_input(), ) } else { - let mut items = parent_history.items; - if let SpawnAgentForkMode::LastNTurns(last_n_turns) = fork_mode { - items = truncate_rollout_to_last_n_fork_turns(&items, *last_n_turns); + match fork_mode { + SpawnAgentForkMode::FullHistory => { + full_history_fork_reference_items(rollout_path.clone(), &source_items) + } + SpawnAgentForkMode::LastNTurns(last_n_turns) => { + truncate_rollout_to_last_n_fork_turns(&source_items, *last_n_turns) + } } - items }; if !use_response_continuation_baseline { // MultiAgentV2 root/subagent usage hints are injected as standalone developer @@ -585,10 +665,27 @@ impl AgentControl { }); } if is_watchdog_helper_source(&session_source) { + if let Some(role_prompt) = + crate::session::load_agent_role_prompt(&config, &session_source).await + { + forked_rollout_items.push(role_prompt_item(role_prompt)); + } forked_rollout_items.extend( self.watchdog_boot_context_items(state, parent_thread_id) .await, ); + } else if options.fork_mode.is_some() { + if let Some(role_prompt) = + crate::session::load_agent_role_prompt(&config, &session_source).await + { + forked_rollout_items.push(role_prompt_item(role_prompt)); + } + if let Some(initial_task_message) = options.initial_task_message.clone() { + forked_rollout_items.push(subagent_assignment_item( + &session_source, + initial_task_message, + )); + } } state @@ -685,17 +782,10 @@ impl AgentControl { async fn resume_single_agent_from_rollout( &self, - mut config: crate::config::Config, + config: crate::config::Config, thread_id: ThreadId, session_source: SessionSource, ) -> CodexResult { - if let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { depth, .. }) = &session_source - && *depth >= config.agent_max_depth - && !config.features.enabled(Feature::MultiAgentV2) - { - let _ = config.features.disable(Feature::SpawnCsv); - let _ = config.features.disable(Feature::Collab); - } let state = self.upgrade()?; let mut reservation = self.state.reserve_spawn_slot(config.agent_max_threads)?; let (session_source, agent_metadata) = match session_source { @@ -767,7 +857,9 @@ impl AgentControl { // Resumed threads are re-registered in-memory and need the same listener // attachment path as freshly spawned threads. state.notify_thread_created(resumed_thread.thread_id); - if !resumed_thread.thread.enabled(Feature::MultiAgentV2) { + if !resumed_thread.thread.enabled(Feature::MultiAgentV2) + && !matches!(agent_metadata.agent_role.as_deref(), Some("watchdog")) + { let child_reference = agent_metadata .agent_path .as_ref() @@ -1029,6 +1121,46 @@ impl AgentControl { Ok(()) } + pub(crate) async fn finalize_watchdog_helper( + &self, + helper_thread_id: ThreadId, + helper_status: AgentStatus, + ) -> bool { + let Some(watchdogs) = self.watchdogs.as_ref() else { + return false; + }; + let Some(owner_thread_id) = watchdogs.owner_for_active_helper(helper_thread_id).await + else { + return false; + }; + let target_thread_id = watchdogs.target_for_active_helper(helper_thread_id).await; + let helper_suppressed = watchdogs.take_suppressed_helper(helper_thread_id).await; + let mut close_watchdog_handle = false; + if let AgentStatus::Completed(Some(message)) = helper_status + && !helper_suppressed + { + close_watchdog_handle = final_message_requests_watchdog_close(&message); + if let Err(err) = self.send_watchdog_wakeup(owner_thread_id, message).await { + warn!( + helper_thread_id = %helper_thread_id, + owner_thread_id = %owner_thread_id, + "watchdog helper forward failed: {err}" + ); + } + } + + let _ = self.shutdown_live_agent(helper_thread_id).await; + if close_watchdog_handle { + if let Some(target_thread_id) = target_thread_id { + let _ = self.unregister_watchdog_handle(target_thread_id).await; + let _ = self.shutdown_live_agent(target_thread_id).await; + } + } else { + let _ = self.finish_watchdog_helper(helper_thread_id).await; + } + true + } + fn watchdog_manager(&self) -> CodexResult<&Arc> { self.watchdogs.as_ref().ok_or_else(|| { CodexErr::UnsupportedOperation("watchdog manager unavailable".to_string()) @@ -1296,6 +1428,30 @@ impl AgentControl { Ok(()) } + pub(crate) async fn send_watchdog_snooze_event( + &self, + owner_thread_id: ThreadId, + target_thread_id: ThreadId, + delay_seconds: u64, + ) -> CodexResult<()> { + let state = self.upgrade()?; + let owner_thread = state.get_thread(owner_thread_id).await?; + owner_thread + .codex + .session + .send_event_raw(Event { + id: format!("watchdog-snooze-{target_thread_id}-{}", ThreadId::new()), + msg: codex_protocol::protocol::EventMsg::Warning(WarningEvent { + message: format!( + "Watchdog snoozed for {}.", + format_watchdog_snooze_duration(delay_seconds) + ), + }), + }) + .await; + Ok(()) + } + pub(crate) async fn compact_parent_for_watchdog_helper( &self, helper_thread_id: ThreadId, @@ -1367,7 +1523,7 @@ impl AgentControl { &self, child_thread_id: ThreadId, session_source: Option, - child_reference: String, + _child_reference: String, child_agent_path: Option, ) { let Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { @@ -1395,12 +1551,21 @@ impl AgentControl { if !is_final(&status) { return; } + if control + .finalize_watchdog_helper(child_thread_id, status.clone()) + .await + { + return; + } let Ok(state) = control.upgrade() else { return; }; let child_thread = state.get_thread(child_thread_id).await.ok(); - let message = format_subagent_notification_message(child_reference.as_str(), &status); + // The TUI indexes live subagent rows by ThreadId. Use the child ThreadId in this + // hidden notification so final status updates remove the correct panel row. + let message = + format_subagent_notification_message(&child_thread_id.to_string(), &status); if child_agent_path.is_some() && child_thread .as_ref() @@ -1632,6 +1797,10 @@ async fn parent_prompt_cache_key_for_source( state: &Arc, session_source: Option<&SessionSource>, ) -> Option { + if !fork_parent_prompt_cache_key_enabled() { + return None; + } + let Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, .. })) = session_source @@ -1646,20 +1815,46 @@ async fn parent_prompt_cache_key_for_source( .map(|parent_thread| parent_thread.codex.session.prompt_cache_key()) } +fn fork_parent_prompt_cache_key_enabled() -> bool { + let parent_named_value = + std::env::var(CODEX_EXPERIMENTAL_FORK_PARENT_PROMPT_CACHE_KEY_ENV).ok(); + let legacy_value = std::env::var(CODEX_EXPERIMENTAL_FORK_PROMPT_CACHE_KEY_ENV).ok(); + fork_parent_prompt_cache_key_value_enabled( + parent_named_value.as_deref(), + legacy_value.as_deref(), + ) +} + +fn fork_parent_prompt_cache_key_value_enabled( + parent_named_value: Option<&str>, + legacy_value: Option<&str>, +) -> bool { + parent_named_value.or(legacy_value).is_none_or(|value| { + matches!( + value.to_ascii_lowercase().as_str(), + "1" | "true" | "yes" | "on" + ) + }) +} + fn previous_response_fork_rollout_items( source_items: Vec, baseline_input: Vec, ) -> Vec { let source_session_meta = source_items.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta) => Some(meta.clone()), - RolloutItem::ResponseItem(_) + RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) | RolloutItem::TurnContext(_) | RolloutItem::EventMsg(_) => None, }); let latest_turn_context = source_items.iter().rev().find_map(|item| match item { RolloutItem::TurnContext(turn_context) => Some(turn_context.clone()), - RolloutItem::ResponseItem(_) + RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) | RolloutItem::SessionMeta(_) | RolloutItem::EventMsg(_) => None, @@ -1799,6 +1994,16 @@ fn agent_matches_prefix(agent_path: Option<&AgentPath>, prefix: &AgentPath) -> b }) } +fn format_watchdog_snooze_duration(delay_seconds: u64) -> String { + let minutes = delay_seconds / 60; + let seconds = delay_seconds % 60; + match (minutes, seconds) { + (0, seconds) => format!("{seconds}s"), + (minutes, 0) => format!("{minutes}m"), + (minutes, seconds) => format!("{minutes}m {seconds}s"), + } +} + pub(crate) fn render_input_preview(initial_operation: &Op) -> String { match initial_operation { Op::UserInput { items, .. } => items diff --git a/codex-rs/core/src/agent/control_tests.rs b/codex-rs/core/src/agent/control_tests.rs index 9858eef09de8..171c9f7d6073 100644 --- a/codex-rs/core/src/agent/control_tests.rs +++ b/codex-rs/core/src/agent/control_tests.rs @@ -30,13 +30,19 @@ use codex_thread_store::ArchiveThreadParams; use codex_thread_store::LocalThreadStore; use codex_thread_store::LocalThreadStoreConfig; use codex_thread_store::ThreadStore; +use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_response_created; use core_test_support::responses::mount_sse_once; +use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::namespace_child_tool; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; +use core_test_support::responses::start_websocket_server; use pretty_assertions::assert_eq; +use serial_test::serial; +use std::ffi::OsStr; +use std::ffi::OsString; use tempfile::TempDir; use tokio::time::Duration; use tokio::time::sleep; @@ -104,6 +110,49 @@ fn assistant_message(text: &str, phase: Option) -> ResponseItem { } } +async fn wait_for_turn_complete(thread: &CodexThread) { + timeout(Duration::from_secs(5), async { + loop { + let event = thread + .next_event() + .await + .expect("event channel should stay open"); + if matches!(event.msg, EventMsg::TurnComplete(_)) { + break; + } + } + }) + .await + .expect("turn should complete"); +} + +fn request_tool_signatures(body: &serde_json::Value) -> std::collections::BTreeSet { + let mut signatures = std::collections::BTreeSet::new(); + let tools = body["tools"].as_array().expect("tools should be an array"); + for tool in tools { + let tool_type = tool.get("type").and_then(serde_json::Value::as_str); + let Some(name) = tool.get("name").and_then(serde_json::Value::as_str) else { + continue; + }; + if tool_type == Some("namespace") { + let child_tools = tool + .get("tools") + .and_then(serde_json::Value::as_array) + .expect("namespace tools should have child tools"); + for child_tool in child_tools { + let child_name = child_tool + .get("name") + .and_then(serde_json::Value::as_str) + .expect("child tool should have a name"); + signatures.insert(format!("{name}.{child_name}")); + } + } else { + signatures.insert(name.to_string()); + } + } + signatures +} + #[test] fn fork_previous_response_id_env_value_parses_truthy_values() { for value in ["1", "true", "TRUE", "yes", "on"] { @@ -126,6 +175,207 @@ fn fork_previous_response_id_is_enabled_by_default() { assert!(fork_previous_response_id_value_enabled(/*value*/ None)); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[serial(fork_env)] +async fn fork_previous_response_id_env_controls_inherited_continuation() -> anyhow::Result<()> { + let server = start_websocket_server(vec![vec![ + vec![ + ev_response_created("warm-parent"), + ev_completed("warm-parent"), + ], + vec![ + ev_response_created("resp-parent"), + ev_assistant_message("msg-parent", "parent done"), + ev_completed("resp-parent"), + ], + ]]) + .await; + let (_home, mut config) = test_config().await; + config.model_provider.base_url = Some(format!("{}/v1", server.uri())); + config.model_provider.supports_websockets = true; + + let manager = ThreadManager::with_models_provider_and_home_for_tests( + CodexAuth::from_api_key("dummy"), + config.model_provider.clone(), + config.codex_home.to_path_buf(), + std::sync::Arc::new(codex_exec_server::EnvironmentManager::default_for_tests()), + ); + let control = manager.agent_control(); + let parent = manager.start_thread(config).await?; + let parent_thread_id = parent.thread_id; + parent.thread.submit(text_input("parent seed")).await?; + wait_for_turn_complete(parent.thread.as_ref()).await; + + let state = control + .manager + .upgrade() + .expect("test manager state should stay alive"); + let session_source = SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_path: None, + agent_nickname: Some("worker".to_string()), + agent_role: None, + }); + + let enabled_guard = EnvVarGuard::set( + CODEX_EXPERIMENTAL_FORK_PREVIOUS_RESPONSE_ID_ENV, + OsStr::new("1"), + ); + assert!( + parent_response_continuation_for_source(&state, Some(&session_source)) + .await + .is_some(), + "forked agents should inherit the parent response id by default so forked requests keep the parent prompt prefix cacheable" + ); + drop(enabled_guard); + + let disabled_guard = EnvVarGuard::set( + CODEX_EXPERIMENTAL_FORK_PREVIOUS_RESPONSE_ID_ENV, + OsStr::new("0"), + ); + assert!( + parent_response_continuation_for_source(&state, Some(&session_source)) + .await + .is_none(), + "CODEX_EXPERIMENTAL_FORK_PREVIOUS_RESPONSE_ID=0 must disable only the fork-specific previous_response_id inheritance" + ); + drop(disabled_guard); + + server.shutdown().await; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[serial(fork_env)] +async fn fork_previous_response_id_env_disables_parent_previous_id_on_child_request() +-> anyhow::Result<()> { + let _previous_response_guard = EnvVarGuard::set( + CODEX_EXPERIMENTAL_FORK_PREVIOUS_RESPONSE_ID_ENV, + OsStr::new("0"), + ); + let server = start_websocket_server(vec![ + vec![ + vec![ + ev_response_created("warm-parent"), + ev_completed("warm-parent"), + ], + vec![ + ev_response_created("resp-parent"), + ev_assistant_message("msg-parent", "parent done"), + ev_completed("resp-parent"), + ], + ], + vec![ + vec![ + ev_response_created("warm-child"), + ev_completed("warm-child"), + ], + vec![ + ev_response_created("resp-child"), + ev_completed("resp-child"), + ], + ], + ]) + .await; + let (_home, mut config) = test_config().await; + config.model_provider.base_url = Some(format!("{}/v1", server.uri())); + config.model_provider.supports_websockets = true; + + let manager = ThreadManager::with_models_provider_and_home_for_tests( + CodexAuth::from_api_key("dummy"), + config.model_provider.clone(), + config.codex_home.to_path_buf(), + std::sync::Arc::new(codex_exec_server::EnvironmentManager::default_for_tests()), + ); + let control = manager.agent_control(); + let parent = manager.start_thread(config.clone()).await?; + let parent_thread_id = parent.thread_id; + parent.thread.submit(text_input("parent seed")).await?; + wait_for_turn_complete(parent.thread.as_ref()).await; + parent + .thread + .codex + .session + .ensure_rollout_materialized() + .await; + parent.thread.codex.session.flush_rollout().await?; + + let child_thread_id = control + .spawn_agent_with_metadata( + config, + text_input("child request boundary"), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_path: None, + agent_nickname: Some("worker".to_string()), + agent_role: None, + })), + SpawnAgentOptions { + fork_parent_spawn_call_id: Some( + "spawn-call-previous-response-disabled".to_string(), + ), + fork_mode: Some(SpawnAgentForkMode::FullHistory), + ..Default::default() + }, + ) + .await? + .thread_id; + let child_thread = manager + .get_thread(child_thread_id) + .await + .expect("child thread should be registered"); + wait_for_turn_complete(child_thread.as_ref()).await; + + let connections = server.connections(); + let child_connection = connections + .get(1) + .expect("forked child should use its own websocket connection"); + assert!( + child_connection.iter().all(|request| { + request.body_json()["previous_response_id"].as_str() != Some("resp-parent") + }), + "CODEX_EXPERIMENTAL_FORK_PREVIOUS_RESPONSE_ID=0 must prevent the child request from using the parent's response id; child requests={child_connection:#?}" + ); + + server.shutdown().await; + Ok(()) +} + +#[test] +fn fork_parent_prompt_cache_key_env_values_parse_with_parent_precedence() { + for value in ["1", "true", "TRUE", "yes", "on"] { + assert!( + fork_parent_prompt_cache_key_value_enabled(Some(value), /*legacy_value*/ None), + "{value} should enable parent prompt cache key inheritance" + ); + } + + for value in ["", "0", "false", "off", "no", "enabled"] { + assert!( + !fork_parent_prompt_cache_key_value_enabled(Some(value), /*legacy_value*/ None), + "{value} should not enable parent prompt cache key inheritance" + ); + } + + assert!(fork_parent_prompt_cache_key_value_enabled( + /*parent_named_value*/ None, /*legacy_value*/ None + )); + assert!(fork_parent_prompt_cache_key_value_enabled( + /*parent_named_value*/ None, + Some("1") + )); + assert!(!fork_parent_prompt_cache_key_value_enabled( + Some("0"), + Some("1") + )); + assert!(fork_parent_prompt_cache_key_value_enabled( + Some("1"), + Some("0") + )); +} + #[tokio::test] async fn previous_response_fork_rollout_items_preserve_latest_turn_context() { let harness = AgentControlHarness::new().await; @@ -205,6 +455,32 @@ impl AgentControlHarness { } } +struct EnvVarGuard { + key: &'static str, + original: Option, +} + +impl EnvVarGuard { + fn set(key: &'static str, value: &OsStr) -> Self { + let original = std::env::var_os(key); + unsafe { + std::env::set_var(key, value); + } + Self { key, original } + } +} + +impl Drop for EnvVarGuard { + fn drop(&mut self) { + unsafe { + match &self.original { + Some(value) => std::env::set_var(self.key, value), + None => std::env::remove_var(self.key), + } + } + } +} + fn has_subagent_notification(history_items: &[ResponseItem]) -> bool { history_items.iter().any(|item| { let ResponseItem::Message { role, content, .. } = item else { @@ -237,6 +513,23 @@ fn history_contains_text(history_items: &[ResponseItem], needle: &str) -> bool { }) } +fn history_text_match_count(history_items: &[ResponseItem], needle: &str) -> usize { + history_items + .iter() + .filter(|item| { + let ResponseItem::Message { content, .. } = item else { + return false; + }; + content.iter().any(|content_item| match content_item { + ContentItem::InputText { text } | ContentItem::OutputText { text } => { + text.contains(needle) + } + ContentItem::InputImage { .. } => false, + }) + }) + .count() +} + fn history_contains_assistant_inter_agent_communication( history_items: &[ResponseItem], expected: &InterAgentCommunication, @@ -505,6 +798,10 @@ async fn watchdog_helper_forks_owner_history() { .features .enable(Feature::AgentWatchdog) .expect("test config should allow feature update"); + config + .features + .enable(Feature::AgentPromptInjection) + .expect("test config should allow feature update"); config .mcp_servers .set(std::collections::HashMap::from([( @@ -514,6 +811,11 @@ async fn watchdog_helper_forks_owner_history() { .expect("test config should allow MCP servers"); let owner_turn = owner_thread.codex.session.new_default_turn().await; + owner_thread + .codex + .session + .record_context_updates_and_set_reference_context_item(owner_turn.as_ref()) + .await; owner_thread .codex .session @@ -594,6 +896,42 @@ async fn watchdog_helper_forks_owner_history() { &history_items, "previous owner response: pong 81 (118)" )); + let watchdog_prompt_position = history_items + .iter() + .position(|item| { + matches!( + item, + ResponseItem::Message { role, content, .. } + if role == "developer" + && content.iter().any(|content| matches!( + content, + ContentItem::InputText { text } + if text.contains("You are also a **watchdog**") + )) + ) + }) + .expect( + "forked watchdog helpers must receive watchdog_agent_prompt.md after the fork boundary", + ); + let tool_search_position = history_items + .iter() + .position(|item| { + matches!( + item, + ResponseItem::ToolSearchCall { call_id: Some(call_id), .. } + if call_id == "synthetic_watchdog_tool_search" + ) + }) + .expect("watchdog helpers should receive synthetic tool discovery context"); + assert!( + watchdog_prompt_position < tool_search_position, + "watchdog_agent_prompt.md must be injected before synthetic watchdog tool context" + ); + assert_eq!( + history_text_match_count(&history_items, "You are also a **watchdog**"), + 1, + "forked watchdog helper history must contain exactly one watchdog prompt" + ); assert!(history_items.iter().any(|item| matches!( item, ResponseItem::ToolSearchCall { call_id: Some(call_id), .. } @@ -1345,7 +1683,12 @@ async fn spawn_agent_fork_rejects_missing_parent_spawn_call_id_for_non_watchdogs } #[tokio::test] -async fn spawn_agent_can_fork_parent_thread_history_with_sanitized_items() { +#[serial(fork_env)] +async fn spawn_agent_full_history_fork_uses_compact_reference_and_materializes_parent_items() { + let _previous_response_guard = EnvVarGuard::set( + CODEX_EXPERIMENTAL_FORK_PREVIOUS_RESPONSE_ID_ENV, + OsStr::new("0"), + ); let harness = AgentControlHarness::new().await; let mut parent_config = harness.config.clone(); let _ = parent_config.features.enable(Feature::MultiAgentV2); @@ -1355,6 +1698,7 @@ async fn spawn_agent_can_fork_parent_thread_history_with_sanitized_items() { Some("Parent subagent guidance.".to_string()); let mut child_config = harness.config.clone(); let _ = child_config.features.enable(Feature::MultiAgentV2); + let _ = child_config.features.enable(Feature::AgentPromptInjection); child_config.multi_agent_v2.root_agent_usage_hint_text = Some("Child root guidance.".to_string()); child_config.multi_agent_v2.subagent_usage_hint_text = @@ -1441,6 +1785,7 @@ async fn spawn_agent_can_fork_parent_thread_history_with_sanitized_items() { SpawnAgentOptions { fork_parent_spawn_call_id: Some(parent_spawn_call_id.clone()), fork_mode: Some(SpawnAgentForkMode::FullHistory), + initial_task_message: Some("child task".to_string()), ..Default::default() }, ) @@ -1487,7 +1832,22 @@ async fn spawn_agent_can_fork_parent_thread_history_with_sanitized_items() { let mut parent_tool_names = parent_mcp_tools.keys().cloned().collect::>(); parent_tool_names.sort(); assert_eq!(snapshot_tool_names, parent_tool_names); + let child_rollout_path = child_thread + .rollout_path() + .expect("child rollout path should be present"); + let child_rollout = RolloutRecorder::get_rollout_history(&child_rollout_path) + .await + .expect("child rollout should be readable"); + assert!( + child_rollout + .get_rollout_items() + .iter() + .any(|item| matches!(item, RolloutItem::ForkReference(_))), + "full-history forks should store a compact ForkReference so fork rollout files do not copy parent rollout history" + ); + let history = child_thread.codex.session.clone_history().await; + let subagent_prompt = crate::session::load_subagent_prompt(&harness.config.codex_home).await; let expected_history = [ ResponseItem::Message { id: None, @@ -1497,12 +1857,54 @@ async fn spawn_agent_can_fork_parent_thread_history_with_sanitized_items() { }], phase: None, }, + ResponseItem::Message { + id: None, + role: "developer".to_string(), + content: vec![ContentItem::InputText { + text: "Parent root guidance.".to_string(), + }], + phase: None, + }, + ResponseItem::Message { + id: None, + role: "developer".to_string(), + content: vec![ContentItem::InputText { + text: "Parent subagent guidance.".to_string(), + }], + phase: None, + }, + assistant_message("parent commentary", Some(MessagePhase::Commentary)), assistant_message("parent final answer", Some(MessagePhase::FinalAnswer)), + assistant_message("parent unknown phase", /*phase*/ None), + ResponseItem::Reasoning { + id: String::new(), + summary: Vec::new(), + content: None, + encrypted_content: None, + }, + trigger_message.to_response_input_item().into(), + spawn_agent_call(&parent_spawn_call_id), + ResponseItem::Message { + id: None, + role: "developer".to_string(), + content: vec![ContentItem::InputText { + text: subagent_prompt, + }], + phase: None, + }, + ResponseItem::Message { + id: None, + role: "developer".to_string(), + content: vec![ContentItem::InputText { + text: "# Subagent Assignment\n\nYou are `this subagent`. Your direct assignment from your parent agent is:\n\nchild task".to_string(), + }], + phase: None, + }, ]; assert_eq!( history.raw_items(), &expected_history, - "forked child history should keep only parent user messages and assistant final answers" + "forked child history should materialize the full parent prefix so full-history forks preserve prompt-cache alignment" ); let expected = ( @@ -1536,14 +1938,26 @@ async fn spawn_agent_can_fork_parent_thread_history_with_sanitized_items() { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[serial(fork_env)] async fn forked_spawn_first_request_uses_parent_cache_key_and_mcp_snapshot() -> anyhow::Result<()> { let server = start_mock_server().await; - let child_response_mock = mount_sse_once( + let request_log = mount_sse_sequence( &server, - sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]), + vec![ + sse(vec![ + ev_response_created("resp-parent"), + ev_completed("resp-parent"), + ]), + sse(vec![ + ev_response_created("resp-child"), + ev_completed("resp-child"), + ]), + ], ) .await; let (_home, mut config) = test_config().await; + let _ = config.features.enable(Feature::MultiAgentV2); + let _ = config.features.enable(Feature::AgentWatchdog); config.model_provider.base_url = Some(format!("{}/v1", server.uri())); config.model_provider.supports_websockets = false; let mcp_server_path = config.codex_home.join("fake_mcp_server.py"); @@ -1665,6 +2079,119 @@ while True: parent_mcp_tools.contains_key("mcp__rmcp__echo"), "parent MCP manager should expose live MCP tools before forking: tools={parent_mcp_tools:#?}; failures={startup_failures:#?}" ); + parent.thread.submit(text_input("parent seed")).await?; + wait_for_turn_complete(parent.thread.as_ref()).await; + parent + .thread + .codex + .session + .ensure_rollout_materialized() + .await; + parent.thread.codex.session.flush_rollout().await?; + + let child_thread_id = control + .spawn_agent_with_metadata( + config, + text_input("child request boundary"), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_path: None, + agent_nickname: Some("worker".to_string()), + agent_role: None, + })), + SpawnAgentOptions { + fork_parent_spawn_call_id: Some("spawn-call-request-boundary".to_string()), + fork_mode: Some(SpawnAgentForkMode::FullHistory), + ..Default::default() + }, + ) + .await? + .thread_id; + let child_thread = manager + .get_thread(child_thread_id) + .await + .expect("child thread should be registered"); + + timeout(Duration::from_secs(5), async { + loop { + let event = child_thread + .next_event() + .await + .expect("child event channel should stay open"); + if matches!(event.msg, EventMsg::TurnComplete(_)) { + break; + } + } + }) + .await + .expect("child turn should complete"); + let requests = request_log.requests(); + assert_eq!(requests.len(), 2); + let parent_body = requests[0].body_json(); + let child_body = requests[1].body_json(); + let expected_prompt_cache_key = parent_prompt_cache_key.to_string(); + assert_eq!( + child_body["prompt_cache_key"].as_str(), + Some(expected_prompt_cache_key.as_str()) + ); + let parent_tool_signatures = request_tool_signatures(&parent_body); + let child_tool_signatures = request_tool_signatures(&child_body); + assert_eq!( + child_tool_signatures, parent_tool_signatures, + "forked children must keep the same eager tool surface as their parent so request prefixes stay cacheable" + ); + for expected_tool in [ + "spawn_agent", + "send_message", + "followup_task", + "wait_agent", + "list_agents", + "close_agent", + "watchdog.close_self", + "watchdog.snooze", + "watchdog.compact_parent_context", + ] { + assert!( + child_tool_signatures.contains(expected_tool), + "expected forked child request to expose `{expected_tool}`; tools={child_tool_signatures:#?}" + ); + } + assert!( + namespace_child_tool(&child_body, "mcp__rmcp__", "echo").is_some(), + "first forked child request should expose parent MCP snapshot tools: {child_body:#}" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[serial(fork_env)] +async fn fork_parent_prompt_cache_key_env_disables_request_inheritance() -> anyhow::Result<()> { + let _parent_prompt_cache_key_guard = EnvVarGuard::set( + CODEX_EXPERIMENTAL_FORK_PARENT_PROMPT_CACHE_KEY_ENV, + OsStr::new("0"), + ); + let server = start_mock_server().await; + let child_response_mock = mount_sse_once( + &server, + sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]), + ) + .await; + let (_home, mut config) = test_config().await; + config.model_provider.base_url = Some(format!("{}/v1", server.uri())); + config.model_provider.supports_websockets = false; + + let manager = ThreadManager::with_models_provider_and_home_for_tests( + CodexAuth::from_api_key("dummy"), + config.model_provider.clone(), + config.codex_home.to_path_buf(), + std::sync::Arc::new(codex_exec_server::EnvironmentManager::default_for_tests()), + ); + let control = manager.agent_control(); + let parent = manager.start_thread(config.clone()).await?; + let parent_thread_id = parent.thread_id; + let parent_prompt_cache_key = parent.thread.codex.session.prompt_cache_key(); parent .thread .inject_user_message_without_turn("parent seed".to_string()) @@ -1689,7 +2216,9 @@ while True: agent_role: None, })), SpawnAgentOptions { - fork_parent_spawn_call_id: Some("spawn-call-request-boundary".to_string()), + fork_parent_spawn_call_id: Some( + "spawn-call-parent-prompt-cache-key-disabled".to_string(), + ), fork_mode: Some(SpawnAgentForkMode::FullHistory), ..Default::default() }, @@ -1714,16 +2243,15 @@ while True: }) .await .expect("child turn should complete"); + let child_prompt_cache_key = child_thread.codex.session.prompt_cache_key(); + assert_ne!(child_prompt_cache_key, parent_prompt_cache_key); + let body = child_response_mock.single_request().body_json(); - let expected_prompt_cache_key = parent_prompt_cache_key.to_string(); + let expected_prompt_cache_key = child_prompt_cache_key.to_string(); assert_eq!( body["prompt_cache_key"].as_str(), Some(expected_prompt_cache_key.as_str()) ); - assert!( - namespace_child_tool(&body, "mcp__rmcp__", "echo").is_some(), - "first forked child request should expose parent MCP snapshot tools: {body:#}" - ); Ok(()) } @@ -1731,6 +2259,8 @@ while True: #[tokio::test] async fn spawn_agent_fork_flushes_parent_rollout_before_loading_history() { let harness = AgentControlHarness::new().await; + let mut child_config = harness.config.clone(); + let _ = child_config.features.enable(Feature::AgentPromptInjection); let (parent_thread_id, parent_thread) = harness.start_thread().await; let turn_context = parent_thread.codex.session.new_default_turn().await; let parent_spawn_call_id = "spawn-call-unflushed".to_string(); @@ -1749,7 +2279,7 @@ async fn spawn_agent_fork_flushes_parent_rollout_before_loading_history() { let child_thread_id = harness .control .spawn_agent_with_metadata( - harness.config.clone(), + child_config, text_input("child task"), Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, @@ -1761,6 +2291,7 @@ async fn spawn_agent_fork_flushes_parent_rollout_before_loading_history() { SpawnAgentOptions { fork_parent_spawn_call_id: Some(parent_spawn_call_id.clone()), fork_mode: Some(SpawnAgentForkMode::FullHistory), + initial_task_message: Some("child task".to_string()), ..Default::default() }, ) @@ -1778,6 +2309,27 @@ async fn spawn_agent_fork_flushes_parent_rollout_before_loading_history() { history_contains_text(history.raw_items(), "unflushed final answer"), "forked child history should include unflushed assistant final answers after flushing the parent rollout" ); + assert!( + history_contains_text(history.raw_items(), "# Subagent Assignment"), + "forked child history should contain an explicit developer assignment" + ); + assert_eq!( + history_text_match_count(history.raw_items(), "# You are a Subagent"), + 1, + "forked child history must contain exactly one subagent prompt" + ); + assert_eq!( + history_text_match_count(history.raw_items(), "# Subagent Assignment"), + 1, + "forked child history must contain exactly one explicit assignment" + ); + assert!( + history_contains_text( + history.raw_items(), + "Your direct assignment from your parent agent is:\n\nchild task" + ), + "forked child history should make the spawned task unambiguous" + ); let _ = harness .control @@ -2327,7 +2879,7 @@ async fn multi_agent_v2_completion_queues_message_for_direct_parent() { .await; let expected_message = crate::session_prefix::format_subagent_notification_message( - tester_path.as_str(), + &tester_thread_id.to_string(), &AgentStatus::Completed(Some("done".to_string())), ); let expected = ( diff --git a/codex-rs/core/src/agent/watchdog.rs b/codex-rs/core/src/agent/watchdog.rs index 193019a24a6a..b0e70755af94 100644 --- a/codex-rs/core/src/agent/watchdog.rs +++ b/codex-rs/core/src/agent/watchdog.rs @@ -123,7 +123,7 @@ impl WatchdogManager { generation, }; - let (superseded, helper_ids_to_unsuppress) = { + let (superseded, suppressed_helpers) = { let mut registrations = self.registrations.lock().await; let superseded_targets = registrations .iter() @@ -135,11 +135,11 @@ impl WatchdogManager { }) .collect::>(); let mut superseded = Vec::new(); - let mut helper_ids_to_unsuppress = Vec::new(); + let mut suppressed_helpers = Vec::new(); for superseded_target in superseded_targets { if let Some(removed) = registrations.remove(&superseded_target) { if let Some(helper_id) = removed.active_helper_id { - helper_ids_to_unsuppress.push(helper_id); + suppressed_helpers.push(helper_id); } superseded.push(RemovedWatchdog { target_thread_id: superseded_target, @@ -148,12 +148,12 @@ impl WatchdogManager { } } registrations.insert(entry.registration.target_thread_id, entry); - (superseded, helper_ids_to_unsuppress) + (superseded, suppressed_helpers) }; - if !helper_ids_to_unsuppress.is_empty() { - let mut suppressed_helpers = self.suppressed_helpers.lock().await; - for helper_id in helper_ids_to_unsuppress { - suppressed_helpers.remove(&helper_id); + if !suppressed_helpers.is_empty() { + let mut suppressed = self.suppressed_helpers.lock().await; + for helper_id in suppressed_helpers { + suppressed.remove(&helper_id); } } Ok(superseded) @@ -306,40 +306,9 @@ impl WatchdogManager { if !is_final(&helper_status) { return; } - let helper_suppressed = self.take_suppressed_helper(helper_id).await; - let mut close_watchdog_handle = false; - if let AgentStatus::Completed(Some(message)) = helper_status - && !helper_suppressed - { - close_watchdog_handle = final_message_requests_watchdog_close(&message); - if let Err(err) = control_for_spawn - .send_watchdog_wakeup(snapshot.owner_thread_id, message) - .await - { - warn!( - helper_id = %helper_id, - owner_thread_id = %snapshot.owner_thread_id, - "watchdog helper forward failed: {err}" - ); - } - } - let _ = control_for_spawn.shutdown_live_agent(helper_id).await; - if close_watchdog_handle { - let _ = control_for_spawn - .unregister_watchdog_handle(target_thread_id) - .await; - let _ = control_for_spawn - .shutdown_live_agent(target_thread_id) - .await; - return; - } - self.update_after_spawn( - target_thread_id, - generation, - now, - /*active_helper_id*/ None, - ) - .await; + let _ = control_for_spawn + .finalize_watchdog_helper(helper_id, helper_status) + .await; return; } @@ -403,6 +372,7 @@ impl WatchdogManager { fork_parent_spawn_call_id: None, fork_mode: Some(SpawnAgentForkMode::FullHistory), environments: None, + initial_task_message: None, }, ) .await; @@ -614,7 +584,7 @@ fn is_watchdog_terminated(status: &AgentStatus) -> bool { matches!(status, AgentStatus::Shutdown | AgentStatus::NotFound) } -fn final_message_requests_watchdog_close(message: &str) -> bool { +pub(crate) fn final_message_requests_watchdog_close(message: &str) -> bool { message.trim().eq_ignore_ascii_case("goodbye") } diff --git a/codex-rs/core/src/codex_thread.rs b/codex-rs/core/src/codex_thread.rs index cc83c0a7c13a..562b7c73cd65 100644 --- a/codex-rs/core/src/codex_thread.rs +++ b/codex-rs/core/src/codex_thread.rs @@ -381,6 +381,13 @@ impl CodexThread { self.rollout_path.clone() } + pub async fn current_rollout_path(&self) -> Option { + match self.codex.session.current_rollout_path().await { + Ok(Some(path)) => Some(path), + Ok(None) | Err(_) => self.rollout_path.clone(), + } + } + pub(crate) fn session_configured(&self) -> SessionConfiguredEvent { self.session_configured.clone() } diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs index 50dcd5248d5c..3168b17c0cb2 100644 --- a/codex-rs/core/src/config/config_tests.rs +++ b/codex-rs/core/src/config/config_tests.rs @@ -5143,6 +5143,22 @@ nickname_candidates = ["Hypatia", "Noether"] Ok(()) } +#[test] +fn agent_role_toml_rejects_watchdog_interval() { + let err = toml::from_str::( + r#"[agents.slow_watch] +description = "Not a watchdog" +watchdog_interval_s = 300 +"#, + ) + .expect_err("role-scoped watchdog_interval_s should be rejected"); + + assert!( + err.to_string() + .contains("unknown field `watchdog_interval_s`") + ); +} + #[tokio::test] async fn agent_role_relative_config_file_resolves_from_config_layer() -> std::io::Result<()> { let codex_home = TempDir::new()?; @@ -5251,6 +5267,74 @@ nickname_candidates = ["Noether"] Ok(()) } +#[tokio::test] +async fn agent_role_file_drops_watchdog_interval_with_warning() -> std::io::Result<()> { + let codex_home = TempDir::new()?; + let role_config_path = codex_home.path().join("agents").join("researcher.toml"); + tokio::fs::create_dir_all( + role_config_path + .parent() + .expect("role config should have a parent directory"), + ) + .await?; + tokio::fs::write( + &role_config_path, + r#" +description = "Research role" +developer_instructions = "Research carefully" +watchdog_interval_s = 300 +"#, + ) + .await?; + tokio::fs::write( + codex_home.path().join(CONFIG_TOML_FILE), + r#"[agents.researcher] +description = "Research role" +config_file = "./agents/researcher.toml" +"#, + ) + .await?; + + let config = ConfigBuilder::without_managed_config_for_tests() + .codex_home(codex_home.path().to_path_buf()) + .fallback_cwd(Some(codex_home.path().to_path_buf())) + .build() + .await?; + + assert!( + config + .startup_warnings + .iter() + .any(|warning| warning.contains("cannot set watchdog_interval_s")) + ); + assert!( + !config.agent_roles.contains_key("researcher"), + "malformed role file should be dropped" + ); + assert!( + !config + .config_layer_stack + .get_layers( + ConfigLayerStackOrdering::LowestPrecedenceFirst, + /*include_disabled*/ true, + ) + .iter() + .any(|layer| layer.config.to_string().contains("watchdog_interval_s")), + "malformed role file should not add watchdog_interval_s to the config layer stack" + ); + assert_eq!(config.watchdog_interval_s, DEFAULT_WATCHDOG_INTERVAL_S); + + assert!( + config + .startup_warnings + .iter() + .any(|warning| warning.contains("cannot set watchdog_interval_s") + && warning.contains("set it at the top level of config.toml")) + ); + + Ok(()) +} + #[tokio::test] async fn agent_role_file_without_developer_instructions_is_dropped_with_warning() -> std::io::Result<()> { @@ -8664,7 +8748,7 @@ enabled = true } #[tokio::test] -async fn multi_agent_v2_rejects_agents_max_threads() -> std::io::Result<()> { +async fn multi_agent_v2_ignores_legacy_agents_max_threads() -> std::io::Result<()> { let codex_home = TempDir::new()?; std::fs::write( codex_home.path().join(CONFIG_TOML_FILE), @@ -8676,17 +8760,20 @@ max_threads = 3 "#, )?; - let err = ConfigBuilder::without_managed_config_for_tests() + let config = ConfigBuilder::without_managed_config_for_tests() .codex_home(codex_home.path().to_path_buf()) .fallback_cwd(Some(codex_home.path().to_path_buf())) .build() - .await - .expect_err("agents.max_threads should conflict with multi_agent_v2"); + .await?; - assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); assert_eq!( - err.to_string(), - "agents.max_threads cannot be set when multi_agent_v2 is enabled" + config.agent_max_threads, + Some( + config + .multi_agent_v2 + .max_concurrent_threads_per_session + .saturating_sub(1) + ) ); Ok(()) diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 3d5249230e0d..9bcb66617ca0 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -2582,12 +2582,6 @@ impl Config { } let agent_max_threads_from_config = cfg.agents.as_ref().and_then(|agents| agents.max_threads); let agent_max_threads = if features.enabled(Feature::MultiAgentV2) { - if agent_max_threads_from_config.is_some() { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "agents.max_threads cannot be set when multi_agent_v2 is enabled", - )); - } Some( multi_agent_v2 .max_concurrent_threads_per_session diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 32fe4ba1f968..33c480deaa8a 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -147,6 +147,7 @@ pub(crate) mod state_db_bridge; pub use state_db_bridge::StateDbHandle; pub use state_db_bridge::get_state_db; mod thread_rollout_truncation; +pub use thread_rollout_truncation::materialize_rollout_items_for_replay; mod tools; pub(crate) mod turn_diff_tracker; mod turn_metadata; diff --git a/codex-rs/core/src/personality_migration_tests.rs b/codex-rs/core/src/personality_migration_tests.rs index 4aef53a5c435..74b2d36cafa9 100644 --- a/codex-rs/core/src/personality_migration_tests.rs +++ b/codex-rs/core/src/personality_migration_tests.rs @@ -44,6 +44,7 @@ async fn write_rollout_with_user_event(dir: &Path, thread_id: ThreadId) -> io::R let session_meta = SessionMetaLine { meta: SessionMeta { id: thread_id, + segment_id: None, forked_from_id: None, timestamp: TEST_TIMESTAMP.to_string(), cwd: std::path::PathBuf::from("."), diff --git a/codex-rs/core/src/rollout.rs b/codex-rs/core/src/rollout.rs index d4ac5c699ade..bfd4845dce59 100644 --- a/codex-rs/core/src/rollout.rs +++ b/codex-rs/core/src/rollout.rs @@ -22,6 +22,8 @@ pub use codex_rollout::find_thread_path_by_id_str; pub use codex_rollout::parse_cursor; pub use codex_rollout::read_head_for_summary; pub use codex_rollout::read_session_meta_line; +pub use codex_rollout::resolve_fork_reference_rollout_path; +pub use codex_rollout::resolve_rollout_reference_rollout_path; pub use codex_rollout::rollout_date_parts; impl codex_rollout::RolloutConfigView for Config { diff --git a/codex-rs/core/src/session/config_lock.rs b/codex-rs/core/src/session/config_lock.rs index d1f190510a36..7cb9ba59df99 100644 --- a/codex-rs/core/src/session/config_lock.rs +++ b/codex-rs/core/src/session/config_lock.rs @@ -259,7 +259,7 @@ mod tests { assert!(matches!( multi_agent_v2, FeatureToml::Config(MultiAgentV2ConfigToml { - enabled: Some(false), + enabled: Some(_), max_concurrent_threads_per_session: Some(_), min_wait_timeout_ms: Some(_), usage_hint_enabled: Some(_), @@ -267,6 +267,14 @@ mod tests { .. }) )); + assert_eq!( + multi_agent_v2.enabled(), + Some( + sc.original_config_do_not_use + .features + .enabled(Feature::MultiAgentV2) + ) + ); assert_eq!(lockfile.version, crate::config_lock::CONFIG_LOCK_VERSION); } diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index eeff6f7b981d..c96a7ad49bde 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -41,6 +41,7 @@ use crate::rollout::find_thread_name_by_id; use crate::session_prefix::format_subagent_notification_message; use crate::skills::SkillRenderSideEffects; use crate::skills_load_input_from_config; +use crate::thread_rollout_truncation::materialize_rollout_items_for_replay; use crate::turn_metadata::TurnMetadataState; use async_channel::Receiver; use async_channel::Sender; @@ -100,6 +101,7 @@ use codex_protocol::models::format_allow_prefixes; use codex_protocol::openai_models::ModelInfo; use codex_protocol::permissions::FileSystemSandboxPolicy; use codex_protocol::permissions::NetworkSandboxPolicy; +use codex_protocol::protocol::DEFAULT_ROLLOUT_REFERENCE_DEPTH; use codex_protocol::protocol::FileChange; use codex_protocol::protocol::HasLegacyEvent; use codex_protocol::protocol::InterAgentCommunication; @@ -134,6 +136,7 @@ use codex_thread_store::LiveThread; use codex_thread_store::LiveThreadInitGuard; use codex_thread_store::LocalThreadStore; use codex_thread_store::ResumeThreadParams; +use codex_thread_store::RotateThreadSegmentParams; use codex_thread_store::ThreadEventPersistenceMode; use codex_thread_store::ThreadPersistenceMetadata; use codex_thread_store::ThreadStore; @@ -218,6 +221,7 @@ mod rollout_reconstruction_tests; const ROOT_AGENT_PROMPT_FALLBACK: &str = include_str!("../../root_agent_prompt.md"); const SUBAGENT_PROMPT_FALLBACK: &str = include_str!("../../subagent_prompt.md"); +const WATCHDOG_AGENT_PROMPT_FALLBACK: &str = include_str!("../../watchdog_agent_prompt.md"); async fn load_agent_prompt_fallback( codex_home: &Path, @@ -242,6 +246,32 @@ pub(crate) async fn load_subagent_prompt(codex_home: &Path) -> String { load_agent_prompt_fallback(codex_home, SUBAGENT_PROMPT_FALLBACK, "AGENTS.subagent.md").await } +pub(crate) async fn load_watchdog_agent_prompt(codex_home: &Path) -> String { + load_agent_prompt_fallback( + codex_home, + WATCHDOG_AGENT_PROMPT_FALLBACK, + "AGENTS.watchdog.md", + ) + .await +} + +fn history_contains_developer_text( + history: &crate::context_manager::ContextManager, + expected: &str, +) -> bool { + history.raw_items().iter().any(|item| { + matches!( + item, + ResponseItem::Message { role, content, .. } + if role == "developer" + && content.iter().any(|content_item| matches!( + content_item, + ContentItem::InputText { text } if text == expected + )) + ) + }) +} + pub(crate) async fn load_agent_role_prompt( config: &Config, session_source: &SessionSource, @@ -251,6 +281,11 @@ pub(crate) async fn load_agent_role_prompt( } let role_prompt = match session_source { + SessionSource::SubAgent(SubAgentSource::ThreadSpawn { agent_role, .. }) + if agent_role.as_deref() == Some("watchdog") => + { + load_watchdog_agent_prompt(&config.codex_home).await + } SessionSource::SubAgent(_) => load_subagent_prompt(&config.codex_home).await, SessionSource::Cli | SessionSource::VSCode @@ -500,7 +535,7 @@ impl Codex { async fn spawn_internal(args: CodexSpawnArgs) -> CodexResult { let CodexSpawnArgs { - mut config, + config, auth_manager, models_manager, environment_manager, @@ -541,14 +576,6 @@ impl Codex { ); } - if let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { depth, .. }) = session_source - && depth >= config.agent_max_depth - && !config.features.enabled(Feature::MultiAgentV2) - { - let _ = config.features.disable(Feature::SpawnCsv); - let _ = config.features.disable(Feature::Collab); - } - let primary_environment = environment_selections.primary_environment(); let user_instructions = AgentsMdManager::new(&config) .user_instructions(primary_environment.as_deref()) @@ -1216,7 +1243,30 @@ impl Session { .session_source .is_non_root_agent() }; - let has_prior_user_turns = initial_history_has_prior_user_turns(&conversation_history); + let codex_home = { + let state = self.state.lock().await; + state.session_configuration.codex_home().clone() + }; + let replay_rollout_items = if conversation_history.scan_rollout_items(|item| { + matches!( + item, + RolloutItem::ForkReference(_) | RolloutItem::RolloutReference(_) + ) + }) { + Some( + materialize_rollout_items_for_replay( + codex_home.as_path(), + &conversation_history.get_rollout_items(), + ) + .await, + ) + } else { + None + }; + let has_prior_user_turns = replay_rollout_items.as_ref().map_or_else( + || initial_history_has_prior_user_turns(&conversation_history), + |items| initial_history_has_prior_user_turns(&InitialHistory::Forked(items.clone())), + ); { let mut state = self.state.lock().await; state.set_next_turn_is_first(!has_prior_user_turns); @@ -1229,7 +1279,7 @@ impl Session { .await; } InitialHistory::Resumed(resumed_history) => { - let rollout_items = resumed_history.history; + let rollout_items = replay_rollout_items.unwrap_or(resumed_history.history); let previous_turn_settings = self .apply_rollout_reconstruction(&turn_context, &rollout_items) .await; @@ -1268,12 +1318,14 @@ impl Session { } } InitialHistory::Forked(rollout_items) => { - self.apply_rollout_reconstruction(&turn_context, &rollout_items) + let replay_rollout_items = + replay_rollout_items.unwrap_or_else(|| rollout_items.clone()); + self.apply_rollout_reconstruction(&turn_context, &replay_rollout_items) .await; // Seed usage info from the recorded rollout so UIs can show token counts // immediately on resume/fork. - if let Some(info) = Self::last_token_info_from_rollout(&rollout_items) { + if let Some(info) = Self::last_token_info_from_rollout(&replay_rollout_items) { let mut state = self.state.lock().await; state.set_token_info(Some(info)); } @@ -1617,7 +1669,10 @@ impl Session { return; }; - let message = format_subagent_notification_message(child_agent_path.as_str(), &status); + // The TUI indexes live subagent rows by ThreadId. Use the child ThreadId in this + // hidden notification so final status updates remove the correct panel row. + let message = + format_subagent_notification_message(&self.conversation_id.to_string(), &status); // `communication` owns the message. Keep a second copy only when the // recorder will actually need it after parent delivery succeeds. let trace_message = self @@ -2549,15 +2604,70 @@ impl Session { self.replace_history(items, reference_context_item.clone()) .await; - self.persist_rollout_items(&[RolloutItem::Compacted(compacted_item)]) - .await; + let mut rollout_items = vec![RolloutItem::Compacted(compacted_item)]; if let Some(turn_context_item) = reference_context_item { - self.persist_rollout_items(&[RolloutItem::TurnContext(turn_context_item)]) - .await; + rollout_items.push(RolloutItem::TurnContext(turn_context_item)); + } + if !self + .rotate_rollout_segment_after_compaction(rollout_items.clone()) + .await + { + self.persist_rollout_items(&rollout_items).await; } self.services.model_client.advance_window_generation(); } + async fn rotate_rollout_segment_after_compaction( + &self, + initial_items: Vec, + ) -> bool { + let Some(live_thread) = self.live_thread() else { + return false; + }; + let params = { + let state = self.state.lock().await; + let session_configuration = &state.session_configuration; + let event_persistence_mode = if session_configuration.persist_extended_history { + ThreadEventPersistenceMode::Extended + } else { + ThreadEventPersistenceMode::Limited + }; + RotateThreadSegmentParams { + source: session_configuration.session_source.clone(), + base_instructions: BaseInstructions { + text: session_configuration.base_instructions.clone(), + }, + dynamic_tools: session_configuration.dynamic_tools.clone(), + metadata: ThreadPersistenceMetadata { + cwd: Some(session_configuration.cwd.to_path_buf()), + model_provider: session_configuration + .original_config_do_not_use + .model_provider_id + .clone(), + memory_mode: if session_configuration + .original_config_do_not_use + .memories + .generate_memories + { + ThreadMemoryMode::Enabled + } else { + ThreadMemoryMode::Disabled + }, + }, + event_persistence_mode, + initial_items, + previous_segment_reference_depth: DEFAULT_ROLLOUT_REFERENCE_DEPTH, + } + }; + match live_thread.rotate_local_segment(params).await { + Ok(rotated) => rotated, + Err(err) => { + warn!("failed to rotate rollout segment after compaction: {err:#}"); + false + } + } + } + async fn persist_rollout_response_items(&self, items: &[ResponseItem]) { let rollout_items: Vec = items .iter() @@ -2606,6 +2716,7 @@ impl Session { collaboration_mode, base_instructions, session_source, + history, ) = { let state = self.state.lock().await; ( @@ -2614,6 +2725,7 @@ impl Session { state.session_configuration.collaboration_mode.clone(), state.session_configuration.base_instructions.clone(), state.session_configuration.session_source.clone(), + state.history.clone(), ) }; if let Some(model_switch_message) = @@ -2626,6 +2738,7 @@ impl Session { } if let Some(role_prompt) = load_agent_role_prompt(&turn_context.config, &session_source).await + && !history_contains_developer_text(&history, &role_prompt) { developer_sections.push(role_prompt); } diff --git a/codex-rs/core/src/session/rollout_reconstruction.rs b/codex-rs/core/src/session/rollout_reconstruction.rs index a4c042af0c83..8ab63bd3cab9 100644 --- a/codex-rs/core/src/session/rollout_reconstruction.rs +++ b/codex-rs/core/src/session/rollout_reconstruction.rs @@ -207,7 +207,10 @@ impl Session { active_segment.get_or_insert_with(ActiveReplaySegment::default); active_segment.counts_as_user_turn |= is_user_turn_boundary(response_item); } - RolloutItem::EventMsg(_) | RolloutItem::SessionMeta(_) => {} + RolloutItem::EventMsg(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::SessionMeta(_) => {} } if base_replacement_history.is_some() @@ -275,6 +278,8 @@ impl Session { history.drop_last_n_user_turns(rollback.num_turns); } RolloutItem::EventMsg(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) | RolloutItem::TurnContext(_) | RolloutItem::SessionMeta(_) => {} } diff --git a/codex-rs/core/src/session/snapshots/codex_core__codex_tests__fork_startup_context_then_first_turn_diff.snap b/codex-rs/core/src/session/snapshots/codex_core__codex_tests__fork_startup_context_then_first_turn_diff.snap index f9a808f65e43..9a8f6ec5c686 100644 --- a/codex-rs/core/src/session/snapshots/codex_core__codex_tests__fork_startup_context_then_first_turn_diff.snap +++ b/codex-rs/core/src/session/snapshots/codex_core__codex_tests__fork_startup_context_then_first_turn_diff.snap @@ -1,6 +1,6 @@ --- source: core/src/session/tests.rs -assertion_line: 1619 +assertion_line: 1748 expression: snapshot --- Scenario: First request after fork when startup preserves the parent baseline, the fork changes approval policy, and the first forked turn enters plan mode. diff --git a/codex-rs/core/src/session/tests.rs b/codex-rs/core/src/session/tests.rs index ba73af6e7397..f269075cf4a8 100644 --- a/codex-rs/core/src/session/tests.rs +++ b/codex-rs/core/src/session/tests.rs @@ -2879,6 +2879,89 @@ async fn attach_thread_persistence(session: &mut Session) -> PathBuf { .expect("thread should have rollout path") } +#[tokio::test] +async fn replace_compacted_history_rotates_local_rollout_segment() { + let (mut sess, tc, _) = make_session_and_context_with_rx().await; + let sess = Arc::get_mut(&mut sess).expect("session should not have additional references"); + let old_rollout_path = attach_thread_persistence(sess).await; + sess.persist_rollout_items(&[ + RolloutItem::ResponseItem(user_message("before compaction")), + RolloutItem::ResponseItem(assistant_message("before compaction answer")), + ]) + .await; + sess.flush_rollout() + .await + .expect("pre-compaction rollout should flush"); + let replacement_history = vec![user_message("compacted summary")]; + + sess.replace_compacted_history( + replacement_history.clone(), + Some(tc.to_turn_context_item()), + CompactedItem { + message: "compacted summary".to_string(), + replacement_history: Some(replacement_history.clone()), + }, + ) + .await; + + let new_rollout_path = sess + .current_rollout_path() + .await + .expect("load current rollout path") + .expect("rollout path after compaction"); + assert_ne!(new_rollout_path, old_rollout_path); + let config = sess.get_config().await; + let archived_old_rollout_path = config + .codex_home + .join(codex_rollout::ARCHIVED_SESSIONS_SUBDIR) + .join( + old_rollout_path + .file_name() + .expect("old rollout path should have a file name"), + ); + assert!(!old_rollout_path.exists()); + assert!(archived_old_rollout_path.exists()); + let old_rollout_timestamp = old_rollout_path + .file_name() + .and_then(|file_name| file_name.to_str()) + .and_then(|file_name| file_name.strip_prefix("rollout-")) + .and_then(|file_name| file_name.strip_suffix(&format!("-{}.jsonl", sess.conversation_id))) + .expect("old rollout timestamp"); + let (new_items, new_thread_id, _) = + RolloutRecorder::load_rollout_items(new_rollout_path.as_path()) + .await + .expect("load new rollout segment"); + assert_eq!(new_thread_id, Some(sess.conversation_id)); + assert!(new_items.iter().any(|item| { + matches!( + item, + RolloutItem::RolloutReference(reference) + if reference.rollout_path == old_rollout_path + && reference.thread_id == Some(sess.conversation_id) + && reference.rollout_timestamp.as_deref() == Some(old_rollout_timestamp) + ) + })); + assert!(new_items.iter().any(|item| { + matches!( + item, + RolloutItem::Compacted(CompactedItem { + replacement_history: Some(history), + .. + }) if *history == replacement_history + ) + })); + + let replay_items = crate::thread_rollout_truncation::materialize_rollout_items_for_replay( + config.codex_home.as_path(), + &new_items, + ) + .await; + let reconstructed = sess + .reconstruct_history_from_rollout(tc.as_ref(), &replay_items) + .await; + assert_eq!(reconstructed.history, replacement_history); +} + fn text_block(s: &str) -> serde_json::Value { json!({ "type": "text", @@ -8724,6 +8807,31 @@ async fn subagent_prompt_is_for_regular_subagents_only() { assert!(!prompt.contains("watchdog.snooze")); } +#[tokio::test] +async fn watchdog_prompt_is_loaded_for_watchdog_subagents() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + let mut config = build_test_config(codex_home.path()).await; + config + .features + .enable(Feature::AgentPromptInjection) + .expect("test config should enable prompt injection"); + let session_source = SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id: ThreadId::default(), + depth: 1, + agent_path: None, + agent_nickname: Some("Test Watchdog".to_string()), + agent_role: Some("watchdog".to_string()), + }); + + let prompt = load_agent_role_prompt(&config, &session_source) + .await + .expect("watchdog subagents need a role prompt"); + + assert!(prompt.contains("You are also a **watchdog**")); + assert!(prompt.contains("Call `watchdog.close_self`")); + assert!(prompt.contains("Call `followup_task` with `\"target\":\"parent\"`")); +} + #[tokio::test] async fn agent_prompt_loader_prefers_home_overrides() { let codex_home = tempfile::tempdir().expect("create temp dir"); @@ -8736,6 +8844,12 @@ async fn agent_prompt_loader_prefers_home_overrides() { ) .await .expect("write subagent override"); + tokio::fs::write( + codex_home.path().join("AGENTS.watchdog.md"), + "custom watchdog", + ) + .await + .expect("write watchdog override"); assert_eq!( load_root_agent_prompt(codex_home.path()).await, @@ -8745,6 +8859,10 @@ async fn agent_prompt_loader_prefers_home_overrides() { load_subagent_prompt(codex_home.path()).await, "custom subagent" ); + assert_eq!( + load_watchdog_agent_prompt(codex_home.path()).await, + "custom watchdog" + ); } #[tokio::test] @@ -8778,6 +8896,51 @@ async fn root_agent_prompt_is_inline_developer_context_not_session_instructions( ))); } +#[tokio::test] +async fn watchdog_agent_prompt_is_inline_developer_context_for_watchdog_threads() { + let session = make_session_with_config(|config| { + config + .features + .enable(Feature::AgentPromptInjection) + .expect("test config should enable prompt injection"); + }) + .await + .expect("session should build"); + let session_source = SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id: ThreadId::new(), + depth: 1, + agent_path: Some(AgentPath::try_from("/root/watchdog").expect("agent path should parse")), + agent_nickname: Some("Test Watchdog".to_string()), + agent_role: Some("watchdog".to_string()), + }); + session + .state + .lock() + .await + .session_configuration + .session_source = session_source.clone(); + + let mut turn_context = session.new_default_turn().await; + Arc::get_mut(&mut turn_context) + .expect("turn context should not be shared") + .session_source = session_source; + let initial_context = session.build_initial_context(turn_context.as_ref()).await; + + let developer_texts = developer_input_texts(&initial_context); + assert!( + developer_texts + .iter() + .any(|text| text.contains("You are also a **watchdog**")), + "watchdog prompt must be visible as developer context so watchdog helpers do not act like the parent agent: {developer_texts:?}" + ); + assert!( + !developer_texts + .iter() + .any(|text| text.contains("# You are the Root Agent")), + "watchdog helper current-turn developer context must not inject root prompt: {developer_texts:?}" + ); +} + #[tokio::test] async fn agent_prompt_injection_does_not_require_collab_feature() { let codex_home = tempfile::tempdir().expect("create temp dir"); diff --git a/codex-rs/core/src/thread_manager.rs b/codex-rs/core/src/thread_manager.rs index 7c93e3336241..4ee40e84cbc9 100644 --- a/codex-rs/core/src/thread_manager.rs +++ b/codex-rs/core/src/thread_manager.rs @@ -40,6 +40,7 @@ use codex_protocol::models::ResponseItem; use codex_protocol::openai_models::ModelPreset; use codex_protocol::protocol::Event; use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::ForkReferenceItem; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::McpServerRefreshConfig; use codex_protocol::protocol::Op; @@ -810,7 +811,23 @@ impl ThreadManager { parent_trace: Option, ) -> CodexResult { let interrupted_marker = InterruptedTurnHistoryMarker::from_config(&config); + let snapshot_state = snapshot_turn_state(&history); + let source_rollout_path = match &history { + InitialHistory::Resumed(resumed) => resumed.rollout_path.clone(), + InitialHistory::New | InitialHistory::Cleared | InitialHistory::Forked(_) => None, + }; + let source_items = history.get_rollout_items(); + let reference_history = source_rollout_path.and_then(|rollout_path| { + fork_reference_history_for_snapshot( + snapshot, + rollout_path, + &source_items, + &snapshot_state, + interrupted_marker, + ) + }); let history = fork_history_from_snapshot(snapshot, history, interrupted_marker); + let history = reference_history.unwrap_or(history); let environments = default_thread_environment_selections( self.state.environment_manager.as_ref(), &config.cwd, @@ -1376,6 +1393,73 @@ fn fork_history_from_snapshot( } } +fn fork_reference_history_for_snapshot( + snapshot: ForkSnapshot, + rollout_path: PathBuf, + source_items: &[RolloutItem], + snapshot_state: &SnapshotTurnState, + interrupted_marker: InterruptedTurnHistoryMarker, +) -> Option { + let nth_user_message = match snapshot { + ForkSnapshot::TruncateBeforeNthUserMessage(nth_user_message) => { + let user_positions = truncation::user_message_positions_in_rollout(source_items); + if snapshot_state.ends_mid_turn && nth_user_message >= user_positions.len() { + return None; + } + nth_user_message + } + ForkSnapshot::Interrupted => usize::MAX, + }; + + let source_meta = source_items.iter().find_map(|item| match item { + RolloutItem::SessionMeta(meta) => Some(meta), + RolloutItem::Compacted(_) + | RolloutItem::EventMsg(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::TurnContext(_) => None, + }); + + let mut history: Vec = source_items + .iter() + .find_map(|item| match item { + RolloutItem::SessionMeta(meta) => Some(RolloutItem::SessionMeta(meta.clone())), + RolloutItem::Compacted(_) + | RolloutItem::EventMsg(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::TurnContext(_) => None, + }) + .into_iter() + .chain(std::iter::once(RolloutItem::ForkReference( + ForkReferenceItem { + rollout_path, + thread_id: source_meta.map(|meta| meta.meta.id), + segment_id: source_meta.and_then(|meta| meta.meta.segment_id), + nth_user_message, + }, + ))) + .collect(); + + if snapshot == ForkSnapshot::Interrupted && snapshot_state.ends_mid_turn { + if let Some(marker) = interrupted_turn_history_marker(interrupted_marker) { + history.push(RolloutItem::ResponseItem(marker)); + } + history.push(RolloutItem::EventMsg(EventMsg::TurnAborted( + TurnAbortedEvent { + turn_id: snapshot_state.active_turn_id.clone(), + reason: TurnAbortReason::Interrupted, + completed_at: None, + duration_ms: None, + }, + ))); + } + + Some(InitialHistory::Forked(history)) +} + /// Append the same persisted interrupt boundary used by the live interrupt path /// to an existing fork snapshot after the source thread has been confirmed to /// be mid-turn. diff --git a/codex-rs/core/src/thread_manager_tests.rs b/codex-rs/core/src/thread_manager_tests.rs index 2fe2f97bb345..14967053237d 100644 --- a/codex-rs/core/src/thread_manager_tests.rs +++ b/codex-rs/core/src/thread_manager_tests.rs @@ -5,6 +5,7 @@ use crate::session::session::SessionSettingsUpdate; use crate::session::tests::make_session_and_context; use crate::tasks::InterruptedTurnHistoryMarker; use crate::tasks::interrupted_turn_history_marker; +use crate::thread_rollout_truncation::materialize_rollout_items_for_replay; use codex_features::Feature; use codex_models_manager::manager::RefreshStrategy; use codex_protocol::models::ContentItem; @@ -803,6 +804,7 @@ async fn interrupted_fork_snapshot_does_not_synthesize_turn_id_for_legacy_histor let mut config = test_config().await; config.codex_home = temp_dir.path().join("codex-home").abs(); config.cwd = config.codex_home.abs(); + config.agent_interrupt_message_enabled = true; std::fs::create_dir_all(&config.codex_home).expect("create codex home"); let auth_manager = @@ -865,7 +867,8 @@ async fn interrupted_fork_snapshot_does_not_synthesize_turn_id_for_legacy_histor .filter(|item| !matches!(item, RolloutItem::SessionMeta(_))) .collect(); let interrupted_marker_json = serde_json::to_value(RolloutItem::ResponseItem( - contextual_user_interrupted_marker(), + interrupted_turn_history_marker(InterruptedTurnHistoryMarker::from_config(&config)) + .expect("interrupted marker should be enabled"), )) .expect("serialize interrupted marker"); let interrupted_abort_json = serde_json::to_value(RolloutItem::EventMsg( @@ -996,6 +999,7 @@ async fn interrupted_fork_snapshot_uses_persisted_mid_turn_history_without_live_ let mut config = test_config().await; config.codex_home = temp_dir.path().join("codex-home").abs(); config.cwd = config.codex_home.abs(); + config.agent_interrupt_message_enabled = true; std::fs::create_dir_all(&config.codex_home).expect("create codex home"); let auth_manager = @@ -1057,7 +1061,8 @@ async fn interrupted_fork_snapshot_uses_persisted_mid_turn_history_without_live_ .filter(|item| !matches!(item, RolloutItem::SessionMeta(_))) .collect(); let interrupted_marker_json = serde_json::to_value(RolloutItem::ResponseItem( - contextual_user_interrupted_marker(), + interrupted_turn_history_marker(InterruptedTurnHistoryMarker::from_config(&config)) + .expect("interrupted marker should be enabled"), )) .expect("serialize interrupted marker"); assert_eq!( @@ -1089,8 +1094,17 @@ async fn interrupted_fork_snapshot_uses_persisted_mid_turn_history_without_live_ let reforked_history = RolloutRecorder::get_rollout_history(&reforked_path) .await .expect("read re-forked rollout history"); - let reforked_rollout_items: Vec<_> = reforked_history - .get_rollout_items() + let reforked_raw_items = reforked_history.get_rollout_items(); + assert!( + reforked_raw_items + .iter() + .any(|item| matches!(item, RolloutItem::ForkReference(_))), + "re-forked interrupted snapshots should keep compact ForkReference history" + ); + let materialized_reforked_items = + materialize_rollout_items_for_replay(config.codex_home.as_path(), &reforked_raw_items) + .await; + let reforked_rollout_items: Vec<_> = materialized_reforked_items .into_iter() .filter(|item| !matches!(item, RolloutItem::SessionMeta(_))) .collect(); diff --git a/codex-rs/core/src/thread_rollout_truncation.rs b/codex-rs/core/src/thread_rollout_truncation.rs index e20ee53d47d8..dfa56f6b2409 100644 --- a/codex-rs/core/src/thread_rollout_truncation.rs +++ b/codex-rs/core/src/thread_rollout_truncation.rs @@ -5,12 +5,17 @@ use crate::context_manager::is_user_turn_boundary; use crate::event_mapping; +use crate::rollout::RolloutRecorder; +use crate::rollout::resolve_fork_reference_rollout_path; +use crate::rollout::resolve_rollout_reference_rollout_path; use codex_protocol::items::TurnItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::RolloutItem; +use std::path::Path; +use tracing::warn; pub(crate) fn initial_history_has_prior_user_turns(conversation_history: &InitialHistory) -> bool { conversation_history.scan_rollout_items(rollout_item_is_user_turn_boundary) @@ -148,6 +153,103 @@ pub(crate) fn truncate_rollout_to_last_n_fork_turns( items[keep_idx..].to_vec() } +pub async fn materialize_rollout_items_for_replay( + codex_home: &Path, + rollout_items: &[RolloutItem], +) -> Vec { + materialize_rollout_items_for_replay_at_depth(codex_home, rollout_items, /*depth*/ 0).await +} + +async fn materialize_rollout_items_for_replay_at_depth( + codex_home: &Path, + rollout_items: &[RolloutItem], + depth: usize, +) -> Vec { + const MAX_FORK_REFERENCE_DEPTH: usize = 8; + if depth >= MAX_FORK_REFERENCE_DEPTH { + warn!("fork reference materialization reached max depth"); + return rollout_items.to_vec(); + } + + let mut materialized = Vec::new(); + for item in rollout_items { + match item { + RolloutItem::ForkReference(reference) => { + let resolved_path = + match resolve_fork_reference_rollout_path(codex_home, reference).await { + Ok(path) => path, + Err(err) => { + warn!( + "failed to resolve fork reference {}: {err}", + reference.rollout_path.display() + ); + reference.rollout_path.clone() + } + }; + match RolloutRecorder::load_rollout_items(&resolved_path).await { + Ok((parent_items, _, _)) => { + let parent_materialized = + Box::pin(materialize_rollout_items_for_replay_at_depth( + codex_home, + &parent_items, + depth + 1, + )) + .await; + let parent_prefix = truncate_rollout_before_nth_user_message_from_start( + &parent_materialized, + reference.nth_user_message, + ); + materialized.extend(parent_prefix); + } + Err(err) => { + warn!( + "failed to load fork reference {}: {err}", + resolved_path.display() + ); + } + } + } + RolloutItem::RolloutReference(reference) => { + if depth >= reference.max_depth { + warn!("rollout reference materialization reached max depth"); + continue; + } + let resolved_path = + match resolve_rollout_reference_rollout_path(codex_home, reference).await { + Ok(path) => path, + Err(err) => { + warn!( + "failed to resolve rollout reference {}: {err}", + reference.rollout_path.display() + ); + reference.rollout_path.clone() + } + }; + match RolloutRecorder::load_rollout_items(&resolved_path).await { + Ok((reference_items, _, _)) => { + let reference_materialized = + Box::pin(materialize_rollout_items_for_replay_at_depth( + codex_home, + &reference_items, + depth + 1, + )) + .await; + materialized.extend(reference_materialized); + } + Err(err) => { + warn!( + "failed to load rollout reference {}: {err}", + resolved_path.display() + ); + } + } + } + other => materialized.push(other.clone()), + } + } + materialized +} + fn is_real_user_message_boundary(item: &ResponseItem) -> bool { matches!( event_mapping::parse_turn_item(item), diff --git a/codex-rs/core/src/thread_rollout_truncation_tests.rs b/codex-rs/core/src/thread_rollout_truncation_tests.rs index df370a0546a0..5eae19cc8216 100644 --- a/codex-rs/core/src/thread_rollout_truncation_tests.rs +++ b/codex-rs/core/src/thread_rollout_truncation_tests.rs @@ -1,11 +1,19 @@ use super::*; use crate::session::tests::make_session_and_context; use codex_protocol::AgentPath; +use codex_protocol::SegmentId; +use codex_protocol::ThreadId; use codex_protocol::models::ContentItem; use codex_protocol::models::ReasoningItemReasoningSummary; +use codex_protocol::protocol::ForkReferenceItem; use codex_protocol::protocol::InterAgentCommunication; +use codex_protocol::protocol::RolloutLine; +use codex_protocol::protocol::RolloutReferenceItem; +use codex_protocol::protocol::SessionMeta; +use codex_protocol::protocol::SessionMetaLine; use codex_protocol::protocol::ThreadRolledBackEvent; use pretty_assertions::assert_eq; +use std::path::PathBuf; fn user_msg(text: &str) -> ResponseItem { ResponseItem::Message { @@ -40,6 +48,34 @@ fn inter_agent_msg(text: &str, trigger_turn: bool) -> ResponseItem { communication.to_response_input_item().into() } +async fn write_rollout(path: &std::path::Path, items: &[RolloutItem]) { + let mut jsonl = String::new(); + for item in items { + let line = RolloutLine { + timestamp: "2026-04-30T00:00:00.000Z".to_string(), + item: item.clone(), + }; + jsonl.push_str(&serde_json::to_string(&line).expect("serialize rollout line")); + jsonl.push('\n'); + } + tokio::fs::write(path, jsonl).await.expect("write rollout"); +} + +fn session_meta_item(thread_id: ThreadId, segment_id: SegmentId) -> RolloutItem { + RolloutItem::SessionMeta(SessionMetaLine { + meta: SessionMeta { + id: thread_id, + segment_id: Some(segment_id), + timestamp: "2026-04-30T00:00:00.000Z".to_string(), + cwd: PathBuf::from("/tmp"), + originator: "test".to_string(), + cli_version: "0.0.0".to_string(), + ..SessionMeta::default() + }, + git: None, + }) +} + #[test] fn truncates_rollout_from_start_before_nth_user_only() { let items = [ @@ -108,6 +144,233 @@ fn truncation_max_keeps_full_rollout() { ); } +#[tokio::test] +async fn materializes_fork_reference_before_replay() { + let temp = tempfile::tempdir().expect("tempdir"); + let source_path = temp + .path() + .join("rollout-2026-04-30T00-00-00-00000000-0000-0000-0000-000000000001.jsonl"); + let source_items = vec![ + RolloutItem::ResponseItem(user_msg("u1")), + RolloutItem::ResponseItem(assistant_msg("a1")), + RolloutItem::ResponseItem(user_msg("u2")), + RolloutItem::ResponseItem(assistant_msg("a2")), + ]; + write_rollout(&source_path, &source_items).await; + + let compact_fork = vec![ + RolloutItem::ForkReference(ForkReferenceItem { + rollout_path: source_path.clone(), + thread_id: None, + segment_id: None, + nth_user_message: 1, + }), + RolloutItem::ResponseItem(user_msg("child request")), + ]; + + let materialized = materialize_rollout_items_for_replay(temp.path(), &compact_fork).await; + + let expected = vec![ + RolloutItem::ResponseItem(user_msg("u1")), + RolloutItem::ResponseItem(assistant_msg("a1")), + RolloutItem::ResponseItem(user_msg("child request")), + ]; + assert_eq!( + serde_json::to_value(&materialized).unwrap(), + serde_json::to_value(&expected).unwrap() + ); +} + +#[tokio::test] +async fn materializes_fork_reference_by_segment_id_after_source_rollover() { + let temp = tempfile::tempdir().expect("tempdir"); + let thread_id = ThreadId::new(); + let old_segment_id = SegmentId::new(); + let new_segment_id = SegmentId::new(); + + let old_active_path = temp + .path() + .join("sessions/2026/04/30") + .join(format!("rollout-2026-04-30T00-00-00-{thread_id}.jsonl")); + let old_archived_path = temp + .path() + .join("archived_sessions/2026/04/30") + .join(format!("rollout-2026-04-30T00-00-00-{thread_id}.jsonl")); + let new_active_path = temp + .path() + .join("sessions/2026/05/01") + .join(format!("rollout-2026-05-01T00-00-00-{thread_id}.jsonl")); + + tokio::fs::create_dir_all(old_archived_path.parent().expect("archived parent")) + .await + .expect("create archived parent"); + tokio::fs::create_dir_all(new_active_path.parent().expect("active parent")) + .await + .expect("create active parent"); + + write_rollout( + &old_archived_path, + &[ + session_meta_item(thread_id, old_segment_id), + RolloutItem::ResponseItem(user_msg("old segment request")), + RolloutItem::ResponseItem(assistant_msg("old segment answer")), + ], + ) + .await; + write_rollout( + &new_active_path, + &[ + session_meta_item(thread_id, new_segment_id), + RolloutItem::ResponseItem(user_msg("new segment request")), + RolloutItem::ResponseItem(assistant_msg("new segment answer")), + ], + ) + .await; + + let compact_fork = vec![ + RolloutItem::ForkReference(ForkReferenceItem { + rollout_path: old_active_path, + thread_id: Some(thread_id), + segment_id: Some(old_segment_id), + nth_user_message: usize::MAX, + }), + RolloutItem::ResponseItem(user_msg("child request")), + ]; + + let materialized = materialize_rollout_items_for_replay(temp.path(), &compact_fork).await; + let text = serde_json::to_string(&materialized).expect("serialize materialized rollout"); + + assert!(text.contains("old segment request")); + assert!(!text.contains("new segment request")); + assert!(text.contains("child request")); +} + +#[tokio::test] +async fn materializes_fork_reference_before_truncating_rollout_references() { + let temp = tempfile::tempdir().expect("tempdir"); + let thread_id = ThreadId::new(); + let old_segment_id = SegmentId::new(); + let current_segment_id = SegmentId::new(); + + let old_path = temp.path().join("old.jsonl"); + let current_path = temp.path().join("current.jsonl"); + + write_rollout( + &old_path, + &[ + session_meta_item(thread_id, old_segment_id), + RolloutItem::ResponseItem(user_msg("u1")), + RolloutItem::ResponseItem(assistant_msg("a1")), + ], + ) + .await; + write_rollout( + ¤t_path, + &[ + session_meta_item(thread_id, current_segment_id), + RolloutItem::RolloutReference(RolloutReferenceItem { + rollout_path: old_path, + thread_id: None, + rollout_timestamp: None, + segment_id: None, + max_depth: 2, + }), + RolloutItem::ResponseItem(user_msg("u2")), + RolloutItem::ResponseItem(assistant_msg("a2")), + RolloutItem::ResponseItem(user_msg("u3")), + ], + ) + .await; + + let fork_items = vec![ + RolloutItem::ForkReference(ForkReferenceItem { + rollout_path: current_path, + thread_id: Some(thread_id), + segment_id: Some(current_segment_id), + nth_user_message: 2, + }), + RolloutItem::ResponseItem(user_msg("child request")), + ]; + + let materialized = materialize_rollout_items_for_replay(temp.path(), &fork_items).await; + let text = serde_json::to_string(&materialized).expect("serialize materialized rollout"); + + assert!(text.contains("u1")); + assert!(text.contains("u2")); + assert!(!text.contains("u3")); + assert!(text.contains("child request")); +} + +#[tokio::test] +async fn materializes_rollout_reference_with_bounded_depth() { + let temp = tempfile::tempdir().expect("tempdir"); + let thread_id = ThreadId::new(); + let oldest_segment_id = SegmentId::new(); + let old_segment_id = SegmentId::new(); + let middle_segment_id = SegmentId::new(); + + let oldest_path = temp.path().join("oldest.jsonl"); + let old_path = temp.path().join("old.jsonl"); + let middle_path = temp.path().join("middle.jsonl"); + + write_rollout( + &oldest_path, + &[ + session_meta_item(thread_id, oldest_segment_id), + RolloutItem::ResponseItem(user_msg("oldest segment request")), + ], + ) + .await; + write_rollout( + &old_path, + &[ + session_meta_item(thread_id, old_segment_id), + RolloutItem::RolloutReference(RolloutReferenceItem { + rollout_path: oldest_path, + thread_id: None, + rollout_timestamp: None, + segment_id: None, + max_depth: 2, + }), + RolloutItem::ResponseItem(user_msg("old segment request")), + ], + ) + .await; + write_rollout( + &middle_path, + &[ + session_meta_item(thread_id, middle_segment_id), + RolloutItem::RolloutReference(RolloutReferenceItem { + rollout_path: old_path, + thread_id: None, + rollout_timestamp: None, + segment_id: None, + max_depth: 2, + }), + RolloutItem::ResponseItem(user_msg("middle segment request")), + ], + ) + .await; + + let current_items = vec![ + RolloutItem::RolloutReference(RolloutReferenceItem { + rollout_path: middle_path, + thread_id: None, + rollout_timestamp: None, + segment_id: None, + max_depth: 2, + }), + RolloutItem::ResponseItem(user_msg("current segment request")), + ]; + let materialized = materialize_rollout_items_for_replay(temp.path(), ¤t_items).await; + let text = serde_json::to_string(&materialized).expect("serialize materialized rollout"); + + assert!(!text.contains("oldest segment request")); + assert!(text.contains("old segment request")); + assert!(text.contains("middle segment request")); + assert!(text.contains("current segment request")); +} + #[test] fn truncates_rollout_from_start_applies_thread_rollback_markers() { let rollout_items = vec![ diff --git a/codex-rs/core/src/tools/handlers/multi_agents/resume_agent.rs b/codex-rs/core/src/tools/handlers/multi_agents/resume_agent.rs index 2d4f2c3f47e8..58b8cfc9404d 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/resume_agent.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/resume_agent.rs @@ -28,6 +28,17 @@ impl ToolHandler for Handler { let receiver_thread_id = ThreadId::from_string(&args.id).map_err(|err| { FunctionCallError::RespondToModel(format!("invalid agent id {}: {err:?}", args.id)) })?; + if session + .services + .agent_control + .is_watchdog_handle(receiver_thread_id) + .await + { + return Err(FunctionCallError::RespondToModel( + "watchdog handles can't receive resume_agent; watchdog check-ins run on the idle timer. Use close_agent to stop a watchdog." + .to_string(), + )); + } let receiver_agent = session .services .agent_control diff --git a/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs b/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs index 218383a3ac96..f09a52977093 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs @@ -32,6 +32,17 @@ impl ToolHandler for Handler { let items = args.items.clone(); let input_items = parse_collab_input(args.message, args.items)?; let prompt = render_input_preview(&input_items); + if session + .services + .agent_control + .is_watchdog_handle(receiver_thread_id) + .await + { + return Err(FunctionCallError::RespondToModel( + "watchdog handles can't receive send_input; watchdog check-ins run on the idle timer. Use close_agent to stop a watchdog." + .to_string(), + )); + } let receiver_agent = session .services .agent_control diff --git a/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs b/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs index 3bd291ed8a5d..fcb73fa630d6 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs @@ -83,12 +83,8 @@ impl ToolHandler for Handler { .await; let mut config = build_agent_spawn_config(&session.get_base_instructions().await, turn.as_ref())?; - if args.fork_context { - reject_full_fork_spawn_overrides( - role_name, - args.model.as_deref(), - args.reasoning_effort, - )?; + let effective_role_name = if args.fork_context && !is_watchdog { + None } else { apply_requested_spawn_agent_model_overrides( &session, @@ -101,7 +97,8 @@ impl ToolHandler for Handler { apply_role_to_config(&mut config, role_name) .await .map_err(FunctionCallError::RespondToModel)?; - } + role_name + }; apply_spawn_agent_runtime_overrides(&mut config, turn.as_ref())?; apply_spawn_agent_overrides(&mut config, child_depth); @@ -109,11 +106,11 @@ impl ToolHandler for Handler { session.conversation_id, &turn.session_source, child_depth, - role_name, + effective_role_name, /*task_name*/ None, )?; let result = if let Some(watchdog_interval_s) = watchdog_interval_s { - let thread_id = spawn_watchdog( + let thread_id = Box::pin(spawn_watchdog( &session.services.agent_control, config, prompt.clone(), @@ -121,7 +118,7 @@ impl ToolHandler for Handler { child_depth, watchdog_interval_s, spawn_source, - ) + )) .await .map_err(collab_spawn_error)?; Ok(LiveAgent { @@ -148,6 +145,7 @@ impl ToolHandler for Handler { .map(TurnEnvironment::selection) .collect(), ), + initial_task_message: args.fork_context.then_some(prompt.clone()), }, ), ) @@ -280,15 +278,9 @@ async fn spawn_watchdog( "failed to clear watchdog MCP servers: {err}" )) })?; - let target_thread_id = agent_control - .spawn_agent_with_metadata( - handle_config, - Op::Interrupt, - Some(spawn_source), - Default::default(), - ) - .await? - .thread_id; + let target_thread_id = + Box::pin(agent_control.spawn_agent(handle_config, Op::Interrupt, Some(spawn_source))) + .await?; let superseded_before_register = agent_control .unregister_watchdogs_for_owner(owner_thread_id) .await; diff --git a/codex-rs/core/src/tools/handlers/multi_agents/watchdog_snooze.rs b/codex-rs/core/src/tools/handlers/multi_agents/watchdog_snooze.rs index cb813c9e13c9..6f4ac4f54167 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/watchdog_snooze.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/watchdog_snooze.rs @@ -19,6 +19,11 @@ impl ToolHandler for Handler { } = invocation; let arguments = function_arguments(payload)?; let args: WatchdogSnoozeArgs = parse_arguments(&arguments)?; + let owner_thread_id = session + .services + .agent_control + .watchdog_owner_for_active_helper(session.conversation_id) + .await; let Some(result) = session .services .agent_control @@ -39,6 +44,17 @@ impl ToolHandler for Handler { "failed to finish watchdog helper after snooze: {err}" )) })?; + if let Some(owner_thread_id) = owner_thread_id { + let _ = session + .services + .agent_control + .send_watchdog_snooze_event( + owner_thread_id, + result.target_thread_id, + result.delay_seconds, + ) + .await; + } let _ = args.reason; Ok(WatchdogSnoozeResult { target_thread_id: result.target_thread_id.to_string(), diff --git a/codex-rs/core/src/tools/handlers/multi_agents_common.rs b/codex-rs/core/src/tools/handlers/multi_agents_common.rs index c01755cb2b2c..56e968138209 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_common.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_common.rs @@ -238,19 +238,6 @@ fn build_agent_shared_config(turn: &TurnContext) -> Result, - model: Option<&str>, - reasoning_effort: Option, -) -> Result<(), FunctionCallError> { - if agent_type.is_some() || model.is_some() || reasoning_effort.is_some() { - return Err(FunctionCallError::RespondToModel( - "Full-history forked agents inherit the parent agent type, model, and reasoning effort; omit agent_type, model, and reasoning_effort, or spawn without a full-history fork.".to_string(), - )); - } - Ok(()) -} - /// Copies runtime-only turn state onto a child config before it is handed to `AgentControl`. /// /// These values are chosen by the live turn rather than persisted config, so leaving them stale diff --git a/codex-rs/core/src/tools/handlers/multi_agents_tests.rs b/codex-rs/core/src/tools/handlers/multi_agents_tests.rs index 47494caaf5d5..7f2795a155c6 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_tests.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_tests.rs @@ -61,6 +61,29 @@ use tokio::sync::Mutex; use tokio::time::timeout; use tokio_util::sync::CancellationToken; +fn run_large_stack_async(test: F) +where + F: FnOnce() -> Fut + Send + 'static, + Fut: std::future::Future + 'static, +{ + // Watchdog spawn tests exercise the full multi-agent spawn future. In debug + // builds that future can overflow libtest's default thread stack before the + // regression assertion runs. + std::thread::Builder::new() + .name("multi-agent-test".to_string()) + .stack_size(16 * 1024 * 1024) + .spawn(|| { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("test runtime should build") + .block_on(test()); + }) + .expect("multi-agent test thread should start") + .join() + .expect("multi-agent test thread should finish"); +} + fn invocation( session: Arc, turn: Arc, @@ -96,6 +119,27 @@ fn thread_manager() -> ThreadManager { ) } +async fn spawned_thread_id_after( + manager: &ThreadManager, + before_thread_ids: &[ThreadId], +) -> ThreadId { + let mut spawned_thread_ids = manager + .list_thread_ids() + .await + .into_iter() + .filter(|thread_id| !before_thread_ids.contains(thread_id)) + .collect::>(); + spawned_thread_ids.sort_by_key(ToString::to_string); + assert_eq!( + spawned_thread_ids.len(), + 1, + "spawn_agent should add exactly one child thread" + ); + spawned_thread_ids + .pop() + .expect("spawned thread id should be present") +} + async fn install_role_with_model_override(turn: &mut TurnContext) -> String { let role_name = "fork-context-role".to_string(); tokio::fs::create_dir_all(&turn.config.codex_home) @@ -293,7 +337,12 @@ async fn spawn_agent_uses_explorer_role_and_preserves_approval_policy() { } #[tokio::test] -async fn spawn_agent_fork_context_rejects_agent_type_override() { +async fn spawn_agent_fork_context_ignores_agent_type_override() { + #[derive(Debug, Deserialize)] + struct SpawnAgentResult { + agent_id: String, + } + let (mut session, mut turn) = make_session_and_context().await; let role_name = install_role_with_model_override(&mut turn).await; let manager = thread_manager(); @@ -303,7 +352,7 @@ async fn spawn_agent_fork_context_rejects_agent_type_override() { .expect("root thread should start"); session.services.agent_control = manager.agent_control(); session.conversation_id = root.thread_id; - let err = SpawnAgentHandler + let output = SpawnAgentHandler .handle(invocation( Arc::new(session), Arc::new(turn), @@ -315,18 +364,29 @@ async fn spawn_agent_fork_context_rejects_agent_type_override() { })), )) .await - .expect_err("fork_context should reject agent_type overrides"); + .expect("fork_context should ignore agent_type overrides"); + let (content, _) = expect_text_output(output); + let result: SpawnAgentResult = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + let agent_id = parse_agent_id(&result.agent_id); + let snapshot = manager + .get_thread(agent_id) + .await + .expect("spawned agent thread should exist") + .config_snapshot() + .await; - assert_eq!( - err, - FunctionCallError::RespondToModel( - "Full-history forked agents inherit the parent agent type, model, and reasoning effort; omit agent_type, model, and reasoning_effort, or spawn without a full-history fork.".to_string(), - ) - ); + assert_ne!(snapshot.model, "gpt-5-role-override"); + assert_ne!(snapshot.model_provider_id, "ollama"); } #[tokio::test] -async fn spawn_agent_fork_context_rejects_child_model_overrides() { +async fn spawn_agent_fork_context_ignores_child_model_overrides() { + #[derive(Debug, Deserialize)] + struct SpawnAgentResult { + agent_id: String, + } + let (mut session, turn) = make_session_and_context().await; let manager = thread_manager(); let root = manager @@ -336,7 +396,7 @@ async fn spawn_agent_fork_context_rejects_child_model_overrides() { session.services.agent_control = manager.agent_control(); session.conversation_id = root.thread_id; - let err = SpawnAgentHandler + let output = SpawnAgentHandler .handle(invocation( Arc::new(session), Arc::new(turn), @@ -349,210 +409,254 @@ async fn spawn_agent_fork_context_rejects_child_model_overrides() { })), )) .await - .expect_err("forked spawn should reject child model overrides"); + .expect("fork_context should ignore child model overrides"); + let (content, _) = expect_text_output(output); + let result: SpawnAgentResult = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + let agent_id = parse_agent_id(&result.agent_id); + let snapshot = manager + .get_thread(agent_id) + .await + .expect("spawned agent thread should exist") + .config_snapshot() + .await; - assert_eq!( - err, - FunctionCallError::RespondToModel( - "Full-history forked agents inherit the parent agent type, model, and reasoning effort; omit agent_type, model, and reasoning_effort, or spawn without a full-history fork.".to_string(), - ) - ); + assert_ne!(snapshot.model, "gpt-5-child-override"); + assert_ne!(snapshot.reasoning_effort, Some(ReasoningEffort::Low)); } -#[tokio::test] -async fn multi_agent_v2_spawn_fork_turns_all_rejects_agent_type_override() { - let (mut session, mut turn) = make_session_and_context().await; - let role_name = install_role_with_model_override(&mut turn).await; - let manager = thread_manager(); - let root = manager - .start_thread((*turn.config).clone()) - .await - .expect("root thread should start"); - session.services.agent_control = manager.agent_control(); - session.conversation_id = root.thread_id; - let mut config = (*turn.config).clone(); - config - .features - .enable(Feature::MultiAgentV2) - .expect("test config should allow feature update"); - let turn = TurnContext { - config: Arc::new(config), - ..turn - }; - - let err = SpawnAgentHandlerV2 - .handle(invocation( - Arc::new(session), - Arc::new(turn), - "spawn_agent", - function_payload(json!({ - "message": "inspect this repo", - "task_name": "fork_context_v2", - "agent_type": role_name, - "fork_turns": "all" - })), - )) - .await - .expect_err("fork_turns=all should reject agent_type overrides"); +#[test] +fn multi_agent_v2_spawn_fork_turns_all_ignores_agent_type_override() { + run_large_stack_async(|| async { + #[derive(Debug, Deserialize)] + struct SpawnAgentResult { + task_name: String, + } - assert_eq!( - err, - FunctionCallError::RespondToModel( - "Full-history forked agents inherit the parent agent type, model, and reasoning effort; omit agent_type, model, and reasoning_effort, or spawn without a full-history fork.".to_string(), - ) - ); + let (mut session, mut turn) = make_session_and_context().await; + let role_name = install_role_with_model_override(&mut turn).await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; + let before_thread_ids = manager.list_thread_ids().await; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + let turn = TurnContext { + config: Arc::new(config), + ..turn + }; + let session = Arc::new(session); + let turn = Arc::new(turn); + + let output = SpawnAgentHandlerV2 + .handle(invocation( + session.clone(), + turn, + "spawn_agent", + function_payload(json!({ + "message": "inspect this repo", + "task_name": "fork_context_v2", + "agent_type": role_name, + "fork_turns": "all" + })), + )) + .await + .expect("fork_turns=all should ignore agent_type overrides"); + let (content, _) = expect_text_output(output); + let result: SpawnAgentResult = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + assert_eq!(result.task_name, "/root/fork_context_v2"); + let agent_id = spawned_thread_id_after(&manager, &before_thread_ids).await; + let snapshot = manager + .get_thread(agent_id) + .await + .expect("spawned agent thread should exist") + .config_snapshot() + .await; + assert_ne!(snapshot.model, "gpt-5-role-override"); + assert_ne!(snapshot.model_provider_id, "ollama"); + assert_eq!(snapshot.session_source.get_agent_role(), None); + }); } -#[tokio::test] -async fn multi_agent_v2_spawn_defaults_to_full_fork_and_rejects_child_model_overrides() { - let (mut session, mut turn) = make_session_and_context().await; - let manager = thread_manager(); - let root = manager - .start_thread((*turn.config).clone()) - .await - .expect("root thread should start"); - session.services.agent_control = manager.agent_control(); - session.conversation_id = root.thread_id; - let mut config = (*turn.config).clone(); - config - .features - .enable(Feature::MultiAgentV2) - .expect("test config should allow feature update"); - turn.config = Arc::new(config); - - let err = SpawnAgentHandlerV2 - .handle(invocation( - Arc::new(session), - Arc::new(turn), - "spawn_agent", - function_payload(json!({ - "message": "inspect this repo", - "task_name": "fork_context_v2", - "model": "gpt-5-child-override", - "reasoning_effort": "low" - })), - )) - .await - .expect_err("default full fork should reject child model overrides"); +#[test] +fn multi_agent_v2_spawn_defaults_to_full_fork_and_ignores_child_model_overrides() { + run_large_stack_async(|| async { + #[derive(Debug, Deserialize)] + struct SpawnAgentResult { + task_name: String, + } - assert_eq!( - err, - FunctionCallError::RespondToModel( - "Full-history forked agents inherit the parent agent type, model, and reasoning effort; omit agent_type, model, and reasoning_effort, or spawn without a full-history fork.".to_string(), - ) - ); + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; + let parent_reasoning_effort = turn.config.model_reasoning_effort; + let before_thread_ids = manager.list_thread_ids().await; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + turn.config = Arc::new(config); + let session = Arc::new(session); + let turn = Arc::new(turn); + + let output = SpawnAgentHandlerV2 + .handle(invocation( + session.clone(), + turn, + "spawn_agent", + function_payload(json!({ + "message": "inspect this repo", + "task_name": "fork_context_v2", + "model": "gpt-5-child-override", + "reasoning_effort": "low" + })), + )) + .await + .expect("default full fork should ignore child model overrides"); + let (content, _) = expect_text_output(output); + let result: SpawnAgentResult = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + assert_eq!(result.task_name, "/root/fork_context_v2"); + let agent_id = spawned_thread_id_after(&manager, &before_thread_ids).await; + let snapshot = manager + .get_thread(agent_id) + .await + .expect("spawned agent thread should exist") + .config_snapshot() + .await; + assert_ne!(snapshot.model, "gpt-5-child-override"); + assert_eq!(snapshot.reasoning_effort, parent_reasoning_effort); + }); } -#[tokio::test] -async fn multi_agent_v2_spawn_watchdog_role_returns_inert_handle_and_ignores_fork_turns() { - let (mut session, mut turn) = make_session_and_context().await; - let manager = thread_manager(); - let root = manager - .start_thread((*turn.config).clone()) - .await - .expect("root thread should start"); - let agent_control = manager.agent_control(); - session.services.agent_control = agent_control.clone(); - session.conversation_id = root.thread_id; - let mut config = (*turn.config).clone(); - config - .features - .enable(Feature::AgentWatchdog) - .expect("test config should allow feature update"); - config - .features - .enable(Feature::MultiAgentV2) - .expect("test config should allow feature update"); - turn.config = Arc::new(config); - - let output = SpawnAgentHandlerV2 - .handle(invocation( - Arc::new(session), - Arc::new(turn), - "spawn_agent", - function_payload(json!({ - "message": "check in later", - "task_name": "watchdog", - "agent_type": "watchdog", - "fork_turns": "ignored for watchdogs" - })), - )) - .await - .expect("watchdog spawn should ignore fork_turns and succeed"); - let (content, success) = expect_text_output(output); - let result: serde_json::Value = - serde_json::from_str(&content).expect("spawn_agent result should be json"); - assert_eq!(result["task_name"], "/root/watchdog"); +#[test] +fn multi_agent_v2_spawn_watchdog_role_returns_inert_handle_and_ignores_fork_turns() { + run_large_stack_async(|| async { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + let agent_control = manager.agent_control(); + session.services.agent_control = agent_control.clone(); + session.conversation_id = root.thread_id; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::AgentWatchdog) + .expect("test config should allow feature update"); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + turn.config = Arc::new(config); + + let output = SpawnAgentHandlerV2 + .handle(invocation( + Arc::new(session), + Arc::new(turn), + "spawn_agent", + function_payload(json!({ + "message": "check in later", + "task_name": "watchdog", + "agent_type": "watchdog", + "fork_turns": "ignored for watchdogs" + })), + )) + .await + .expect("watchdog spawn should ignore fork_turns and succeed"); + let (content, success) = expect_text_output(output); + let result: serde_json::Value = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + assert_eq!(result["task_name"], "/root/watchdog"); - let watchdog_id = manager - .captured_ops() - .into_iter() - .filter_map(|(thread_id, op)| (thread_id != root.thread_id).then_some((thread_id, op))) - .find_map(|(thread_id, op)| (op == Op::Interrupt).then_some(thread_id)) - .expect("watchdog handle should receive only an inert interrupt"); - assert_eq!(success, Some(true)); - assert_eq!( - agent_control.get_status(watchdog_id).await, - AgentStatus::Running - ); - assert!(agent_control.is_watchdog_handle(watchdog_id).await); + let watchdog_id = manager + .captured_ops() + .into_iter() + .filter_map(|(thread_id, op)| (thread_id != root.thread_id).then_some((thread_id, op))) + .find_map(|(thread_id, op)| (op == Op::Interrupt).then_some(thread_id)) + .expect("watchdog handle should receive only an inert interrupt"); + assert_eq!(success, Some(true)); + assert_eq!( + agent_control.get_status(watchdog_id).await, + AgentStatus::Running + ); + assert!(agent_control.is_watchdog_handle(watchdog_id).await); + }); } -#[tokio::test] -async fn multi_agent_v2_spawn_partial_fork_turns_allows_agent_type_override() { - let (mut session, mut turn) = make_session_and_context().await; - let role_name = install_role_with_model_override(&mut turn).await; - let manager = thread_manager(); - let root = manager - .start_thread((*turn.config).clone()) - .await - .expect("root thread should start"); - session.services.agent_control = manager.agent_control(); - session.conversation_id = root.thread_id; - let mut config = (*turn.config).clone(); - config - .features - .enable(Feature::MultiAgentV2) - .expect("test config should allow feature update"); - let turn = TurnContext { - config: Arc::new(config), - ..turn - }; - - let output = SpawnAgentHandlerV2 - .handle(invocation( - Arc::new(session), - Arc::new(turn), - "spawn_agent", - function_payload(json!({ - "message": "inspect this repo", - "task_name": "partial_fork", - "agent_type": role_name, - "fork_turns": "1" - })), - )) - .await - .expect("partial fork should allow agent_type overrides"); - let (content, _) = expect_text_output(output); - let result: serde_json::Value = - serde_json::from_str(&content).expect("spawn_agent result should be json"); - assert_eq!(result["task_name"], "/root/partial_fork"); - let agent_id = manager - .captured_ops() - .into_iter() - .map(|(thread_id, _)| thread_id) - .find(|thread_id| *thread_id != root.thread_id) - .expect("spawned agent should receive an op"); - let snapshot = manager - .get_thread(agent_id) - .await - .expect("spawned agent thread should exist") - .config_snapshot() - .await; +#[test] +fn multi_agent_v2_spawn_partial_fork_turns_allows_agent_type_override() { + run_large_stack_async(|| async { + let (mut session, mut turn) = make_session_and_context().await; + let role_name = install_role_with_model_override(&mut turn).await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + let turn = TurnContext { + config: Arc::new(config), + ..turn + }; + + let output = SpawnAgentHandlerV2 + .handle(invocation( + Arc::new(session), + Arc::new(turn), + "spawn_agent", + function_payload(json!({ + "message": "inspect this repo", + "task_name": "partial_fork", + "agent_type": role_name, + "fork_turns": "1" + })), + )) + .await + .expect("partial fork should allow agent_type overrides"); + let (content, _) = expect_text_output(output); + let result: serde_json::Value = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + assert_eq!(result["task_name"], "/root/partial_fork"); + let agent_id = manager + .captured_ops() + .into_iter() + .map(|(thread_id, _)| thread_id) + .find(|thread_id| *thread_id != root.thread_id) + .expect("spawned agent should receive an op"); + let snapshot = manager + .get_thread(agent_id) + .await + .expect("spawned agent thread should exist") + .config_snapshot() + .await; - assert_eq!(snapshot.model, "gpt-5-role-override"); - assert_eq!(snapshot.model_provider_id, "ollama"); - assert_eq!(snapshot.reasoning_effort, Some(ReasoningEffort::Minimal)); + assert_eq!(snapshot.model, "gpt-5-role-override"); + assert_eq!(snapshot.model_provider_id, "ollama"); + assert_eq!(snapshot.reasoning_effort, Some(ReasoningEffort::Minimal)); + }); } #[tokio::test] @@ -577,57 +681,59 @@ async fn spawn_agent_returns_agent_id_without_task_name() { serde_json::from_str(&content).expect("spawn_agent result should be json"); assert!(result["agent_id"].is_string()); - assert!(result.get("task_name").is_none()); - assert!(result.get("nickname").is_some()); - assert_eq!(success, Some(true)); -} - -#[tokio::test] -async fn spawn_agent_watchdog_role_returns_inert_handle() { - let (mut session, mut turn) = make_session_and_context().await; - let manager = thread_manager(); - let agent_control = manager.agent_control(); - session.services.agent_control = agent_control.clone(); - let mut config = (*turn.config).clone(); - config - .features - .enable(Feature::AgentWatchdog) - .expect("test config should allow feature update"); - turn.config = Arc::new(config); - - let output = SpawnAgentHandler - .handle(invocation( - Arc::new(session), - Arc::new(turn), - "spawn_agent", - function_payload(json!({ - "message": "check in later", - "agent_type": "watchdog" - })), - )) - .await - .expect("spawn_agent should succeed"); - let (content, success) = expect_text_output(output); - let result: serde_json::Value = - serde_json::from_str(&content).expect("spawn_agent result should be json"); - let agent_id = parse_agent_id( - result["agent_id"] - .as_str() - .expect("spawn_agent result should include agent_id"), - ); - - assert_eq!(success, Some(true)); - assert_eq!( - agent_control.get_status(agent_id).await, - AgentStatus::Running - ); - let ops_for_agent = manager - .captured_ops() - .into_iter() - .filter_map(|(thread_id, op)| (thread_id == agent_id).then_some(op)) - .collect::>(); - assert_eq!(ops_for_agent, vec![Op::Interrupt]); - assert!(agent_control.is_watchdog_handle(agent_id).await); + assert!(result.get("task_name").is_none()); + assert!(result.get("nickname").is_some()); + assert_eq!(success, Some(true)); +} + +#[test] +fn spawn_agent_watchdog_role_returns_inert_handle() { + run_large_stack_async(|| async { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let agent_control = manager.agent_control(); + session.services.agent_control = agent_control.clone(); + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::AgentWatchdog) + .expect("test config should allow feature update"); + turn.config = Arc::new(config); + + let output = SpawnAgentHandler + .handle(invocation( + Arc::new(session), + Arc::new(turn), + "spawn_agent", + function_payload(json!({ + "message": "check in later", + "agent_type": "watchdog" + })), + )) + .await + .expect("spawn_agent should succeed"); + let (content, success) = expect_text_output(output); + let result: serde_json::Value = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + let agent_id = parse_agent_id( + result["agent_id"] + .as_str() + .expect("spawn_agent result should include agent_id"), + ); + + assert_eq!(success, Some(true)); + assert_eq!( + agent_control.get_status(agent_id).await, + AgentStatus::Running + ); + let ops_for_agent = manager + .captured_ops() + .into_iter() + .filter_map(|(thread_id, op)| (thread_id == agent_id).then_some(op)) + .collect::>(); + assert_eq!(ops_for_agent, vec![Op::Interrupt]); + assert!(agent_control.is_watchdog_handle(agent_id).await); + }); } #[tokio::test] @@ -797,6 +903,21 @@ async fn watchdog_snooze_suppresses_helper_and_clears_active_helper() { .any(|(thread_id, op)| *thread_id == helper_thread_id && matches!(op, Op::Shutdown)), "snooze should finish the helper turn without a shutdown op" ); + let snooze_event = timeout(Duration::from_secs(1), async { + loop { + let event = owner + .thread + .next_event() + .await + .expect("owner event channel should stay open"); + if let EventMsg::Warning(warning) = event.msg { + break warning.message; + } + } + }) + .await + .expect("watchdog snooze should publish a visible owner-thread event"); + assert_eq!(snooze_event, "Watchdog snoozed for 30s."); } #[tokio::test] @@ -969,6 +1090,158 @@ async fn multi_agent_v2_watchdog_send_message_parent_is_rejected() { ); } +#[test] +fn multi_agent_v2_followup_task_to_watchdog_handle_is_rejected() { + run_large_stack_async(|| async { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::AgentWatchdog) + .expect("test config should allow watchdog feature update"); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow multi-agent v2 feature update"); + turn.config = Arc::new(config); + let session = Arc::new(session); + let turn = Arc::new(turn); + + let spawn_output = SpawnAgentHandlerV2 + .handle(invocation( + session.clone(), + turn.clone(), + "spawn_agent", + function_payload(json!({ + "message": "check the parent task", + "task_name": "watchdog", + "agent_type": "watchdog" + })), + )) + .await + .expect("watchdog spawn should succeed"); + let (_, spawn_success) = expect_text_output(spawn_output); + assert_eq!(spawn_success, Some(true)); + let watchdog_id = session + .services + .agent_control + .resolve_agent_reference(session.conversation_id, &turn.session_source, "watchdog") + .await + .expect("watchdog should resolve by task name"); + assert!( + session + .services + .agent_control + .is_watchdog_handle(watchdog_id) + .await + ); + + let Err(err) = FollowupTaskHandlerV2 + .handle(invocation( + session, + turn, + "followup_task", + function_payload(json!({ + "target": watchdog_id.to_string(), + "message": "ping 79 (133)" + })), + )) + .await + else { + panic!("followup_task should reject watchdog handles"); + }; + + let expected_error = FunctionCallError::RespondToModel( + "watchdog handles can't receive send_message or followup_task; watchdog check-ins run on the idle timer. Use close_agent to stop a watchdog." + .to_string(), + ); + assert_eq!(err, expected_error); + assert!( + !manager + .captured_ops() + .iter() + .any(|(thread_id, op)| *thread_id == watchdog_id + && matches!(op, Op::InterAgentCommunication { .. })) + ); + }); +} + +#[test] +fn multi_agent_v2_send_message_to_watchdog_handle_is_rejected() { + run_large_stack_async(|| async { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::AgentWatchdog) + .expect("test config should allow watchdog feature update"); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow multi-agent v2 feature update"); + turn.config = Arc::new(config); + let session = Arc::new(session); + let turn = Arc::new(turn); + + let spawn_output = SpawnAgentHandlerV2 + .handle(invocation( + session.clone(), + turn.clone(), + "spawn_agent", + function_payload(json!({ + "message": "check the parent task", + "task_name": "watchdog", + "agent_type": "watchdog" + })), + )) + .await + .expect("watchdog spawn should succeed"); + let (_, spawn_success) = expect_text_output(spawn_output); + assert_eq!(spawn_success, Some(true)); + let watchdog_id = session + .services + .agent_control + .resolve_agent_reference(session.conversation_id, &turn.session_source, "watchdog") + .await + .expect("watchdog should resolve by task name"); + + let Err(err) = SendMessageHandlerV2 + .handle(invocation( + session, + turn, + "send_message", + function_payload(json!({ + "target": watchdog_id.to_string(), + "message": "queued ping" + })), + )) + .await + else { + panic!("send_message should reject watchdog handles"); + }; + + let expected_error = FunctionCallError::RespondToModel( + "watchdog handles can't receive send_message or followup_task; watchdog check-ins run on the idle timer. Use close_agent to stop a watchdog." + .to_string(), + ); + assert_eq!(err, expected_error); + }); +} + #[tokio::test] async fn watchdog_close_self_rejects_non_watchdog_thread() { let (session, turn) = make_session_and_context().await; @@ -1109,95 +1382,97 @@ async fn watchdog_close_self_notifies_owner_and_unregisters_handle() { assert_eq!(close_event.status, AgentStatus::Running); } -#[tokio::test] -async fn watchdog_close_self_removes_watchdog_handle_from_list_agents() { - let (mut session, mut turn) = make_session_and_context().await; - let manager = thread_manager(); - let root = manager - .start_thread((*turn.config).clone()) - .await - .expect("root thread should start"); - let agent_control = manager.agent_control(); - session.services.agent_control = agent_control.clone(); - session.conversation_id = root.thread_id; - let mut config = (*turn.config).clone(); - config - .features - .enable(Feature::AgentWatchdog) - .expect("test config should allow feature update"); - config - .features - .enable(Feature::MultiAgentV2) - .expect("test config should allow feature update"); - let enabled_config = config.clone(); - turn.config = Arc::new(config); - let root_session = Arc::new(session); - let root_turn = Arc::new(turn); - - let spawn_output = SpawnAgentHandlerV2 - .handle(invocation( - root_session, - root_turn, - "spawn_agent", - function_payload(json!({ - "message": "check this branch periodically", - "task_name": "ping_watchdog", - "agent_type": "watchdog" - })), - )) - .await - .expect("watchdog spawn should succeed"); - let (_, spawn_success) = expect_text_output(spawn_output); - let watchdog_id = agent_control - .resolve_agent_reference(root.thread_id, &SessionSource::Cli, "ping_watchdog") - .await - .expect("watchdog path should resolve"); - assert_eq!(spawn_success, Some(true)); - - let helper_id = agent_control - .spawn_agent( - enabled_config, - vec![UserInput::Text { - text: "watchdog helper implementation detail".to_string(), - text_elements: Vec::new(), - }] - .into(), - Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { - parent_thread_id: root.thread_id, - depth: 1, - agent_path: None, - agent_nickname: None, - agent_role: Some("watchdog".to_string()), - })), - ) - .await - .expect("watchdog helper should start"); - agent_control - .set_watchdog_active_helper_for_tests(watchdog_id, helper_id) - .await; - let (mut helper_session, helper_turn) = make_session_and_context().await; - helper_session.services.agent_control = agent_control.clone(); - helper_session.conversation_id = helper_id; +#[test] +fn watchdog_close_self_removes_watchdog_handle_from_list_agents() { + run_large_stack_async(|| async { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + let agent_control = manager.agent_control(); + session.services.agent_control = agent_control.clone(); + session.conversation_id = root.thread_id; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::AgentWatchdog) + .expect("test config should allow feature update"); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + let enabled_config = config.clone(); + turn.config = Arc::new(config); + let root_session = Arc::new(session); + let root_turn = Arc::new(turn); + + let spawn_output = SpawnAgentHandlerV2 + .handle(invocation( + root_session, + root_turn, + "spawn_agent", + function_payload(json!({ + "message": "check this branch periodically", + "task_name": "ping_watchdog", + "agent_type": "watchdog" + })), + )) + .await + .expect("watchdog spawn should succeed"); + let (_, spawn_success) = expect_text_output(spawn_output); + let watchdog_id = agent_control + .resolve_agent_reference(root.thread_id, &SessionSource::Cli, "ping_watchdog") + .await + .expect("watchdog path should resolve"); + assert_eq!(spawn_success, Some(true)); + + let helper_id = agent_control + .spawn_agent( + enabled_config, + vec![UserInput::Text { + text: "watchdog helper implementation detail".to_string(), + text_elements: Vec::new(), + }] + .into(), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id: root.thread_id, + depth: 1, + agent_path: None, + agent_nickname: None, + agent_role: Some("watchdog".to_string()), + })), + ) + .await + .expect("watchdog helper should start"); + agent_control + .set_watchdog_active_helper_for_tests(watchdog_id, helper_id) + .await; + let (mut helper_session, helper_turn) = make_session_and_context().await; + helper_session.services.agent_control = agent_control.clone(); + helper_session.conversation_id = helper_id; - WatchdogSelfCloseHandler - .handle(invocation( - Arc::new(helper_session), - Arc::new(helper_turn), - "close_self", - function_payload(json!({"message": "watchdog done"})), - )) - .await - .expect("watchdog helper should self-close"); + WatchdogSelfCloseHandler + .handle(invocation( + Arc::new(helper_session), + Arc::new(helper_turn), + "close_self", + function_payload(json!({"message": "watchdog done"})), + )) + .await + .expect("watchdog helper should self-close"); - let listed_agents = agent_control - .list_agents(&SessionSource::Cli, /*path_prefix*/ None) - .await - .expect("list_agents should succeed after self-close"); - assert!( - !listed_agents - .iter() - .any(|agent| agent.agent_name == "/root/ping_watchdog") - ); + let listed_agents = agent_control + .list_agents(&SessionSource::Cli, /*path_prefix*/ None) + .await + .expect("list_agents should succeed after self-close"); + assert!( + !listed_agents + .iter() + .any(|agent| agent.agent_name == "/root/ping_watchdog") + ); + }); } #[tokio::test] @@ -1959,158 +2234,10 @@ async fn multi_agent_v2_list_agents_omits_closed_agents() { .services .agent_control .close_agent(agent_id) - .await - .expect("close_agent should succeed"); - - let output = ListAgentsHandlerV2 - .handle(invocation( - session, - turn, - "list_agents", - function_payload(json!({})), - )) - .await - .expect("list_agents should succeed"); - let (content, _) = expect_text_output(output); - let result: ListAgentsResult = - serde_json::from_str(&content).expect("list_agents result should be json"); - - assert_eq!(result.agents.len(), 1); - assert_eq!(result.agents[0].agent_name, "/root"); - assert_eq!( - result.agents[0].last_task_message.as_deref(), - Some("Main thread") - ); -} - -#[tokio::test] -async fn watchdog_handle_is_listed_and_close_agent_removes_it() { - let (mut session, mut turn) = make_session_and_context().await; - let manager = thread_manager(); - let root = manager - .start_thread((*turn.config).clone()) - .await - .expect("root thread should start"); - let agent_control = manager.agent_control(); - session.services.agent_control = agent_control.clone(); - session.conversation_id = root.thread_id; - let mut config = (*turn.config).clone(); - config - .features - .enable(Feature::AgentWatchdog) - .expect("test config should allow feature update"); - config - .features - .enable(Feature::MultiAgentV2) - .expect("test config should allow feature update"); - let enabled_config = config.clone(); - turn.config = Arc::new(config); - - let session = Arc::new(session); - let turn = Arc::new(turn); - let spawn_output = SpawnAgentHandler - .handle(invocation( - session.clone(), - turn.clone(), - "spawn_agent", - function_payload(json!({ - "message": "check this branch periodically", - "agent_type": "watchdog" - })), - )) - .await - .expect("watchdog spawn should succeed"); - let (spawn_content, spawn_success) = expect_text_output(spawn_output); - let spawn_result: serde_json::Value = - serde_json::from_str(&spawn_content).expect("watchdog spawn result should be json"); - let watchdog_id = parse_agent_id( - spawn_result["agent_id"] - .as_str() - .expect("watchdog spawn result should include agent_id"), - ); - assert_eq!(spawn_success, Some(true)); - assert!(agent_control.is_watchdog_handle(watchdog_id).await); - - let helper_id = agent_control - .spawn_agent( - enabled_config, - vec![UserInput::Text { - text: "watchdog helper implementation detail".to_string(), - text_elements: Vec::new(), - }] - .into(), - Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { - parent_thread_id: root.thread_id, - depth: 1, - agent_path: None, - agent_nickname: None, - agent_role: Some("watchdog".to_string()), - })), - ) - .await - .expect("watchdog helper should start"); - agent_control - .set_watchdog_active_helper_for_tests(watchdog_id, helper_id) - .await; - - let list_output = ListAgentsHandlerV2 - .handle(invocation( - session.clone(), - turn.clone(), - "list_agents", - function_payload(json!({})), - )) - .await - .expect("list_agents should include the watchdog handle"); - let (list_content, list_success) = expect_text_output(list_output); - let list_result: ListAgentsResult = - serde_json::from_str(&list_content).expect("list_agents result should be json"); - assert_eq!(list_success, Some(true)); - let watchdog_listing = list_result - .agents - .iter() - .find(|agent| agent.agent_name == watchdog_id.to_string()) - .expect("list_agents should include the watchdog handle"); - assert_eq!(watchdog_listing.agent_status, json!("running")); - assert!( - !list_result - .agents - .iter() - .any(|agent| agent.agent_name == helper_id.to_string()), - "active watchdog helpers should not be exposed as targetable list_agents entries" - ); - - let close_output = CloseAgentHandlerV2 - .handle(invocation( - session.clone(), - turn.clone(), - "close_agent", - function_payload(json!({"target": watchdog_id.to_string()})), - )) - .await - .expect("close_agent should close the watchdog handle"); - let (close_content, close_success) = expect_text_output(close_output); - let close_result: close_agent::CloseAgentResult = - serde_json::from_str(&close_content).expect("close_agent result should be json"); - assert_eq!(close_success, Some(true)); - assert_eq!(close_result.previous_status, AgentStatus::PendingInit); - assert!(!agent_control.is_watchdog_handle(watchdog_id).await); - assert_eq!( - agent_control.get_status(watchdog_id).await, - AgentStatus::NotFound - ); - assert_eq!( - agent_control.get_status(helper_id).await, - AgentStatus::NotFound - ); - assert!( - manager - .captured_ops() - .iter() - .any(|(thread_id, op)| *thread_id == helper_id && matches!(op, Op::Shutdown)) - ); + .await + .expect("close_agent should succeed"); - let list_after_close_output = ListAgentsHandlerV2 + let output = ListAgentsHandlerV2 .handle(invocation( session, turn, @@ -2118,16 +2245,167 @@ async fn watchdog_handle_is_listed_and_close_agent_removes_it() { function_payload(json!({})), )) .await - .expect("list_agents should omit the closed watchdog handle"); - let (list_after_close_content, _) = expect_text_output(list_after_close_output); - let list_after_close_result: ListAgentsResult = serde_json::from_str(&list_after_close_content) - .expect("list_agents result after close should be json"); - assert!( - !list_after_close_result + .expect("list_agents should succeed"); + let (content, _) = expect_text_output(output); + let result: ListAgentsResult = + serde_json::from_str(&content).expect("list_agents result should be json"); + + assert_eq!(result.agents.len(), 1); + assert_eq!(result.agents[0].agent_name, "/root"); + assert_eq!( + result.agents[0].last_task_message.as_deref(), + Some("Main thread") + ); +} + +#[test] +fn watchdog_handle_is_listed_and_close_agent_removes_it() { + run_large_stack_async(|| async { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + let agent_control = manager.agent_control(); + session.services.agent_control = agent_control.clone(); + session.conversation_id = root.thread_id; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::AgentWatchdog) + .expect("test config should allow feature update"); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + let enabled_config = config.clone(); + turn.config = Arc::new(config); + + let session = Arc::new(session); + let turn = Arc::new(turn); + let spawn_output = SpawnAgentHandler + .handle(invocation( + session.clone(), + turn.clone(), + "spawn_agent", + function_payload(json!({ + "message": "check this branch periodically", + "agent_type": "watchdog" + })), + )) + .await + .expect("watchdog spawn should succeed"); + let (spawn_content, spawn_success) = expect_text_output(spawn_output); + let spawn_result: serde_json::Value = + serde_json::from_str(&spawn_content).expect("watchdog spawn result should be json"); + let watchdog_id = parse_agent_id( + spawn_result["agent_id"] + .as_str() + .expect("watchdog spawn result should include agent_id"), + ); + assert_eq!(spawn_success, Some(true)); + assert!(agent_control.is_watchdog_handle(watchdog_id).await); + + let helper_id = agent_control + .spawn_agent( + enabled_config, + vec![UserInput::Text { + text: "watchdog helper implementation detail".to_string(), + text_elements: Vec::new(), + }] + .into(), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id: root.thread_id, + depth: 1, + agent_path: None, + agent_nickname: None, + agent_role: Some("watchdog".to_string()), + })), + ) + .await + .expect("watchdog helper should start"); + agent_control + .set_watchdog_active_helper_for_tests(watchdog_id, helper_id) + .await; + + let list_output = ListAgentsHandlerV2 + .handle(invocation( + session.clone(), + turn.clone(), + "list_agents", + function_payload(json!({})), + )) + .await + .expect("list_agents should include the watchdog handle"); + let (list_content, list_success) = expect_text_output(list_output); + let list_result: ListAgentsResult = + serde_json::from_str(&list_content).expect("list_agents result should be json"); + assert_eq!(list_success, Some(true)); + let watchdog_listing = list_result .agents .iter() - .any(|agent| agent.agent_name == watchdog_id.to_string()) - ); + .find(|agent| agent.agent_name == watchdog_id.to_string()) + .expect("list_agents should include the watchdog handle"); + assert_eq!(watchdog_listing.agent_status, json!("running")); + assert!( + !list_result + .agents + .iter() + .any(|agent| agent.agent_name == helper_id.to_string()), + "active watchdog helpers should not be exposed as targetable list_agents entries" + ); + + let close_output = CloseAgentHandlerV2 + .handle(invocation( + session.clone(), + turn.clone(), + "close_agent", + function_payload(json!({"target": watchdog_id.to_string()})), + )) + .await + .expect("close_agent should close the watchdog handle"); + let (close_content, close_success) = expect_text_output(close_output); + let close_result: close_agent::CloseAgentResult = + serde_json::from_str(&close_content).expect("close_agent result should be json"); + assert_eq!(close_success, Some(true)); + assert_eq!(close_result.previous_status, AgentStatus::PendingInit); + assert!(!agent_control.is_watchdog_handle(watchdog_id).await); + assert_eq!( + agent_control.get_status(watchdog_id).await, + AgentStatus::NotFound + ); + assert_eq!( + agent_control.get_status(helper_id).await, + AgentStatus::NotFound + ); + assert!( + manager + .captured_ops() + .iter() + .any(|(thread_id, op)| *thread_id == helper_id && matches!(op, Op::Shutdown)) + ); + + let list_after_close_output = ListAgentsHandlerV2 + .handle(invocation( + session, + turn, + "list_agents", + function_payload(json!({})), + )) + .await + .expect("list_agents should omit the closed watchdog handle"); + let (list_after_close_content, _) = expect_text_output(list_after_close_output); + let list_after_close_result: ListAgentsResult = + serde_json::from_str(&list_after_close_content) + .expect("list_agents result after close should be json"); + assert!( + !list_after_close_result + .agents + .iter() + .any(|agent| agent.agent_name == watchdog_id.to_string()) + ); + }); } #[tokio::test] @@ -2259,139 +2537,141 @@ async fn multi_agent_v2_send_message_rejects_interrupt_parameter() { ))); } -#[tokio::test] -async fn multi_agent_v2_followup_task_completion_notifies_parent_on_every_turn() { - let (mut session, mut turn) = make_session_and_context().await; - let manager = thread_manager(); - let root = manager - .start_thread((*turn.config).clone()) - .await - .expect("root thread should start"); - session.services.agent_control = manager.agent_control(); - session.conversation_id = root.thread_id; - let mut config = turn.config.as_ref().clone(); - let _ = config.features.enable(Feature::MultiAgentV2); - turn.config = Arc::new(config); - let session = Arc::new(session); - let turn = Arc::new(turn); - - SpawnAgentHandlerV2 - .handle(invocation( - session.clone(), - turn.clone(), - "spawn_agent", - function_payload(json!({ - "message": "boot worker", - "task_name": "worker" - })), - )) - .await - .expect("spawn worker"); - let agent_id = session - .services - .agent_control - .resolve_agent_reference(session.conversation_id, &turn.session_source, "worker") - .await - .expect("worker should resolve"); - let thread = manager - .get_thread(agent_id) - .await - .expect("worker thread should exist"); - let worker_path = AgentPath::try_from("/root/worker").expect("worker path"); - - let first_turn = thread.codex.session.new_default_turn().await; - thread - .codex - .session - .send_event( - first_turn.as_ref(), - EventMsg::TurnComplete(TurnCompleteEvent { - turn_id: first_turn.sub_id.clone(), - last_agent_message: Some("first done".to_string()), - completed_at: None, - duration_ms: None, - time_to_first_token_ms: None, - }), - ) - .await; - - FollowupTaskHandlerV2 - .handle(invocation( - session, - turn, - "followup_task", - function_payload(json!({ - "target": agent_id.to_string(), - "message": "continue", - })), - )) - .await - .expect("followup_task should succeed"); - - let second_turn = thread.codex.session.new_default_turn().await; - thread - .codex - .session - .send_event( - second_turn.as_ref(), - EventMsg::TurnComplete(TurnCompleteEvent { - turn_id: second_turn.sub_id.clone(), - last_agent_message: Some("second done".to_string()), - completed_at: None, - duration_ms: None, - time_to_first_token_ms: None, - }), - ) - .await; +#[test] +fn multi_agent_v2_followup_task_completion_notifies_parent_on_every_turn() { + run_large_stack_async(|| async { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; + let mut config = turn.config.as_ref().clone(); + let _ = config.features.enable(Feature::MultiAgentV2); + turn.config = Arc::new(config); + let session = Arc::new(session); + let turn = Arc::new(turn); - let first_notification = format_subagent_notification_message( - worker_path.as_str(), - &AgentStatus::Completed(Some("first done".to_string())), - ); - let second_notification = format_subagent_notification_message( - worker_path.as_str(), - &AgentStatus::Completed(Some("second done".to_string())), - ); + SpawnAgentHandlerV2 + .handle(invocation( + session.clone(), + turn.clone(), + "spawn_agent", + function_payload(json!({ + "message": "boot worker", + "task_name": "worker" + })), + )) + .await + .expect("spawn worker"); + let agent_id = session + .services + .agent_control + .resolve_agent_reference(session.conversation_id, &turn.session_source, "worker") + .await + .expect("worker should resolve"); + let thread = manager + .get_thread(agent_id) + .await + .expect("worker thread should exist"); + let worker_path = AgentPath::try_from("/root/worker").expect("worker path"); + + let first_turn = thread.codex.session.new_default_turn().await; + thread + .codex + .session + .send_event( + first_turn.as_ref(), + EventMsg::TurnComplete(TurnCompleteEvent { + turn_id: first_turn.sub_id.clone(), + last_agent_message: Some("first done".to_string()), + completed_at: None, + duration_ms: None, + time_to_first_token_ms: None, + }), + ) + .await; - let notifications = timeout(Duration::from_secs(5), async { - loop { - let notifications = manager - .captured_ops() - .into_iter() - .filter_map(|(id, op)| { - (id == root.thread_id) - .then_some(op) - .and_then(|op| match op { - Op::InterAgentCommunication { communication } - if communication.author == worker_path - && communication.recipient == AgentPath::root() - && communication.other_recipients.is_empty() - && !communication.trigger_turn => - { - Some(communication.content) - } - _ => None, - }) - }) - .collect::>(); - let first_count = notifications - .iter() - .filter(|message| **message == first_notification) - .count(); - let second_count = notifications - .iter() - .filter(|message| **message == second_notification) - .count(); - if first_count == 1 && second_count == 1 { - break notifications; + FollowupTaskHandlerV2 + .handle(invocation( + session, + turn, + "followup_task", + function_payload(json!({ + "target": agent_id.to_string(), + "message": "continue", + })), + )) + .await + .expect("followup_task should succeed"); + + let second_turn = thread.codex.session.new_default_turn().await; + thread + .codex + .session + .send_event( + second_turn.as_ref(), + EventMsg::TurnComplete(TurnCompleteEvent { + turn_id: second_turn.sub_id.clone(), + last_agent_message: Some("second done".to_string()), + completed_at: None, + duration_ms: None, + time_to_first_token_ms: None, + }), + ) + .await; + + let first_notification = format_subagent_notification_message( + &agent_id.to_string(), + &AgentStatus::Completed(Some("first done".to_string())), + ); + let second_notification = format_subagent_notification_message( + &agent_id.to_string(), + &AgentStatus::Completed(Some("second done".to_string())), + ); + + let notifications = timeout(Duration::from_secs(5), async { + loop { + let notifications = manager + .captured_ops() + .into_iter() + .filter_map(|(id, op)| { + (id == root.thread_id) + .then_some(op) + .and_then(|op| match op { + Op::InterAgentCommunication { communication } + if communication.author == worker_path + && communication.recipient == AgentPath::root() + && communication.other_recipients.is_empty() + && !communication.trigger_turn => + { + Some(communication.content) + } + _ => None, + }) + }) + .collect::>(); + let first_count = notifications + .iter() + .filter(|message| **message == first_notification) + .count(); + let second_count = notifications + .iter() + .filter(|message| **message == second_notification) + .count(); + if first_count == 1 && second_count == 1 { + break notifications; + } + tokio::time::sleep(Duration::from_millis(10)).await; } - tokio::time::sleep(Duration::from_millis(10)).await; - } - }) - .await - .expect("parent should receive one completion notification per child turn"); + }) + .await + .expect("parent should receive one completion notification per child turn"); - assert_eq!(notifications.len(), 2); + assert_eq!(notifications.len(), 2); + }); } #[tokio::test] @@ -2893,6 +3173,57 @@ async fn send_input_reports_missing_agent() { ); } +#[tokio::test] +async fn send_input_rejects_watchdog_handle() { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + let target = manager + .start_thread((*turn.config).clone()) + .await + .expect("watchdog handle should start"); + let agent_control = manager.agent_control(); + session.services.agent_control = agent_control.clone(); + session.conversation_id = root.thread_id; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::AgentWatchdog) + .expect("test config should allow watchdog feature update"); + turn.config = Arc::new(config.clone()); + agent_control + .register_watchdog(WatchdogRegistration { + owner_thread_id: root.thread_id, + target_thread_id: target.thread_id, + child_depth: 0, + interval_s: 60, + prompt: "check in".to_string(), + config, + }) + .await + .expect("watchdog registration should succeed"); + + let invocation = invocation( + Arc::new(session), + Arc::new(turn), + "send_input", + function_payload(json!({"target": target.thread_id.to_string(), "message": "hi"})), + ); + let Err(err) = SendInputHandler.handle(invocation).await else { + panic!("send_input should reject watchdog handles"); + }; + assert_eq!( + err, + FunctionCallError::RespondToModel( + "watchdog handles can't receive send_input; watchdog check-ins run on the idle timer. Use close_agent to stop a watchdog." + .to_string() + ) + ); +} + #[tokio::test] async fn send_input_interrupts_before_prompt() { let (mut session, turn) = make_session_and_context().await; @@ -3087,6 +3418,57 @@ async fn resume_agent_reports_missing_agent() { ); } +#[tokio::test] +async fn resume_agent_rejects_watchdog_handle() { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + let target = manager + .start_thread((*turn.config).clone()) + .await + .expect("watchdog handle should start"); + let agent_control = manager.agent_control(); + session.services.agent_control = agent_control.clone(); + session.conversation_id = root.thread_id; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::AgentWatchdog) + .expect("test config should allow watchdog feature update"); + turn.config = Arc::new(config.clone()); + agent_control + .register_watchdog(WatchdogRegistration { + owner_thread_id: root.thread_id, + target_thread_id: target.thread_id, + child_depth: 0, + interval_s: 60, + prompt: "check in".to_string(), + config, + }) + .await + .expect("watchdog registration should succeed"); + + let invocation = invocation( + Arc::new(session), + Arc::new(turn), + "resume_agent", + function_payload(json!({"id": target.thread_id.to_string()})), + ); + let Err(err) = ResumeAgentHandler.handle(invocation).await else { + panic!("resume_agent should reject watchdog handles"); + }; + assert_eq!( + err, + FunctionCallError::RespondToModel( + "watchdog handles can't receive resume_agent; watchdog check-ins run on the idle timer. Use close_agent to stop a watchdog." + .to_string() + ) + ); +} + #[tokio::test] async fn resume_agent_noops_for_active_agent() { let (mut session, turn) = make_session_and_context().await; @@ -3489,14 +3871,11 @@ async fn wait_agent_rejects_only_watchdog_handles() { .enable(Feature::AgentWatchdog) .expect("test config should allow feature update"); turn.config = Arc::new(config); - let session = Arc::new(session); - let turn = Arc::new(turn); - agent_control .register_watchdog(WatchdogRegistration { owner_thread_id: owner.thread_id, target_thread_id: target.thread_id, - child_depth: 0, + child_depth: 1, interval_s: 60, prompt: "check in later".to_string(), config: (*turn.config).clone(), @@ -3504,6 +3883,9 @@ async fn wait_agent_rejects_only_watchdog_handles() { .await .expect("watchdog registration should succeed"); + let session = Arc::new(session); + let turn = Arc::new(turn); + let Err(err) = WaitAgentHandler .handle(invocation( session, diff --git a/codex-rs/core/src/tools/handlers/multi_agents_v2/message_tool.rs b/codex-rs/core/src/tools/handlers/multi_agents_v2/message_tool.rs index 34b6671471c1..782fe8d01734 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_v2/message_tool.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_v2/message_tool.rs @@ -107,6 +107,17 @@ async fn handle_message_submission( .agent_control .get_agent_metadata(receiver_thread_id) .unwrap_or_default(); + if session + .services + .agent_control + .is_watchdog_handle(receiver_thread_id) + .await + { + return Err(FunctionCallError::RespondToModel( + "watchdog handles can't receive send_message or followup_task; watchdog check-ins run on the idle timer. Use close_agent to stop a watchdog." + .to_string(), + )); + } if mode == MessageDeliveryMode::QueueOnly && is_watchdog_parent { return Err(FunctionCallError::RespondToModel( "watchdog check-in threads must use followup_task with target `parent` to message their parent." diff --git a/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs b/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs index 8cafe287c966..3d108d9a0978 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs @@ -87,12 +87,8 @@ impl ToolHandler for Handler { .await; let mut config = build_agent_spawn_config(&session.get_base_instructions().await, turn.as_ref())?; - if matches!(fork_mode, Some(SpawnAgentForkMode::FullHistory)) { - reject_full_fork_spawn_overrides( - role_name, - args.model.as_deref(), - args.reasoning_effort, - )?; + let effective_role_name = if matches!(fork_mode, Some(SpawnAgentForkMode::FullHistory)) { + None } else { apply_requested_spawn_agent_model_overrides( &session, @@ -105,7 +101,8 @@ impl ToolHandler for Handler { apply_role_to_config(&mut config, role_name) .await .map_err(FunctionCallError::RespondToModel)?; - } + role_name + }; apply_spawn_agent_runtime_overrides(&mut config, turn.as_ref())?; apply_spawn_agent_overrides(&mut config, child_depth); @@ -113,11 +110,11 @@ impl ToolHandler for Handler { session.conversation_id, &turn.session_source, child_depth, - role_name, + effective_role_name, Some(args.task_name.clone()), )?; let result = if let Some(watchdog_interval_s) = watchdog_interval_s { - let thread_id = spawn_watchdog( + let thread_id = Box::pin(spawn_watchdog( &session.services.agent_control, config, prompt.clone(), @@ -125,7 +122,7 @@ impl ToolHandler for Handler { child_depth, watchdog_interval_s, spawn_source, - ) + )) .await .map_err(collab_spawn_error)?; Ok(crate::agent::control::LiveAgent { @@ -166,13 +163,14 @@ impl ToolHandler for Handler { Some(spawn_source), SpawnAgentOptions { fork_parent_spawn_call_id: fork_mode.as_ref().map(|_| call_id.clone()), - fork_mode, + fork_mode: fork_mode.clone(), environments: Some( turn.environments .iter() .map(TurnEnvironment::selection) .collect(), ), + initial_task_message: fork_mode.as_ref().map(|_| prompt.clone()), }, ) .await @@ -279,9 +277,9 @@ async fn spawn_watchdog( "failed to clear watchdog MCP servers: {err}" )) })?; - let target_thread_id = agent_control - .spawn_agent(handle_config, Op::Interrupt, Some(spawn_source)) - .await?; + let target_thread_id = + Box::pin(agent_control.spawn_agent(handle_config, Op::Interrupt, Some(spawn_source))) + .await?; let superseded_before_register = agent_control .unregister_watchdogs_for_owner(owner_thread_id) .await; diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index 6e8a2567fac2..addce19239a8 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -483,6 +483,7 @@ async fn resume_replays_legacy_js_repl_image_rollout_shapes() { item: RolloutItem::SessionMeta(SessionMetaLine { meta: SessionMeta { id: ThreadId::default(), + segment_id: None, timestamp: "2024-01-01T00:00:00Z".to_string(), cwd: ".".into(), originator: "test_originator".to_string(), @@ -613,6 +614,7 @@ async fn resume_replays_image_tool_outputs_with_detail() { item: RolloutItem::SessionMeta(SessionMetaLine { meta: SessionMeta { id: ThreadId::default(), + segment_id: None, timestamp: "2024-01-01T00:00:00Z".to_string(), cwd: ".".into(), originator: "test_originator".to_string(), diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 1d770649d08f..f1d32281d05f 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -2,6 +2,7 @@ use codex_core::compact::SUMMARIZATION_PROMPT; use codex_core::compact::SUMMARY_PREFIX; use codex_core::config::Config; +use codex_core::materialize_rollout_items_for_replay; use codex_features::Feature; use codex_login::CodexAuth; use codex_model_provider_info::ModelProviderInfo; @@ -260,7 +261,6 @@ async fn summarize_context_three_requests_and_instructions() { }); let test = builder.build(&server).await.unwrap(); let codex = test.codex.clone(); - let rollout_path = test.session_configured.rollout_path.expect("rollout path"); // 1) Normal user input – should hit server once. codex @@ -393,6 +393,7 @@ async fn summarize_context_three_requests_and_instructions() { ); // Shut down Codex to flush rollout entries before inspecting the file. + let rollout_path = codex.current_rollout_path().await.expect("rollout path"); codex.submit(Op::Shutdown).await.unwrap(); wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await; @@ -404,8 +405,7 @@ async fn summarize_context_three_requests_and_instructions() { rollout_path.display() ) }); - let mut regular_turn_context_count = 0usize; - let mut saw_compacted_summary = false; + let mut rollout_items = Vec::new(); for line in text.lines() { let trimmed = line.trim(); if trimmed.is_empty() { @@ -414,7 +414,15 @@ async fn summarize_context_three_requests_and_instructions() { let Ok(entry): Result = serde_json::from_str(trimmed) else { continue; }; - match entry.item { + rollout_items.push(entry.item); + } + let rollout_items = + materialize_rollout_items_for_replay(test.config.codex_home.as_path(), &rollout_items) + .await; + let mut regular_turn_context_count = 0usize; + let mut saw_compacted_summary = false; + for item in rollout_items { + match item { RolloutItem::TurnContext(_) => { regular_turn_context_count += 1; } @@ -2016,7 +2024,6 @@ async fn auto_compact_persists_rollout_entries() { }); let test = builder.build(&server).await.unwrap(); let codex = test.codex.clone(); - let session_configured = test.session_configured; codex .submit(Op::UserInput { @@ -2060,10 +2067,10 @@ async fn auto_compact_persists_rollout_entries() { .unwrap(); wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + let rollout_path = codex.current_rollout_path().await.expect("rollout path"); codex.submit(Op::Shutdown).await.unwrap(); wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await; - let rollout_path = session_configured.rollout_path.expect("rollout path"); let text = std::fs::read_to_string(&rollout_path).unwrap_or_else(|e| { panic!( "failed to read rollout file {}: {e}", @@ -2071,7 +2078,7 @@ async fn auto_compact_persists_rollout_entries() { ) }); - let mut turn_context_count = 0usize; + let mut rollout_items = Vec::new(); for line in text.lines() { let trimmed = line.trim(); if trimmed.is_empty() { @@ -2080,7 +2087,15 @@ async fn auto_compact_persists_rollout_entries() { let Ok(entry): Result = serde_json::from_str(trimmed) else { continue; }; - match entry.item { + rollout_items.push(entry.item); + } + let rollout_items = + materialize_rollout_items_for_replay(test.config.codex_home.as_path(), &rollout_items) + .await; + + let mut turn_context_count = 0usize; + for item in rollout_items { + match item { RolloutItem::TurnContext(_) => { turn_context_count += 1; } diff --git a/codex-rs/core/tests/suite/compact_remote.rs b/codex-rs/core/tests/suite/compact_remote.rs index b145506d860c..05e09baa3ca0 100644 --- a/codex-rs/core/tests/suite/compact_remote.rs +++ b/codex-rs/core/tests/suite/compact_remote.rs @@ -1434,11 +1434,6 @@ async fn remote_compact_and_resume_refresh_stale_developer_instructions() -> Res test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()); let initial = start_builder.build(&server).await?; let home = initial.home.clone(); - let rollout_path = initial - .session_configured - .rollout_path - .clone() - .expect("rollout path"); let responses_mock = responses::mount_sse_sequence( &server, @@ -1495,6 +1490,12 @@ async fn remote_compact_and_resume_refresh_stale_developer_instructions() -> Res initial.codex.submit(Op::Compact).await?; wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + let rollout_path = initial + .codex + .current_rollout_path() + .await + .expect("rollout path"); + initial .codex .submit(Op::UserInput { @@ -2100,11 +2101,6 @@ async fn snapshot_request_shape_remote_compact_resume_restates_realtime_end() -> let mut builder = remote_realtime_test_codex_builder(&realtime_server); let initial = builder.build(&server).await?; let home = initial.home.clone(); - let rollout_path = initial - .session_configured - .rollout_path - .clone() - .expect("rollout path"); let responses_mock = responses::mount_sse_sequence( &server, @@ -2151,6 +2147,12 @@ async fn snapshot_request_shape_remote_compact_resume_restates_realtime_end() -> initial.codex.submit(Op::Compact).await?; wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + let rollout_path = initial + .codex + .current_rollout_path() + .await + .expect("rollout path"); + initial.codex.submit(Op::Shutdown).await?; wait_for_event(&initial.codex, |ev| { matches!(ev, EventMsg::ShutdownComplete) diff --git a/codex-rs/core/tests/suite/compact_resume_fork.rs b/codex-rs/core/tests/suite/compact_resume_fork.rs index 354e9a6a033b..04d0ef0f4fed 100644 --- a/codex-rs/core/tests/suite/compact_resume_fork.rs +++ b/codex-rs/core/tests/suite/compact_resume_fork.rs @@ -143,7 +143,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() { user_turn(&base, "hello world").await; compact_conversation(&base).await; user_turn(&base, "AFTER_COMPACT").await; - let base_path = fetch_conversation_path(&base); + let base_path = fetch_conversation_path(&base).await; assert!( base_path.exists(), "compact+resume test expects base path {base_path:?} to exist", @@ -152,7 +152,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() { shutdown_conversation(&base).await; let resumed = resume_conversation(&manager, &config, base_path).await; user_turn(&resumed, "AFTER_RESUME").await; - let resumed_path = fetch_conversation_path(&resumed); + let resumed_path = fetch_conversation_path(&resumed).await; assert!( resumed_path.exists(), "compact+resume test expects resumed path {resumed_path:?} to exist", @@ -299,7 +299,7 @@ async fn compact_resume_after_second_compaction_preserves_history() -> Result<() user_turn(&base, "hello world").await; compact_conversation(&base).await; user_turn(&base, "AFTER_COMPACT").await; - let base_path = fetch_conversation_path(&base); + let base_path = fetch_conversation_path(&base).await; assert!( base_path.exists(), "second compact test expects base path {base_path:?} to exist", @@ -308,7 +308,7 @@ async fn compact_resume_after_second_compaction_preserves_history() -> Result<() shutdown_conversation(&base).await; let resumed = resume_conversation(&manager, &config, base_path).await; user_turn(&resumed, "AFTER_RESUME").await; - let resumed_path = fetch_conversation_path(&resumed); + let resumed_path = fetch_conversation_path(&resumed).await; assert!( resumed_path.exists(), "second compact test expects resumed path {resumed_path:?} to exist", @@ -319,7 +319,7 @@ async fn compact_resume_after_second_compaction_preserves_history() -> Result<() compact_conversation(&forked).await; user_turn(&forked, "AFTER_COMPACT_2").await; - let forked_path = fetch_conversation_path(&forked); + let forked_path = fetch_conversation_path(&forked).await; assert!( forked_path.exists(), "second compact test expects forked path {forked_path:?} to exist", @@ -814,8 +814,11 @@ async fn compact_conversation(conversation: &Arc) { wait_for_event(conversation, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; } -fn fetch_conversation_path(conversation: &Arc) -> std::path::PathBuf { - conversation.rollout_path().expect("rollout path") +async fn fetch_conversation_path(conversation: &Arc) -> std::path::PathBuf { + conversation + .current_rollout_path() + .await + .expect("rollout path") } async fn shutdown_conversation(conversation: &Arc) { diff --git a/codex-rs/core/tests/suite/fork_thread.rs b/codex-rs/core/tests/suite/fork_thread.rs index 19ed2a20889d..655e03016cbc 100644 --- a/codex-rs/core/tests/suite/fork_thread.rs +++ b/codex-rs/core/tests/suite/fork_thread.rs @@ -1,5 +1,6 @@ use codex_core::ForkSnapshot; use codex_core::NewThread; +use codex_core::materialize_rollout_items_for_replay; use codex_core::parse_turn_item; use codex_protocol::items::TurnItem; use codex_protocol::protocol::EventMsg; @@ -110,13 +111,25 @@ async fn fork_thread_twice_drops_to_first_message() { let fork1_path = codex_fork1.rollout_path().expect("rollout path"); // GetHistory on fork1 flushed; the file is ready. - let fork1_items = read_rollout_items(&fork1_path); + let fork1_raw_items = read_rollout_items(&fork1_path); + assert!( + fork1_raw_items + .iter() + .any(|item| matches!(item, RolloutItem::ForkReference(_))), + "forked rollout should keep a compact ForkReference instead of copying parent history" + ); + let fork1_items = + materialize_rollout_items_for_replay(test.config.codex_home.as_path(), &fork1_raw_items) + .await; + let fork1_items = without_session_meta(fork1_items); pretty_assertions::assert_eq!( serde_json::to_value(&fork1_items).unwrap(), serde_json::to_value(&expected_after_first).unwrap() ); - // Fork again with n=0 → drops the (new) last user message, leaving only the first. + // Fork again with n=0. The first fork's raw rollout only contains a + // ForkReference, but truncation still applies to the materialized history + // referenced by that item. let NewThread { thread: codex_fork2, .. @@ -133,14 +146,23 @@ async fn fork_thread_twice_drops_to_first_message() { let fork2_path = codex_fork2.rollout_path().expect("rollout path"); // GetHistory on fork2 flushed; the file is ready. - let fork1_items = read_rollout_items(&fork1_path); let fork1_user_inputs = find_user_input_positions(&fork1_items); - let cut_last_on_fork1 = fork1_user_inputs - .get(fork1_user_inputs.len().saturating_sub(1)) + let cut2 = fork1_user_inputs + .first() .copied() - .unwrap_or(0); - let expected_after_second: Vec = fork1_items[..cut_last_on_fork1].to_vec(); - let fork2_items = read_rollout_items(&fork2_path); + .unwrap_or(fork1_items.len()); + let expected_after_second = fork1_items[..cut2].to_vec(); + let fork2_raw_items = read_rollout_items(&fork2_path); + assert!( + fork2_raw_items + .iter() + .any(|item| matches!(item, RolloutItem::ForkReference(_))), + "re-forked rollout should keep a compact ForkReference instead of copying parent history" + ); + let fork2_items = + materialize_rollout_items_for_replay(test.config.codex_home.as_path(), &fork2_raw_items) + .await; + let fork2_items = without_session_meta(fork2_items); pretty_assertions::assert_eq!( serde_json::to_value(&fork2_items).unwrap(), serde_json::to_value(&expected_after_second).unwrap() @@ -244,3 +266,10 @@ fn read_rollout_items(path: &std::path::Path) -> Vec { } items } + +fn without_session_meta(items: Vec) -> Vec { + items + .into_iter() + .filter(|item| !matches!(item, RolloutItem::SessionMeta(_))) + .collect() +} diff --git a/codex-rs/core/tests/suite/personality_migration.rs b/codex-rs/core/tests/suite/personality_migration.rs index f300745129cb..1fe9cb915d2b 100644 --- a/codex-rs/core/tests/suite/personality_migration.rs +++ b/codex-rs/core/tests/suite/personality_migration.rs @@ -60,6 +60,7 @@ async fn write_rollout_with_user_event(dir: &Path, thread_id: ThreadId) -> io::R let session_meta = SessionMetaLine { meta: SessionMeta { id: thread_id, + segment_id: None, forked_from_id: None, timestamp: TEST_TIMESTAMP.to_string(), cwd: std::path::PathBuf::from("."), @@ -105,6 +106,7 @@ async fn write_rollout_with_meta_only(dir: &Path, thread_id: ThreadId) -> io::Re let session_meta = SessionMetaLine { meta: SessionMeta { id: thread_id, + segment_id: None, forked_from_id: None, timestamp: TEST_TIMESTAMP.to_string(), cwd: std::path::PathBuf::from("."), diff --git a/codex-rs/core/tests/suite/plugins.rs b/codex-rs/core/tests/suite/plugins.rs index 5b83d3b13663..43da63c53e96 100644 --- a/codex-rs/core/tests/suite/plugins.rs +++ b/codex-rs/core/tests/suite/plugins.rs @@ -453,10 +453,26 @@ async fn plugin_mcp_tools_are_listed() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let codex_home = Arc::new(TempDir::new()?); - let rmcp_test_server_bin = stdio_server_bin()?; + let rmcp_test_server_bin = match stdio_server_bin() { + Ok(bin) if std::path::Path::new(&bin).exists() => bin, + Ok(bin) => { + eprintln!("test_stdio_server binary not available, skipping test: {bin}"); + return Ok(()); + } + Err(err) => { + eprintln!("test_stdio_server binary not available, skipping test: {err}"); + return Ok(()); + } + }; write_plugin_mcp_plugin(codex_home.as_ref(), &rmcp_test_server_bin); let codex = build_plugin_test_codex(&server, codex_home).await?; - wait_for_sample_mcp_ready(&codex).await?; + if let Err(err) = wait_for_sample_mcp_ready(&codex).await { + if err.to_string().contains("No such file or directory") { + eprintln!("test_stdio_server failed to start, skipping test: {err}"); + return Ok(()); + } + return Err(err); + } codex.submit(Op::ListMcpTools).await?; let list_event = wait_for_event_with_timeout( diff --git a/codex-rs/core/tests/suite/sqlite_state.rs b/codex-rs/core/tests/suite/sqlite_state.rs index 8250f5493dea..94b2c74f39b7 100644 --- a/codex-rs/core/tests/suite/sqlite_state.rs +++ b/codex-rs/core/tests/suite/sqlite_state.rs @@ -138,6 +138,7 @@ async fn backfill_scans_existing_rollouts() -> Result<()> { let session_meta_line = SessionMetaLine { meta: SessionMeta { id: thread_id, + segment_id: None, forked_from_id: None, timestamp: "2026-01-27T12:00:00Z".to_string(), cwd: codex_home.to_path_buf(), diff --git a/codex-rs/core/tests/suite/subagent_notifications.rs b/codex-rs/core/tests/suite/subagent_notifications.rs index 3f457967c1a3..f91eac316229 100644 --- a/codex-rs/core/tests/suite/subagent_notifications.rs +++ b/codex-rs/core/tests/suite/subagent_notifications.rs @@ -20,11 +20,18 @@ use core_test_support::test_codex::test_codex; use pretty_assertions::assert_eq; use serde_json::json; use std::fs; +use std::future::Future; use std::path::Path; +use std::sync::Arc; +use std::sync::Mutex; use std::time::Duration; use tokio::time::Instant; use tokio::time::sleep; +use wiremock::Match; +use wiremock::Mock; use wiremock::MockServer; +use wiremock::matchers::method; +use wiremock::matchers::path_regex; const SPAWN_CALL_ID: &str = "spawn-call-1"; const TURN_0_FORK_PROMPT: &str = "seed fork context"; @@ -38,7 +45,46 @@ const REQUESTED_REASONING_EFFORT: ReasoningEffort = ReasoningEffort::Low; const ROLE_MODEL: &str = "gpt-5.4"; const ROLE_REASONING_EFFORT: ReasoningEffort = ReasoningEffort::High; +#[derive(Clone)] +struct RawRequestRecorder { + requests: Arc>>, +} + +impl RawRequestRecorder { + fn new() -> Self { + Self { + requests: Arc::new(Mutex::new(Vec::new())), + } + } + + fn single_request(&self) -> wiremock::Request { + let requests = match self.requests.lock() { + Ok(requests) => requests, + Err(err) => panic!("requests lock should not panic: {err}"), + }; + assert_eq!(requests.len(), 1); + let Some(request) = requests.first() else { + panic!("request should exist"); + }; + request.clone() + } +} + +impl Match for RawRequestRecorder { + fn matches(&self, request: &wiremock::Request) -> bool { + match self.requests.lock() { + Ok(mut requests) => requests.push(request.clone()), + Err(err) => panic!("requests lock should not panic: {err}"), + } + true + } +} + fn body_contains(req: &wiremock::Request, text: &str) -> bool { + request_body_text(req).is_some_and(|body| body.contains(text)) +} + +fn request_body_text(req: &wiremock::Request) -> Option { let is_zstd = req .headers .get("content-encoding") @@ -53,9 +99,7 @@ fn body_contains(req: &wiremock::Request, text: &str) -> bool { } else { Some(req.body.clone()) }; - bytes - .and_then(|body| String::from_utf8(body).ok()) - .is_some_and(|body| body.contains(text)) + bytes.and_then(|body| String::from_utf8(body).ok()) } fn has_subagent_notification(req: &ResponsesRequest) -> bool { @@ -64,6 +108,53 @@ fn has_subagent_notification(req: &ResponsesRequest) -> bool { .any(|text| text.contains("")) } +async fn mount_fork_marker_child_response( + server: &MockServer, + response_body: String, +) -> RawRequestRecorder { + let child_request_log = RawRequestRecorder::new(); + Mock::given(method("POST")) + .and(path_regex(".*/responses$")) + .and(|req: &wiremock::Request| { + let body = request_body_text(req).unwrap_or_default(); + body.contains(r#""previous_response_id":"resp-turn1-1""#) + || (body.contains("# Subagent Assignment") && body.contains(CHILD_PROMPT)) + }) + .and(child_request_log.clone()) + .respond_with(sse_response(response_body)) + .with_priority(4) + .up_to_n_times(1) + .mount(server) + .await; + child_request_log +} + +fn run_large_fork_request_test(name: &'static str, test: F) -> Result<()> +where + F: FnOnce() -> Fut + Send + 'static, + Fut: Future> + Send + 'static, +{ + // These tests intentionally send full forked requests with parent + // developer context and tool schemas. wiremock clones and matches that + // large request body, so use an explicit Tokio stack size instead of + // relying on the platform default worker stack. + let test_thread = std::thread::Builder::new() + .name(name.to_string()) + .stack_size(32 * 1024 * 1024) + .spawn(|| { + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .thread_stack_size(32 * 1024 * 1024) + .enable_all() + .build()?; + runtime.block_on(test()) + })?; + match test_thread.join() { + Ok(result) => result, + Err(err) => std::panic::resume_unwind(err), + } +} + fn tool_parameter_description( req: &ResponsesRequest, tool_name: &str, @@ -305,8 +396,15 @@ async fn subagent_notification_is_included_without_wait() -> Result<()> { Ok(()) } -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn spawned_child_receives_forked_parent_context() -> Result<()> { +#[test] +fn spawned_child_receives_forked_parent_context() -> Result<()> { + run_large_fork_request_test( + "spawned_child_receives_forked_parent_context", + spawned_child_receives_forked_parent_context_impl, + ) +} + +async fn spawned_child_receives_forked_parent_context_impl() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -337,17 +435,6 @@ async fn spawned_child_receives_forked_parent_context() -> Result<()> { ) .await; - let _child_request_log = mount_sse_once_match( - &server, - |req: &wiremock::Request| body_contains(req, CHILD_PROMPT), - sse(vec![ - ev_response_created("resp-child-1"), - ev_assistant_message("msg-child-1", "child done"), - ev_completed("resp-child-1"), - ]), - ) - .await; - let _turn1_followup = mount_sse_once_match( &server, |req: &wiremock::Request| body_contains(req, SPAWN_CALL_ID), @@ -360,12 +447,20 @@ async fn spawned_child_receives_forked_parent_context() -> Result<()> { .await; let mut builder = test_codex().with_config(|config| { - config - .features - .enable(Feature::Collab) - .expect("test config should allow feature update"); + if let Err(err) = config.features.enable(Feature::Collab) { + panic!("test config should allow feature update: {err}"); + } }); let test = builder.build(&server).await?; + let child_request_log = mount_fork_marker_child_response( + &server, + sse(vec![ + ev_response_created("resp-child-1"), + ev_assistant_message("msg-child-1", "child done"), + ev_completed("resp-child-1"), + ]), + ) + .await; test.submit_turn(TURN_0_FORK_PROMPT).await?; let _ = seed_turn.single_request(); @@ -373,27 +468,14 @@ async fn spawned_child_receives_forked_parent_context() -> Result<()> { test.submit_turn(TURN_1_PROMPT).await?; let _ = spawn_turn.single_request(); - let deadline = Instant::now() + Duration::from_secs(2); - let child_request = loop { - if let Some(request) = server - .received_requests() - .await - .unwrap_or_default() - .into_iter() - .find(|request| { - body_contains(request, CHILD_PROMPT) && !body_contains(request, SPAWN_CALL_ID) - }) - { - break request; - } - if Instant::now() >= deadline { - anyhow::bail!("timed out waiting for forked child request"); - } - sleep(Duration::from_millis(10)).await; - }; - assert!(body_contains(&child_request, TURN_0_FORK_PROMPT)); - assert!(!body_contains(&child_request, SPAWN_CALL_ID)); - + let child_request = child_request_log.single_request(); + let child_body = request_body_text(&child_request) + .ok_or_else(|| anyhow::anyhow!("child request body should be text"))?; + assert!( + child_body.contains(TURN_0_FORK_PROMPT) + || child_body.contains(r#""previous_response_id":"resp-turn1-1""#), + "forked child should either inline parent context or continue from the parent response" + ); Ok(()) } @@ -423,8 +505,15 @@ async fn spawn_agent_requested_model_and_reasoning_override_inherited_settings_w Ok(()) } -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn spawned_multi_agent_v2_child_inherits_parent_developer_context() -> Result<()> { +#[test] +fn spawned_multi_agent_v2_child_inherits_parent_developer_context() -> Result<()> { + run_large_fork_request_test( + "spawned_multi_agent_v2_child_inherits_parent_developer_context", + spawned_multi_agent_v2_child_inherits_parent_developer_context_impl, + ) +} + +async fn spawned_multi_agent_v2_child_inherits_parent_developer_context_impl() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -443,9 +532,18 @@ async fn spawned_multi_agent_v2_child_inherits_parent_developer_context() -> Res ) .await; - let _child_request_log = mount_sse_once_match( + let mut builder = test_codex().with_config(|config| { + if let Err(err) = config.features.enable(Feature::Collab) { + panic!("test config should allow feature update: {err}"); + } + if let Err(err) = config.features.enable(Feature::MultiAgentV2) { + panic!("test config should allow feature update: {err}"); + } + config.developer_instructions = Some("Parent developer instructions.".to_string()); + }); + let test = builder.build(&server).await?; + let child_request_log = mount_fork_marker_child_response( &server, - |req: &wiremock::Request| body_contains(req, CHILD_PROMPT), sse(vec![ ev_response_created("resp-child-1"), ev_completed("resp-child-1"), @@ -466,50 +564,30 @@ async fn spawned_multi_agent_v2_child_inherits_parent_developer_context() -> Res ) .await; - let mut builder = test_codex().with_config(|config| { - config - .features - .enable(Feature::Collab) - .expect("test config should allow feature update"); - config - .features - .enable(Feature::MultiAgentV2) - .expect("test config should allow feature update"); - config.developer_instructions = Some("Parent developer instructions.".to_string()); - }); - let test = builder.build(&server).await?; - test.submit_turn(TURN_1_PROMPT).await?; - let deadline = Instant::now() + Duration::from_secs(2); - let child_request = loop { - if let Some(request) = server - .received_requests() - .await - .unwrap_or_default() - .into_iter() - .find(|request| { - body_contains(request, CHILD_PROMPT) && !body_contains(request, SPAWN_CALL_ID) - }) - { - break request; - } - if Instant::now() >= deadline { - anyhow::bail!("timed out waiting for spawned child request with developer context"); - } - sleep(Duration::from_millis(10)).await; - }; - assert!(body_contains( - &child_request, - "Parent developer instructions." - )); - assert!(body_contains(&child_request, CHILD_PROMPT)); + let child_request = child_request_log.single_request(); + let child_body = request_body_text(&child_request) + .ok_or_else(|| anyhow::anyhow!("child request body should be text"))?; + assert!( + child_body.contains("Parent developer instructions.") + || child_body.contains(r#""previous_response_id":"resp-turn1-1""#), + "forked child should either inline parent developer context or continue from the parent response" + ); + assert!(child_body.contains(CHILD_PROMPT)); Ok(()) } -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn skills_toggle_skips_instructions_for_parent_and_spawned_child() -> Result<()> { +#[test] +fn skills_toggle_skips_instructions_for_parent_and_spawned_child() -> Result<()> { + run_large_fork_request_test( + "skills_toggle_skips_instructions_for_parent_and_spawned_child", + skills_toggle_skips_instructions_for_parent_and_spawned_child_impl, + ) +} + +async fn skills_toggle_skips_instructions_for_parent_and_spawned_child_impl() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -538,6 +616,22 @@ async fn skills_toggle_skips_instructions_for_parent_and_spawned_child() -> Resu ) .await; + let mut builder = test_codex() + .with_pre_build_hook(|home| { + if let Err(err) = write_home_skill(home, "demo", "demo-skill", "demo skill") { + panic!("write home skill: {err}"); + } + }) + .with_config(|config| { + if let Err(err) = config.features.enable(Feature::Collab) { + panic!("test config should allow feature update: {err}"); + } + if let Err(err) = config.features.enable(Feature::MultiAgentV2) { + panic!("test config should allow feature update: {err}"); + } + config.include_skill_instructions = false; + }); + let test = builder.build(&server).await?; let _turn1_followup = mount_sse_once_match( &server, |req: &wiremock::Request| { @@ -551,48 +645,21 @@ async fn skills_toggle_skips_instructions_for_parent_and_spawned_child() -> Resu ) .await; - let mut builder = test_codex() - .with_pre_build_hook(|home| { - if let Err(err) = write_home_skill(home, "demo", "demo-skill", "demo skill") { - panic!("write home skill: {err}"); - } - }) - .with_config(|config| { - config - .features - .enable(Feature::Collab) - .expect("test config should allow feature update"); - config - .features - .enable(Feature::MultiAgentV2) - .expect("test config should allow feature update"); - config.include_skill_instructions = false; - }); - let test = builder.build(&server).await?; + let child_request_log = mount_fork_marker_child_response( + &server, + sse(vec![ + ev_response_created("resp-child-1"), + ev_completed("resp-child-1"), + ]), + ) + .await; test.submit_turn(TURN_1_PROMPT).await?; let parent_request = spawn_turn.single_request(); assert!(!parent_request.body_contains_text("")); assert!(!parent_request.body_contains_text("demo-skill")); - let deadline = Instant::now() + Duration::from_secs(2); - let child_request = loop { - if let Some(request) = server - .received_requests() - .await - .unwrap_or_default() - .into_iter() - .find(|request| { - body_contains(request, CHILD_PROMPT) && !body_contains(request, SPAWN_CALL_ID) - }) - { - break request; - } - if Instant::now() >= deadline { - anyhow::bail!("timed out waiting for spawned child request"); - } - sleep(Duration::from_millis(10)).await; - }; + let child_request = child_request_log.single_request(); assert!(!body_contains(&child_request, "")); assert!(!body_contains(&child_request, "demo-skill")); diff --git a/codex-rs/core/tests/suite/window_headers.rs b/codex-rs/core/tests/suite/window_headers.rs index de52821839de..35b8e4ec74fe 100644 --- a/codex-rs/core/tests/suite/window_headers.rs +++ b/codex-rs/core/tests/suite/window_headers.rs @@ -49,21 +49,25 @@ async fn window_id_advances_after_compact_persists_on_resume_and_resets_on_fork( }); let initial = builder.build(&server).await?; let initial_thread = Arc::clone(&initial.codex); - let rollout_path = initial - .session_configured - .rollout_path - .clone() - .expect("rollout path"); submit_user_turn(&initial_thread, "before compact").await?; submit_compact_turn(&initial_thread).await?; submit_user_turn(&initial_thread, "after compact").await?; + let rollout_path = initial_thread + .current_rollout_path() + .await + .expect("rollout path"); shutdown_thread(&initial_thread).await?; let resumed = builder .resume(&server, initial.home.clone(), rollout_path.clone()) .await?; submit_user_turn(&resumed.codex, "after resume").await?; + let resumed_rollout_path = resumed + .codex + .current_rollout_path() + .await + .expect("rollout path"); shutdown_thread(&resumed.codex).await?; let forked = resumed @@ -71,7 +75,7 @@ async fn window_id_advances_after_compact_persists_on_resume_and_resets_on_fork( .fork_thread( /*snapshot*/ 0usize, resumed.config.clone(), - rollout_path, + resumed_rollout_path, /*persist_extended_history*/ false, /*parent_trace*/ None, ) diff --git a/codex-rs/exec/tests/suite/fork.rs b/codex-rs/exec/tests/suite/fork.rs index 01c8d1d9aec0..78a1f63ccfc2 100644 --- a/codex-rs/exec/tests/suite/fork.rs +++ b/codex-rs/exec/tests/suite/fork.rs @@ -81,6 +81,25 @@ fn extract_forked_from_id(path: &std::path::Path) -> Option { .map(ToString::to_string) } +fn extract_fork_reference(path: &std::path::Path) -> Option<(String, usize)> { + let Ok(content) = std::fs::read_to_string(path) else { + return None; + }; + content.lines().skip(1).find_map(|line| { + let item = serde_json::from_str::(line).ok()?; + if item.get("type").and_then(Value::as_str) != Some("fork_reference") { + return None; + } + let payload = item.get("payload")?; + let rollout_path = payload.get("rollout_path")?.as_str()?.to_string(); + let nth_user_message = payload + .get("nth_user_message")? + .as_u64() + .and_then(|value| usize::try_from(value).ok())?; + Some((rollout_path, nth_user_message)) + }) +} + fn exec_fixture() -> anyhow::Result { Ok(find_resource!("tests/fixtures/cli_responses_fixture.sse")?) } @@ -132,9 +151,17 @@ fn exec_fork_by_id_creates_new_session_with_copied_history() -> anyhow::Result<( extract_forked_from_id(&forked_path).as_deref(), Some(session_id.as_str()) ); + let fork_reference = + extract_fork_reference(&forked_path).context("forked rollout should record a reference")?; + let referenced_path = std::path::PathBuf::from(&fork_reference.0); + assert_eq!( + std::fs::canonicalize(&referenced_path)?, + std::fs::canonicalize(&original_path)?, + ); + assert_eq!(fork_reference.1, usize::MAX); assert!( - forked_content.contains(&marker), - "forked session should copy ancestor rollout history" + !forked_content.contains(&marker), + "forked session should reference ancestor rollout history instead of copying it" ); assert!(forked_content.contains(&marker2)); diff --git a/codex-rs/protocol/src/lib.rs b/codex-rs/protocol/src/lib.rs index 175c92331f25..9243d9a160b7 100644 --- a/codex-rs/protocol/src/lib.rs +++ b/codex-rs/protocol/src/lib.rs @@ -4,6 +4,7 @@ pub mod auth; mod thread_id; mod tool_name; pub use agent_path::AgentPath; +pub use thread_id::SegmentId; pub use thread_id::ThreadId; pub use tool_name::ToolName; pub mod approvals; diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 60137fa8b0a6..9b6f3221143d 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -14,6 +14,7 @@ use std::time::Duration; use strum_macros::EnumIter; use crate::AgentPath; +use crate::SegmentId; use crate::ThreadId; use crate::approvals::ElicitationRequestEvent; use crate::config_types::ApprovalsReviewer; @@ -2713,6 +2714,8 @@ impl fmt::Display for InternalSessionSource { #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)] pub struct SessionMeta { pub id: ThreadId, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub segment_id: Option, #[serde(skip_serializing_if = "Option::is_none")] pub forked_from_id: Option, pub timestamp: String, @@ -2745,6 +2748,7 @@ impl Default for SessionMeta { fn default() -> Self { SessionMeta { id: ThreadId::default(), + segment_id: None, forked_from_id: None, timestamp: String::new(), cwd: PathBuf::new(), @@ -2770,10 +2774,41 @@ pub struct SessionMetaLine { pub git: Option, } +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)] +pub struct ForkReferenceItem { + pub rollout_path: PathBuf, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub thread_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub segment_id: Option, + pub nth_user_message: usize, +} + +pub const DEFAULT_ROLLOUT_REFERENCE_DEPTH: usize = 2; + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)] +pub struct RolloutReferenceItem { + pub rollout_path: PathBuf, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub thread_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub rollout_timestamp: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub segment_id: Option, + #[serde(default = "default_rollout_reference_depth")] + pub max_depth: usize, +} + +fn default_rollout_reference_depth() -> usize { + DEFAULT_ROLLOUT_REFERENCE_DEPTH +} + #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)] #[serde(tag = "type", content = "payload", rename_all = "snake_case")] pub enum RolloutItem { SessionMeta(SessionMetaLine), + ForkReference(ForkReferenceItem), + RolloutReference(RolloutReferenceItem), ResponseItem(ResponseItem), Compacted(CompactedItem), TurnContext(TurnContextItem), diff --git a/codex-rs/protocol/src/thread_id.rs b/codex-rs/protocol/src/thread_id.rs index 8d6d96eff8f2..548cbdb0303f 100644 --- a/codex-rs/protocol/src/thread_id.rs +++ b/codex-rs/protocol/src/thread_id.rs @@ -14,6 +14,12 @@ pub struct ThreadId { uuid: Uuid, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, TS, Hash)] +#[ts(type = "string")] +pub struct SegmentId { + uuid: Uuid, +} + impl ThreadId { pub fn new() -> Self { Self { @@ -50,18 +56,66 @@ impl From for String { } } +impl SegmentId { + pub fn new() -> Self { + Self { + uuid: Uuid::now_v7(), + } + } + + pub fn from_string(s: &str) -> Result { + Ok(Self { + uuid: Uuid::parse_str(s)?, + }) + } +} + +impl TryFrom<&str> for SegmentId { + type Error = uuid::Error; + + fn try_from(value: &str) -> Result { + Self::from_string(value) + } +} + +impl TryFrom for SegmentId { + type Error = uuid::Error; + + fn try_from(value: String) -> Result { + Self::from_string(value.as_str()) + } +} + +impl From for String { + fn from(value: SegmentId) -> Self { + value.to_string() + } +} + impl Default for ThreadId { fn default() -> Self { Self::new() } } +impl Default for SegmentId { + fn default() -> Self { + Self::new() + } +} + impl Display for ThreadId { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.uuid, f) } } +impl Display for SegmentId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.uuid, f) + } +} + impl Serialize for ThreadId { fn serialize(&self, serializer: S) -> Result where @@ -71,6 +125,15 @@ impl Serialize for ThreadId { } } +impl Serialize for SegmentId { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.collect_str(&self.uuid) + } +} + impl<'de> Deserialize<'de> for ThreadId { fn deserialize(deserializer: D) -> Result where @@ -82,6 +145,17 @@ impl<'de> Deserialize<'de> for ThreadId { } } +impl<'de> Deserialize<'de> for SegmentId { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let value = String::deserialize(deserializer)?; + let uuid = Uuid::parse_str(&value).map_err(serde::de::Error::custom)?; + Ok(Self { uuid }) + } +} + impl JsonSchema for ThreadId { fn schema_name() -> String { "ThreadId".to_string() @@ -92,6 +166,16 @@ impl JsonSchema for ThreadId { } } +impl JsonSchema for SegmentId { + fn schema_name() -> String { + "SegmentId".to_string() + } + + fn json_schema(generator: &mut SchemaGenerator) -> Schema { + ::json_schema(generator) + } +} + #[cfg(test)] mod tests { use super::*; @@ -100,4 +184,10 @@ mod tests { let id = ThreadId::default(); assert_ne!(id.uuid, Uuid::nil()); } + + #[test] + fn test_segment_id_default_is_not_zeroes() { + let id = SegmentId::default(); + assert_ne!(id.uuid, Uuid::nil()); + } } diff --git a/codex-rs/rollout/src/lib.rs b/codex-rs/rollout/src/lib.rs index 4046beb635cc..8a12a69b5508 100644 --- a/codex-rs/rollout/src/lib.rs +++ b/codex-rs/rollout/src/lib.rs @@ -41,6 +41,7 @@ pub use list::ThreadListLayout; pub use list::ThreadSortKey; pub use list::ThreadsPage; pub use list::find_archived_thread_path_by_id_str; +pub use list::find_rollout_path_by_segment_id; pub use list::find_thread_path_by_id_str; #[deprecated(note = "use find_thread_path_by_id_str")] pub use list::find_thread_path_by_id_str as find_conversation_path_by_id_str; @@ -50,6 +51,8 @@ pub use list::parse_cursor; pub use list::read_head_for_summary; pub use list::read_session_meta_line; pub use list::read_thread_item_from_rollout; +pub use list::resolve_fork_reference_rollout_path; +pub use list::resolve_rollout_reference_rollout_path; pub use list::rollout_date_parts; pub use metadata::builder_from_items; pub use policy::EventPersistenceMode; diff --git a/codex-rs/rollout/src/list.rs b/codex-rs/rollout/src/list.rs index bdb7198835d7..a65e1ea1f449 100644 --- a/codex-rs/rollout/src/list.rs +++ b/codex-rs/rollout/src/list.rs @@ -21,6 +21,7 @@ use super::SESSIONS_SUBDIR; use crate::protocol::EventMsg; use crate::state_db; use codex_file_search as file_search; +use codex_protocol::SegmentId; use codex_protocol::ThreadId; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::RolloutLine; @@ -939,6 +940,19 @@ pub(crate) fn parse_timestamp_uuid_from_filename(name: &str) -> Option<(OffsetDa Some((ts, uuid)) } +fn parse_timestamp_string_uuid_from_filename(name: &str) -> Option<(&str, Uuid)> { + // Expected: rollout-YYYY-MM-DDThh-mm-ss-.jsonl + let core = name.strip_prefix("rollout-")?.strip_suffix(".jsonl")?; + core.match_indices('-') + .rev() + .find_map(|(index, _)| { + Uuid::parse_str(&core[index + 1..]) + .ok() + .map(|uuid| (index, uuid)) + }) + .map(|(index, uuid)| (&core[..index], uuid)) +} + struct ThreadCandidate { path: PathBuf, id: Uuid, @@ -1118,6 +1132,9 @@ async fn read_head_summary(path: &Path, head_limit: usize) -> io::Result { + // Not included in summaries; skip. + } RolloutItem::ResponseItem(_) => { summary.created_at = summary .created_at @@ -1181,7 +1198,9 @@ pub async fn read_head_for_summary(path: &Path) -> io::Result {} } @@ -1330,6 +1349,222 @@ pub async fn find_archived_thread_path_by_id_str( find_thread_path_by_id_str_in_subdir(codex_home, ARCHIVED_SESSIONS_SUBDIR, id_str).await } +pub async fn find_rollout_path_by_segment_id( + codex_home: &Path, + thread_id: ThreadId, + segment_id: SegmentId, +) -> io::Result> { + if let Some(path) = find_rollout_path_by_segment_id_in_subdir( + codex_home, + SESSIONS_SUBDIR, + thread_id, + segment_id, + ) + .await? + { + return Ok(Some(path)); + } + find_rollout_path_by_segment_id_in_subdir( + codex_home, + ARCHIVED_SESSIONS_SUBDIR, + thread_id, + segment_id, + ) + .await +} + +async fn find_rollout_path_by_segment_id_in_subdir( + codex_home: &Path, + subdir: &str, + thread_id: ThreadId, + segment_id: SegmentId, +) -> io::Result> { + let root = codex_home.join(subdir); + if !tokio::fs::try_exists(&root).await.unwrap_or(false) { + return Ok(None); + } + + let target_thread_id = thread_id.to_string(); + let mut stack = vec![root]; + let mut scanned_files = 0usize; + while let Some(dir) = stack.pop() { + let mut read_dir = match tokio::fs::read_dir(&dir).await { + Ok(read_dir) => read_dir, + Err(err) => { + tracing::warn!("failed to read rollout directory {}: {err}", dir.display()); + continue; + } + }; + while let Some(entry) = read_dir.next_entry().await? { + let path = entry.path(); + let file_type = entry.file_type().await?; + if file_type.is_dir() { + stack.push(path); + continue; + } + if !file_type.is_file() { + continue; + } + let Some(file_name) = path.file_name().and_then(|file_name| file_name.to_str()) else { + continue; + }; + if !file_name.starts_with("rollout-") || !file_name.ends_with(".jsonl") { + continue; + } + let Some((_, uuid)) = parse_timestamp_uuid_from_filename(file_name) else { + continue; + }; + if uuid.to_string() != target_thread_id { + continue; + } + scanned_files = scanned_files.saturating_add(1); + if scanned_files > MAX_SCAN_FILES { + return Ok(None); + } + let Ok(meta_line) = read_session_meta_line(&path).await else { + continue; + }; + if meta_line.meta.id == thread_id && meta_line.meta.segment_id == Some(segment_id) { + return Ok(Some(path)); + } + } + } + + Ok(None) +} + +pub async fn resolve_fork_reference_rollout_path( + codex_home: &Path, + reference: &codex_protocol::protocol::ForkReferenceItem, +) -> io::Result { + if let (Some(thread_id), Some(segment_id)) = (reference.thread_id, reference.segment_id) + && let Some(path) = + find_rollout_path_by_segment_id(codex_home, thread_id, segment_id).await? + { + return Ok(path); + } + + let rollout_path = reference.rollout_path.as_path(); + if tokio::fs::try_exists(rollout_path).await.unwrap_or(false) { + return Ok(rollout_path.to_path_buf()); + } + + let Some(file_name) = rollout_path + .file_name() + .and_then(|file_name| file_name.to_str()) + else { + return Ok(rollout_path.to_path_buf()); + }; + let Some((_, uuid)) = parse_timestamp_uuid_from_filename(file_name) else { + return Ok(rollout_path.to_path_buf()); + }; + let archived_path = codex_home.join(ARCHIVED_SESSIONS_SUBDIR).join(file_name); + if tokio::fs::try_exists(archived_path.as_path()) + .await + .unwrap_or(false) + { + return Ok(archived_path); + } + let id = uuid.to_string(); + if let Some(path) = find_thread_path_by_id_str(codex_home, id.as_str()).await? { + return Ok(path); + } + if let Some(path) = find_archived_thread_path_by_id_str(codex_home, id.as_str()).await? { + return Ok(path); + } + Ok(rollout_path.to_path_buf()) +} + +pub async fn resolve_rollout_reference_rollout_path( + codex_home: &Path, + reference: &codex_protocol::protocol::RolloutReferenceItem, +) -> io::Result { + if let (Some(thread_id), Some(rollout_timestamp)) = + (reference.thread_id, reference.rollout_timestamp.as_deref()) + { + let file_name = format!("rollout-{rollout_timestamp}-{thread_id}.jsonl"); + if let Some(active_path) = + rollout_path_for_timestamp_file(codex_home, rollout_timestamp, &file_name) + && tokio::fs::try_exists(active_path.as_path()) + .await + .unwrap_or(false) + { + return Ok(active_path); + } + let archived_path = codex_home.join(ARCHIVED_SESSIONS_SUBDIR).join(&file_name); + if tokio::fs::try_exists(archived_path.as_path()) + .await + .unwrap_or(false) + { + return Ok(archived_path); + } + } + + if let (Some(thread_id), Some(segment_id)) = (reference.thread_id, reference.segment_id) + && let Some(path) = + find_rollout_path_by_segment_id(codex_home, thread_id, segment_id).await? + { + return Ok(path); + } + + let rollout_path = reference.rollout_path.as_path(); + if tokio::fs::try_exists(rollout_path).await.unwrap_or(false) { + return Ok(rollout_path.to_path_buf()); + } + + let Some(file_name) = rollout_path + .file_name() + .and_then(|file_name| file_name.to_str()) + else { + return Ok(rollout_path.to_path_buf()); + }; + let Some((rollout_timestamp, uuid)) = parse_timestamp_string_uuid_from_filename(file_name) + else { + return Ok(rollout_path.to_path_buf()); + }; + let archived_path = codex_home.join(ARCHIVED_SESSIONS_SUBDIR).join(file_name); + if tokio::fs::try_exists(archived_path.as_path()) + .await + .unwrap_or(false) + { + return Ok(archived_path); + } + if let Some(active_path) = + rollout_path_for_timestamp_file(codex_home, rollout_timestamp, file_name) + && tokio::fs::try_exists(active_path.as_path()) + .await + .unwrap_or(false) + { + return Ok(active_path); + } + let id = uuid.to_string(); + if let Some(path) = find_thread_path_by_id_str(codex_home, id.as_str()).await? { + return Ok(path); + } + if let Some(path) = find_archived_thread_path_by_id_str(codex_home, id.as_str()).await? { + return Ok(path); + } + Ok(rollout_path.to_path_buf()) +} + +fn rollout_path_for_timestamp_file( + codex_home: &Path, + rollout_timestamp: &str, + file_name: &str, +) -> Option { + let year = rollout_timestamp.get(0..4)?; + let month = rollout_timestamp.get(5..7)?; + let day = rollout_timestamp.get(8..10)?; + Some( + codex_home + .join(SESSIONS_SUBDIR) + .join(year) + .join(month) + .join(day) + .join(file_name), + ) +} + /// Extract the `YYYY/MM/DD` directory components from a rollout filename. pub fn rollout_date_parts(file_name: &OsStr) -> Option<(String, String, String)> { let name = file_name.to_string_lossy(); diff --git a/codex-rs/rollout/src/metadata.rs b/codex-rs/rollout/src/metadata.rs index e7a25f0cdacf..7e42d9ca0445 100644 --- a/codex-rs/rollout/src/metadata.rs +++ b/codex-rs/rollout/src/metadata.rs @@ -69,7 +69,9 @@ pub fn builder_from_items( ) -> Option { if let Some(session_meta) = items.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => Some(meta_line), - RolloutItem::ResponseItem(_) + RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) | RolloutItem::TurnContext(_) | RolloutItem::EventMsg(_) => None, @@ -123,7 +125,9 @@ pub async fn extract_metadata_from_rollout( metadata, memory_mode: items.iter().rev().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => meta_line.meta.memory_mode.clone(), - RolloutItem::ResponseItem(_) + RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) | RolloutItem::TurnContext(_) | RolloutItem::EventMsg(_) => None, diff --git a/codex-rs/rollout/src/metadata_tests.rs b/codex-rs/rollout/src/metadata_tests.rs index c94cd0be7e5d..0227519007c9 100644 --- a/codex-rs/rollout/src/metadata_tests.rs +++ b/codex-rs/rollout/src/metadata_tests.rs @@ -34,6 +34,7 @@ async fn extract_metadata_from_rollout_uses_session_meta() { let session_meta = SessionMeta { id, + segment_id: None, forked_from_id: None, timestamp: "2026-01-27T12:34:56Z".to_string(), cwd: dir.path().to_path_buf(), @@ -85,6 +86,7 @@ async fn extract_metadata_from_rollout_returns_latest_memory_mode() { let session_meta = SessionMeta { id, + segment_id: None, forked_from_id: None, timestamp: "2026-01-27T12:34:56Z".to_string(), cwd: dir.path().to_path_buf(), @@ -344,6 +346,7 @@ fn write_rollout_in_sessions_with_cwd( let path = sessions_dir.join(format!("rollout-{filename_ts}-{thread_uuid}.jsonl")); let session_meta = SessionMeta { id, + segment_id: None, forked_from_id: None, timestamp: event_ts.to_string(), cwd, diff --git a/codex-rs/rollout/src/policy.rs b/codex-rs/rollout/src/policy.rs index 146e1dc365f0..0ae70b8606a2 100644 --- a/codex-rs/rollout/src/policy.rs +++ b/codex-rs/rollout/src/policy.rs @@ -16,9 +16,11 @@ pub fn is_persisted_response_item(item: &RolloutItem, mode: EventPersistenceMode RolloutItem::ResponseItem(item) => should_persist_response_item(item), RolloutItem::EventMsg(ev) => should_persist_event_msg(ev, mode), // Persist Codex executive markers so we can analyze flows (e.g., compaction, API turns). - RolloutItem::Compacted(_) | RolloutItem::TurnContext(_) | RolloutItem::SessionMeta(_) => { - true - } + RolloutItem::Compacted(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::TurnContext(_) + | RolloutItem::SessionMeta(_) => true, } } diff --git a/codex-rs/rollout/src/recorder.rs b/codex-rs/rollout/src/recorder.rs index dc2f08b7abb4..ce5980f3896b 100644 --- a/codex-rs/rollout/src/recorder.rs +++ b/codex-rs/rollout/src/recorder.rs @@ -11,6 +11,7 @@ use std::sync::Mutex; use chrono::SecondsFormat; use chrono::Utc; +use codex_protocol::SegmentId; use codex_protocol::ThreadId; use codex_protocol::dynamic_tools::DynamicToolSpec; use codex_protocol::models::BaseInstructions; @@ -667,6 +668,7 @@ impl RolloutRecorder { let log_file_info = precompute_log_file_info(config, conversation_id)?; let path = log_file_info.path.clone(); let session_id = log_file_info.conversation_id; + let segment_id = SegmentId::new(); let started_at = log_file_info.timestamp; let timestamp_format: &[FormatItem] = format_description!( @@ -679,6 +681,7 @@ impl RolloutRecorder { let session_meta = SessionMeta { id: session_id, + segment_id: Some(segment_id), forked_from_id, timestamp, cwd: config.cwd().to_path_buf(), @@ -894,6 +897,12 @@ impl RolloutRecorder { RolloutItem::ResponseItem(item) => { items.push(RolloutItem::ResponseItem(item)); } + RolloutItem::ForkReference(item) => { + items.push(RolloutItem::ForkReference(item)); + } + RolloutItem::RolloutReference(item) => { + items.push(RolloutItem::RolloutReference(item)); + } RolloutItem::Compacted(item) => { items.push(RolloutItem::Compacted(item)); } @@ -1367,28 +1376,43 @@ fn precompute_log_file_info( // Resolve ~/.codex/sessions/YYYY/MM/DD path. let timestamp = OffsetDateTime::now_local() .map_err(|e| IoError::other(format!("failed to get local time: {e}")))?; - let mut dir = config.codex_home().to_path_buf(); - dir.push(SESSIONS_SUBDIR); - dir.push(timestamp.year().to_string()); - dir.push(format!("{:02}", u8::from(timestamp.month()))); - dir.push(format!("{:02}", timestamp.day())); // Custom format for YYYY-MM-DDThh-mm-ss. Use `-` instead of `:` for // compatibility with filesystems that do not allow colons in filenames. let format: &[FormatItem] = format_description!("[year]-[month]-[day]T[hour]-[minute]-[second]"); - let date_str = timestamp - .format(format) - .map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?; - - let filename = format!("rollout-{date_str}-{conversation_id}.jsonl"); - - let path = dir.join(filename); + let mut selected_timestamp = timestamp; + let mut path = None; + for offset_seconds in 0..60 { + let candidate_timestamp = timestamp + .checked_add(time::Duration::seconds(offset_seconds)) + .ok_or_else(|| IoError::other("failed to compute rollout timestamp"))?; + let mut dir = config.codex_home().to_path_buf(); + dir.push(SESSIONS_SUBDIR); + dir.push(candidate_timestamp.year().to_string()); + dir.push(format!("{:02}", u8::from(candidate_timestamp.month()))); + dir.push(format!("{:02}", candidate_timestamp.day())); + let date_str = candidate_timestamp + .format(format) + .map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?; + let filename = format!("rollout-{date_str}-{conversation_id}.jsonl"); + let candidate_path = dir.join(filename); + if !candidate_path.exists() { + selected_timestamp = candidate_timestamp; + path = Some(candidate_path); + break; + } + } + let path = path.ok_or_else(|| { + IoError::other(format!( + "failed to find an unused rollout path for thread {conversation_id}" + )) + })?; Ok(LogFileInfo { path, conversation_id, - timestamp, + timestamp: selected_timestamp, }) } @@ -1884,6 +1908,8 @@ async fn resume_candidate_matches_cwd( && let Some(latest_turn_context_cwd) = items.iter().rev().find_map(|item| match item { RolloutItem::TurnContext(turn_context) => Some(turn_context.cwd.as_path()), RolloutItem::SessionMeta(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) | RolloutItem::EventMsg(_) => None, diff --git a/codex-rs/rollout/src/recorder_tests.rs b/codex-rs/rollout/src/recorder_tests.rs index 0138db72df04..32b6c18c3442 100644 --- a/codex-rs/rollout/src/recorder_tests.rs +++ b/codex-rs/rollout/src/recorder_tests.rs @@ -283,6 +283,20 @@ async fn recorder_materializes_on_flush_with_pending_items() -> std::io::Result< text.contains("\"type\":\"session_meta\""), "expected session metadata in rollout" ); + let (items, _, _) = RolloutRecorder::load_rollout_items(&rollout_path).await?; + let segment_id = items.iter().find_map(|item| match item { + RolloutItem::SessionMeta(meta) => meta.meta.segment_id, + RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => None, + }); + assert!( + segment_id.is_some(), + "new rollout metadata should include a segment_id for durable references" + ); let buffered_idx = text .find("buffered-event") .expect("buffered event in rollout"); diff --git a/codex-rs/rollout/src/session_index_tests.rs b/codex-rs/rollout/src/session_index_tests.rs index c6a539fb28ab..4c081b6820c7 100644 --- a/codex-rs/rollout/src/session_index_tests.rs +++ b/codex-rs/rollout/src/session_index_tests.rs @@ -26,6 +26,7 @@ fn write_rollout_with_metadata(path: &Path, thread_id: ThreadId) -> std::io::Res item: RolloutItem::SessionMeta(SessionMetaLine { meta: SessionMeta { id: thread_id, + segment_id: None, forked_from_id: None, timestamp, cwd: ".".into(), diff --git a/codex-rs/rollout/src/tests.rs b/codex-rs/rollout/src/tests.rs index fba8a9827a31..0c3a4bc0f5f5 100644 --- a/codex-rs/rollout/src/tests.rs +++ b/codex-rs/rollout/src/tests.rs @@ -34,6 +34,7 @@ use codex_protocol::models::ResponseItem; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::RolloutLine; +use codex_protocol::protocol::RolloutReferenceItem; use codex_protocol::protocol::SessionMeta; use codex_protocol::protocol::SessionMetaLine; use codex_protocol::protocol::SessionSource; @@ -294,6 +295,34 @@ fn rollout_date_parts_extracts_directory_components() { ); } +#[tokio::test] +async fn rollout_reference_resolves_archived_file_by_stable_thread_and_timestamp() { + let temp = TempDir::new().expect("tempdir"); + let home = temp.path(); + let uuid = Uuid::new_v4(); + let thread_id = ThreadId::from_string(&uuid.to_string()).expect("thread id"); + let ts = "2025-01-03T13-00-00"; + let active_path = home.join(format!("sessions/2025/01/03/rollout-{ts}-{uuid}.jsonl")); + let archived_path = home.join(format!("archived_sessions/rollout-{ts}-{uuid}.jsonl")); + fs::create_dir_all(archived_path.parent().expect("archived parent")).unwrap(); + fs::write(&archived_path, "").unwrap(); + + let resolved = crate::resolve_rollout_reference_rollout_path( + home, + &RolloutReferenceItem { + rollout_path: active_path, + thread_id: Some(thread_id), + rollout_timestamp: Some(ts.to_string()), + segment_id: None, + max_depth: 2, + }, + ) + .await + .expect("resolve rollout reference"); + + assert_eq!(resolved, archived_path); +} + async fn assert_state_db_rollout_path( home: &Path, thread_id: ThreadId, @@ -1139,6 +1168,7 @@ async fn test_updated_at_uses_file_mtime() -> Result<()> { item: RolloutItem::SessionMeta(SessionMetaLine { meta: SessionMeta { id: conversation_id, + segment_id: None, forked_from_id: None, timestamp: ts.to_string(), cwd: ".".into(), diff --git a/codex-rs/state/src/extract.rs b/codex-rs/state/src/extract.rs index a4a0ab0f6a17..e893f0c81189 100644 --- a/codex-rs/state/src/extract.rs +++ b/codex-rs/state/src/extract.rs @@ -22,7 +22,9 @@ pub fn apply_rollout_item( RolloutItem::TurnContext(turn_ctx) => apply_turn_context(metadata, turn_ctx), RolloutItem::EventMsg(event) => apply_event_msg(metadata, event), RolloutItem::ResponseItem(item) => apply_response_item(metadata, item), - RolloutItem::Compacted(_) => {} + RolloutItem::Compacted(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) => {} } if metadata.model_provider.is_empty() { metadata.model_provider = default_provider.to_string(); @@ -36,9 +38,11 @@ pub fn rollout_item_affects_thread_metadata(item: &RolloutItem) -> bool { RolloutItem::EventMsg( EventMsg::TokenCount(_) | EventMsg::UserMessage(_) | EventMsg::ThreadNameUpdated(_), ) => true, - RolloutItem::EventMsg(_) | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) => { - false - } + RolloutItem::EventMsg(_) + | RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) => false, } } @@ -270,6 +274,7 @@ mod tests { &RolloutItem::SessionMeta(SessionMetaLine { meta: SessionMeta { id: thread_id, + segment_id: None, forked_from_id: Some( ThreadId::from_string(&Uuid::now_v7().to_string()).expect("thread id"), ), @@ -405,6 +410,7 @@ mod tests { &RolloutItem::SessionMeta(SessionMetaLine { meta: SessionMeta { id: thread_id, + segment_id: None, forked_from_id: None, timestamp: "2026-02-26T00:00:00.000Z".to_string(), cwd: PathBuf::from("/workspace"), diff --git a/codex-rs/state/src/runtime/threads.rs b/codex-rs/state/src/runtime/threads.rs index 906a3bb39aae..beb53c0bfa49 100644 --- a/codex-rs/state/src/runtime/threads.rs +++ b/codex-rs/state/src/runtime/threads.rs @@ -979,7 +979,9 @@ SELECT pub(super) fn extract_dynamic_tools(items: &[RolloutItem]) -> Option>> { items.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => Some(meta_line.meta.dynamic_tools.clone()), - RolloutItem::ResponseItem(_) + RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) | RolloutItem::TurnContext(_) | RolloutItem::EventMsg(_) => None, @@ -989,7 +991,9 @@ pub(super) fn extract_dynamic_tools(items: &[RolloutItem]) -> Option Option { items.iter().rev().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => meta_line.meta.memory_mode.clone(), - RolloutItem::ResponseItem(_) + RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) | RolloutItem::TurnContext(_) | RolloutItem::EventMsg(_) => None, @@ -1352,6 +1356,7 @@ mod tests { let items = vec![RolloutItem::SessionMeta(SessionMetaLine { meta: SessionMeta { id: thread_id, + segment_id: None, forked_from_id: None, timestamp: metadata.created_at.to_rfc3339(), cwd: PathBuf::new(), @@ -1410,6 +1415,7 @@ mod tests { let items = vec![RolloutItem::SessionMeta(SessionMetaLine { meta: SessionMeta { id: thread_id, + segment_id: None, forked_from_id: None, timestamp: created_at, cwd: PathBuf::new(), diff --git a/codex-rs/thread-manager-sample/src/main.rs b/codex-rs/thread-manager-sample/src/main.rs index 64eae898f6a5..22302c215bd9 100644 --- a/codex-rs/thread-manager-sample/src/main.rs +++ b/codex-rs/thread-manager-sample/src/main.rs @@ -212,8 +212,8 @@ fn new_config(model: Option, arg0_paths: Arg0DispatchPaths) -> anyhow::R agent_job_max_runtime_seconds: None, agent_interrupt_message_enabled: false, agent_max_depth: 1, - agent_roles: BTreeMap::new(), watchdog_interval_s: 60, + agent_roles: BTreeMap::new(), memories: MemoriesConfig::default(), sqlite_home: codex_home.to_path_buf(), log_dir: codex_home.join("log").to_path_buf(), diff --git a/codex-rs/thread-store/src/lib.rs b/codex-rs/thread-store/src/lib.rs index 52b7f5ea1fab..5bc57f6f4a99 100644 --- a/codex-rs/thread-store/src/lib.rs +++ b/codex-rs/thread-store/src/lib.rs @@ -32,6 +32,7 @@ pub use types::OptionalStringPatch; pub use types::ReadThreadByRolloutPathParams; pub use types::ReadThreadParams; pub use types::ResumeThreadParams; +pub use types::RotateThreadSegmentParams; pub use types::SortDirection; pub use types::StoredThread; pub use types::StoredThreadHistory; diff --git a/codex-rs/thread-store/src/live_thread.rs b/codex-rs/thread-store/src/live_thread.rs index bcce1c764540..d92c38a37e0c 100644 --- a/codex-rs/thread-store/src/live_thread.rs +++ b/codex-rs/thread-store/src/live_thread.rs @@ -11,6 +11,7 @@ use crate::CreateThreadParams; use crate::LoadThreadHistoryParams; use crate::LocalThreadStore; use crate::ResumeThreadParams; +use crate::RotateThreadSegmentParams; use crate::StoredThreadHistory; use crate::ThreadMetadataPatch; use crate::ThreadStore; @@ -111,6 +112,23 @@ impl LiveThread { .await } + pub async fn rotate_local_segment( + &self, + params: RotateThreadSegmentParams, + ) -> ThreadStoreResult { + let Some(local_store) = self + .thread_store + .as_any() + .downcast_ref::() + else { + return Ok(false); + }; + local_store + .rotate_thread_segment(self.thread_id, params) + .await?; + Ok(true) + } + pub async fn persist(&self) -> ThreadStoreResult<()> { self.thread_store.persist_thread(self.thread_id).await } diff --git a/codex-rs/thread-store/src/local/live_writer.rs b/codex-rs/thread-store/src/local/live_writer.rs index 643207b59dec..a986f3f2928a 100644 --- a/codex-rs/thread-store/src/local/live_writer.rs +++ b/codex-rs/thread-store/src/local/live_writer.rs @@ -1,11 +1,16 @@ use std::path::PathBuf; use codex_protocol::ThreadId; +use codex_protocol::protocol::RolloutItem; +use codex_protocol::protocol::RolloutReferenceItem; use codex_protocol::protocol::ThreadMemoryMode; use codex_rollout::RolloutConfig; use codex_rollout::RolloutRecorder; use codex_rollout::RolloutRecorderParams; use codex_rollout::builder_from_items; +use codex_rollout::read_session_meta_line; +use tokio::fs; +use tracing::warn; use super::LocalThreadStore; use super::create_thread; @@ -13,6 +18,7 @@ use crate::AppendThreadItemsParams; use crate::CreateThreadParams; use crate::ReadThreadParams; use crate::ResumeThreadParams; +use crate::RotateThreadSegmentParams; use crate::ThreadStoreError; use crate::ThreadStoreResult; @@ -161,8 +167,154 @@ pub(super) async fn rollout_path( .to_path_buf()) } +pub(super) async fn rotate_thread_segment( + store: &LocalThreadStore, + thread_id: ThreadId, + params: RotateThreadSegmentParams, +) -> ThreadStoreResult<()> { + let old_recorder = store.live_recorder(thread_id).await?; + old_recorder.flush().await.map_err(thread_store_io_error)?; + let old_rollout_path = old_recorder.rollout_path().to_path_buf(); + let old_meta = read_session_meta_line(old_rollout_path.as_path()) + .await + .map_err(|err| ThreadStoreError::Internal { + message: format!( + "failed to read current rollout metadata from {}: {err}", + old_rollout_path.display() + ), + })?; + if old_meta.meta.id != thread_id { + return Err(ThreadStoreError::Internal { + message: format!( + "live rollout {} belongs to thread {} instead of {thread_id}", + old_rollout_path.display(), + old_meta.meta.id + ), + }); + } + + let cwd = params + .metadata + .cwd + .clone() + .ok_or_else(|| ThreadStoreError::InvalidRequest { + message: "local thread store requires a cwd".to_string(), + })?; + let config = RolloutConfig { + codex_home: store.config.codex_home.clone(), + sqlite_home: store.config.sqlite_home.clone(), + cwd, + model_provider_id: params.metadata.model_provider.clone(), + generate_memories: matches!(params.metadata.memory_mode, ThreadMemoryMode::Enabled), + }; + let mut initial_items = Vec::with_capacity(params.initial_items.len() + 1); + initial_items.push(RolloutItem::RolloutReference(RolloutReferenceItem { + rollout_path: old_rollout_path.clone(), + thread_id: Some(thread_id), + rollout_timestamp: rollout_timestamp_from_path(old_rollout_path.as_path()), + segment_id: old_meta.meta.segment_id, + max_depth: params.previous_segment_reference_depth, + })); + initial_items.extend(params.initial_items); + + let state_db_ctx = store.state_db().await; + let new_recorder = RolloutRecorder::new( + &config, + RolloutRecorderParams::new( + thread_id, + old_meta.meta.forked_from_id, + params.source, + params.base_instructions, + params.dynamic_tools, + create_thread::event_persistence_mode(params.event_persistence_mode), + ), + state_db_ctx, + /*state_builder*/ None, + ) + .await + .map_err(|err| ThreadStoreError::Internal { + message: format!("failed to initialize rotated local thread recorder: {err}"), + })?; + new_recorder + .record_items(initial_items.as_slice()) + .await + .map_err(thread_store_io_error)?; + new_recorder.flush().await.map_err(thread_store_io_error)?; + + if let Err(err) = old_recorder.shutdown().await { + warn!( + "failed to close previous rollout segment {} for thread {thread_id}: {err}", + old_rollout_path.display() + ); + } + + let current_path = store + .live_recorders + .lock() + .await + .get(&thread_id) + .ok_or(ThreadStoreError::ThreadNotFound { thread_id })? + .rollout_path() + .to_path_buf(); + if current_path != old_rollout_path { + return Err(ThreadStoreError::Conflict { + message: format!("live writer for thread {thread_id} changed during segment rotation"), + }); + } + + let old_file_name = old_rollout_path + .file_name() + .ok_or_else(|| ThreadStoreError::Internal { + message: format!( + "previous rollout segment path {} does not have a file name", + old_rollout_path.display() + ), + })?; + let archived_root = store + .config + .codex_home + .join(codex_rollout::ARCHIVED_SESSIONS_SUBDIR); + fs::create_dir_all(archived_root.as_path()) + .await + .map_err(thread_store_io_error)?; + let archived_path = archived_root.join(old_file_name); + fs::rename(old_rollout_path.as_path(), archived_path.as_path()) + .await + .map_err(|err| ThreadStoreError::Internal { + message: format!( + "failed to archive previous rollout segment {} to {}: {err}", + old_rollout_path.display(), + archived_path.display() + ), + })?; + + let mut live_recorders = store.live_recorders.lock().await; + let current_path = live_recorders + .get(&thread_id) + .ok_or(ThreadStoreError::ThreadNotFound { thread_id })? + .rollout_path() + .to_path_buf(); + if current_path != old_rollout_path { + return Err(ThreadStoreError::Conflict { + message: format!("live writer for thread {thread_id} changed during segment rotation"), + }); + } + live_recorders.insert(thread_id, new_recorder); + Ok(()) +} + fn thread_store_io_error(err: std::io::Error) -> ThreadStoreError { ThreadStoreError::Internal { message: err.to_string(), } } + +fn rollout_timestamp_from_path(path: &std::path::Path) -> Option { + let file_name = path.file_name()?.to_str()?; + let core = file_name.strip_prefix("rollout-")?.strip_suffix(".jsonl")?; + core.match_indices('-').rev().find_map(|(index, _)| { + ThreadId::from_string(&core[index + 1..]) + .ok() + .map(|_| core[..index].to_string()) + }) +} diff --git a/codex-rs/thread-store/src/local/mod.rs b/codex-rs/thread-store/src/local/mod.rs index 04dd8b249077..2033f6cab69e 100644 --- a/codex-rs/thread-store/src/local/mod.rs +++ b/codex-rs/thread-store/src/local/mod.rs @@ -29,6 +29,7 @@ use crate::LoadThreadHistoryParams; use crate::ReadThreadByRolloutPathParams; use crate::ReadThreadParams; use crate::ResumeThreadParams; +use crate::RotateThreadSegmentParams; use crate::StoredThread; use crate::StoredThreadHistory; use crate::ThreadPage; @@ -123,6 +124,14 @@ impl LocalThreadStore { live_writer::rollout_path(self, thread_id).await } + pub async fn rotate_thread_segment( + &self, + thread_id: ThreadId, + params: RotateThreadSegmentParams, + ) -> ThreadStoreResult<()> { + live_writer::rotate_thread_segment(self, thread_id, params).await + } + pub(super) async fn live_recorder( &self, thread_id: ThreadId, @@ -293,6 +302,7 @@ mod tests { use tempfile::TempDir; use super::*; + use crate::RotateThreadSegmentParams; use crate::ThreadEventPersistenceMode; use crate::ThreadPersistenceMetadata; use crate::local::test_support::test_config; @@ -348,6 +358,134 @@ mod tests { ); } + #[tokio::test] + async fn rotate_thread_segment_keeps_thread_id_and_references_previous_segment() { + let home = TempDir::new().expect("temp dir"); + let store = LocalThreadStore::new(test_config(home.path())); + let thread_id = ThreadId::default(); + + store + .create_thread(create_thread_params(thread_id)) + .await + .expect("create live thread"); + store + .append_items(AppendThreadItemsParams { + thread_id, + items: vec![user_message_item("before rotation")], + }) + .await + .expect("append pre-rotation item"); + store + .flush_thread(thread_id) + .await + .expect("flush pre-rotation item"); + let old_rollout_path = store + .live_rollout_path(thread_id) + .await + .expect("old rollout path"); + let (old_items, _, _) = RolloutRecorder::load_rollout_items(old_rollout_path.as_path()) + .await + .expect("old rollout items"); + let old_segment_id = old_items.iter().find_map(|item| match item { + RolloutItem::SessionMeta(meta) => meta.meta.segment_id, + RolloutItem::ForkReference(_) + | RolloutItem::RolloutReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => None, + }); + + store + .rotate_thread_segment( + thread_id, + RotateThreadSegmentParams { + source: SessionSource::Exec, + base_instructions: BaseInstructions::default(), + dynamic_tools: Vec::new(), + metadata: thread_metadata(), + event_persistence_mode: ThreadEventPersistenceMode::Limited, + initial_items: vec![user_message_item("rotation checkpoint")], + previous_segment_reference_depth: 2, + }, + ) + .await + .expect("rotate segment"); + let new_rollout_path = store + .live_rollout_path(thread_id) + .await + .expect("new rollout path"); + assert_ne!(new_rollout_path, old_rollout_path); + + store + .append_items(AppendThreadItemsParams { + thread_id, + items: vec![user_message_item("after rotation")], + }) + .await + .expect("append post-rotation item"); + store + .flush_thread(thread_id) + .await + .expect("flush post-rotation item"); + let archived_old_rollout_path = home + .path() + .join(codex_rollout::ARCHIVED_SESSIONS_SUBDIR) + .join( + old_rollout_path + .file_name() + .expect("old rollout path should have a file name"), + ); + assert!(!old_rollout_path.exists()); + assert!(archived_old_rollout_path.exists()); + let old_segment_id = old_segment_id.expect("old rollout should have a segment id"); + let resolved_old_rollout_path = + codex_rollout::find_rollout_path_by_segment_id(home.path(), thread_id, old_segment_id) + .await + .expect("resolve archived segment by id") + .expect("archived segment should resolve by id"); + assert_eq!(resolved_old_rollout_path, archived_old_rollout_path); + let old_rollout_timestamp = old_rollout_path + .file_name() + .and_then(|file_name| file_name.to_str()) + .and_then(|file_name| file_name.strip_prefix("rollout-")) + .and_then(|file_name| file_name.strip_suffix(&format!("-{thread_id}.jsonl"))) + .expect("old rollout timestamp"); + + let (new_items, new_thread_id, _) = + RolloutRecorder::load_rollout_items(new_rollout_path.as_path()) + .await + .expect("new rollout items"); + assert_eq!(new_thread_id, Some(thread_id)); + assert!(new_items.iter().any(|item| { + matches!( + item, + RolloutItem::RolloutReference(reference) + if reference.rollout_path == old_rollout_path + && reference.thread_id == Some(thread_id) + && reference.rollout_timestamp.as_deref() == Some(old_rollout_timestamp) + && reference.segment_id == Some(old_segment_id) + && reference.max_depth == 2 + ) + })); + assert!(new_items.iter().any(|item| { + matches!( + item, + RolloutItem::EventMsg(EventMsg::UserMessage(event)) + if event.message == "rotation checkpoint" + ) + })); + assert!(new_items.iter().any(|item| { + matches!( + item, + RolloutItem::EventMsg(EventMsg::UserMessage(event)) + if event.message == "after rotation" + ) + })); + assert_rollout_contains_message(archived_old_rollout_path.as_path(), "before rotation") + .await; + } + #[tokio::test] async fn create_thread_rejects_missing_cwd() { let home = TempDir::new().expect("temp dir"); diff --git a/codex-rs/thread-store/src/types.rs b/codex-rs/thread-store/src/types.rs index 85bde023bdf7..7e1766f75817 100644 --- a/codex-rs/thread-store/src/types.rs +++ b/codex-rs/thread-store/src/types.rs @@ -84,6 +84,25 @@ pub struct AppendThreadItemsParams { pub items: Vec, } +/// Parameters for replacing a live local rollout segment without changing the thread id. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct RotateThreadSegmentParams { + /// Runtime source for the thread. + pub source: SessionSource, + /// Base instructions persisted in the new session metadata. + pub base_instructions: BaseInstructions, + /// Dynamic tools available to the thread. + pub dynamic_tools: Vec, + /// Metadata captured for the new segment. + pub metadata: ThreadPersistenceMetadata, + /// Whether persistence should include the extended event surface. + pub event_persistence_mode: ThreadEventPersistenceMode, + /// Items written immediately after the new segment metadata. + pub initial_items: Vec, + /// Maximum number of rollout-reference segments materialized through the new segment. + pub previous_segment_reference_depth: usize, +} + /// Parameters for loading persisted history for resume, fork, rollback, and memory jobs. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct LoadThreadHistoryParams { diff --git a/codex-rs/tools/src/tool_registry_plan.rs b/codex-rs/tools/src/tool_registry_plan.rs index 23e4b883eb01..67d50dfe873d 100644 --- a/codex-rs/tools/src/tool_registry_plan.rs +++ b/codex-rs/tools/src/tool_registry_plan.rs @@ -26,6 +26,7 @@ use crate::create_apply_patch_json_tool; use crate::create_close_agent_tool_v1; use crate::create_close_agent_tool_v2; use crate::create_code_mode_tool; +use crate::create_compact_parent_context_tool; use crate::create_create_goal_tool; use crate::create_exec_command_tool; use crate::create_followup_task_tool; @@ -500,12 +501,17 @@ pub fn build_tool_registry_plan( if config.agent_watchdog { plan.push_spec( create_watchdog_tools_namespace(vec![ + create_compact_parent_context_tool(), create_watchdog_close_self_tool(), create_watchdog_snooze_tool(), ]), /*supports_parallel_tool_calls*/ false, config.code_mode_enabled, ); + plan.register_handler( + crate::ToolName::namespaced("watchdog", "compact_parent_context"), + ToolHandlerKind::CompactParentContext, + ); plan.register_handler( crate::ToolName::namespaced("watchdog", "close_self"), ToolHandlerKind::WatchdogSelfClose, diff --git a/codex-rs/tools/src/tool_registry_plan_tests.rs b/codex-rs/tools/src/tool_registry_plan_tests.rs index 71aef8b1a77b..af10ec2107a6 100644 --- a/codex-rs/tools/src/tool_registry_plan_tests.rs +++ b/codex-rs/tools/src/tool_registry_plan_tests.rs @@ -242,6 +242,53 @@ fn goal_tools_require_goals_feature() { assert_contains_tool_names(&tools, &["get_goal", "create_goal", "update_goal"]); } +#[test] +fn watchdog_tools_are_eager_namespace_tools() { + let model_info = model_info(); + let mut features = Features::with_defaults(); + features.enable(Feature::AgentWatchdog); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, handlers) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*deferred_mcp_tools*/ None, + &[], + ); + + // Frodex keeps the watchdog namespace eager for every agent so parent and + // forked child requests have the same prompt-visible tool surface. + assert_eq!( + namespace_function_names(&tools, "watchdog"), + vec![ + "compact_parent_context".to_string(), + "close_self".to_string(), + "snooze".to_string() + ] + ); + assert!(handlers.contains(&ToolHandlerSpec { + name: ToolName::namespaced("watchdog", "compact_parent_context"), + kind: ToolHandlerKind::CompactParentContext, + })); + assert!(handlers.contains(&ToolHandlerSpec { + name: ToolName::namespaced("watchdog", "close_self"), + kind: ToolHandlerKind::WatchdogSelfClose, + })); + assert!(handlers.contains(&ToolHandlerSpec { + name: ToolName::namespaced("watchdog", "snooze"), + kind: ToolHandlerKind::WatchdogSnooze, + })); +} + #[test] fn test_build_specs_multi_agent_v2_uses_task_names_and_hides_resume() { let model_info = model_info(); diff --git a/codex-rs/tui/src/app/agent_navigation.rs b/codex-rs/tui/src/app/agent_navigation.rs index b8643d1d3dcc..6e15c66941ba 100644 --- a/codex-rs/tui/src/app/agent_navigation.rs +++ b/codex-rs/tui/src/app/agent_navigation.rs @@ -16,7 +16,7 @@ //! //! The key invariant is that traversal follows first-seen spawn order rather than thread-id sort //! order. Once a thread id is observed it keeps its place in the cycle even if the entry is later -//! updated or marked closed. +//! updated. Closed threads remain cached for replay metadata, but they are not selectable. use crate::multi_agents::AgentPickerThreadEntry; use crate::multi_agents::format_agent_picker_item_name; @@ -93,12 +93,10 @@ impl AgentNavigationState { ); } - /// Marks a thread as closed without removing it from the traversal cache. + /// Marks a thread as closed without removing it from the metadata cache. /// - /// Closed threads stay in the picker and in spawn order so users can still review them and so - /// next/previous navigation does not reshuffle around disappearing entries. If a caller "cleans - /// this up" by deleting the entry instead, wraparound navigation will silently change shape - /// mid-session. + /// Closed threads stay cached so replayed transcript metadata remains available, but picker + /// traversal filters them out because selecting a closed thread is not actionable. pub(crate) fn mark_closed(&mut self, thread_id: ThreadId) { if let Some(entry) = self.threads.get_mut(&thread_id) { entry.is_closed = true; @@ -141,6 +139,23 @@ impl AgentNavigationState { .collect() } + /// Returns selectable picker rows in first-seen order. + /// + /// The primary thread stays selectable even when it is the only row. Closed agents and watchdog + /// handles are hidden because neither is a useful target for `/agent` thread selection. + pub(crate) fn selectable_threads( + &self, + primary_thread_id: Option, + ) -> Vec<(ThreadId, &AgentPickerThreadEntry)> { + self.ordered_threads() + .into_iter() + .filter(|(thread_id, entry)| { + Some(*thread_id) == primary_thread_id + || (!entry.is_closed && entry.agent_role.as_deref() != Some("watchdog")) + }) + .collect() + } + /// Returns tracked thread ids in the same stable order used by the picker. pub(crate) fn tracked_thread_ids(&self) -> Vec { self.ordered_threads() @@ -149,18 +164,48 @@ impl AgentNavigationState { .collect() } - /// Returns the adjacent thread id for keyboard navigation in stable spawn order. + #[cfg(test)] + /// Returns the adjacent tracked thread id for focused tests of stable spawn-order traversal. + pub(crate) fn adjacent_thread_id( + &self, + current_displayed_thread_id: Option, + direction: AgentNavigationDirection, + ) -> Option { + let ordered_threads = self.ordered_threads(); + if ordered_threads.len() < 2 { + return None; + } + + let current_thread_id = current_displayed_thread_id?; + let current_idx = ordered_threads + .iter() + .position(|(thread_id, _)| *thread_id == current_thread_id)?; + let next_idx = match direction { + AgentNavigationDirection::Next => (current_idx + 1) % ordered_threads.len(), + AgentNavigationDirection::Previous => { + if current_idx == 0 { + ordered_threads.len() - 1 + } else { + current_idx - 1 + } + } + }; + Some(ordered_threads[next_idx].0) + } + + /// Returns the adjacent selectable thread id for keyboard navigation in stable spawn order. /// /// The caller must pass the thread whose transcript is actually being shown to the user, not /// just whichever thread bookkeeping most recently marked active. If the wrong current thread /// is supplied, next/previous navigation will jump in a way that feels nondeterministic even /// though the cache itself is correct. - pub(crate) fn adjacent_thread_id( + pub(crate) fn adjacent_selectable_thread_id( &self, current_displayed_thread_id: Option, + primary_thread_id: Option, direction: AgentNavigationDirection, ) -> Option { - let ordered_threads = self.ordered_threads(); + let ordered_threads = self.selectable_threads(primary_thread_id); if ordered_threads.len() < 2 { return None; } @@ -184,7 +229,7 @@ impl AgentNavigationState { /// Derives the contextual footer label for the currently displayed thread. /// - /// This intentionally returns `None` until there is more than one tracked thread so + /// This intentionally returns `None` until there is more than one selectable thread so /// single-thread sessions do not waste footer space restating the obvious. When metadata for /// the displayed thread is missing, the label falls back to the same generic naming rules used /// by the picker. @@ -193,16 +238,18 @@ impl AgentNavigationState { current_displayed_thread_id: Option, primary_thread_id: Option, ) -> Option { - if self.threads.len() <= 1 { + let selectable_threads = self.selectable_threads(primary_thread_id); + if selectable_threads.len() <= 1 { return None; } let thread_id = current_displayed_thread_id?; let is_primary = primary_thread_id == Some(thread_id); Some( - self.threads - .get(&thread_id) - .map(|entry| { + selectable_threads + .into_iter() + .find(|(candidate, _)| *candidate == thread_id) + .map(|(_, entry)| { format_agent_picker_item_name( entry.agent_nickname.as_deref(), entry.agent_role.as_deref(), @@ -337,4 +384,39 @@ mod tests { Some("Main [default]".to_string()) ); } + + #[test] + fn selectable_threads_hide_closed_agents_and_watchdogs() { + let (mut state, main_thread_id, first_agent_id, second_agent_id) = populated_state(); + let watchdog_id = + ThreadId::from_string("00000000-0000-0000-0000-000000000104").expect("valid thread"); + state.upsert( + first_agent_id, + Some("Robie".to_string()), + Some("worker".to_string()), + /*is_closed*/ true, + ); + state.upsert( + watchdog_id, + Some("Watcher".to_string()), + Some("watchdog".to_string()), + /*is_closed*/ false, + ); + + let selectable_ids = state + .selectable_threads(Some(main_thread_id)) + .into_iter() + .map(|(thread_id, _)| thread_id) + .collect::>(); + + assert_eq!(selectable_ids, vec![main_thread_id, second_agent_id]); + assert_eq!( + state.adjacent_selectable_thread_id( + Some(main_thread_id), + Some(main_thread_id), + AgentNavigationDirection::Next, + ), + Some(second_agent_id) + ); + } } diff --git a/codex-rs/tui/src/app/session_lifecycle.rs b/codex-rs/tui/src/app/session_lifecycle.rs index 1bdb38648b2c..2c30eecf9c2e 100644 --- a/codex-rs/tui/src/app/session_lifecycle.rs +++ b/codex-rs/tui/src/app/session_lifecycle.rs @@ -608,10 +608,11 @@ impl App { direction: AgentNavigationDirection, ) -> Option { let current_thread = self.current_displayed_thread_id(); - if let Some(thread_id) = self - .agent_navigation - .adjacent_thread_id(current_thread, direction) - { + if let Some(thread_id) = self.agent_navigation.adjacent_selectable_thread_id( + current_thread, + self.primary_thread_id, + direction, + ) { return Some(thread_id); } @@ -623,8 +624,11 @@ impl App { if self.backfill_loaded_subagent_threads(app_server).await { self.last_subagent_backfill_attempt = Some(primary_thread_id); } - self.agent_navigation - .adjacent_thread_id(self.current_displayed_thread_id(), direction) + self.agent_navigation.adjacent_selectable_thread_id( + self.current_displayed_thread_id(), + self.primary_thread_id, + direction, + ) } pub(super) fn fresh_session_config(&self) -> Config { diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 19e6900709c7..454009daa055 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -159,6 +159,7 @@ use codex_protocol::config_types::Settings; use codex_protocol::config_types::WindowsSandboxLevel; use codex_protocol::items::AgentMessageContent; use codex_protocol::items::AgentMessageItem; +use codex_protocol::models::ContentItem; use codex_protocol::models::MessagePhase; use codex_protocol::models::ResponseItem; use codex_protocol::models::local_image_label_text; @@ -191,6 +192,7 @@ use ratatui::style::Stylize; use ratatui::text::Line; use ratatui::widgets::Paragraph; use ratatui::widgets::Wrap; +use serde::Deserialize; use tokio::sync::mpsc::UnboundedSender; use tracing::debug; use tracing::warn; @@ -454,12 +456,72 @@ fn is_unified_exec_source(source: ExecCommandSource) -> bool { ) } -fn inter_agent_message_from_item(item: &ResponseItem) -> Option<(String, String)> { +fn inter_agent_message_from_item(item: &ResponseItem) -> Option<(String, String, String)> { let ResponseItem::Message { content, .. } = item else { return None; }; let communication = InterAgentCommunication::from_message_content(content)?; - Some((communication.author.to_string(), communication.content)) + let raw_content = communication.content.clone(); + Some(( + communication.author.to_string(), + raw_content.clone(), + display_inter_agent_message_content(&raw_content), + )) +} + +#[derive(Debug, Deserialize)] +struct SubagentNotificationPayload { + agent_path: Option, + status: AgentStatus, +} + +fn text_from_message_content(content: &[ContentItem]) -> Option<&str> { + content.iter().find_map(|item| match item { + ContentItem::InputText { text } | ContentItem::OutputText { text } => Some(text.as_str()), + ContentItem::InputImage { .. } => None, + }) +} + +fn display_inter_agent_message_content(content: &str) -> String { + parse_subagent_notification(content) + .map(|payload| display_subagent_notification_status(payload.status)) + .unwrap_or_else(|| content.to_string()) +} + +fn display_subagent_notification_status(status: AgentStatus) -> String { + match status { + AgentStatus::Completed(Some(message)) => message, + AgentStatus::Completed(None) => "completed".to_string(), + AgentStatus::Errored(message) => format!("errored: {message}"), + AgentStatus::Interrupted => "interrupted".to_string(), + AgentStatus::Shutdown => "shutdown".to_string(), + AgentStatus::NotFound => "not found".to_string(), + AgentStatus::PendingInit => "pending init".to_string(), + AgentStatus::Running => "running".to_string(), + } +} + +fn parse_subagent_notification(content: &str) -> Option { + const START_MARKER: &str = ""; + const END_MARKER: &str = ""; + + let trimmed = content.trim(); + if !trimmed + .get(..START_MARKER.len()) + .is_some_and(|candidate| candidate.eq_ignore_ascii_case(START_MARKER)) + { + return None; + } + let without_start = &trimmed[START_MARKER.len()..]; + let end_start = without_start.len().checked_sub(END_MARKER.len())?; + if !without_start + .get(end_start..) + .is_some_and(|candidate| candidate.eq_ignore_ascii_case(END_MARKER)) + { + return None; + } + let body = without_start[..end_start].trim(); + serde_json::from_str::(body).ok() } fn is_standard_tool_call(parsed_cmd: &[ParsedCommand]) -> bool { @@ -3996,13 +4058,39 @@ impl ChatWidget { self.request_redraw(); } + fn apply_subagent_notification_text(&mut self, text: &str) -> bool { + let Some(payload) = parse_subagent_notification(text) else { + return false; + }; + let Some(agent_path) = payload.agent_path else { + return true; + }; + let Ok(thread_id) = ThreadId::from_string(&agent_path) else { + return true; + }; + self.subagent_panel_registry + .update_status(thread_id, payload.status); + self.refresh_subagent_panel(); + true + } + fn on_raw_response_item(&mut self, item: ResponseItem, from_replay: bool) { - let Some((sender, message)) = inter_agent_message_from_item(&item) else { + let direct_message_text = match &item { + ResponseItem::Message { content, .. } => text_from_message_content(content), + _ => None, + } + .map(str::to_owned); + if let Some(text) = direct_message_text.as_deref() { + self.apply_subagent_notification_text(text); + } + + let Some((sender, raw_message, message)) = inter_agent_message_from_item(&item) else { if from_replay { self.last_replayed_inter_agent_message = None; } return; }; + self.apply_subagent_notification_text(&raw_message); let replay_key = (sender.clone(), message.clone()); if from_replay { diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__live_app_server_subagent_notification_renders_status_message_cell.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__live_app_server_subagent_notification_renders_status_message_cell.snap new file mode 100644 index 000000000000..914fb87fec9c --- /dev/null +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__live_app_server_subagent_notification_renders_status_message_cell.snap @@ -0,0 +1,6 @@ +--- +source: tui/src/chatwidget/tests/app_server.rs +assertion_line: 122 +expression: rendered +--- +• Agent message: The watchdog closed itself. I did not close it. from /root/factorial_sum_agent diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__resume_replay_does_not_resurrect_closed_watchdog_panel_row.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__resume_replay_does_not_resurrect_closed_watchdog_panel_row.snap index e5b1414b7bf9..41aa4df4f35c 100644 --- a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__resume_replay_does_not_resurrect_closed_watchdog_panel_row.snap +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__resume_replay_does_not_resurrect_closed_watchdog_panel_row.snap @@ -1,5 +1,6 @@ --- source: tui/src/chatwidget/tests/app_server.rs +assertion_line: 885 expression: screen --- diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_notification_completion_hides_subagent_panel_row.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_notification_completion_hides_subagent_panel_row.snap new file mode 100644 index 000000000000..9b04dccd3626 --- /dev/null +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_notification_completion_hides_subagent_panel_row.snap @@ -0,0 +1,10 @@ +--- +source: tui/src/chatwidget/tests/app_server.rs +assertion_line: 819 +expression: screen +--- + + +› Ask Codex to do anything + + gpt-5.5 default · /tmp/project diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_mounts_watchdog_spawn.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_mounts_watchdog_spawn.snap index 4c23d7b3d096..2d477f9f894c 100644 --- a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_mounts_watchdog_spawn.snap +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_mounts_watchdog_spawn.snap @@ -1,6 +1,6 @@ --- source: tui/src/chatwidget/tests/app_server.rs -assertion_line: 615 +assertion_line: 632 expression: screen --- diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_renders_subagent_and_watchdog_rows.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_renders_subagent_and_watchdog_rows.snap index a1ab95fb6a20..b90cac523543 100644 --- a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_renders_subagent_and_watchdog_rows.snap +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_renders_subagent_and_watchdog_rows.snap @@ -1,5 +1,6 @@ --- source: tui/src/chatwidget/tests/app_server.rs +assertion_line: 718 expression: screen --- diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__watchdog_goodbye_message_closes_subagent_panel_row.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__watchdog_goodbye_message_closes_subagent_panel_row.snap index 82e9bd864114..b6d4703da747 100644 --- a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__watchdog_goodbye_message_closes_subagent_panel_row.snap +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__watchdog_goodbye_message_closes_subagent_panel_row.snap @@ -1,6 +1,6 @@ --- source: tui/src/chatwidget/tests/app_server.rs -assertion_line: 801 +assertion_line: 818 expression: screen --- diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__watchdog_goodbye_message_inserts_close_history.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__watchdog_goodbye_message_inserts_close_history.snap index b1b1d8fb240d..d29f64e3de3c 100644 --- a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__watchdog_goodbye_message_inserts_close_history.snap +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__watchdog_goodbye_message_inserts_close_history.snap @@ -1,6 +1,6 @@ --- source: tui/src/chatwidget/tests/app_server.rs -assertion_line: 789 +assertion_line: 806 expression: inserted --- • Spawned Boyle [watchdog] (arcanine 1m low) diff --git a/codex-rs/tui/src/chatwidget/tests/app_server.rs b/codex-rs/tui/src/chatwidget/tests/app_server.rs index c55e4ff2a61e..d6e3b271e226 100644 --- a/codex-rs/tui/src/chatwidget/tests/app_server.rs +++ b/codex-rs/tui/src/chatwidget/tests/app_server.rs @@ -108,6 +108,48 @@ async fn live_app_server_raw_inter_agent_message_renders_agent_message_cell() { ); } +#[tokio::test] +async fn live_app_server_subagent_notification_renders_status_message_cell() { + let (mut chat, mut rx, _ops) = make_chatwidget_manual(/*model_override*/ None).await; + let status = AgentStatus::Completed(Some( + "The watchdog closed itself. I did not close it.".to_string(), + )); + let notification = format!( + "\n{}\n", + serde_json::json!({ + "agent_path": "/root/factorial_sum_agent", + "status": status, + }) + ); + let communication = InterAgentCommunication::new( + AgentPath::try_from("/root/factorial_sum_agent").expect("valid agent path"), + AgentPath::root(), + Vec::new(), + notification, + /*trigger_turn*/ false, + ); + + chat.handle_server_notification( + ServerNotification::RawResponseItemCompleted(RawResponseItemCompletedNotification { + thread_id: "thread-1".to_string(), + turn_id: "turn-1".to_string(), + item: communication.to_response_input_item().into(), + }), + /*replay_kind*/ None, + ); + + let rendered = drain_insert_history(&mut rx) + .into_iter() + .map(|lines| lines_to_single_string(&lines)) + .collect::>() + .join("\n"); + + assert_chatwidget_snapshot!( + "live_app_server_subagent_notification_renders_status_message_cell", + rendered + ); +} + #[tokio::test] async fn live_app_server_user_message_item_completed_does_not_duplicate_rendered_prompt() { let (mut chat, mut rx, mut op_rx) = make_chatwidget_manual(/*model_override*/ None).await; @@ -701,6 +743,93 @@ async fn subagent_panel_renders_subagent_and_watchdog_rows() { assert_chatwidget_snapshot!("subagent_panel_renders_subagent_and_watchdog_rows", screen); } +#[tokio::test] +async fn subagent_notification_completion_hides_subagent_panel_row() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await; + let sender_thread_id = + ThreadId::from_string("019cff70-2599-75e2-af72-b90000001002").expect("valid thread id"); + let worker_thread_id = + ThreadId::from_string("019cff70-2599-75e2-af72-b90000001003").expect("valid thread id"); + + chat.set_collab_agent_metadata( + worker_thread_id, + Some("Calculator".to_string()), + Some("worker".to_string()), + ); + chat.handle_server_notification( + ServerNotification::ItemCompleted(ItemCompletedNotification { + thread_id: "thread-1".to_string(), + turn_id: "turn-1".to_string(), + item: AppServerThreadItem::CollabAgentToolCall { + id: "spawn-worker".to_string(), + tool: AppServerCollabAgentTool::SpawnAgent, + status: AppServerCollabAgentToolCallStatus::Completed, + sender_thread_id: sender_thread_id.to_string(), + receiver_thread_ids: vec![worker_thread_id.to_string()], + prompt: Some("Compute the answer.".to_string()), + model: Some("gpt-5.4".to_string()), + reasoning_effort: Some(ReasoningEffortConfig::Low), + agents_states: HashMap::from([( + worker_thread_id.to_string(), + AppServerCollabAgentState { + status: AppServerCollabAgentStatus::Running, + message: None, + }, + )]), + }, + }), + /*replay_kind*/ None, + ); + + let notification = format!( + "\n{}\n", + serde_json::json!({ + "agent_path": worker_thread_id.to_string(), + "status": AgentStatus::Completed(Some("4037913".to_string())), + }) + ); + let communication = InterAgentCommunication::new( + AgentPath::try_from("/root/calculator").expect("valid agent path"), + AgentPath::root(), + Vec::new(), + notification, + /*trigger_turn*/ false, + ); + chat.handle_server_notification( + ServerNotification::RawResponseItemCompleted(RawResponseItemCompletedNotification { + thread_id: "thread-1".to_string(), + turn_id: "turn-1".to_string(), + item: communication.to_response_input_item().into(), + }), + /*replay_kind*/ None, + ); + + let width = 140; + let height = chat.desired_height(width); + let mut terminal = + ratatui::Terminal::new(VT100Backend::new(width, height)).expect("create terminal"); + terminal.set_viewport_area(ratatui::prelude::Rect::new(0, 0, width, height)); + terminal + .draw(|f| chat.render(f.area(), f.buffer_mut())) + .expect("render chat widget"); + let screen = normalized_backend_snapshot(terminal.backend()); + + assert!(!screen.contains("Subagents")); + assert!(!screen.contains("Calculator")); + assert!(!screen.contains("")); + let inserted = drain_insert_history(&mut rx) + .into_iter() + .map(|lines| lines_to_single_string(&lines)) + .collect::>() + .join("\n"); + assert!(inserted.contains("Agent message: 4037913")); + assert!(!inserted.contains("")); + assert_chatwidget_snapshot!( + "subagent_notification_completion_hides_subagent_panel_row", + screen + ); +} + #[tokio::test] async fn watchdog_goodbye_message_closes_subagent_panel_row() { let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;