From 8ed4c789116098561c808a22f63d3021ceca2c1f Mon Sep 17 00:00:00 2001 From: charley-openai Date: Fri, 24 Apr 2026 13:12:31 -0700 Subject: [PATCH] Add reasoning effort to turn tracing spans --- codex-rs/core/src/session/turn.rs | 2 + codex-rs/core/src/session/turn_context.rs | 14 ++++++ codex-rs/core/src/tasks/mod.rs | 2 + codex-rs/core/tests/suite/otel.rs | 56 +++++++++++++++-------- 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs index 823dcbfc3dad..25a82b5127a5 100644 --- a/codex-rs/core/src/session/turn.rs +++ b/codex-rs/core/src/session/turn.rs @@ -1853,6 +1853,7 @@ async fn try_run_sampling_request( Box, )> = None; let mut should_emit_turn_diff = false; + let reasoning_effort = turn_context.effective_reasoning_effort_for_tracing(); let plan_mode = turn_context.collaboration_mode.mode == ModeKind::Plan; let mut assistant_message_stream_parsers = AssistantMessageStreamParsers::new(plan_mode); let mut plan_mode_state = plan_mode.then(|| PlanModeStreamState::new(&turn_context.sub_id)); @@ -1864,6 +1865,7 @@ async fn try_run_sampling_request( otel.name = field::Empty, tool_name = field::Empty, from = field::Empty, + codex.request.reasoning_effort = %reasoning_effort, gen_ai.usage.input_tokens = field::Empty, gen_ai.usage.cache_read.input_tokens = field::Empty, gen_ai.usage.output_tokens = field::Empty, diff --git a/codex-rs/core/src/session/turn_context.rs b/codex-rs/core/src/session/turn_context.rs index 35b517b720d1..3f93728fd0cc 100644 --- a/codex-rs/core/src/session/turn_context.rs +++ b/codex-rs/core/src/session/turn_context.rs @@ -117,6 +117,20 @@ impl TurnContext { ) } + pub(crate) fn effective_reasoning_effort_for_tracing(&self) -> String { + if self.model_info.supports_reasoning_summaries { + match self + .reasoning_effort + .or(self.model_info.default_reasoning_level) + { + Some(effort) => effort.to_string(), + None => "default".to_string(), + } + } else { + "default".to_string() + } + } + pub(crate) fn model_context_window(&self) -> Option { let effective_context_window_percent = self.model_info.effective_context_window_percent; self.model_info diff --git a/codex-rs/core/src/tasks/mod.rs b/codex-rs/core/src/tasks/mod.rs index 91078c50ceb0..0a2279cfc78f 100644 --- a/codex-rs/core/src/tasks/mod.rs +++ b/codex-rs/core/src/tasks/mod.rs @@ -368,12 +368,14 @@ impl Session { let task_cancellation_token = cancellation_token.child_token(); // Task-owned turn spans keep a core-owned span open for the // full task lifecycle after the submission dispatch span ends. + let reasoning_effort = turn_context.effective_reasoning_effort_for_tracing(); let task_span = info_span!( "turn", otel.name = span_name, thread.id = %self.conversation_id, turn.id = %turn_context.sub_id, model = %turn_context.model_info.slug, + codex.turn.reasoning_effort = %reasoning_effort, codex.turn.token_usage.input_tokens = field::Empty, codex.turn.token_usage.cached_input_tokens = field::Empty, codex.turn.token_usage.non_cached_input_tokens = field::Empty, diff --git a/codex-rs/core/tests/suite/otel.rs b/codex-rs/core/tests/suite/otel.rs index deeffcd855d4..5539599b2491 100644 --- a/codex-rs/core/tests/suite/otel.rs +++ b/codex-rs/core/tests/suite/otel.rs @@ -1,6 +1,7 @@ use codex_core::config::Constrained; use codex_features::Feature; use codex_protocol::models::PermissionProfile; +use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::Op; @@ -595,8 +596,9 @@ async fn turn_and_completed_response_spans_record_token_usage() { ) .await; - let TestCodex { codex, .. } = test_codex() + let test = test_codex() .with_config(|config| { + config.model_reasoning_effort = Some(ReasoningEffort::High); config .features .disable(Feature::GhostCommit) @@ -606,6 +608,8 @@ async fn turn_and_completed_response_spans_record_token_usage() { .await .unwrap(); + let TestCodex { codex, .. } = test; + codex .submit(Op::UserInput { environments: None, @@ -625,7 +629,9 @@ async fn turn_and_completed_response_spans_record_token_usage() { assert!( logs.lines().any(|line| { - line.contains("handle_responses{otel.name=\"completed\"") + line.contains("handle_responses{") + && line.contains("otel.name=\"completed\"") + && line.contains("codex.request.reasoning_effort=high") && line.contains("gen_ai.usage.input_tokens=3") && line.contains("gen_ai.usage.cache_read.input_tokens=1") && line.contains("gen_ai.usage.output_tokens=5") @@ -637,6 +643,7 @@ async fn turn_and_completed_response_spans_record_token_usage() { assert!( logs.lines().any(|line| { line.contains("turn{otel.name=\"session_task.turn\"") + && line.contains("codex.turn.reasoning_effort=high") && line.contains("codex.turn.token_usage.input_tokens=3") && line.contains("codex.turn.token_usage.cached_input_tokens=1") && line.contains("codex.turn.token_usage.non_cached_input_tokens=2") @@ -708,13 +715,18 @@ async fn handle_responses_span_records_response_kind_and_tool_name() { let logs = String::from_utf8(buffer.lock().unwrap().clone()).unwrap(); assert!( - logs.contains("handle_responses{otel.name=\"function_call\"") - && logs.contains("tool_name=\"nonexistent\"") - && logs.contains("from=\"output_item_done\""), + logs.lines().any(|line| { + line.contains("handle_responses{") + && line.contains("otel.name=\"function_call\"") + && line.contains("tool_name=\"nonexistent\"") + && line.contains("from=\"output_item_done\"") + }), "missing handle_responses span with function call metadata\nlogs:\n{logs}" ); assert!( - logs.contains("handle_responses{otel.name=\"completed\""), + logs.lines().any(|line| { + line.contains("handle_responses{") && line.contains("otel.name=\"completed\"") + }), "missing handle_responses span for completion\nlogs:\n{logs}" ); } @@ -766,7 +778,9 @@ async fn record_responses_sets_span_fields_for_response_events() { .await; let TestCodex { codex, .. } = test_codex() + .with_model("gpt-5.4") .with_config(|config| { + config.model_reasoning_effort = Some(ReasoningEffort::High); config .features .disable(Feature::GhostCommit) @@ -806,22 +820,24 @@ async fn record_responses_sets_span_fields_for_response_events() { ]; for (name, from, tool_name) in expected { + let otel_name = format!("otel.name=\"{name}\""); + let from_field = from.map(|from| format!("from=\"{from}\"")); + let tool_name_field = tool_name.map(|tool_name| format!("tool_name=\"{tool_name}\"")); + assert!( - logs.contains(&format!("handle_responses{{otel.name=\"{name}\"")), - "missing otel.name={name}\nlogs:\n{logs}" + logs.lines().any(|line| { + line.contains("handle_responses{") + && line.contains(&otel_name) + && line.contains("codex.request.reasoning_effort=high") + && from_field + .as_ref() + .is_none_or(|from_field| line.contains(from_field)) + && tool_name_field + .as_ref() + .is_none_or(|tool_name_field| line.contains(tool_name_field)) + }), + "missing span fields for {name}\nlogs:\n{logs}" ); - if let Some(from) = from { - assert!( - logs.contains(&format!("from=\"{from}\"")), - "missing from={from} for {name}\nlogs:\n{logs}" - ); - } - if let Some(tool_name) = tool_name { - assert!( - logs.contains(&format!("tool_name=\"{tool_name}\"")), - "missing tool_name={tool_name} for {name}\nlogs:\n{logs}" - ); - } } }