From 1611a601559dcbe1c16a492a4a6f8b783ddcaeff Mon Sep 17 00:00:00 2001 From: Harrrryz Date: Thu, 4 Dec 2025 22:44:18 -0500 Subject: [PATCH 1/2] Fix DeepSeek reasoning content handling for LiteLLM --- src/agents/extensions/models/litellm_model.py | 23 +++++++++- src/agents/models/chatcmpl_converter.py | 46 +++++++++++++++++++ tests/test_anthropic_thinking_blocks.py | 34 +++++++++++++- 3 files changed, 101 insertions(+), 2 deletions(-) diff --git a/src/agents/extensions/models/litellm_model.py b/src/agents/extensions/models/litellm_model.py index 439146c6c..1b792b2d6 100644 --- a/src/agents/extensions/models/litellm_model.py +++ b/src/agents/extensions/models/litellm_model.py @@ -280,7 +280,9 @@ async def _fetch_response( ) converted_messages = Converter.items_to_messages( - input, preserve_thinking_blocks=preserve_thinking_blocks + input, + preserve_thinking_blocks=preserve_thinking_blocks, + include_reasoning_content=self._should_include_reasoning_content(model_settings), ) # Fix for interleaved thinking bug: reorder messages to ensure tool_use comes before tool_result # noqa: E501 @@ -436,6 +438,25 @@ async def _fetch_response( ) return response, ret + def _should_include_reasoning_content(self, model_settings: ModelSettings) -> bool: + """Determine whether to forward reasoning_content on assistant messages. + + DeepSeek thinking mode requires reasoning_content to be present on messages with tool + calls, otherwise the API returns a 400. + """ + model_name = str(self.model).lower() + base_url = (self.base_url or "").lower() + + if "deepseek" in model_name or "deepseek.com" in base_url: + return True + + if isinstance(model_settings.extra_body, dict) and "thinking" in model_settings.extra_body: + return True + if model_settings.extra_args and "thinking" in model_settings.extra_args: + return True + + return False + def _fix_tool_message_ordering( self, messages: list[ChatCompletionMessageParam] ) -> list[ChatCompletionMessageParam]: diff --git a/src/agents/models/chatcmpl_converter.py b/src/agents/models/chatcmpl_converter.py index bc0304be0..024e59ac6 100644 --- a/src/agents/models/chatcmpl_converter.py +++ b/src/agents/models/chatcmpl_converter.py @@ -340,6 +340,7 @@ def items_to_messages( cls, items: str | Iterable[TResponseInputItem], preserve_thinking_blocks: bool = False, + include_reasoning_content: bool = False, ) -> list[ChatCompletionMessageParam]: """ Convert a sequence of 'Item' objects into a list of ChatCompletionMessageParam. @@ -372,6 +373,21 @@ def items_to_messages( result: list[ChatCompletionMessageParam] = [] current_assistant_msg: ChatCompletionAssistantMessageParam | None = None pending_thinking_blocks: list[dict[str, str]] | None = None + pending_reasoning_content: str | None = None + + def apply_pending_reasoning_content( + message: ChatCompletionAssistantMessageParam, + ) -> None: + nonlocal pending_reasoning_content + if ( + not include_reasoning_content + or pending_reasoning_content is None + or "reasoning_content" in message + ): + return + + cast(dict[str, Any], message)["reasoning_content"] = pending_reasoning_content + pending_reasoning_content = None def flush_assistant_message() -> None: nonlocal current_assistant_msg @@ -387,6 +403,9 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam: if current_assistant_msg is None: current_assistant_msg = ChatCompletionAssistantMessageParam(role="assistant") current_assistant_msg["tool_calls"] = [] + apply_pending_reasoning_content(current_assistant_msg) + else: + apply_pending_reasoning_content(current_assistant_msg) return current_assistant_msg @@ -479,6 +498,7 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam: new_asst["content"] = combined new_asst["tool_calls"] = [] + apply_pending_reasoning_content(new_asst) current_assistant_msg = new_asst # 4) function/file-search calls => attach to assistant @@ -556,6 +576,32 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam: # 7) reasoning message => extract thinking blocks if present elif reasoning_item := cls.maybe_reasoning_message(item): + # Capture reasoning content if present so we can attach it to the next assistant + # message (required by some providers for tool calls). + summary_items = reasoning_item.get("summary") + if ( + include_reasoning_content + and isinstance(summary_items, list) + and len(summary_items) > 0 + ): + reasoning_text = summary_items[0].get("text") + if reasoning_text is not None: + pending_reasoning_content = reasoning_text + if ( + include_reasoning_content + and pending_reasoning_content is None + and isinstance(reasoning_item.get("content"), list) + ): + reasoning_texts = [ + content_item.get("text") + for content_item in cast(list[dict[str, Any]], reasoning_item["content"]) + if isinstance(content_item, dict) + and content_item.get("type") == "reasoning_text" + and content_item.get("text") is not None + ] + if reasoning_texts: + pending_reasoning_content = "".join(cast(list[str], reasoning_texts)) + # Reconstruct thinking blocks from content (text) and encrypted_content (signature) content_items = reasoning_item.get("content", []) encrypted_content = reasoning_item.get("encrypted_content") diff --git a/tests/test_anthropic_thinking_blocks.py b/tests/test_anthropic_thinking_blocks.py index 8fbc59833..6971e442b 100644 --- a/tests/test_anthropic_thinking_blocks.py +++ b/tests/test_anthropic_thinking_blocks.py @@ -16,6 +16,7 @@ from openai.types.chat.chat_completion_message_tool_call import Function from agents.extensions.models.litellm_model import InternalChatCompletionMessage +from agents.items import TResponseInputItem from agents.models.chatcmpl_converter import Converter @@ -58,7 +59,7 @@ def test_converter_skips_reasoning_items(): ] # Convert to messages - messages = Converter.items_to_messages(test_items) # type: ignore[arg-type] + messages = Converter.items_to_messages(cast(list[TResponseInputItem], test_items)) # Should have user message and assistant message, but no reasoning content assert len(messages) == 2 @@ -242,3 +243,34 @@ def test_anthropic_thinking_blocks_with_tool_calls(): tool_calls = assistant_msg.get("tool_calls", []) assert len(cast(list[Any], tool_calls)) == 1, "Tool calls should be preserved" assert cast(list[Any], tool_calls)[0]["function"]["name"] == "get_weather" + + +def test_reasoning_content_added_when_enabled(): + """ + Verify reasoning content is attached to the assistant tool-call message when requested. + """ + test_items: list[dict[str, Any]] = [ + {"role": "user", "content": "Hello"}, + { + "id": "reasoning_123", + "type": "reasoning", + "summary": [{"text": "Thinking about the weather", "type": "summary_text"}], + }, + { + "id": "call_123", + "type": "function_call", + "name": "get_weather", + "arguments": '{"city": "Tokyo"}', + "call_id": "call_123", + }, + ] + + messages = Converter.items_to_messages( + cast(list[TResponseInputItem], test_items), + include_reasoning_content=True, + ) + + assistant_msg = next(msg for msg in messages if msg.get("role") == "assistant") + assert assistant_msg.get("reasoning_content") == "Thinking about the weather" + tool_calls = assistant_msg.get("tool_calls") + assert tool_calls and len(cast(list[Any], tool_calls)) == 1 From 2de7412aefbcd376e2751cde478427cee0af1069 Mon Sep 17 00:00:00 2001 From: Harrrryz Date: Sun, 7 Dec 2025 22:33:32 -0500 Subject: [PATCH 2/2] Handle DeepSeek reasoning content on OpenAI client path --- src/agents/models/openai_chatcompletions.py | 20 +++- tests/test_reasoning_content.py | 121 ++++++++++++++++++++ 2 files changed, 140 insertions(+), 1 deletion(-) diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py index 76f36d86b..27f7bc0ac 100644 --- a/src/agents/models/openai_chatcompletions.py +++ b/src/agents/models/openai_chatcompletions.py @@ -218,7 +218,10 @@ async def _fetch_response( stream: bool = False, prompt: ResponsePromptParam | None = None, ) -> ChatCompletion | tuple[Response, AsyncStream[ChatCompletionChunk]]: - converted_messages = Converter.items_to_messages(input) + include_reasoning_content = self._should_include_reasoning_content(model_settings) + converted_messages = Converter.items_to_messages( + input, include_reasoning_content=include_reasoning_content + ) if system_instructions: converted_messages.insert( @@ -337,6 +340,21 @@ async def _fetch_response( ) return response, ret + def _should_include_reasoning_content(self, model_settings: ModelSettings) -> bool: + """Determine whether to forward reasoning_content on assistant messages.""" + model_name = str(self.model).lower() + base_url = str(getattr(self._client, "base_url", "") or "").lower() + + if "deepseek" in model_name or "deepseek.com" in base_url: + return True + + if isinstance(model_settings.extra_body, dict) and "thinking" in model_settings.extra_body: + return True + if model_settings.extra_args and "thinking" in model_settings.extra_args: + return True + + return False + def _get_client(self) -> AsyncOpenAI: if self._client is None: self._client = AsyncOpenAI() diff --git a/tests/test_reasoning_content.py b/tests/test_reasoning_content.py index b9d7fa91f..9d387ab51 100644 --- a/tests/test_reasoning_content.py +++ b/tests/test_reasoning_content.py @@ -4,7 +4,9 @@ from typing import Any, cast import pytest +from openai import AsyncOpenAI from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessage +from openai.types.chat.chat_completion import Choice as CompletionChoice from openai.types.chat.chat_completion_chunk import Choice, ChoiceDelta from openai.types.completion_usage import ( CompletionTokensDetails, @@ -19,6 +21,7 @@ ) from agents.model_settings import ModelSettings +from agents.models.chatcmpl_converter import Converter from agents.models.interface import ModelTracing from agents.models.openai_chatcompletions import OpenAIChatCompletionsModel from agents.models.openai_provider import OpenAIProvider @@ -340,3 +343,121 @@ async def patched_fetch_response(self, *args, **kwargs): assert isinstance(response_event.response.output[0], ResponseOutputMessage) assert isinstance(response_event.response.output[0].content[0], ResponseOutputText) assert response_event.response.output[0].content[0].text == "The answer is 42" + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_openai_chatcompletions_includes_reasoning_for_deepseek(monkeypatch) -> None: + """ + Ensure reasoning_content is forwarded when calling DeepSeek via OpenAI-compatible client. + """ + + def spy_items_to_messages( + items: Any, preserve_thinking_blocks: bool = False, include_reasoning_content: bool = False + ): + spy_items_to_messages.include_reasoning_content = include_reasoning_content # type: ignore[attr-defined] # noqa: E501 + return [] + + monkeypatch.setattr(Converter, "items_to_messages", staticmethod(spy_items_to_messages)) + + class DummyCompletions: + async def create(self, **kwargs): + return ChatCompletion( + id="resp-id", + created=0, + model="deepseek-reasoner", + object="chat.completion", + choices=[ + CompletionChoice( + index=0, + finish_reason="stop", + message=ChatCompletionMessage(role="assistant", content="Hi"), + ) + ], + usage=CompletionUsage(completion_tokens=1, prompt_tokens=1, total_tokens=2), + ) + + class DummyChat: + def __init__(self): + self.completions = DummyCompletions() + + class DummyClient: + def __init__(self): + self.chat = DummyChat() + self.base_url = "https://api.deepseek.com" + + model = OpenAIChatCompletionsModel("deepseek-reasoner", cast(AsyncOpenAI, DummyClient())) + + await model.get_response( + system_instructions=None, + input="", + model_settings=ModelSettings(), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + previous_response_id=None, + conversation_id=None, + prompt=None, + ) + + assert getattr(spy_items_to_messages, "include_reasoning_content", False) is True + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_openai_chatcompletions_reasoning_disabled_for_non_deepseek(monkeypatch) -> None: + """ + Verify reasoning_content is not added for non-DeepSeek calls by default. + """ + + def spy_items_to_messages( + items: Any, preserve_thinking_blocks: bool = False, include_reasoning_content: bool = False + ): + spy_items_to_messages.include_reasoning_content = include_reasoning_content # type: ignore[attr-defined] # noqa: E501 + return [] + + monkeypatch.setattr(Converter, "items_to_messages", staticmethod(spy_items_to_messages)) + + class DummyCompletions: + async def create(self, **kwargs): + return ChatCompletion( + id="resp-id", + created=0, + model="gpt-4o", + object="chat.completion", + choices=[ + CompletionChoice( + index=0, + finish_reason="stop", + message=ChatCompletionMessage(role="assistant", content="Hi"), + ) + ], + usage=CompletionUsage(completion_tokens=1, prompt_tokens=1, total_tokens=2), + ) + + class DummyChat: + def __init__(self): + self.completions = DummyCompletions() + + class DummyClient: + def __init__(self): + self.chat = DummyChat() + self.base_url = "https://api.openai.com" + + model = OpenAIChatCompletionsModel("gpt-4o", cast(AsyncOpenAI, DummyClient())) + + await model.get_response( + system_instructions=None, + input="", + model_settings=ModelSettings(), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + previous_response_id=None, + conversation_id=None, + prompt=None, + ) + + assert getattr(spy_items_to_messages, "include_reasoning_content", False) is False