From 5afa38f1d601e9e0f2e36bcd92afe2dc16bd5ed0 Mon Sep 17 00:00:00 2001 From: Bhaskar Date: Fri, 24 Oct 2025 07:52:01 -0400 Subject: [PATCH 1/3] fix: handle dict format in response processing for web_search tools - Modified _process_response to handle both object and dict formats - web_search tools return response.output items as dicts instead of objects - Added isinstance checks throughout the method for type safety - Tested with real GPT-5 API and web_search - confirms fix works - Maintains backward compatibility with normal responses - Fixes AttributeError: 'dict' object has no attribute 'type' - Resolves #8958 Signed-off-by: Bhaskar --- dspy/clients/base_lm.py | 52 ++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/dspy/clients/base_lm.py b/dspy/clients/base_lm.py index 8d3e6bb65f..9828012879 100644 --- a/dspy/clients/base_lm.py +++ b/dspy/clients/base_lm.py @@ -230,19 +230,50 @@ def _process_response(self, response): reasoning_contents = [] for output_item in response.output: - output_item_type = output_item.type + # Handle both object and dict formats (web_search returns dicts) + if isinstance(output_item, dict): + output_item_type = output_item.get("type") + else: + output_item_type = output_item.type + if output_item_type == "message": - for content_item in output_item.content: - text_outputs.append(content_item.text) + if isinstance(output_item, dict): + content = output_item.get("content", []) + else: + content = output_item.content + + for content_item in content: + if isinstance(content_item, dict): + text_outputs.append(content_item.get("text", "")) + else: + text_outputs.append(content_item.text) + elif output_item_type == "function_call": - tool_calls.append(output_item.model_dump()) + if isinstance(output_item, dict): + tool_calls.append(output_item) + else: + tool_calls.append(output_item.model_dump()) + elif output_item_type == "reasoning": - if getattr(output_item, "content", None) and len(output_item.content) > 0: - for content_item in output_item.content: - reasoning_contents.append(content_item.text) - elif getattr(output_item, "summary", None) and len(output_item.summary) > 0: - for summary_item in output_item.summary: - reasoning_contents.append(summary_item.text) + if isinstance(output_item, dict): + content = output_item.get("content", []) + summary = output_item.get("summary", []) + else: + content = getattr(output_item, "content", None) + summary = getattr(output_item, "summary", None) + + if content and len(content) > 0: + for content_item in content: + if isinstance(content_item, dict): + reasoning_contents.append(content_item.get("text", "")) + else: + reasoning_contents.append(content_item.text) + elif summary and len(summary) > 0: + for summary_item in summary: + if isinstance(summary_item, dict): + reasoning_contents.append(summary_item.get("text", "")) + else: + reasoning_contents.append(summary_item.text) result = {} if len(text_outputs) > 0: @@ -254,7 +285,6 @@ def _process_response(self, response): # All `response.output` items map to one answer, so we return a list of size 1. return [result] - def inspect_history(n: int = 1): """The global history shared across all LMs.""" return pretty_print_history(GLOBAL_HISTORY, n) From 2dd198f00b400220a81f7c9aa41bf95b648162d9 Mon Sep 17 00:00:00 2001 From: Bhaskar Date: Sat, 25 Oct 2025 22:37:15 -0400 Subject: [PATCH 2/3] fix: remove trailing whitespace for ruff compliance Signed-off-by: Bhaskar --- dspy/clients/base_lm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dspy/clients/base_lm.py b/dspy/clients/base_lm.py index 9828012879..3417831c99 100644 --- a/dspy/clients/base_lm.py +++ b/dspy/clients/base_lm.py @@ -241,7 +241,6 @@ def _process_response(self, response): content = output_item.get("content", []) else: content = output_item.content - for content_item in content: if isinstance(content_item, dict): text_outputs.append(content_item.get("text", "")) @@ -261,7 +260,6 @@ def _process_response(self, response): else: content = getattr(output_item, "content", None) summary = getattr(output_item, "summary", None) - if content and len(content) > 0: for content_item in content: if isinstance(content_item, dict): From 61ca8246661d46af7331de5a864b5b961eb7ebd8 Mon Sep 17 00:00:00 2001 From: Bhaskar Date: Sat, 25 Oct 2025 22:53:20 -0400 Subject: [PATCH 3/3] refactor: normalize response format early + add comprehensive tests - Refactor _process_response to convert all outputs to dict format first - Simplifies parsing logic (no isinstance checks throughout) - Add comprehensive unit tests for both dict and object formats - Tests cover: message, function_call, reasoning types - Tests cover: object format (normal) and dict format (web_search) - Addresses @TomeHirata feedback for cleaner implementation - Fixes #8958 --- dspy/clients/base_lm.py | 99 ++++++--- dspy/clients/test_base_lm_response_formats.py | 204 ++++++++++++++++++ 2 files changed, 269 insertions(+), 34 deletions(-) create mode 100644 dspy/clients/test_base_lm_response_formats.py diff --git a/dspy/clients/base_lm.py b/dspy/clients/base_lm.py index 3417831c99..a50442b1cc 100644 --- a/dspy/clients/base_lm.py +++ b/dspy/clients/base_lm.py @@ -225,53 +225,83 @@ def _process_response(self, response): Returns: List of processed outputs, which is always of size 1 because the Response API only supports one output. """ + + def _normalize_output_item(item): + """Convert response output item to dict format regardless of input type. + + OpenAI's Responses API returns different formats based on tool usage: + - Without tools: Returns objects with attributes (.type, .content, etc.) + - With tools (e.g., web_search): Returns dicts with keys ('type', 'content', etc.) + + This function normalizes both formats to dict for consistent processing. + """ + if isinstance(item, dict): + # Already a dict, return as-is + return item + + # Convert object to dict + normalized = {"type": item.type} + + # Handle content + if hasattr(item, "content") and item.content: + normalized["content"] = [] + for content_item in item.content: + if isinstance(content_item, dict): + normalized["content"].append(content_item) + else: + normalized["content"].append({"text": content_item.text}) + + # Handle function calls (store original for model_dump if needed) + if hasattr(item, "name"): + normalized["name"] = item.name + if hasattr(item, "arguments"): + normalized["arguments"] = item.arguments + if hasattr(item, "model_dump"): + # Store the original object for model_dump + normalized["_original"] = item + + # Handle reasoning content + if hasattr(item, "summary") and item.summary: + normalized["summary"] = [] + for summary_item in item.summary: + if isinstance(summary_item, dict): + normalized["summary"].append(summary_item) + else: + normalized["summary"].append({"text": summary_item.text}) + + return normalized + + # Normalize all output items to dict format first + normalized_outputs = [_normalize_output_item(item) for item in response.output] + text_outputs = [] tool_calls = [] reasoning_contents = [] - for output_item in response.output: - # Handle both object and dict formats (web_search returns dicts) - if isinstance(output_item, dict): - output_item_type = output_item.get("type") - else: - output_item_type = output_item.type + for output_item in normalized_outputs: + output_item_type = output_item.get("type") if output_item_type == "message": - if isinstance(output_item, dict): - content = output_item.get("content", []) - else: - content = output_item.content - for content_item in content: - if isinstance(content_item, dict): - text_outputs.append(content_item.get("text", "")) - else: - text_outputs.append(content_item.text) + for content_item in output_item.get("content", []): + text_outputs.append(content_item.get("text", "")) elif output_item_type == "function_call": - if isinstance(output_item, dict): - tool_calls.append(output_item) + # Use original object for model_dump if available, otherwise use dict + if "_original" in output_item: + tool_calls.append(output_item["_original"].model_dump()) else: - tool_calls.append(output_item.model_dump()) + tool_calls.append(output_item) elif output_item_type == "reasoning": - if isinstance(output_item, dict): - content = output_item.get("content", []) - summary = output_item.get("summary", []) - else: - content = getattr(output_item, "content", None) - summary = getattr(output_item, "summary", None) - if content and len(content) > 0: + content = output_item.get("content", []) + summary = output_item.get("summary", []) + + if content: for content_item in content: - if isinstance(content_item, dict): - reasoning_contents.append(content_item.get("text", "")) - else: - reasoning_contents.append(content_item.text) - elif summary and len(summary) > 0: + reasoning_contents.append(content_item.get("text", "")) + elif summary: for summary_item in summary: - if isinstance(summary_item, dict): - reasoning_contents.append(summary_item.get("text", "")) - else: - reasoning_contents.append(summary_item.text) + reasoning_contents.append(summary_item.get("text", "")) result = {} if len(text_outputs) > 0: @@ -280,6 +310,7 @@ def _process_response(self, response): result["tool_calls"] = tool_calls if len(reasoning_contents) > 0: result["reasoning_content"] = "".join(reasoning_contents) + # All `response.output` items map to one answer, so we return a list of size 1. return [result] diff --git a/dspy/clients/test_base_lm_response_formats.py b/dspy/clients/test_base_lm_response_formats.py new file mode 100644 index 0000000000..361f35a8ea --- /dev/null +++ b/dspy/clients/test_base_lm_response_formats.py @@ -0,0 +1,204 @@ +""" +Unit tests for _process_response method handling both dict and object formats. +Tests the fix for issue #8958 - web_search tools return dict format. +""" + +import pytest + +from dspy.clients.base_lm import BaseLM + + +class MockContent: + """Mock content object (object format)""" + def __init__(self, text): + self.text = text + + +class MockOutputItem: + """Mock output item (object format - without web_search)""" + def __init__(self, item_type, content=None, summary=None): + self.type = item_type + if content: + self.content = content + if summary: + self.summary = summary + + def model_dump(self): + return {"type": self.type, "name": "test_function", "arguments": "{}"} + + +class MockResponse: + """Mock response object""" + def __init__(self, output): + self.output = output + self.usage = type("obj", (object,), { + "completion_tokens": 10, + "prompt_tokens": 5, + "total_tokens": 15 + })() + self.model = "gpt-4" + + +class TestProcessResponseFormats: + """Test _process_response handles both dict and object formats""" + + @pytest.fixture + def base_lm(self): + """Create a BaseLM instance for testing""" + return BaseLM(model="test-model", model_type="responses") + + def test_object_format_message(self, base_lm): + """Test processing object format (normal responses without web_search)""" + # Create mock response with object format + mock_response = MockResponse( + output=[ + MockOutputItem("message", content=[MockContent("Hello world")]) + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert result[0]["text"] == "Hello world" + + def test_dict_format_message(self, base_lm): + """Test processing dict format (responses with web_search tools)""" + # Create mock response with dict format (as returned by web_search) + mock_response = MockResponse( + output=[ + { + "type": "message", + "content": [{"text": "Hello from web search"}] + } + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert result[0]["text"] == "Hello from web search" + + def test_dict_format_with_multiple_content(self, base_lm): + """Test dict format with multiple content items""" + mock_response = MockResponse( + output=[ + { + "type": "message", + "content": [ + {"text": "Part 1"}, + {"text": " Part 2"}, + {"text": " Part 3"} + ] + } + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert result[0]["text"] == "Part 1 Part 2 Part 3" + + def test_object_format_function_call(self, base_lm): + """Test function call in object format""" + mock_item = MockOutputItem("function_call") + mock_response = MockResponse(output=[mock_item]) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "tool_calls" in result[0] + assert len(result[0]["tool_calls"]) == 1 + + def test_dict_format_function_call(self, base_lm): + """Test function call in dict format""" + mock_response = MockResponse( + output=[ + { + "type": "function_call", + "name": "web_search", + "arguments": '{"query": "test"}' + } + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "tool_calls" in result[0] + assert result[0]["tool_calls"][0]["name"] == "web_search" + + def test_object_format_reasoning(self, base_lm): + """Test reasoning content in object format""" + mock_response = MockResponse( + output=[ + MockOutputItem("reasoning", content=[MockContent("Thinking step 1")]) + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "reasoning_content" in result[0] + assert result[0]["reasoning_content"] == "Thinking step 1" + + def test_dict_format_reasoning(self, base_lm): + """Test reasoning content in dict format""" + mock_response = MockResponse( + output=[ + { + "type": "reasoning", + "content": [{"text": "Reasoning step 1"}] + } + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "reasoning_content" in result[0] + assert result[0]["reasoning_content"] == "Reasoning step 1" + + def test_dict_format_reasoning_with_summary(self, base_lm): + """Test reasoning with summary (fallback when no content)""" + mock_response = MockResponse( + output=[ + { + "type": "reasoning", + "summary": [{"text": "Summary text"}] + } + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "reasoning_content" in result[0] + assert result[0]["reasoning_content"] == "Summary text" + + def test_mixed_format_backwards_compatibility(self, base_lm): + """Test that both formats can coexist (edge case)""" + # Mix of object and dict formats in same response + mock_response = MockResponse( + output=[ + MockOutputItem("message", content=[MockContent("Object format")]), + {"type": "message", "content": [{"text": " Dict format"}]} + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert result[0]["text"] == "Object format Dict format" + + def test_empty_content(self, base_lm): + """Test handling of empty content""" + mock_response = MockResponse( + output=[ + {"type": "message", "content": []} + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "text" not in result[0] # No text key when no content