stanfordnlp · bhaskargurram-ai · Oct 24, 2025 · Oct 26, 2025 · Oct 26, 2025 · TomeHirata
diff --git a/dspy/clients/base_lm.py b/dspy/clients/base_lm.py
@@ -225,24 +225,83 @@ def _process_response(self, response):
         Returns:
             List of processed outputs, which is always of size 1 because the Response API only supports one output.
         """
+
+        def _normalize_output_item(item):
+            """Convert response output item to dict format regardless of input type.
+
+            OpenAI's Responses API returns different formats based on tool usage:
+            - Without tools: Returns objects with attributes (.type, .content, etc.)
+            - With tools (e.g., web_search): Returns dicts with keys ('type', 'content', etc.)
+
+            This function normalizes both formats to dict for consistent processing.
+            """
+            if isinstance(item, dict):
+                # Already a dict, return as-is
+                return item
+
+            # Convert object to dict
+            normalized = {"type": item.type}
+
+            # Handle content
+            if hasattr(item, "content") and item.content:
+                normalized["content"] = []
+                for content_item in item.content:
+                    if isinstance(content_item, dict):
+                        normalized["content"].append(content_item)
+                    else:
+                        normalized["content"].append({"text": content_item.text})
+
+            # Handle function calls (store original for model_dump if needed)
+            if hasattr(item, "name"):
+                normalized["name"] = item.name
+            if hasattr(item, "arguments"):
+                normalized["arguments"] = item.arguments
+            if hasattr(item, "model_dump"):
+                # Store the original object for model_dump
+                normalized["_original"] = item
+
+            # Handle reasoning content
+            if hasattr(item, "summary") and item.summary:
+                normalized["summary"] = []
+                for summary_item in item.summary:
+                    if isinstance(summary_item, dict):
+                        normalized["summary"].append(summary_item)
+                    else:
+                        normalized["summary"].append({"text": summary_item.text})
+
+            return normalized
+
+        # Normalize all output items to dict format first
+        normalized_outputs = [_normalize_output_item(item) for item in response.output]
+
         text_outputs = []
         tool_calls = []
         reasoning_contents = []
 
-        for output_item in response.output:
-            output_item_type = output_item.type
+        for output_item in normalized_outputs:
+            output_item_type = output_item.get("type")
+
             if output_item_type == "message":
-                for content_item in output_item.content:
-                    text_outputs.append(content_item.text)
+                for content_item in output_item.get("content", []):
+                    text_outputs.append(content_item.get("text", ""))
+
             elif output_item_type == "function_call":
-                tool_calls.append(output_item.model_dump())
+                # Use original object for model_dump if available, otherwise use dict
+                if "_original" in output_item:
+                    tool_calls.append(output_item["_original"].model_dump())
+                else:
+                    tool_calls.append(output_item)
+
             elif output_item_type == "reasoning":
-                if getattr(output_item, "content", None) and len(output_item.content) > 0:
-                    for content_item in output_item.content:
-                        reasoning_contents.append(content_item.text)
-                elif getattr(output_item, "summary", None) and len(output_item.summary) > 0:
-                    for summary_item in output_item.summary:
-                        reasoning_contents.append(summary_item.text)
+                content = output_item.get("content", [])
+                summary = output_item.get("summary", [])
+
+                if content:
+                    for content_item in content:
+                        reasoning_contents.append(content_item.get("text", ""))
+                elif summary:
+                    for summary_item in summary:
+                        reasoning_contents.append(summary_item.get("text", ""))
 
         result = {}
         if len(text_outputs) > 0:
@@ -251,10 +310,10 @@ def _process_response(self, response):
             result["tool_calls"] = tool_calls
         if len(reasoning_contents) > 0:
             result["reasoning_content"] = "".join(reasoning_contents)
+
         # All `response.output` items map to one answer, so we return a list of size 1.
         return [result]
 
-
 def inspect_history(n: int = 1):
     """The global history shared across all LMs."""
     return pretty_print_history(GLOBAL_HISTORY, n)
diff --git a/dspy/clients/test_base_lm_response_formats.py b/dspy/clients/test_base_lm_response_formats.py
@@ -0,0 +1,204 @@
+"""
+Unit tests for _process_response method handling both dict and object formats.
+Tests the fix for issue #8958 - web_search tools return dict format.
+"""
+
+import pytest
+
+from dspy.clients.base_lm import BaseLM
+
+
+class MockContent:
+    """Mock content object (object format)"""
+    def __init__(self, text):
+        self.text = text
+
+
+class MockOutputItem:
+    """Mock output item (object format - without web_search)"""
+    def __init__(self, item_type, content=None, summary=None):
+        self.type = item_type
+        if content:
+            self.content = content
+        if summary:
+            self.summary = summary
+
+    def model_dump(self):
+        return {"type": self.type, "name": "test_function", "arguments": "{}"}
+
+
+class MockResponse:
+    """Mock response object"""
+    def __init__(self, output):
+        self.output = output
+        self.usage = type("obj", (object,), {
+            "completion_tokens": 10,
+            "prompt_tokens": 5,
+            "total_tokens": 15
+        })()
+        self.model = "gpt-4"
+
+
+class TestProcessResponseFormats:
+    """Test _process_response handles both dict and object formats"""
+
+    @pytest.fixture
+    def base_lm(self):
+        """Create a BaseLM instance for testing"""
+        return BaseLM(model="test-model", model_type="responses")
+
+    def test_object_format_message(self, base_lm):
+        """Test processing object format (normal responses without web_search)"""
+        # Create mock response with object format
+        mock_response = MockResponse(
+            output=[
+                MockOutputItem("message", content=[MockContent("Hello world")])
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert result[0]["text"] == "Hello world"
+
+    def test_dict_format_message(self, base_lm):
+        """Test processing dict format (responses with web_search tools)"""
+        # Create mock response with dict format (as returned by web_search)
+        mock_response = MockResponse(
+            output=[
+                {
+                    "type": "message",
+                    "content": [{"text": "Hello from web search"}]
+                }
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert result[0]["text"] == "Hello from web search"
+
+    def test_dict_format_with_multiple_content(self, base_lm):
+        """Test dict format with multiple content items"""
+        mock_response = MockResponse(
+            output=[
+                {
+                    "type": "message",
+                    "content": [
+                        {"text": "Part 1"},
+                        {"text": " Part 2"},
+                        {"text": " Part 3"}
+                    ]
+                }
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert result[0]["text"] == "Part 1 Part 2 Part 3"
+
+    def test_object_format_function_call(self, base_lm):
+        """Test function call in object format"""
+        mock_item = MockOutputItem("function_call")
+        mock_response = MockResponse(output=[mock_item])
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "tool_calls" in result[0]
+        assert len(result[0]["tool_calls"]) == 1
+
+    def test_dict_format_function_call(self, base_lm):
+        """Test function call in dict format"""
+        mock_response = MockResponse(
+            output=[
+                {
+                    "type": "function_call",
+                    "name": "web_search",
+                    "arguments": '{"query": "test"}'
+                }
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "tool_calls" in result[0]
+        assert result[0]["tool_calls"][0]["name"] == "web_search"
+
+    def test_object_format_reasoning(self, base_lm):
+        """Test reasoning content in object format"""
+        mock_response = MockResponse(
+            output=[
+                MockOutputItem("reasoning", content=[MockContent("Thinking step 1")])
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "reasoning_content" in result[0]
+        assert result[0]["reasoning_content"] == "Thinking step 1"
+
+    def test_dict_format_reasoning(self, base_lm):
+        """Test reasoning content in dict format"""
+        mock_response = MockResponse(
+            output=[
+                {
+                    "type": "reasoning",
+                    "content": [{"text": "Reasoning step 1"}]
+                }
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "reasoning_content" in result[0]
+        assert result[0]["reasoning_content"] == "Reasoning step 1"
+
+    def test_dict_format_reasoning_with_summary(self, base_lm):
+        """Test reasoning with summary (fallback when no content)"""
+        mock_response = MockResponse(
+            output=[
+                {
+                    "type": "reasoning",
+                    "summary": [{"text": "Summary text"}]
+                }
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "reasoning_content" in result[0]
+        assert result[0]["reasoning_content"] == "Summary text"
+
+    def test_mixed_format_backwards_compatibility(self, base_lm):
+        """Test that both formats can coexist (edge case)"""
+        # Mix of object and dict formats in same response
+        mock_response = MockResponse(
+            output=[
+                MockOutputItem("message", content=[MockContent("Object format")]),
+                {"type": "message", "content": [{"text": " Dict format"}]}
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert result[0]["text"] == "Object format Dict format"
+
+    def test_empty_content(self, base_lm):
+        """Test handling of empty content"""
+        mock_response = MockResponse(
+            output=[
+                {"type": "message", "content": []}
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "text" not in result[0]  # No text key when no content