From 5afa38f1d601e9e0f2e36bcd92afe2dc16bd5ed0 Mon Sep 17 00:00:00 2001
From: Bhaskar <bhaskar@zasti.ai>
Date: Fri, 24 Oct 2025 07:52:01 -0400
Subject: [PATCH 1/3] fix: handle dict format in response processing for
 web_search tools

- Modified _process_response to handle both object and dict formats
- web_search tools return response.output items as dicts instead of objects
- Added isinstance checks throughout the method for type safety
- Tested with real GPT-5 API and web_search - confirms fix works
- Maintains backward compatibility with normal responses
- Fixes AttributeError: 'dict' object has no attribute 'type'
- Resolves #8958

Signed-off-by: Bhaskar <bhaskar@zasti.ai>
---
 dspy/clients/base_lm.py | 52 ++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 11 deletions(-)

diff --git a/dspy/clients/base_lm.py b/dspy/clients/base_lm.py
index 8d3e6bb65f..9828012879 100644
--- a/dspy/clients/base_lm.py
+++ b/dspy/clients/base_lm.py
@@ -230,19 +230,50 @@ def _process_response(self, response):
         reasoning_contents = []
 
         for output_item in response.output:
-            output_item_type = output_item.type
+            # Handle both object and dict formats (web_search returns dicts)
+            if isinstance(output_item, dict):
+                output_item_type = output_item.get("type")
+            else:
+                output_item_type = output_item.type
+
             if output_item_type == "message":
-                for content_item in output_item.content:
-                    text_outputs.append(content_item.text)
+                if isinstance(output_item, dict):
+                    content = output_item.get("content", [])
+                else:
+                    content = output_item.content
+                
+                for content_item in content:
+                    if isinstance(content_item, dict):
+                        text_outputs.append(content_item.get("text", ""))
+                    else:
+                        text_outputs.append(content_item.text)
+
             elif output_item_type == "function_call":
-                tool_calls.append(output_item.model_dump())
+                if isinstance(output_item, dict):
+                    tool_calls.append(output_item)
+                else:
+                    tool_calls.append(output_item.model_dump())
+
             elif output_item_type == "reasoning":
-                if getattr(output_item, "content", None) and len(output_item.content) > 0:
-                    for content_item in output_item.content:
-                        reasoning_contents.append(content_item.text)
-                elif getattr(output_item, "summary", None) and len(output_item.summary) > 0:
-                    for summary_item in output_item.summary:
-                        reasoning_contents.append(summary_item.text)
+                if isinstance(output_item, dict):
+                    content = output_item.get("content", [])
+                    summary = output_item.get("summary", [])
+                else:
+                    content = getattr(output_item, "content", None)
+                    summary = getattr(output_item, "summary", None)
+                
+                if content and len(content) > 0:
+                    for content_item in content:
+                        if isinstance(content_item, dict):
+                            reasoning_contents.append(content_item.get("text", ""))
+                        else:
+                            reasoning_contents.append(content_item.text)
+                elif summary and len(summary) > 0:
+                    for summary_item in summary:
+                        if isinstance(summary_item, dict):
+                            reasoning_contents.append(summary_item.get("text", ""))
+                        else:
+                            reasoning_contents.append(summary_item.text)
 
         result = {}
         if len(text_outputs) > 0:
@@ -254,7 +285,6 @@ def _process_response(self, response):
         # All `response.output` items map to one answer, so we return a list of size 1.
         return [result]
 
-
 def inspect_history(n: int = 1):
     """The global history shared across all LMs."""
     return pretty_print_history(GLOBAL_HISTORY, n)

From 2dd198f00b400220a81f7c9aa41bf95b648162d9 Mon Sep 17 00:00:00 2001
From: Bhaskar <bhaskar@zasti.ai>
Date: Sat, 25 Oct 2025 22:37:15 -0400
Subject: [PATCH 2/3] fix: remove trailing whitespace for ruff compliance

Signed-off-by: Bhaskar <bhaskar@zasti.ai>
---
 dspy/clients/base_lm.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dspy/clients/base_lm.py b/dspy/clients/base_lm.py
index 9828012879..3417831c99 100644
--- a/dspy/clients/base_lm.py
+++ b/dspy/clients/base_lm.py
@@ -241,7 +241,6 @@ def _process_response(self, response):
                     content = output_item.get("content", [])
                 else:
                     content = output_item.content
-                
                 for content_item in content:
                     if isinstance(content_item, dict):
                         text_outputs.append(content_item.get("text", ""))
@@ -261,7 +260,6 @@ def _process_response(self, response):
                 else:
                     content = getattr(output_item, "content", None)
                     summary = getattr(output_item, "summary", None)
-                
                 if content and len(content) > 0:
                     for content_item in content:
                         if isinstance(content_item, dict):

From 61ca8246661d46af7331de5a864b5b961eb7ebd8 Mon Sep 17 00:00:00 2001
From: Bhaskar <bhaskar@zasti.ai>
Date: Sat, 25 Oct 2025 22:53:20 -0400
Subject: [PATCH 3/3] refactor: normalize response format early + add
 comprehensive tests

- Refactor _process_response to convert all outputs to dict format first
- Simplifies parsing logic (no isinstance checks throughout)
- Add comprehensive unit tests for both dict and object formats
- Tests cover: message, function_call, reasoning types
- Tests cover: object format (normal) and dict format (web_search)
- Addresses @TomeHirata feedback for cleaner implementation
- Fixes #8958
---
 dspy/clients/base_lm.py                       |  99 ++++++---
 dspy/clients/test_base_lm_response_formats.py | 204 ++++++++++++++++++
 2 files changed, 269 insertions(+), 34 deletions(-)
 create mode 100644 dspy/clients/test_base_lm_response_formats.py

diff --git a/dspy/clients/base_lm.py b/dspy/clients/base_lm.py
index 3417831c99..a50442b1cc 100644
--- a/dspy/clients/base_lm.py
+++ b/dspy/clients/base_lm.py
@@ -225,53 +225,83 @@ def _process_response(self, response):
         Returns:
             List of processed outputs, which is always of size 1 because the Response API only supports one output.
         """
+
+        def _normalize_output_item(item):
+            """Convert response output item to dict format regardless of input type.
+
+            OpenAI's Responses API returns different formats based on tool usage:
+            - Without tools: Returns objects with attributes (.type, .content, etc.)
+            - With tools (e.g., web_search): Returns dicts with keys ('type', 'content', etc.)
+
+            This function normalizes both formats to dict for consistent processing.
+            """
+            if isinstance(item, dict):
+                # Already a dict, return as-is
+                return item
+
+            # Convert object to dict
+            normalized = {"type": item.type}
+
+            # Handle content
+            if hasattr(item, "content") and item.content:
+                normalized["content"] = []
+                for content_item in item.content:
+                    if isinstance(content_item, dict):
+                        normalized["content"].append(content_item)
+                    else:
+                        normalized["content"].append({"text": content_item.text})
+
+            # Handle function calls (store original for model_dump if needed)
+            if hasattr(item, "name"):
+                normalized["name"] = item.name
+            if hasattr(item, "arguments"):
+                normalized["arguments"] = item.arguments
+            if hasattr(item, "model_dump"):
+                # Store the original object for model_dump
+                normalized["_original"] = item
+
+            # Handle reasoning content
+            if hasattr(item, "summary") and item.summary:
+                normalized["summary"] = []
+                for summary_item in item.summary:
+                    if isinstance(summary_item, dict):
+                        normalized["summary"].append(summary_item)
+                    else:
+                        normalized["summary"].append({"text": summary_item.text})
+
+            return normalized
+
+        # Normalize all output items to dict format first
+        normalized_outputs = [_normalize_output_item(item) for item in response.output]
+
         text_outputs = []
         tool_calls = []
         reasoning_contents = []
 
-        for output_item in response.output:
-            # Handle both object and dict formats (web_search returns dicts)
-            if isinstance(output_item, dict):
-                output_item_type = output_item.get("type")
-            else:
-                output_item_type = output_item.type
+        for output_item in normalized_outputs:
+            output_item_type = output_item.get("type")
 
             if output_item_type == "message":
-                if isinstance(output_item, dict):
-                    content = output_item.get("content", [])
-                else:
-                    content = output_item.content
-                for content_item in content:
-                    if isinstance(content_item, dict):
-                        text_outputs.append(content_item.get("text", ""))
-                    else:
-                        text_outputs.append(content_item.text)
+                for content_item in output_item.get("content", []):
+                    text_outputs.append(content_item.get("text", ""))
 
             elif output_item_type == "function_call":
-                if isinstance(output_item, dict):
-                    tool_calls.append(output_item)
+                # Use original object for model_dump if available, otherwise use dict
+                if "_original" in output_item:
+                    tool_calls.append(output_item["_original"].model_dump())
                 else:
-                    tool_calls.append(output_item.model_dump())
+                    tool_calls.append(output_item)
 
             elif output_item_type == "reasoning":
-                if isinstance(output_item, dict):
-                    content = output_item.get("content", [])
-                    summary = output_item.get("summary", [])
-                else:
-                    content = getattr(output_item, "content", None)
-                    summary = getattr(output_item, "summary", None)
-                if content and len(content) > 0:
+                content = output_item.get("content", [])
+                summary = output_item.get("summary", [])
+
+                if content:
                     for content_item in content:
-                        if isinstance(content_item, dict):
-                            reasoning_contents.append(content_item.get("text", ""))
-                        else:
-                            reasoning_contents.append(content_item.text)
-                elif summary and len(summary) > 0:
+                        reasoning_contents.append(content_item.get("text", ""))
+                elif summary:
                     for summary_item in summary:
-                        if isinstance(summary_item, dict):
-                            reasoning_contents.append(summary_item.get("text", ""))
-                        else:
-                            reasoning_contents.append(summary_item.text)
+                        reasoning_contents.append(summary_item.get("text", ""))
 
         result = {}
         if len(text_outputs) > 0:
@@ -280,6 +310,7 @@ def _process_response(self, response):
             result["tool_calls"] = tool_calls
         if len(reasoning_contents) > 0:
             result["reasoning_content"] = "".join(reasoning_contents)
+
         # All `response.output` items map to one answer, so we return a list of size 1.
         return [result]
 
diff --git a/dspy/clients/test_base_lm_response_formats.py b/dspy/clients/test_base_lm_response_formats.py
new file mode 100644
index 0000000000..361f35a8ea
--- /dev/null
+++ b/dspy/clients/test_base_lm_response_formats.py
@@ -0,0 +1,204 @@
+"""
+Unit tests for _process_response method handling both dict and object formats.
+Tests the fix for issue #8958 - web_search tools return dict format.
+"""
+
+import pytest
+
+from dspy.clients.base_lm import BaseLM
+
+
+class MockContent:
+    """Mock content object (object format)"""
+    def __init__(self, text):
+        self.text = text
+
+
+class MockOutputItem:
+    """Mock output item (object format - without web_search)"""
+    def __init__(self, item_type, content=None, summary=None):
+        self.type = item_type
+        if content:
+            self.content = content
+        if summary:
+            self.summary = summary
+
+    def model_dump(self):
+        return {"type": self.type, "name": "test_function", "arguments": "{}"}
+
+
+class MockResponse:
+    """Mock response object"""
+    def __init__(self, output):
+        self.output = output
+        self.usage = type("obj", (object,), {
+            "completion_tokens": 10,
+            "prompt_tokens": 5,
+            "total_tokens": 15
+        })()
+        self.model = "gpt-4"
+
+
+class TestProcessResponseFormats:
+    """Test _process_response handles both dict and object formats"""
+
+    @pytest.fixture
+    def base_lm(self):
+        """Create a BaseLM instance for testing"""
+        return BaseLM(model="test-model", model_type="responses")
+
+    def test_object_format_message(self, base_lm):
+        """Test processing object format (normal responses without web_search)"""
+        # Create mock response with object format
+        mock_response = MockResponse(
+            output=[
+                MockOutputItem("message", content=[MockContent("Hello world")])
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert result[0]["text"] == "Hello world"
+
+    def test_dict_format_message(self, base_lm):
+        """Test processing dict format (responses with web_search tools)"""
+        # Create mock response with dict format (as returned by web_search)
+        mock_response = MockResponse(
+            output=[
+                {
+                    "type": "message",
+                    "content": [{"text": "Hello from web search"}]
+                }
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert result[0]["text"] == "Hello from web search"
+
+    def test_dict_format_with_multiple_content(self, base_lm):
+        """Test dict format with multiple content items"""
+        mock_response = MockResponse(
+            output=[
+                {
+                    "type": "message",
+                    "content": [
+                        {"text": "Part 1"},
+                        {"text": " Part 2"},
+                        {"text": " Part 3"}
+                    ]
+                }
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert result[0]["text"] == "Part 1 Part 2 Part 3"
+
+    def test_object_format_function_call(self, base_lm):
+        """Test function call in object format"""
+        mock_item = MockOutputItem("function_call")
+        mock_response = MockResponse(output=[mock_item])
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "tool_calls" in result[0]
+        assert len(result[0]["tool_calls"]) == 1
+
+    def test_dict_format_function_call(self, base_lm):
+        """Test function call in dict format"""
+        mock_response = MockResponse(
+            output=[
+                {
+                    "type": "function_call",
+                    "name": "web_search",
+                    "arguments": '{"query": "test"}'
+                }
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "tool_calls" in result[0]
+        assert result[0]["tool_calls"][0]["name"] == "web_search"
+
+    def test_object_format_reasoning(self, base_lm):
+        """Test reasoning content in object format"""
+        mock_response = MockResponse(
+            output=[
+                MockOutputItem("reasoning", content=[MockContent("Thinking step 1")])
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "reasoning_content" in result[0]
+        assert result[0]["reasoning_content"] == "Thinking step 1"
+
+    def test_dict_format_reasoning(self, base_lm):
+        """Test reasoning content in dict format"""
+        mock_response = MockResponse(
+            output=[
+                {
+                    "type": "reasoning",
+                    "content": [{"text": "Reasoning step 1"}]
+                }
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "reasoning_content" in result[0]
+        assert result[0]["reasoning_content"] == "Reasoning step 1"
+
+    def test_dict_format_reasoning_with_summary(self, base_lm):
+        """Test reasoning with summary (fallback when no content)"""
+        mock_response = MockResponse(
+            output=[
+                {
+                    "type": "reasoning",
+                    "summary": [{"text": "Summary text"}]
+                }
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "reasoning_content" in result[0]
+        assert result[0]["reasoning_content"] == "Summary text"
+
+    def test_mixed_format_backwards_compatibility(self, base_lm):
+        """Test that both formats can coexist (edge case)"""
+        # Mix of object and dict formats in same response
+        mock_response = MockResponse(
+            output=[
+                MockOutputItem("message", content=[MockContent("Object format")]),
+                {"type": "message", "content": [{"text": " Dict format"}]}
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert result[0]["text"] == "Object format Dict format"
+
+    def test_empty_content(self, base_lm):
+        """Test handling of empty content"""
+        mock_response = MockResponse(
+            output=[
+                {"type": "message", "content": []}
+            ]
+        )
+
+        result = base_lm._process_response(mock_response)
+
+        assert len(result) == 1
+        assert "text" not in result[0]  # No text key when no content