Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 71 additions & 12 deletions dspy/clients/base_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,24 +225,83 @@ def _process_response(self, response):
Returns:
List of processed outputs, which is always of size 1 because the Response API only supports one output.
"""

def _normalize_output_item(item):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't OpenAI provide this conversion out of box?

"""Convert response output item to dict format regardless of input type.

OpenAI's Responses API returns different formats based on tool usage:
- Without tools: Returns objects with attributes (.type, .content, etc.)
- With tools (e.g., web_search): Returns dicts with keys ('type', 'content', etc.)

This function normalizes both formats to dict for consistent processing.
"""
if isinstance(item, dict):
# Already a dict, return as-is
return item

# Convert object to dict
normalized = {"type": item.type}

# Handle content
if hasattr(item, "content") and item.content:
normalized["content"] = []
for content_item in item.content:
if isinstance(content_item, dict):
normalized["content"].append(content_item)
else:
normalized["content"].append({"text": content_item.text})

# Handle function calls (store original for model_dump if needed)
if hasattr(item, "name"):
normalized["name"] = item.name
if hasattr(item, "arguments"):
normalized["arguments"] = item.arguments
if hasattr(item, "model_dump"):
# Store the original object for model_dump
normalized["_original"] = item

# Handle reasoning content
if hasattr(item, "summary") and item.summary:
normalized["summary"] = []
for summary_item in item.summary:
if isinstance(summary_item, dict):
normalized["summary"].append(summary_item)
else:
normalized["summary"].append({"text": summary_item.text})

return normalized

# Normalize all output items to dict format first
normalized_outputs = [_normalize_output_item(item) for item in response.output]

text_outputs = []
tool_calls = []
reasoning_contents = []

for output_item in response.output:
output_item_type = output_item.type
for output_item in normalized_outputs:
output_item_type = output_item.get("type")

if output_item_type == "message":
for content_item in output_item.content:
text_outputs.append(content_item.text)
for content_item in output_item.get("content", []):
text_outputs.append(content_item.get("text", ""))

elif output_item_type == "function_call":
tool_calls.append(output_item.model_dump())
# Use original object for model_dump if available, otherwise use dict
if "_original" in output_item:
tool_calls.append(output_item["_original"].model_dump())
else:
tool_calls.append(output_item)

elif output_item_type == "reasoning":
if getattr(output_item, "content", None) and len(output_item.content) > 0:
for content_item in output_item.content:
reasoning_contents.append(content_item.text)
elif getattr(output_item, "summary", None) and len(output_item.summary) > 0:
for summary_item in output_item.summary:
reasoning_contents.append(summary_item.text)
content = output_item.get("content", [])
summary = output_item.get("summary", [])

if content:
for content_item in content:
reasoning_contents.append(content_item.get("text", ""))
elif summary:
for summary_item in summary:
reasoning_contents.append(summary_item.get("text", ""))

result = {}
if len(text_outputs) > 0:
Expand All @@ -251,10 +310,10 @@ def _process_response(self, response):
result["tool_calls"] = tool_calls
if len(reasoning_contents) > 0:
result["reasoning_content"] = "".join(reasoning_contents)

# All `response.output` items map to one answer, so we return a list of size 1.
return [result]


def inspect_history(n: int = 1):
"""The global history shared across all LMs."""
return pretty_print_history(GLOBAL_HISTORY, n)
204 changes: 204 additions & 0 deletions dspy/clients/test_base_lm_response_formats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
"""
Unit tests for _process_response method handling both dict and object formats.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't create a new test file and use the existing test_base_lm.py

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also I don't think we don't need to add such a large number of tests.

Tests the fix for issue #8958 - web_search tools return dict format.
"""

import pytest

from dspy.clients.base_lm import BaseLM


class MockContent:
"""Mock content object (object format)"""
def __init__(self, text):
self.text = text


class MockOutputItem:
"""Mock output item (object format - without web_search)"""
def __init__(self, item_type, content=None, summary=None):
self.type = item_type
if content:
self.content = content
if summary:
self.summary = summary

def model_dump(self):
return {"type": self.type, "name": "test_function", "arguments": "{}"}


class MockResponse:
"""Mock response object"""
def __init__(self, output):
self.output = output
self.usage = type("obj", (object,), {
"completion_tokens": 10,
"prompt_tokens": 5,
"total_tokens": 15
})()
self.model = "gpt-4"


class TestProcessResponseFormats:
"""Test _process_response handles both dict and object formats"""

@pytest.fixture
def base_lm(self):
"""Create a BaseLM instance for testing"""
return BaseLM(model="test-model", model_type="responses")

def test_object_format_message(self, base_lm):
"""Test processing object format (normal responses without web_search)"""
# Create mock response with object format
mock_response = MockResponse(
output=[
MockOutputItem("message", content=[MockContent("Hello world")])
]
)

result = base_lm._process_response(mock_response)

assert len(result) == 1
assert result[0]["text"] == "Hello world"

def test_dict_format_message(self, base_lm):
"""Test processing dict format (responses with web_search tools)"""
# Create mock response with dict format (as returned by web_search)
mock_response = MockResponse(
output=[
{
"type": "message",
"content": [{"text": "Hello from web search"}]
}
]
)

result = base_lm._process_response(mock_response)

assert len(result) == 1
assert result[0]["text"] == "Hello from web search"

def test_dict_format_with_multiple_content(self, base_lm):
"""Test dict format with multiple content items"""
mock_response = MockResponse(
output=[
{
"type": "message",
"content": [
{"text": "Part 1"},
{"text": " Part 2"},
{"text": " Part 3"}
]
}
]
)

result = base_lm._process_response(mock_response)

assert len(result) == 1
assert result[0]["text"] == "Part 1 Part 2 Part 3"

def test_object_format_function_call(self, base_lm):
"""Test function call in object format"""
mock_item = MockOutputItem("function_call")
mock_response = MockResponse(output=[mock_item])

result = base_lm._process_response(mock_response)

assert len(result) == 1
assert "tool_calls" in result[0]
assert len(result[0]["tool_calls"]) == 1

def test_dict_format_function_call(self, base_lm):
"""Test function call in dict format"""
mock_response = MockResponse(
output=[
{
"type": "function_call",
"name": "web_search",
"arguments": '{"query": "test"}'
}
]
)

result = base_lm._process_response(mock_response)

assert len(result) == 1
assert "tool_calls" in result[0]
assert result[0]["tool_calls"][0]["name"] == "web_search"

def test_object_format_reasoning(self, base_lm):
"""Test reasoning content in object format"""
mock_response = MockResponse(
output=[
MockOutputItem("reasoning", content=[MockContent("Thinking step 1")])
]
)

result = base_lm._process_response(mock_response)

assert len(result) == 1
assert "reasoning_content" in result[0]
assert result[0]["reasoning_content"] == "Thinking step 1"

def test_dict_format_reasoning(self, base_lm):
"""Test reasoning content in dict format"""
mock_response = MockResponse(
output=[
{
"type": "reasoning",
"content": [{"text": "Reasoning step 1"}]
}
]
)

result = base_lm._process_response(mock_response)

assert len(result) == 1
assert "reasoning_content" in result[0]
assert result[0]["reasoning_content"] == "Reasoning step 1"

def test_dict_format_reasoning_with_summary(self, base_lm):
"""Test reasoning with summary (fallback when no content)"""
mock_response = MockResponse(
output=[
{
"type": "reasoning",
"summary": [{"text": "Summary text"}]
}
]
)

result = base_lm._process_response(mock_response)

assert len(result) == 1
assert "reasoning_content" in result[0]
assert result[0]["reasoning_content"] == "Summary text"

def test_mixed_format_backwards_compatibility(self, base_lm):
"""Test that both formats can coexist (edge case)"""
# Mix of object and dict formats in same response
mock_response = MockResponse(
output=[
MockOutputItem("message", content=[MockContent("Object format")]),
{"type": "message", "content": [{"text": " Dict format"}]}
]
)

result = base_lm._process_response(mock_response)

assert len(result) == 1
assert result[0]["text"] == "Object format Dict format"

def test_empty_content(self, base_lm):
"""Test handling of empty content"""
mock_response = MockResponse(
output=[
{"type": "message", "content": []}
]
)

result = base_lm._process_response(mock_response)

assert len(result) == 1
assert "text" not in result[0] # No text key when no content