Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions src/strands/models/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,21 @@ async def structured_output(
Yields:
Model events with the last being the structured output.
"""
if not supports_response_schema(self.get_config()["model_id"]):
# Allow LiteLLM proxy usage to bypass supports_response_schema check.
# If a proxy is in use, we cannot reliably detect support by model_id alone,
# so prefer to let litellm/proxy handle response_format negotiation.
client_args = getattr(self, "client_args", {}) or {}
use_proxy = bool(client_args.get("use_litellm_proxy") or client_args.get("use_proxy"))
model_id = self.get_config().get("model_id")

if not use_proxy and (not model_id or not supports_response_schema(model_id)):
raise ValueError("Model does not support response_format")

logger.debug("calling litellm.acompletion model=%s use_proxy=%s", model_id, use_proxy)

response = await litellm.acompletion(
**self.client_args,
model=self.get_config()["model_id"],
model=model_id,
messages=self.format_request(prompt, system_prompt=system_prompt)["messages"],
response_format=output_model,
)
Expand Down
30 changes: 30 additions & 0 deletions tests/strands/models/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,3 +332,33 @@ def test_tool_choice_none_no_warning(model, messages, captured_warnings):
model.format_request(messages, tool_choice=None)

assert len(captured_warnings) == 0


class TestModel(pydantic.BaseModel):
name: str


@pytest.mark.asyncio
async def test_structured_output_with_proxy_bypasses_support_check(monkeypatch):
# Model configured with proxy, supports_response_schema returns False
model = LiteLLMModel(client_args={"api_key": "X", "use_litellm_proxy": True}, model_id="some/model")

# Force supports_response_schema to False to simulate "unknown" capability
monkeypatch.setattr(strands.models.litellm, "supports_response_schema", lambda _mid: False)

# Mock litellm.acompletion to return a response with a choice finishing with tool_calls
mock_choice = unittest.mock.Mock()
mock_choice.finish_reason = "tool_calls"
mock_choice.message = unittest.mock.Mock()
mock_choice.message.content = '{"name":"proxy-result"}'
mock_response = unittest.mock.Mock()
mock_response.choices = [mock_choice]

async def fake_acompletion(**kwargs):
return mock_response

monkeypatch.setattr(strands.models.litellm.litellm, "acompletion", fake_acompletion)

stream = model.structured_output(TestModel, [{"role": "user", "content": [{"text": "x"}]}])
events = [e async for e in stream]
assert events[-1] == {"output": TestModel(name="proxy-result")}