diff --git a/src/strands/models/litellm.py b/src/strands/models/litellm.py index 005eed3df..9b16c9ff7 100644 --- a/src/strands/models/litellm.py +++ b/src/strands/models/litellm.py @@ -205,12 +205,21 @@ async def structured_output( Yields: Model events with the last being the structured output. """ - if not supports_response_schema(self.get_config()["model_id"]): + # Allow LiteLLM proxy usage to bypass supports_response_schema check. + # If a proxy is in use, we cannot reliably detect support by model_id alone, + # so prefer to let litellm/proxy handle response_format negotiation. + client_args = getattr(self, "client_args", {}) or {} + use_proxy = bool(client_args.get("use_litellm_proxy") or client_args.get("use_proxy")) + model_id = self.get_config().get("model_id") + + if not use_proxy and (not model_id or not supports_response_schema(model_id)): raise ValueError("Model does not support response_format") + logger.debug("calling litellm.acompletion model=%s use_proxy=%s", model_id, use_proxy) + response = await litellm.acompletion( **self.client_args, - model=self.get_config()["model_id"], + model=model_id, messages=self.format_request(prompt, system_prompt=system_prompt)["messages"], response_format=output_model, ) diff --git a/tests/strands/models/test_litellm.py b/tests/strands/models/test_litellm.py index bc81fc819..e1b4cf15f 100644 --- a/tests/strands/models/test_litellm.py +++ b/tests/strands/models/test_litellm.py @@ -332,3 +332,33 @@ def test_tool_choice_none_no_warning(model, messages, captured_warnings): model.format_request(messages, tool_choice=None) assert len(captured_warnings) == 0 + + +class TestModel(pydantic.BaseModel): + name: str + + +@pytest.mark.asyncio +async def test_structured_output_with_proxy_bypasses_support_check(monkeypatch): + # Model configured with proxy, supports_response_schema returns False + model = LiteLLMModel(client_args={"api_key": "X", "use_litellm_proxy": True}, model_id="some/model") + + # Force supports_response_schema to False to simulate "unknown" capability + monkeypatch.setattr(strands.models.litellm, "supports_response_schema", lambda _mid: False) + + # Mock litellm.acompletion to return a response with a choice finishing with tool_calls + mock_choice = unittest.mock.Mock() + mock_choice.finish_reason = "tool_calls" + mock_choice.message = unittest.mock.Mock() + mock_choice.message.content = '{"name":"proxy-result"}' + mock_response = unittest.mock.Mock() + mock_response.choices = [mock_choice] + + async def fake_acompletion(**kwargs): + return mock_response + + monkeypatch.setattr(strands.models.litellm.litellm, "acompletion", fake_acompletion) + + stream = model.structured_output(TestModel, [{"role": "user", "content": [{"text": "x"}]}]) + events = [e async for e in stream] + assert events[-1] == {"output": TestModel(name="proxy-result")}