From ad7b9ff06bcde8c30043cc87ca12278f83f08402 Mon Sep 17 00:00:00 2001 From: Emilien Mottet Date: Fri, 22 May 2026 13:08:13 +0200 Subject: [PATCH] Python: read headers defensively to support stream wrappers without `.headers` (#6028) `OpenAIChatClient._inner_get_response()` reads `.headers` on the raw streaming response returned by `client.responses.with_raw_response.create(stream=True)` (and its three sibling call sites - retrieve-streaming, non-streaming create and background retrieve) to surface the `x-ms-served-model` Azure header, introduced in #5910. When `azure-ai-projects>=2.1.0` experimental GenAI tracing is enabled (`AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true`), the instrumentor wraps the raw streaming response in an inline `AsyncStreamWrapper` that exposes `.response` but not `.headers`. Reading `raw_create_response.headers` then raises `AttributeError: 'AsyncStreamWrapper' object has no attribute 'headers'`, which `FoundryChatClient` rethrows as a `ChatClientException` and breaks every streaming call (workflows and free chat). Fix: read the header dict via `getattr(raw_response, "headers", None)` at all four call sites. `_extract_served_model()` already short-circuits on `None`, so the served-model surfacing degrades gracefully (model stays the deployment alias) instead of crashing when the response is wrapped by an instrumentor that does not proxy `.headers`. Regression test added: `test_streaming_response_without_headers_attribute_does_not_crash` simulates a stream wrapper that raises `AttributeError` on `.headers` and asserts the stream still completes with the deployment alias as `update.model`. Fixes #6028 --- .../agent_framework_openai/_chat_client.py | 15 +++- .../tests/openai/test_openai_chat_client.py | 82 +++++++++++++++++++ 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/python/packages/openai/agent_framework_openai/_chat_client.py b/python/packages/openai/agent_framework_openai/_chat_client.py index 8257678584..8ed4d4163a 100644 --- a/python/packages/openai/agent_framework_openai/_chat_client.py +++ b/python/packages/openai/agent_framework_openai/_chat_client.py @@ -636,7 +636,11 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: continuation_token["response_id"], stream=True, ) - served_model = self._extract_served_model(raw_stream_response.headers) + # Read headers defensively: telemetry instrumentors (e.g. azure-ai-projects + # experimental tracing) wrap the streaming response in objects that do not + # proxy ``.headers``. Degrade gracefully so the served-model surfacing is + # best-effort instead of crashing the whole call. + served_model = self._extract_served_model(getattr(raw_stream_response, "headers", None)) async with raw_stream_response.parse() as stream_response: async for chunk in stream_response: update = self._parse_chunk_from_openai( @@ -677,7 +681,8 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: raw_create_response = await client.responses.with_raw_response.create( stream=True, **run_options ) - served_model = self._extract_served_model(raw_create_response.headers) + # See note above on ``raw_stream_response.headers``. + served_model = self._extract_served_model(getattr(raw_create_response, "headers", None)) async with raw_create_response.parse() as stream_response: async for chunk in stream_response: update = self._parse_chunk_from_openai( @@ -706,7 +711,8 @@ async def _get_response() -> ChatResponse: except Exception as ex: self._handle_request_error(ex) chat_response = self._parse_response_from_openai(response, options=validated_options) - served_model = self._extract_served_model(raw_response.headers) + # See note above on ``raw_stream_response.headers``. + served_model = self._extract_served_model(getattr(raw_response, "headers", None)) if served_model is not None: chat_response.model = served_model # Once the background response completes, drop the continuation_token from @@ -728,7 +734,8 @@ async def _get_response() -> ChatResponse: except Exception as ex: self._handle_request_error(ex) chat_response = self._parse_response_from_openai(response, options=validated_options) - served_model = self._extract_served_model(raw_response.headers) + # See note above on ``raw_stream_response.headers``. + served_model = self._extract_served_model(getattr(raw_response, "headers", None)) if served_model is not None: chat_response.model = served_model return chat_response diff --git a/python/packages/openai/tests/openai/test_openai_chat_client.py b/python/packages/openai/tests/openai/test_openai_chat_client.py index 31c3c26fe0..0e87ebd206 100644 --- a/python/packages/openai/tests/openai/test_openai_chat_client.py +++ b/python/packages/openai/tests/openai/test_openai_chat_client.py @@ -841,6 +841,88 @@ async def test_served_model_header_not_captured_for_streaming_text_format() -> N assert update.model == "test-model" +async def test_streaming_response_without_headers_attribute_does_not_crash() -> None: + """Regression for #6028. + + Some telemetry instrumentors (e.g. ``azure-ai-projects`` experimental GenAI tracing, + activated by ``AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true``) monkey-patch + ``openai.resources.responses.AsyncResponses.create`` at the class level and return + an ``AsyncStreamWrapper`` whose class genuinely has no ``headers`` attribute. The + ``with_raw_response.create`` wrapper does not re-wrap the return value + (``async_to_raw_response_wrapper`` only injects an extra header into the request), + so ``raw_create_response`` in ``_inner_get_response`` ends up being the wrapper + itself. Reading ``raw_create_response.headers`` used to raise ``AttributeError`` + and bubble up as ``ChatClientException``, breaking every streaming call. The + defensive ``getattr(..., "headers", None)`` should now degrade gracefully: + no served-model surfacing, but the stream still completes. + """ + client = OpenAIChatClient(model="test-model", api_key="test-key") + + events = [ + ResponseTextDeltaEvent( + type="response.output_text.delta", + content_index=0, + item_id="text_item", + output_index=0, + sequence_number=1, + logprobs=[], + delta="Hello", + ), + ] + + class _StreamWrapperWithoutHeaders: + """Mimics ``azure.ai.projects.telemetry._responses_instrumentor.AsyncStreamWrapper``: + an async iterator that proxies the stream contents but does not expose ``.headers``. + ``hasattr(wrapper, "headers")`` returns ``False`` so ``getattr(..., "headers", None)`` + falls through to the default — matching the real instrumentor's class layout. + """ + + def __init__(self, events: list[object]) -> None: + self._events = events + self._iterator = iter(()) + + def __aiter__(self) -> "_StreamWrapperWithoutHeaders": + self._iterator = iter(self._events) + return self + + async def __anext__(self) -> object: + try: + return next(self._iterator) + except StopIteration as exc: + raise StopAsyncIteration from exc + + def parse(self) -> "_StreamWrapperWithoutHeaders": + return self + + async def __aenter__(self) -> "_StreamWrapperWithoutHeaders": + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: object | None, + ) -> None: + return None + + headerless_stream = _StreamWrapperWithoutHeaders(events) + # Sanity-check the simulation: the real instrumentor's wrapper genuinely lacks ``.headers``. + assert not hasattr(headerless_stream, "headers") + + with ( + patch.object(client, "_prepare_request", new=AsyncMock(return_value=(client.client, {}, {}))), + patch.object(client.client.responses, "create", new=AsyncMock(return_value=headerless_stream)), + patch.object(client, "_get_metadata_from_response", return_value={}), + ): + stream = client._inner_get_response(messages=[Message(role="user", contents=["Hi"])], options={}, stream=True) + updates = [update async for update in stream] + + assert updates, "Expected the stream to complete even when the wrapper lacks .headers" + for update in updates: + # No header => no override => model stays the deployment alias. + assert update.model == "test-model" + + async def test_streaming_text_format_preserves_final_structured_output() -> None: """Streaming structured output should still parse into the final ChatResponse value.""" client = OpenAIChatClient(model="test-model", api_key="test-key")