From ad7b9ff06bcde8c30043cc87ca12278f83f08402 Mon Sep 17 00:00:00 2001
From: Emilien Mottet <emilien.mottet@michelin.com>
Date: Fri, 22 May 2026 13:08:13 +0200
Subject: [PATCH] Python: read headers defensively to support stream wrappers
 without `.headers` (#6028)

`OpenAIChatClient._inner_get_response()` reads `.headers` on the raw streaming
response returned by `client.responses.with_raw_response.create(stream=True)`
(and its three sibling call sites - retrieve-streaming, non-streaming create
and background retrieve) to surface the `x-ms-served-model` Azure header,
introduced in #5910.

When `azure-ai-projects>=2.1.0` experimental GenAI tracing is enabled
(`AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true`), the instrumentor wraps the
raw streaming response in an inline `AsyncStreamWrapper` that exposes
`.response` but not `.headers`. Reading `raw_create_response.headers` then
raises `AttributeError: 'AsyncStreamWrapper' object has no attribute 'headers'`,
which `FoundryChatClient` rethrows as a `ChatClientException` and breaks every
streaming call (workflows and free chat).

Fix: read the header dict via `getattr(raw_response, "headers", None)` at all
four call sites. `_extract_served_model()` already short-circuits on `None`,
so the served-model surfacing degrades gracefully (model stays the deployment
alias) instead of crashing when the response is wrapped by an instrumentor
that does not proxy `.headers`.

Regression test added:
`test_streaming_response_without_headers_attribute_does_not_crash`
simulates a stream wrapper that raises `AttributeError` on `.headers` and
asserts the stream still completes with the deployment alias as `update.model`.

Fixes #6028
---
 .../agent_framework_openai/_chat_client.py    | 15 +++-
 .../tests/openai/test_openai_chat_client.py   | 82 +++++++++++++++++++
 2 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/python/packages/openai/agent_framework_openai/_chat_client.py b/python/packages/openai/agent_framework_openai/_chat_client.py
index 8257678584..8ed4d4163a 100644
--- a/python/packages/openai/agent_framework_openai/_chat_client.py
+++ b/python/packages/openai/agent_framework_openai/_chat_client.py
@@ -636,7 +636,11 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                             continuation_token["response_id"],
                             stream=True,
                         )
-                        served_model = self._extract_served_model(raw_stream_response.headers)
+                        # Read headers defensively: telemetry instrumentors (e.g. azure-ai-projects
+                        # experimental tracing) wrap the streaming response in objects that do not
+                        # proxy ``.headers``. Degrade gracefully so the served-model surfacing is
+                        # best-effort instead of crashing the whole call.
+                        served_model = self._extract_served_model(getattr(raw_stream_response, "headers", None))
                         async with raw_stream_response.parse() as stream_response:
                             async for chunk in stream_response:
                                 update = self._parse_chunk_from_openai(
@@ -677,7 +681,8 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                             raw_create_response = await client.responses.with_raw_response.create(
                                 stream=True, **run_options
                             )
-                            served_model = self._extract_served_model(raw_create_response.headers)
+                            # See note above on ``raw_stream_response.headers``.
+                            served_model = self._extract_served_model(getattr(raw_create_response, "headers", None))
                             async with raw_create_response.parse() as stream_response:
                                 async for chunk in stream_response:
                                     update = self._parse_chunk_from_openai(
@@ -706,7 +711,8 @@ async def _get_response() -> ChatResponse:
                 except Exception as ex:
                     self._handle_request_error(ex)
                 chat_response = self._parse_response_from_openai(response, options=validated_options)
-                served_model = self._extract_served_model(raw_response.headers)
+                # See note above on ``raw_stream_response.headers``.
+                served_model = self._extract_served_model(getattr(raw_response, "headers", None))
                 if served_model is not None:
                     chat_response.model = served_model
                 # Once the background response completes, drop the continuation_token from
@@ -728,7 +734,8 @@ async def _get_response() -> ChatResponse:
             except Exception as ex:
                 self._handle_request_error(ex)
             chat_response = self._parse_response_from_openai(response, options=validated_options)
-            served_model = self._extract_served_model(raw_response.headers)
+            # See note above on ``raw_stream_response.headers``.
+            served_model = self._extract_served_model(getattr(raw_response, "headers", None))
             if served_model is not None:
                 chat_response.model = served_model
             return chat_response
diff --git a/python/packages/openai/tests/openai/test_openai_chat_client.py b/python/packages/openai/tests/openai/test_openai_chat_client.py
index 31c3c26fe0..0e87ebd206 100644
--- a/python/packages/openai/tests/openai/test_openai_chat_client.py
+++ b/python/packages/openai/tests/openai/test_openai_chat_client.py
@@ -841,6 +841,88 @@ async def test_served_model_header_not_captured_for_streaming_text_format() -> N
         assert update.model == "test-model"
 
 
+async def test_streaming_response_without_headers_attribute_does_not_crash() -> None:
+    """Regression for #6028.
+
+    Some telemetry instrumentors (e.g. ``azure-ai-projects`` experimental GenAI tracing,
+    activated by ``AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true``) monkey-patch
+    ``openai.resources.responses.AsyncResponses.create`` at the class level and return
+    an ``AsyncStreamWrapper`` whose class genuinely has no ``headers`` attribute. The
+    ``with_raw_response.create`` wrapper does not re-wrap the return value
+    (``async_to_raw_response_wrapper`` only injects an extra header into the request),
+    so ``raw_create_response`` in ``_inner_get_response`` ends up being the wrapper
+    itself. Reading ``raw_create_response.headers`` used to raise ``AttributeError``
+    and bubble up as ``ChatClientException``, breaking every streaming call. The
+    defensive ``getattr(..., "headers", None)`` should now degrade gracefully:
+    no served-model surfacing, but the stream still completes.
+    """
+    client = OpenAIChatClient(model="test-model", api_key="test-key")
+
+    events = [
+        ResponseTextDeltaEvent(
+            type="response.output_text.delta",
+            content_index=0,
+            item_id="text_item",
+            output_index=0,
+            sequence_number=1,
+            logprobs=[],
+            delta="Hello",
+        ),
+    ]
+
+    class _StreamWrapperWithoutHeaders:
+        """Mimics ``azure.ai.projects.telemetry._responses_instrumentor.AsyncStreamWrapper``:
+        an async iterator that proxies the stream contents but does not expose ``.headers``.
+        ``hasattr(wrapper, "headers")`` returns ``False`` so ``getattr(..., "headers", None)``
+        falls through to the default — matching the real instrumentor's class layout.
+        """
+
+        def __init__(self, events: list[object]) -> None:
+            self._events = events
+            self._iterator = iter(())
+
+        def __aiter__(self) -> "_StreamWrapperWithoutHeaders":
+            self._iterator = iter(self._events)
+            return self
+
+        async def __anext__(self) -> object:
+            try:
+                return next(self._iterator)
+            except StopIteration as exc:
+                raise StopAsyncIteration from exc
+
+        def parse(self) -> "_StreamWrapperWithoutHeaders":
+            return self
+
+        async def __aenter__(self) -> "_StreamWrapperWithoutHeaders":
+            return self
+
+        async def __aexit__(
+            self,
+            exc_type: type[BaseException] | None,
+            exc: BaseException | None,
+            traceback: object | None,
+        ) -> None:
+            return None
+
+    headerless_stream = _StreamWrapperWithoutHeaders(events)
+    # Sanity-check the simulation: the real instrumentor's wrapper genuinely lacks ``.headers``.
+    assert not hasattr(headerless_stream, "headers")
+
+    with (
+        patch.object(client, "_prepare_request", new=AsyncMock(return_value=(client.client, {}, {}))),
+        patch.object(client.client.responses, "create", new=AsyncMock(return_value=headerless_stream)),
+        patch.object(client, "_get_metadata_from_response", return_value={}),
+    ):
+        stream = client._inner_get_response(messages=[Message(role="user", contents=["Hi"])], options={}, stream=True)
+        updates = [update async for update in stream]
+
+    assert updates, "Expected the stream to complete even when the wrapper lacks .headers"
+    for update in updates:
+        # No header => no override => model stays the deployment alias.
+        assert update.model == "test-model"
+
+
 async def test_streaming_text_format_preserves_final_structured_output() -> None:
     """Streaming structured output should still parse into the final ChatResponse value."""
     client = OpenAIChatClient(model="test-model", api_key="test-key")