Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions python/packages/openai/agent_framework_openai/_chat_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,11 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
continuation_token["response_id"],
stream=True,
)
served_model = self._extract_served_model(raw_stream_response.headers)
# Read headers defensively: telemetry instrumentors (e.g. azure-ai-projects
# experimental tracing) wrap the streaming response in objects that do not
# proxy ``.headers``. Degrade gracefully so the served-model surfacing is
# best-effort instead of crashing the whole call.
served_model = self._extract_served_model(getattr(raw_stream_response, "headers", None))
async with raw_stream_response.parse() as stream_response:
async for chunk in stream_response:
update = self._parse_chunk_from_openai(
Expand Down Expand Up @@ -677,7 +681,8 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
raw_create_response = await client.responses.with_raw_response.create(
stream=True, **run_options
)
served_model = self._extract_served_model(raw_create_response.headers)
# See note above on ``raw_stream_response.headers``.
served_model = self._extract_served_model(getattr(raw_create_response, "headers", None))
async with raw_create_response.parse() as stream_response:
async for chunk in stream_response:
update = self._parse_chunk_from_openai(
Expand Down Expand Up @@ -706,7 +711,8 @@ async def _get_response() -> ChatResponse:
except Exception as ex:
self._handle_request_error(ex)
chat_response = self._parse_response_from_openai(response, options=validated_options)
served_model = self._extract_served_model(raw_response.headers)
# See note above on ``raw_stream_response.headers``.
served_model = self._extract_served_model(getattr(raw_response, "headers", None))
if served_model is not None:
chat_response.model = served_model
# Once the background response completes, drop the continuation_token from
Expand All @@ -728,7 +734,8 @@ async def _get_response() -> ChatResponse:
except Exception as ex:
self._handle_request_error(ex)
chat_response = self._parse_response_from_openai(response, options=validated_options)
served_model = self._extract_served_model(raw_response.headers)
# See note above on ``raw_stream_response.headers``.
served_model = self._extract_served_model(getattr(raw_response, "headers", None))
if served_model is not None:
chat_response.model = served_model
return chat_response
Expand Down
82 changes: 82 additions & 0 deletions python/packages/openai/tests/openai/test_openai_chat_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,88 @@ async def test_served_model_header_not_captured_for_streaming_text_format() -> N
assert update.model == "test-model"


async def test_streaming_response_without_headers_attribute_does_not_crash() -> None:
"""Regression for #6028.

Some telemetry instrumentors (e.g. ``azure-ai-projects`` experimental GenAI tracing,
activated by ``AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true``) monkey-patch
``openai.resources.responses.AsyncResponses.create`` at the class level and return
an ``AsyncStreamWrapper`` whose class genuinely has no ``headers`` attribute. The
``with_raw_response.create`` wrapper does not re-wrap the return value
(``async_to_raw_response_wrapper`` only injects an extra header into the request),
so ``raw_create_response`` in ``_inner_get_response`` ends up being the wrapper
itself. Reading ``raw_create_response.headers`` used to raise ``AttributeError``
and bubble up as ``ChatClientException``, breaking every streaming call. The
defensive ``getattr(..., "headers", None)`` should now degrade gracefully:
no served-model surfacing, but the stream still completes.
"""
client = OpenAIChatClient(model="test-model", api_key="test-key")

events = [
ResponseTextDeltaEvent(
type="response.output_text.delta",
content_index=0,
item_id="text_item",
output_index=0,
sequence_number=1,
logprobs=[],
delta="Hello",
),
]

class _StreamWrapperWithoutHeaders:
"""Mimics ``azure.ai.projects.telemetry._responses_instrumentor.AsyncStreamWrapper``:
an async iterator that proxies the stream contents but does not expose ``.headers``.
``hasattr(wrapper, "headers")`` returns ``False`` so ``getattr(..., "headers", None)``
falls through to the default — matching the real instrumentor's class layout.
"""

def __init__(self, events: list[object]) -> None:
self._events = events
self._iterator = iter(())

def __aiter__(self) -> "_StreamWrapperWithoutHeaders":
self._iterator = iter(self._events)
return self

async def __anext__(self) -> object:
try:
return next(self._iterator)
except StopIteration as exc:
raise StopAsyncIteration from exc

def parse(self) -> "_StreamWrapperWithoutHeaders":
return self

async def __aenter__(self) -> "_StreamWrapperWithoutHeaders":
return self

async def __aexit__(
self,
exc_type: type[BaseException] | None,
exc: BaseException | None,
traceback: object | None,
) -> None:
return None

headerless_stream = _StreamWrapperWithoutHeaders(events)
# Sanity-check the simulation: the real instrumentor's wrapper genuinely lacks ``.headers``.
assert not hasattr(headerless_stream, "headers")

with (
patch.object(client, "_prepare_request", new=AsyncMock(return_value=(client.client, {}, {}))),
patch.object(client.client.responses, "create", new=AsyncMock(return_value=headerless_stream)),
patch.object(client, "_get_metadata_from_response", return_value={}),
):
stream = client._inner_get_response(messages=[Message(role="user", contents=["Hi"])], options={}, stream=True)
updates = [update async for update in stream]

assert updates, "Expected the stream to complete even when the wrapper lacks .headers"
for update in updates:
# No header => no override => model stays the deployment alias.
assert update.model == "test-model"


async def test_streaming_text_format_preserves_final_structured_output() -> None:
"""Streaming structured output should still parse into the final ChatResponse value."""
client = OpenAIChatClient(model="test-model", api_key="test-key")
Expand Down