From 40eb4300a6ddccfd3f645abbbf3941088ac55a7c Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Wed, 27 Aug 2025 17:21:43 +0000 Subject: [PATCH 1/2] Add MockOpenAIResponses for use in tests --- ...esponses_usage_without_tokens_details.yaml | 96 ----------- tests/models/mock_openai.py | 161 ++++++++++++++++++ tests/models/test_openai.py | 81 +-------- tests/models/test_openai_responses.py | 48 +++++- 4 files changed, 208 insertions(+), 178 deletions(-) delete mode 100644 tests/models/cassettes/test_openai_responses/test_openai_responses_usage_without_tokens_details.yaml create mode 100644 tests/models/mock_openai.py diff --git a/tests/models/cassettes/test_openai_responses/test_openai_responses_usage_without_tokens_details.yaml b/tests/models/cassettes/test_openai_responses/test_openai_responses_usage_without_tokens_details.yaml deleted file mode 100644 index 4953e21b02..0000000000 --- a/tests/models/cassettes/test_openai_responses/test_openai_responses_usage_without_tokens_details.yaml +++ /dev/null @@ -1,96 +0,0 @@ -interactions: -- request: - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '84' - content-type: - - application/json - host: - - api.openai.com - method: POST - parsed_body: - input: - - content: What is 2+2? - role: user - model: gpt-4o - stream: false - uri: https://api.openai.com/v1/responses - response: - headers: - alt-svc: - - h3=":443"; ma=86400 - connection: - - keep-alive - content-length: - - '1369' - content-type: - - application/json - openai-organization: - - pydantic-28gund - openai-processing-ms: - - '899' - openai-project: - - proj_dKobscVY9YJxeEaDJen54e3d - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - transfer-encoding: - - chunked - parsed_body: - background: false - created_at: 1756246493 - error: null - id: resp_68ae31dd0edc819da9e77ad3cdb128770306cb62070aed80 - incomplete_details: null - instructions: null - max_output_tokens: null - max_tool_calls: null - metadata: {} - model: gpt-4o-2024-08-06 - object: response - output: - - content: - - annotations: [] - logprobs: [] - text: 2 + 2 equals 4. - type: output_text - id: msg_68ae31ddcef0819da13eb00d2b393eb60306cb62070aed80 - role: assistant - status: completed - type: message - parallel_tool_calls: true - previous_response_id: null - prompt_cache_key: null - reasoning: - effort: null - summary: null - safety_identifier: null - service_tier: default - status: completed - store: true - temperature: 1.0 - text: - format: - type: text - verbosity: medium - tool_choice: auto - tools: [] - top_logprobs: 0 - top_p: 1.0 - truncation: disabled - usage: - input_tokens: 14 - output_tokens: 9 - total_tokens: 23 - user: null - status: - code: 200 - message: OK -version: 1 diff --git a/tests/models/mock_openai.py b/tests/models/mock_openai.py new file mode 100644 index 0000000000..4f0a7fbf5a --- /dev/null +++ b/tests/models/mock_openai.py @@ -0,0 +1,161 @@ +from __future__ import annotations as _annotations + +from collections.abc import Sequence +from dataclasses import dataclass, field +from functools import cached_property +from typing import Any, Union, cast + +from ..conftest import raise_if_exception, try_import +from .mock_async_stream import MockAsyncStream + +with try_import() as imports_successful: + from openai import NOT_GIVEN, AsyncOpenAI + from openai.types import chat, responses + from openai.types.chat.chat_completion import Choice, ChoiceLogprobs + from openai.types.chat.chat_completion_message import ChatCompletionMessage + from openai.types.completion_usage import CompletionUsage + from openai.types.responses.response import ResponseUsage + from openai.types.responses.response_output_item import ResponseOutputItem + + # note: we use Union here so that casting works with Python 3.9 + MockChatCompletion = Union[chat.ChatCompletion, Exception] + MockChatCompletionChunk = Union[chat.ChatCompletionChunk, Exception] + MockResponse = Union[responses.Response, Exception] + MockResponseStreamEvent = Union[responses.ResponseStreamEvent, Exception] + + +@dataclass +class MockOpenAI: + completions: MockChatCompletion | Sequence[MockChatCompletion] | None = None + stream: Sequence[MockChatCompletionChunk] | Sequence[Sequence[MockChatCompletionChunk]] | None = None + index: int = 0 + chat_completion_kwargs: list[dict[str, Any]] = field(default_factory=list) + + @cached_property + def chat(self) -> Any: + chat_completions = type('Completions', (), {'create': self.chat_completions_create}) + return type('Chat', (), {'completions': chat_completions}) + + @classmethod + def create_mock(cls, completions: MockChatCompletion | Sequence[MockChatCompletion]) -> AsyncOpenAI: + return cast(AsyncOpenAI, cls(completions=completions)) + + @classmethod + def create_mock_stream( + cls, + stream: Sequence[MockChatCompletionChunk] | Sequence[Sequence[MockChatCompletionChunk]], + ) -> AsyncOpenAI: + return cast(AsyncOpenAI, cls(stream=stream)) + + async def chat_completions_create( # pragma: lax no cover + self, *_args: Any, stream: bool = False, **kwargs: Any + ) -> chat.ChatCompletion | MockAsyncStream[MockChatCompletionChunk]: + self.chat_completion_kwargs.append({k: v for k, v in kwargs.items() if v is not NOT_GIVEN}) + + if stream: + assert self.stream is not None, 'you can only used `stream=True` if `stream` is provided' + if isinstance(self.stream[0], Sequence): + response = MockAsyncStream(iter(cast(list[MockChatCompletionChunk], self.stream[self.index]))) + else: + response = MockAsyncStream(iter(cast(list[MockChatCompletionChunk], self.stream))) + else: + assert self.completions is not None, 'you can only used `stream=False` if `completions` are provided' + if isinstance(self.completions, Sequence): + raise_if_exception(self.completions[self.index]) + response = cast(chat.ChatCompletion, self.completions[self.index]) + else: + raise_if_exception(self.completions) + response = cast(chat.ChatCompletion, self.completions) + self.index += 1 + return response + + +def get_mock_chat_completion_kwargs(async_open_ai: AsyncOpenAI) -> list[dict[str, Any]]: + if isinstance(async_open_ai, MockOpenAI): + return async_open_ai.chat_completion_kwargs + else: # pragma: no cover + raise RuntimeError('Not a MockOpenAI instance') + + +def completion_message( + message: ChatCompletionMessage, *, usage: CompletionUsage | None = None, logprobs: ChoiceLogprobs | None = None +) -> chat.ChatCompletion: + choices = [Choice(finish_reason='stop', index=0, message=message)] + if logprobs: + choices = [Choice(finish_reason='stop', index=0, message=message, logprobs=logprobs)] + return chat.ChatCompletion( + id='123', + choices=choices, + created=1704067200, # 2024-01-01 + model='gpt-4o-123', + object='chat.completion', + usage=usage, + ) + + +@dataclass +class MockOpenAIResponses: + response: MockResponse | Sequence[MockResponse] | None = None + stream: Sequence[MockResponseStreamEvent] | Sequence[Sequence[MockResponseStreamEvent]] | None = None + index: int = 0 + response_kwargs: list[dict[str, Any]] = field(default_factory=list) + + @cached_property + def responses(self) -> Any: + return type('Responses', (), {'create': self.responses_create}) + + @classmethod + def create_mock(cls, responses: MockResponse | Sequence[MockResponse]) -> AsyncOpenAI: + return cast(AsyncOpenAI, cls(response=responses)) + + @classmethod + def create_mock_stream( + cls, + stream: Sequence[MockResponseStreamEvent] | Sequence[Sequence[MockResponseStreamEvent]], + ) -> AsyncOpenAI: + return cast(AsyncOpenAI, cls(stream=stream)) + + async def responses_create( # pragma: lax no cover + self, *_args: Any, stream: bool = False, **kwargs: Any + ) -> responses.Response | MockAsyncStream[MockResponseStreamEvent]: + self.response_kwargs.append({k: v for k, v in kwargs.items() if v is not NOT_GIVEN}) + + if stream: + assert self.stream is not None, 'you can only used `stream=True` if `stream` is provided' + if isinstance(self.stream[0], Sequence): + response = MockAsyncStream(iter(cast(list[MockResponseStreamEvent], self.stream[self.index]))) + else: + response = MockAsyncStream(iter(cast(list[MockResponseStreamEvent], self.stream))) + else: + assert self.response is not None, 'you can only used `stream=False` if `response` are provided' + if isinstance(self.response, Sequence): + raise_if_exception(self.response[self.index]) + response = cast(responses.Response, self.response[self.index]) + else: + raise_if_exception(self.response) + response = cast(responses.Response, self.response) + self.index += 1 + return response + + +def get_mock_responses_kwargs(async_open_ai: AsyncOpenAI) -> list[dict[str, Any]]: + if isinstance(async_open_ai, MockOpenAIResponses): + return async_open_ai.response_kwargs + else: # pragma: no cover + raise RuntimeError('Not a MockOpenAIResponses instance') + + +def response_message( + output_items: Sequence[ResponseOutputItem], *, usage: ResponseUsage | None = None +) -> responses.Response: + return responses.Response( + id='123', + model='gpt-4o-123', + object='response', + created_at=1704067200, # 2024-01-01 + output=list(output_items), + parallel_tool_calls=True, + tool_choice='auto', + tools=[], + usage=usage, + ) diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py index f25218a5c1..31126b61ff 100644 --- a/tests/models/test_openai.py +++ b/tests/models/test_openai.py @@ -1,11 +1,9 @@ from __future__ import annotations as _annotations import json -from collections.abc import Sequence -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime, timezone from enum import Enum -from functools import cached_property from typing import Annotated, Any, Callable, Literal, Union, cast import httpx @@ -47,13 +45,13 @@ from pydantic_ai.tools import ToolDefinition from pydantic_ai.usage import RequestUsage -from ..conftest import IsDatetime, IsInstance, IsNow, IsStr, TestEnv, raise_if_exception, try_import -from .mock_async_stream import MockAsyncStream +from ..conftest import IsDatetime, IsInstance, IsNow, IsStr, TestEnv, try_import +from .mock_openai import MockOpenAI, completion_message, get_mock_chat_completion_kwargs with try_import() as imports_successful: - from openai import NOT_GIVEN, APIStatusError, AsyncOpenAI + from openai import APIStatusError, AsyncOpenAI from openai.types import chat - from openai.types.chat.chat_completion import Choice, ChoiceLogprobs + from openai.types.chat.chat_completion import ChoiceLogprobs from openai.types.chat.chat_completion_chunk import ( Choice as ChunkChoice, ChoiceDelta, @@ -98,75 +96,6 @@ def test_init(): assert m.model_name == 'gpt-4o' -@dataclass -class MockOpenAI: - completions: MockChatCompletion | Sequence[MockChatCompletion] | None = None - stream: Sequence[MockChatCompletionChunk] | Sequence[Sequence[MockChatCompletionChunk]] | None = None - index: int = 0 - chat_completion_kwargs: list[dict[str, Any]] = field(default_factory=list) - - @cached_property - def chat(self) -> Any: - chat_completions = type('Completions', (), {'create': self.chat_completions_create}) - return type('Chat', (), {'completions': chat_completions}) - - @classmethod - def create_mock(cls, completions: MockChatCompletion | Sequence[MockChatCompletion]) -> AsyncOpenAI: - return cast(AsyncOpenAI, cls(completions=completions)) - - @classmethod - def create_mock_stream( - cls, - stream: Sequence[MockChatCompletionChunk] | Sequence[Sequence[MockChatCompletionChunk]], - ) -> AsyncOpenAI: - return cast(AsyncOpenAI, cls(stream=stream)) - - async def chat_completions_create( # pragma: lax no cover - self, *_args: Any, stream: bool = False, **kwargs: Any - ) -> chat.ChatCompletion | MockAsyncStream[MockChatCompletionChunk]: - self.chat_completion_kwargs.append({k: v for k, v in kwargs.items() if v is not NOT_GIVEN}) - - if stream: - assert self.stream is not None, 'you can only used `stream=True` if `stream` is provided' - if isinstance(self.stream[0], Sequence): - response = MockAsyncStream(iter(cast(list[MockChatCompletionChunk], self.stream[self.index]))) - else: - response = MockAsyncStream(iter(cast(list[MockChatCompletionChunk], self.stream))) - else: - assert self.completions is not None, 'you can only used `stream=False` if `completions` are provided' - if isinstance(self.completions, Sequence): - raise_if_exception(self.completions[self.index]) - response = cast(chat.ChatCompletion, self.completions[self.index]) - else: - raise_if_exception(self.completions) - response = cast(chat.ChatCompletion, self.completions) - self.index += 1 - return response - - -def get_mock_chat_completion_kwargs(async_open_ai: AsyncOpenAI) -> list[dict[str, Any]]: - if isinstance(async_open_ai, MockOpenAI): - return async_open_ai.chat_completion_kwargs - else: # pragma: no cover - raise RuntimeError('Not a MockOpenAI instance') - - -def completion_message( - message: ChatCompletionMessage, *, usage: CompletionUsage | None = None, logprobs: ChoiceLogprobs | None = None -) -> chat.ChatCompletion: - choices = [Choice(finish_reason='stop', index=0, message=message)] - if logprobs: - choices = [Choice(finish_reason='stop', index=0, message=message, logprobs=logprobs)] - return chat.ChatCompletion( - id='123', - choices=choices, - created=1704067200, # 2024-01-01 - model='gpt-4o-123', - object='chat.completion', - usage=usage, - ) - - async def test_request_simple_success(allow_model_requests: None): c = completion_message( ChatCompletionMessage(content='world', role='assistant'), diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 6c838f4e09..6ba535163a 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1,6 +1,6 @@ import json from dataclasses import replace -from typing import Any +from typing import Any, cast import pytest from inline_snapshot import snapshot @@ -33,8 +33,12 @@ from ..conftest import IsDatetime, IsStr, TestEnv, try_import from ..parts_from_messages import part_types_from_messages +from .mock_openai import MockOpenAIResponses, response_message with try_import() as imports_successful: + from openai.types.responses.response_output_message import Content, ResponseOutputMessage, ResponseOutputText + from openai.types.responses.response_usage import ResponseUsage + from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings from pydantic_ai.providers.openai import OpenAIProvider @@ -1078,14 +1082,46 @@ async def test_openai_responses_verbosity(allow_model_requests: None, openai_api assert result.output == snapshot('4') -async def test_openai_responses_usage_without_tokens_details(allow_model_requests: None, openai_api_key: str): - # The VCR cassette was manually modified to remove the input_tokens_details and output_tokens_details fields. - provider = OpenAIProvider(api_key=openai_api_key) - model = OpenAIResponsesModel('gpt-4o', provider=provider) +async def test_openai_responses_usage_without_tokens_details(allow_model_requests: None): + c = response_message( + [ + ResponseOutputMessage( + id='123', + content=cast(list[Content], [ResponseOutputText(text='4', type='output_text', annotations=[])]), + role='assistant', + status='completed', + type='message', + ) + ], + # Intentionally use model_construct so that input_tokens_details and output_tokens_details will not be set. + usage=ResponseUsage.model_construct(input_tokens=14, output_tokens=1, total_tokens=15), + ) + mock_client = MockOpenAIResponses.create_mock(c) + model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(openai_client=mock_client)) agent = Agent(model=model) result = await agent.run('What is 2+2?') + assert result.all_messages() == snapshot( + [ + ModelRequest( + parts=[ + UserPromptPart( + content='What is 2+2?', + timestamp=IsDatetime(), + ) + ] + ), + ModelResponse( + parts=[TextPart(content='4')], + usage=RequestUsage(input_tokens=14, output_tokens=1, details={'reasoning_tokens': 0}), + model_name='gpt-4o-123', + timestamp=IsDatetime(), + provider_name='openai', + provider_request_id='123', + ), + ] + ) assert result.usage() == snapshot( - RunUsage(input_tokens=14, output_tokens=9, details={'reasoning_tokens': 0}, requests=1) + RunUsage(input_tokens=14, output_tokens=1, details={'reasoning_tokens': 0}, requests=1) ) From 66c01019b646af40f574a0f2336c957d63fdaa54 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Wed, 27 Aug 2025 17:38:45 +0000 Subject: [PATCH 2/2] pragma: no cover --- tests/models/mock_openai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/models/mock_openai.py b/tests/models/mock_openai.py index 4f0a7fbf5a..d6aebe116f 100644 --- a/tests/models/mock_openai.py +++ b/tests/models/mock_openai.py @@ -113,7 +113,7 @@ def create_mock_stream( cls, stream: Sequence[MockResponseStreamEvent] | Sequence[Sequence[MockResponseStreamEvent]], ) -> AsyncOpenAI: - return cast(AsyncOpenAI, cls(stream=stream)) + return cast(AsyncOpenAI, cls(stream=stream)) # pragma: lax no cover async def responses_create( # pragma: lax no cover self, *_args: Any, stream: bool = False, **kwargs: Any @@ -139,7 +139,7 @@ async def responses_create( # pragma: lax no cover def get_mock_responses_kwargs(async_open_ai: AsyncOpenAI) -> list[dict[str, Any]]: - if isinstance(async_open_ai, MockOpenAIResponses): + if isinstance(async_open_ai, MockOpenAIResponses): # pragma: lax no cover return async_open_ai.response_kwargs else: # pragma: no cover raise RuntimeError('Not a MockOpenAIResponses instance')