From 2cc86301295c67899bad1444d0f0d9467596a35d Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sat, 19 Apr 2025 01:21:48 -0700 Subject: [PATCH 1/5] Handle additional usage details in Anthropic responses --- .../pydantic_ai/models/anthropic.py | 14 ++++++++- tests/models/test_anthropic.py | 30 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index 61156849e7..25f1e73e2b 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -409,13 +409,25 @@ def _map_usage(message: AnthropicMessage | RawMessageStreamEvent) -> usage.Usage if response_usage is None: return usage.Usage() + # Usage coming from the RawMessageDeltaEvent doesn't have input token data, hence these getattr calls request_tokens = getattr(response_usage, 'input_tokens', None) + cache_creation_request_tokens = getattr(response_usage, 'cache_creation_input_tokens', None) + cache_read_request_tokens = getattr(response_usage, 'cache_read_input_tokens', None) + details: dict[str, int] = {} + # Tokens are only counted once between input_tokens, cache_creation_input_tokens, and cache_read_input_tokens + # This approach maintains request_tokens as the count of all input tokens, with cached counts as details + if isinstance(cache_creation_request_tokens, int): + request_tokens = (request_tokens or 0) + cache_creation_request_tokens + details['cache_creation_input_tokens'] = cache_creation_request_tokens + if isinstance(cache_read_request_tokens, int): + request_tokens = (request_tokens or 0) + cache_read_request_tokens + details['cache_read_input_tokens'] = cache_read_request_tokens return usage.Usage( - # Usage coming from the RawMessageDeltaEvent doesn't have input token data, hence this getattr request_tokens=request_tokens, response_tokens=response_usage.output_tokens, total_tokens=(request_tokens or 0) + response_usage.output_tokens, + details=details, ) diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index 2e7d660711..aa2c1a3964 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -167,6 +167,36 @@ async def test_sync_request_text_response(allow_model_requests: None): ) +async def test_async_request_prompt_caching(allow_model_requests: None): + c = completion_message( + [TextBlock(text='world', type='text')], + usage=AnthropicUsage( + input_tokens=3, + output_tokens=5, + cache_creation_input_tokens=4, + cache_read_input_tokens=6, + ), + ) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-3-5-haiku-latest', provider=AnthropicProvider(anthropic_client=mock_client)) + agent = Agent(m) + + result = await agent.run('hello') + assert result.output == 'world' + assert result.usage() == snapshot( + Usage( + requests=1, + request_tokens=13, + response_tokens=5, + total_tokens=18, + details={ + 'cache_creation_input_tokens': 4, + 'cache_read_input_tokens': 6, + }, + ) + ) + + async def test_async_request_text_response(allow_model_requests: None): c = completion_message( [TextBlock(text='world', type='text')], From 34c040a5d5f67f176be1b90a561d7550a636ab5b Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sat, 26 Apr 2025 01:03:58 -0700 Subject: [PATCH 2/5] Populate Anthropic Usage details with any vender-returned ints --- .../pydantic_ai/models/anthropic.py | 12 ++++++---- tests/models/test_anthropic.py | 23 +++++++++++-------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index 25f1e73e2b..7868005b0a 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -409,25 +409,29 @@ def _map_usage(message: AnthropicMessage | RawMessageStreamEvent) -> usage.Usage if response_usage is None: return usage.Usage() + # Store all integer-typed usage values in the details dict + response_usage_dict = response_usage.model_dump() + details: dict[str, int] = {} + for key, value in response_usage_dict.items(): + if isinstance(value, int): + details[key] = value + # Usage coming from the RawMessageDeltaEvent doesn't have input token data, hence these getattr calls request_tokens = getattr(response_usage, 'input_tokens', None) cache_creation_request_tokens = getattr(response_usage, 'cache_creation_input_tokens', None) cache_read_request_tokens = getattr(response_usage, 'cache_read_input_tokens', None) - details: dict[str, int] = {} # Tokens are only counted once between input_tokens, cache_creation_input_tokens, and cache_read_input_tokens # This approach maintains request_tokens as the count of all input tokens, with cached counts as details if isinstance(cache_creation_request_tokens, int): request_tokens = (request_tokens or 0) + cache_creation_request_tokens - details['cache_creation_input_tokens'] = cache_creation_request_tokens if isinstance(cache_read_request_tokens, int): request_tokens = (request_tokens or 0) + cache_read_request_tokens - details['cache_read_input_tokens'] = cache_read_request_tokens return usage.Usage( request_tokens=request_tokens, response_tokens=response_usage.output_tokens, total_tokens=(request_tokens or 0) + response_usage.output_tokens, - details=details, + details=details or None, ) diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index aa2c1a3964..57677f1aa5 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -7,6 +7,7 @@ from datetime import timezone from functools import cached_property from typing import Any, TypeVar, Union, cast +from unittest.mock import ANY import httpx import pytest @@ -141,14 +142,17 @@ async def test_sync_request_text_response(allow_model_requests: None): result = await agent.run('hello') assert result.output == 'world' - assert result.usage() == snapshot(Usage(requests=1, request_tokens=5, response_tokens=10, total_tokens=15)) - + assert result.usage() == snapshot( + Usage(requests=1, request_tokens=5, response_tokens=10, total_tokens=15, details=ANY) + ) # reset the index so we get the same response again mock_client.index = 0 # type: ignore result = await agent.run('hello', message_history=result.new_messages()) assert result.output == 'world' - assert result.usage() == snapshot(Usage(requests=1, request_tokens=5, response_tokens=10, total_tokens=15)) + assert result.usage() == snapshot( + Usage(requests=1, request_tokens=5, response_tokens=10, total_tokens=15, details=ANY) + ) assert result.all_messages() == snapshot( [ ModelRequest(parts=[UserPromptPart(content='hello', timestamp=IsNow(tz=timezone.utc))]), @@ -189,10 +193,7 @@ async def test_async_request_prompt_caching(allow_model_requests: None): request_tokens=13, response_tokens=5, total_tokens=18, - details={ - 'cache_creation_input_tokens': 4, - 'cache_read_input_tokens': 6, - }, + details=ANY, ) ) @@ -208,7 +209,9 @@ async def test_async_request_text_response(allow_model_requests: None): result = await agent.run('hello') assert result.output == 'world' - assert result.usage() == snapshot(Usage(requests=1, request_tokens=3, response_tokens=5, total_tokens=8)) + assert result.usage() == snapshot( + Usage(requests=1, request_tokens=3, response_tokens=5, total_tokens=8, details=ANY) + ) async def test_request_structured_response(allow_model_requests: None): @@ -581,7 +584,9 @@ async def my_tool(first: str, second: str) -> int: ] ) assert result.is_complete - assert result.usage() == snapshot(Usage(requests=2, request_tokens=20, response_tokens=5, total_tokens=25)) + assert result.usage() == snapshot( + Usage(requests=2, request_tokens=20, response_tokens=5, total_tokens=25, details=ANY) + ) assert tool_called From 31e7c8e17d36a8e68cf147a04fb343429abef0d4 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Tue, 29 Apr 2025 10:05:17 -0700 Subject: [PATCH 3/5] Verify that Anthropic usage details are correct in unit tests --- tests/models/test_anthropic.py | 40 +++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index 57677f1aa5..b5d970f2a7 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -7,7 +7,6 @@ from datetime import timezone from functools import cached_property from typing import Any, TypeVar, Union, cast -from unittest.mock import ANY import httpx import pytest @@ -143,7 +142,13 @@ async def test_sync_request_text_response(allow_model_requests: None): result = await agent.run('hello') assert result.output == 'world' assert result.usage() == snapshot( - Usage(requests=1, request_tokens=5, response_tokens=10, total_tokens=15, details=ANY) + Usage( + requests=1, + request_tokens=5, + response_tokens=10, + total_tokens=15, + details={'input_tokens': 5, 'output_tokens': 10}, + ) ) # reset the index so we get the same response again mock_client.index = 0 # type: ignore @@ -151,7 +156,13 @@ async def test_sync_request_text_response(allow_model_requests: None): result = await agent.run('hello', message_history=result.new_messages()) assert result.output == 'world' assert result.usage() == snapshot( - Usage(requests=1, request_tokens=5, response_tokens=10, total_tokens=15, details=ANY) + Usage( + requests=1, + request_tokens=5, + response_tokens=10, + total_tokens=15, + details={'input_tokens': 5, 'output_tokens': 10}, + ) ) assert result.all_messages() == snapshot( [ @@ -193,7 +204,12 @@ async def test_async_request_prompt_caching(allow_model_requests: None): request_tokens=13, response_tokens=5, total_tokens=18, - details=ANY, + details={ + 'input_tokens': 3, + 'output_tokens': 5, + 'cache_creation_input_tokens': 4, + 'cache_read_input_tokens': 6, + }, ) ) @@ -210,7 +226,13 @@ async def test_async_request_text_response(allow_model_requests: None): result = await agent.run('hello') assert result.output == 'world' assert result.usage() == snapshot( - Usage(requests=1, request_tokens=3, response_tokens=5, total_tokens=8, details=ANY) + Usage( + requests=1, + request_tokens=3, + response_tokens=5, + total_tokens=8, + details={'input_tokens': 3, 'output_tokens': 5}, + ) ) @@ -585,7 +607,13 @@ async def my_tool(first: str, second: str) -> int: ) assert result.is_complete assert result.usage() == snapshot( - Usage(requests=2, request_tokens=20, response_tokens=5, total_tokens=25, details=ANY) + Usage( + requests=2, + request_tokens=20, + response_tokens=5, + total_tokens=25, + details={'input_tokens': 20, 'output_tokens': 5}, + ) ) assert tool_called From 0c9de2d28611e48399b803fbd4db2c088ebeb5b1 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Wed, 30 Apr 2025 11:06:51 -0700 Subject: [PATCH 4/5] Simplify anthropic token parsing logic --- pydantic_ai_slim/pydantic_ai/models/anthropic.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index 7868005b0a..508429804b 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -416,21 +416,19 @@ def _map_usage(message: AnthropicMessage | RawMessageStreamEvent) -> usage.Usage if isinstance(value, int): details[key] = value - # Usage coming from the RawMessageDeltaEvent doesn't have input token data, hence these getattr calls - request_tokens = getattr(response_usage, 'input_tokens', None) - cache_creation_request_tokens = getattr(response_usage, 'cache_creation_input_tokens', None) - cache_read_request_tokens = getattr(response_usage, 'cache_read_input_tokens', None) + # Usage coming from the RawMessageDeltaEvent doesn't have input token data, hence the getattr call # Tokens are only counted once between input_tokens, cache_creation_input_tokens, and cache_read_input_tokens # This approach maintains request_tokens as the count of all input tokens, with cached counts as details - if isinstance(cache_creation_request_tokens, int): - request_tokens = (request_tokens or 0) + cache_creation_request_tokens - if isinstance(cache_read_request_tokens, int): - request_tokens = (request_tokens or 0) + cache_read_request_tokens + request_tokens = ( + getattr(response_usage, 'input_tokens', 0) + + (getattr(response_usage, 'cache_creation_input_tokens', 0) or 0) # These can be missing, None, or int + + (getattr(response_usage, 'cache_read_input_tokens', 0) or 0) + ) return usage.Usage( request_tokens=request_tokens, response_tokens=response_usage.output_tokens, - total_tokens=(request_tokens or 0) + response_usage.output_tokens, + total_tokens=request_tokens + response_usage.output_tokens, details=details or None, ) From 9b8c0ea608e879544ef6d3e41463ec70efd3ab93 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Wed, 30 Apr 2025 17:22:11 -0700 Subject: [PATCH 5/5] Set 0 request tokens to None --- pydantic_ai_slim/pydantic_ai/models/anthropic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index 508429804b..9aa96ce217 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -426,7 +426,7 @@ def _map_usage(message: AnthropicMessage | RawMessageStreamEvent) -> usage.Usage ) return usage.Usage( - request_tokens=request_tokens, + request_tokens=request_tokens or None, response_tokens=response_usage.output_tokens, total_tokens=request_tokens + response_usage.output_tokens, details=details or None,