From 72bf79a7b06a0904da192f3a3ce0aa9423ad2b8d Mon Sep 17 00:00:00 2001 From: Nathan Gage Date: Mon, 17 Nov 2025 16:06:05 -0500 Subject: [PATCH 1/2] [anthropic] add `count_tokens` --- .../pydantic_ai/models/anthropic.py | 108 +++++++++++++++--- 1 file changed, 89 insertions(+), 19 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index c636ba9cfc..6b5b3dfb2c 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -55,7 +55,7 @@ try: - from anthropic import NOT_GIVEN, APIStatusError, AsyncStream, omit as OMIT + from anthropic import NOT_GIVEN, APIStatusError, AsyncAnthropicBedrock, AsyncStream, omit as OMIT from anthropic.types.beta import ( BetaBase64PDFBlockParam, BetaBase64PDFSourceParam, @@ -76,6 +76,7 @@ BetaMemoryTool20250818Param, BetaMessage, BetaMessageParam, + BetaMessageTokensCount, BetaMetadataParam, BetaPlainTextSourceParam, BetaRawContentBlockDeltaEvent, @@ -239,6 +240,23 @@ async def request( model_response = self._process_response(response) return model_response + async def count_tokens( + self, + messages: list[ModelMessage], + model_settings: ModelSettings | None, + model_request_parameters: ModelRequestParameters, + ) -> usage.RequestUsage: + model_settings, model_request_parameters = self.prepare_request( + model_settings, + model_request_parameters, + ) + + response = await self._messages_count_tokens( + messages, cast(AnthropicModelSettings, model_settings or {}), model_request_parameters + ) + + return usage.RequestUsage(input_tokens=response.input_tokens) + @asynccontextmanager async def request_stream( self, @@ -310,28 +328,12 @@ async def _messages_create( tools = self._get_tools(model_request_parameters, model_settings) tools, mcp_servers, beta_features = self._add_builtin_tools(tools, model_request_parameters) - tool_choice: BetaToolChoiceParam | None - - if not tools: - tool_choice = None - else: - if not model_request_parameters.allow_text_output: - tool_choice = {'type': 'any'} - else: - tool_choice = {'type': 'auto'} - - if (allow_parallel_tool_calls := model_settings.get('parallel_tool_calls')) is not None: - tool_choice['disable_parallel_tool_use'] = not allow_parallel_tool_calls + tool_choice = self._infer_tool_choice(tools, model_settings, model_request_parameters) system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings) try: - extra_headers = model_settings.get('extra_headers', {}) - extra_headers.setdefault('User-Agent', get_user_agent()) - if beta_features: - if 'anthropic-beta' in extra_headers: - beta_features.insert(0, extra_headers['anthropic-beta']) - extra_headers['anthropic-beta'] = ','.join(beta_features) + extra_headers = self._map_extra_headers(beta_features, model_settings) return await self.client.beta.messages.create( max_tokens=model_settings.get('max_tokens', 4096), @@ -356,6 +358,43 @@ async def _messages_create( raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e raise # pragma: lax no cover + async def _messages_count_tokens( + self, + messages: list[ModelMessage], + model_settings: AnthropicModelSettings, + model_request_parameters: ModelRequestParameters, + ) -> BetaMessageTokensCount: + if isinstance(self.client, AsyncAnthropicBedrock): + raise UserError('AsyncAnthropicBedrock client does not support `count_tokens` api.') + + # standalone function to make it easier to override + tools = self._get_tools(model_request_parameters, model_settings) + tools, mcp_servers, beta_features = self._add_builtin_tools(tools, model_request_parameters) + + tool_choice = self._infer_tool_choice(tools, model_settings, model_request_parameters) + + system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings) + + try: + extra_headers = self._map_extra_headers(beta_features, model_settings) + + return await self.client.beta.messages.count_tokens( + system=system_prompt or OMIT, + messages=anthropic_messages, + model=self._model_name, + tools=tools or OMIT, + tool_choice=tool_choice or OMIT, + mcp_servers=mcp_servers or OMIT, + thinking=model_settings.get('anthropic_thinking', OMIT), + timeout=model_settings.get('timeout', NOT_GIVEN), + extra_headers=extra_headers, + extra_body=model_settings.get('extra_body'), + ) + except APIStatusError as e: + if (status_code := e.status_code) >= 400: + raise ModelHTTPError(status_code=status_code, model_name=self.model_name, body=e.body) from e + raise # pragma: lax no cover + def _process_response(self, response: BetaMessage) -> ModelResponse: """Process a non-streamed response, and prepare a message to return.""" items: list[ModelResponsePart] = [] @@ -492,6 +531,37 @@ def _add_builtin_tools( ) return tools, mcp_servers, beta_features + def _infer_tool_choice( + self, + tools: list[BetaToolUnionParam], + model_settings: AnthropicModelSettings, + model_request_parameters: ModelRequestParameters, + ) -> BetaToolChoiceParam | None: + if not tools: + return None + else: + tool_choice: BetaToolChoiceParam + + if not model_request_parameters.allow_text_output: + tool_choice = {'type': 'any'} + else: + tool_choice = {'type': 'auto'} + + if 'parallel_tool_calls' in model_settings: + tool_choice['disable_parallel_tool_use'] = not model_settings['parallel_tool_calls'] + + return tool_choice + + def _map_extra_headers(self, beta_features: list[str], model_settings: AnthropicModelSettings) -> dict[str, str]: + """Apply beta_features to extra_headers in model_settings.""" + extra_headers = model_settings.get('extra_headers', {}) + extra_headers.setdefault('User-Agent', get_user_agent()) + if beta_features: + if 'anthropic-beta' in extra_headers: + beta_features.insert(0, extra_headers['anthropic-beta']) + extra_headers['anthropic-beta'] = ','.join(beta_features) + return extra_headers + async def _map_message( # noqa: C901 self, messages: list[ModelMessage], From 8f47956fe40505c4b8036db234b9ce1ab68bf859 Mon Sep 17 00:00:00 2001 From: Nathan Gage Date: Mon, 17 Nov 2025 16:20:49 -0500 Subject: [PATCH 2/2] [anthropic] add tests for `count_tokens` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive test coverage for the Anthropic `count_tokens` implementation: - test_anthropic_model_usage_limit_exceeded: Verifies UsageLimitExceeded is raised when input_tokens_limit would be exceeded (18 < 19 tokens) - test_anthropic_model_usage_limit_not_exceeded: Tests successful execution when within token limits (25 > 19 tokens) - test_anthropic_count_tokens_error: Tests ModelHTTPError handling for invalid model names (404 response) Tests follow the same pattern as Bedrock's count_tokens tests, using the standard "quick brown fox" prompt which tokenizes to 19 input tokens with Anthropic's tokenizer. All tests include VCR cassettes for the `/v1/messages/count_tokens?beta=true` API endpoint. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../test_anthropic_count_tokens_error.yaml | 46 ++++++++ ..._anthropic_model_usage_limit_exceeded.yaml | 40 +++++++ ...hropic_model_usage_limit_not_exceeded.yaml | 108 ++++++++++++++++++ tests/models/test_anthropic.py | 75 +++++++++++- 4 files changed, 268 insertions(+), 1 deletion(-) create mode 100644 tests/models/cassettes/test_anthropic/test_anthropic_count_tokens_error.yaml create mode 100644 tests/models/cassettes/test_anthropic/test_anthropic_model_usage_limit_exceeded.yaml create mode 100644 tests/models/cassettes/test_anthropic/test_anthropic_model_usage_limit_not_exceeded.yaml diff --git a/tests/models/cassettes/test_anthropic/test_anthropic_count_tokens_error.yaml b/tests/models/cassettes/test_anthropic/test_anthropic_count_tokens_error.yaml new file mode 100644 index 0000000000..fd042891b9 --- /dev/null +++ b/tests/models/cassettes/test_anthropic/test_anthropic_count_tokens_error.yaml @@ -0,0 +1,46 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '105' + content-type: + - application/json + host: + - api.anthropic.com + method: POST + parsed_body: + messages: + - content: + - text: hello + type: text + role: user + model: claude-does-not-exist + uri: https://api.anthropic.com/v1/messages/count_tokens?beta=true + response: + headers: + connection: + - keep-alive + content-length: + - '136' + content-type: + - application/json + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + parsed_body: + error: + message: 'model: claude-does-not-exist' + type: not_found_error + request_id: req_011CVEA3SF7rnb3DuBZytqQa + type: error + status: + code: 404 + message: Not Found +version: 1 diff --git a/tests/models/cassettes/test_anthropic/test_anthropic_model_usage_limit_exceeded.yaml b/tests/models/cassettes/test_anthropic/test_anthropic_model_usage_limit_exceeded.yaml new file mode 100644 index 0000000000..5e93ed1a70 --- /dev/null +++ b/tests/models/cassettes/test_anthropic/test_anthropic_model_usage_limit_exceeded.yaml @@ -0,0 +1,40 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '139' + content-type: + - application/json + host: + - api.anthropic.com + method: POST + parsed_body: + messages: + - content: + - text: The quick brown fox jumps over the lazydog. + type: text + role: user + model: claude-sonnet-4-5 + uri: https://api.anthropic.com/v1/messages/count_tokens?beta=true + response: + headers: + connection: + - keep-alive + content-length: + - '19' + content-type: + - application/json + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + parsed_body: + input_tokens: 19 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/cassettes/test_anthropic/test_anthropic_model_usage_limit_not_exceeded.yaml b/tests/models/cassettes/test_anthropic/test_anthropic_model_usage_limit_not_exceeded.yaml new file mode 100644 index 0000000000..3462dad4ef --- /dev/null +++ b/tests/models/cassettes/test_anthropic/test_anthropic_model_usage_limit_not_exceeded.yaml @@ -0,0 +1,108 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '139' + content-type: + - application/json + host: + - api.anthropic.com + method: POST + parsed_body: + messages: + - content: + - text: The quick brown fox jumps over the lazydog. + type: text + role: user + model: claude-sonnet-4-5 + uri: https://api.anthropic.com/v1/messages/count_tokens?beta=true + response: + headers: + connection: + - keep-alive + content-length: + - '19' + content-type: + - application/json + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + parsed_body: + input_tokens: 19 + status: + code: 200 + message: OK +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '172' + content-type: + - application/json + host: + - api.anthropic.com + method: POST + parsed_body: + max_tokens: 4096 + messages: + - content: + - text: The quick brown fox jumps over the lazydog. + type: text + role: user + model: claude-sonnet-4-5 + stream: false + uri: https://api.anthropic.com/v1/messages?beta=true + response: + headers: + connection: + - keep-alive + content-length: + - '729' + content-type: + - application/json + retry-after: + - '19' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + transfer-encoding: + - chunked + parsed_body: + content: + - text: |- + I noticed a small typo in that famous pangram! It should be: + + "The quick brown fox jumps over the **lazy dog**." + + (There should be a space between "lazy" and "dog") + + This sentence is often used for testing typewriters, fonts, and keyboards because it contains every letter of the English alphabet at least once. + type: text + id: msg_01QHpSAhCiB6L5pL23LjdRAy + model: claude-sonnet-4-5-20250929 + role: assistant + stop_reason: end_turn + stop_sequence: null + type: message + usage: + cache_creation: + ephemeral_1h_input_tokens: 0 + ephemeral_5m_input_tokens: 0 + cache_creation_input_tokens: 0 + cache_read_input_tokens: 0 + input_tokens: 19 + output_tokens: 77 + service_tier: standard + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index 1170483879..f30c3272a2 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -41,6 +41,7 @@ ToolCallPart, ToolCallPartDelta, ToolReturnPart, + UsageLimitExceeded, UserPromptPart, ) from pydantic_ai.builtin_tools import CodeExecutionTool, MCPServerTool, MemoryTool, WebSearchTool @@ -53,7 +54,7 @@ from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput from pydantic_ai.result import RunUsage from pydantic_ai.settings import ModelSettings -from pydantic_ai.usage import RequestUsage +from pydantic_ai.usage import RequestUsage, UsageLimits from ..conftest import IsDatetime, IsInstance, IsNow, IsStr, TestEnv, raise_if_exception, try_import from ..parts_from_messages import part_types_from_messages @@ -6405,3 +6406,75 @@ def memory(**command: Any) -> Any: According to my memory, you live in **Mexico City**.\ """) + + +async def test_anthropic_model_usage_limit_exceeded( + allow_model_requests: None, + anthropic_api_key: str, +): + model = AnthropicModel('claude-sonnet-4-5', provider=AnthropicProvider(api_key=anthropic_api_key)) + agent = Agent(model=model) + + with pytest.raises( + UsageLimitExceeded, + match='The next request would exceed the input_tokens_limit of 18 \\(input_tokens=19\\)', + ): + await agent.run( + 'The quick brown fox jumps over the lazydog.', + usage_limits=UsageLimits(input_tokens_limit=18, count_tokens_before_request=True), + ) + + +async def test_anthropic_model_usage_limit_not_exceeded( + allow_model_requests: None, + anthropic_api_key: str, +): + model = AnthropicModel('claude-sonnet-4-5', provider=AnthropicProvider(api_key=anthropic_api_key)) + agent = Agent(model=model) + + result = await agent.run( + 'The quick brown fox jumps over the lazydog.', + usage_limits=UsageLimits(input_tokens_limit=25, count_tokens_before_request=True), + ) + assert result.output == snapshot( + """\ +I noticed a small typo in that famous pangram! It should be: + +"The quick brown fox jumps over the **lazy dog**." + +(There should be a space between "lazy" and "dog") + +This sentence is often used for testing typewriters, fonts, and keyboards because it contains every letter of the English alphabet at least once.\ +""" + ) + + +@pytest.mark.vcr() +async def test_anthropic_count_tokens_error(allow_model_requests: None, anthropic_api_key: str): + """Test that errors convert to ModelHTTPError.""" + model_id = 'claude-does-not-exist' + model = AnthropicModel(model_id, provider=AnthropicProvider(api_key=anthropic_api_key)) + agent = Agent(model) + + with pytest.raises(ModelHTTPError) as exc_info: + await agent.run('hello', usage_limits=UsageLimits(input_tokens_limit=20, count_tokens_before_request=True)) + + assert exc_info.value.status_code == 404 + assert exc_info.value.model_name == model_id + + +async def test_anthropic_bedrock_count_tokens_not_supported(env: TestEnv): + """Test that AsyncAnthropicBedrock raises UserError for count_tokens.""" + from anthropic import AsyncAnthropicBedrock + + bedrock_client = AsyncAnthropicBedrock( + aws_access_key='test-access-key', + aws_secret_key='test-secret-key', + aws_region='us-east-1', + ) + provider = AnthropicProvider(anthropic_client=bedrock_client) + model = AnthropicModel('anthropic.claude-3-5-sonnet-20241022-v2:0', provider=provider) + agent = Agent(model) + + with pytest.raises(UserError, match='AsyncAnthropicBedrock client does not support `count_tokens` api.'): + await agent.run('hello', usage_limits=UsageLimits(input_tokens_limit=20, count_tokens_before_request=True))