-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Support native JSON output and strict tool calls for Anthropic #3457
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b26f93b
04a9b3b
b5243d0
5cd89db
4a51289
41598ec
1578993
0b27ecf
eb6edc6
2446e6f
c2aa94f
dea5f0f
e3b67f7
cbcb783
b25da57
8f278ed
9ab40a3
8aa4d59
1ea365b
d2eecd6
5011da8
9233267
a41a3ed
b8ee2b5
9593b72
fb7b503
c292e78
1c05783
ff9a4ef
58f3ed2
e838c80
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -77,6 +77,7 @@ | |
| BetaContentBlockParam, | ||
| BetaImageBlockParam, | ||
| BetaInputJSONDelta, | ||
| BetaJSONOutputFormatParam, | ||
| BetaMCPToolResultBlock, | ||
| BetaMCPToolUseBlock, | ||
| BetaMCPToolUseBlockParam, | ||
|
|
@@ -205,8 +206,9 @@ def __init__( | |
| model_name: The name of the Anthropic model to use. List of model names available | ||
| [here](https://docs.anthropic.com/en/docs/about-claude/models). | ||
| provider: The provider to use for the Anthropic API. Can be either the string 'anthropic' or an | ||
| instance of `Provider[AsyncAnthropicClient]`. If not provided, the other parameters will be used. | ||
| instance of `Provider[AsyncAnthropicClient]`. Defaults to 'anthropic'. | ||
| profile: The model profile to use. Defaults to a profile picked by the provider based on the model name. | ||
| The default 'anthropic' provider will use the default `..profiles.anthropic_model_profile`. | ||
| settings: Default model settings for this model instance. | ||
| """ | ||
| self._model_name = model_name | ||
|
|
@@ -296,14 +298,29 @@ def prepare_request( | |
| and thinking.get('type') == 'enabled' | ||
| ): | ||
| if model_request_parameters.output_mode == 'auto': | ||
| model_request_parameters = replace(model_request_parameters, output_mode='prompted') | ||
| output_mode = 'native' if self.profile.supports_json_schema_output else 'prompted' | ||
| model_request_parameters = replace(model_request_parameters, output_mode=output_mode) | ||
| elif ( | ||
| model_request_parameters.output_mode == 'tool' and not model_request_parameters.allow_text_output | ||
| ): # pragma: no branch | ||
| # This would result in `tool_choice=required`, which Anthropic does not support with thinking. | ||
| output_mode = 'NativeOutput' if self.profile.supports_json_schema_output else 'PromptedOutput' | ||
| raise UserError( | ||
| 'Anthropic does not support thinking and output tools at the same time. Use `output_type=PromptedOutput(...)` instead.' | ||
| f'Anthropic does not support thinking and output tools at the same time. Use `output_type={output_mode}(...)` instead.' | ||
| ) | ||
|
|
||
| # NOTE forcing `strict=True` here is a bit eager, because the transformer may still determine that the transformation is lossy. | ||
| # so we're relying on anthropic's strict mode being better than prompting the model with pydantic's schema | ||
| if model_request_parameters.output_mode == 'native' and model_request_parameters.output_object is not None: | ||
| # force strict=True for native output | ||
| # this needs to be done here because `super().prepare_request` calls | ||
| # -> Model.customize_request_parameters(model_request_parameters) which calls | ||
| # -> -> _customize_output_object(transformer: type[JsonSchemaTransformer], output_object: OutputObjectDefinition) | ||
| # which finally instantiates the transformer (default AnthropicJsonSchemaTransformer) | ||
| # `schema_transformer = transformer(output_object.json_schema, strict=output_object.strict)` | ||
| model_request_parameters = replace( | ||
| model_request_parameters, output_object=replace(model_request_parameters.output_object, strict=True) | ||
| ) | ||
| return super().prepare_request(model_settings, model_request_parameters) | ||
|
|
||
| @overload | ||
|
|
@@ -333,16 +350,24 @@ async def _messages_create( | |
| model_settings: AnthropicModelSettings, | ||
| model_request_parameters: ModelRequestParameters, | ||
| ) -> BetaMessage | AsyncStream[BetaRawMessageStreamEvent]: | ||
| # standalone function to make it easier to override | ||
| """Calls the Anthropic API to create a message. | ||
| This is the last step before sending the request to the API. | ||
| Most preprocessing has happened in `prepare_request()`. | ||
| """ | ||
| tools = self._get_tools(model_request_parameters, model_settings) | ||
| tools, mcp_servers, beta_features = self._add_builtin_tools(tools, model_request_parameters) | ||
| tools, mcp_servers, builtin_tool_betas = self._add_builtin_tools(tools, model_request_parameters) | ||
| output_format = self._native_output_format(model_request_parameters) | ||
|
|
||
| tool_choice = self._infer_tool_choice(tools, model_settings, model_request_parameters) | ||
|
|
||
| system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings) | ||
|
|
||
| betas_set = self._get_required_betas(tools, model_request_parameters) | ||
| betas_set.update(builtin_tool_betas) | ||
|
|
||
| try: | ||
| extra_headers = self._map_extra_headers(beta_features, model_settings) | ||
| betas, extra_headers = self._prepare_betas_and_headers(betas_set, model_settings) | ||
|
|
||
| return await self.client.beta.messages.create( | ||
| max_tokens=model_settings.get('max_tokens', 4096), | ||
|
|
@@ -352,6 +377,8 @@ async def _messages_create( | |
| tools=tools or OMIT, | ||
| tool_choice=tool_choice or OMIT, | ||
| mcp_servers=mcp_servers or OMIT, | ||
| output_format=output_format or OMIT, | ||
| betas=betas or OMIT, | ||
| stream=stream, | ||
| thinking=model_settings.get('anthropic_thinking', OMIT), | ||
| stop_sequences=model_settings.get('stop_sequences', OMIT), | ||
|
|
@@ -380,14 +407,18 @@ async def _messages_count_tokens( | |
|
|
||
| # standalone function to make it easier to override | ||
| tools = self._get_tools(model_request_parameters, model_settings) | ||
| tools, mcp_servers, beta_features = self._add_builtin_tools(tools, model_request_parameters) | ||
| tools, mcp_servers, builtin_tool_betas = self._add_builtin_tools(tools, model_request_parameters) | ||
| output_format = self._native_output_format(model_request_parameters) | ||
|
|
||
| tool_choice = self._infer_tool_choice(tools, model_settings, model_request_parameters) | ||
|
|
||
| system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings) | ||
|
|
||
| betas = self._get_required_betas(tools, model_request_parameters) | ||
| betas.update(builtin_tool_betas) | ||
|
|
||
| try: | ||
| extra_headers = self._map_extra_headers(beta_features, model_settings) | ||
| betas_list, extra_headers = self._prepare_betas_and_headers(betas, model_settings) | ||
|
|
||
| return await self.client.beta.messages.count_tokens( | ||
| system=system_prompt or OMIT, | ||
|
|
@@ -396,6 +427,8 @@ async def _messages_count_tokens( | |
| tools=tools or OMIT, | ||
| tool_choice=tool_choice or OMIT, | ||
| mcp_servers=mcp_servers or OMIT, | ||
| betas=betas_list or OMIT, | ||
| output_format=output_format or OMIT, | ||
| thinking=model_settings.get('anthropic_thinking', OMIT), | ||
| timeout=model_settings.get('timeout', NOT_GIVEN), | ||
| extra_headers=extra_headers, | ||
|
|
@@ -497,10 +530,31 @@ def _get_tools( | |
|
|
||
| return tools | ||
|
|
||
| def _get_required_betas( | ||
| self, tools: list[BetaToolUnionParam], model_request_parameters: ModelRequestParameters | ||
| ) -> set[str]: | ||
| """Determine which beta features are needed based on tools and output format. | ||
| Args: | ||
| tools: The transformed tool dictionaries that will be sent to the API | ||
| model_request_parameters: Model request parameters containing output settings | ||
| Returns: | ||
| Set of beta feature strings (naturally deduplicated) | ||
| """ | ||
| betas: set[str] = set() | ||
|
|
||
| has_strict_tools = any(tool.get('strict') for tool in tools) | ||
|
|
||
| if has_strict_tools or model_request_parameters.output_mode == 'native': | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there's a scenario where we can send a tool def with So we should really add this beta depending on the result of
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That means we also don't need to check |
||
| betas.add('structured-outputs-2025-11-13') | ||
|
|
||
| return betas | ||
|
|
||
| def _add_builtin_tools( | ||
| self, tools: list[BetaToolUnionParam], model_request_parameters: ModelRequestParameters | ||
| ) -> tuple[list[BetaToolUnionParam], list[BetaRequestMCPServerURLDefinitionParam], list[str]]: | ||
| beta_features: list[str] = [] | ||
| ) -> tuple[list[BetaToolUnionParam], list[BetaRequestMCPServerURLDefinitionParam], set[str]]: | ||
| beta_features: set[str] = set() | ||
| mcp_servers: list[BetaRequestMCPServerURLDefinitionParam] = [] | ||
| for tool in model_request_parameters.builtin_tools: | ||
| if isinstance(tool, WebSearchTool): | ||
|
|
@@ -517,14 +571,14 @@ def _add_builtin_tools( | |
| ) | ||
| elif isinstance(tool, CodeExecutionTool): # pragma: no branch | ||
| tools.append(BetaCodeExecutionTool20250522Param(name='code_execution', type='code_execution_20250522')) | ||
| beta_features.append('code-execution-2025-05-22') | ||
| beta_features.add('code-execution-2025-05-22') | ||
| elif isinstance(tool, MemoryTool): # pragma: no branch | ||
| if 'memory' not in model_request_parameters.tool_defs: | ||
| raise UserError("Built-in `MemoryTool` requires a 'memory' tool to be defined.") | ||
| # Replace the memory tool definition with the built-in memory tool | ||
| tools = [tool for tool in tools if tool['name'] != 'memory'] | ||
| tools.append(BetaMemoryTool20250818Param(name='memory', type='memory_20250818')) | ||
| beta_features.append('context-management-2025-06-27') | ||
| beta_features.add('context-management-2025-06-27') | ||
| elif isinstance(tool, MCPServerTool) and tool.url: | ||
| mcp_server_url_definition_param = BetaRequestMCPServerURLDefinitionParam( | ||
| type='url', | ||
|
|
@@ -539,7 +593,7 @@ def _add_builtin_tools( | |
| if tool.authorization_token: # pragma: no cover | ||
| mcp_server_url_definition_param['authorization_token'] = tool.authorization_token | ||
| mcp_servers.append(mcp_server_url_definition_param) | ||
| beta_features.append('mcp-client-2025-04-04') | ||
| beta_features.add('mcp-client-2025-04-04') | ||
| else: # pragma: no cover | ||
| raise UserError( | ||
| f'`{tool.__class__.__name__}` is not supported by `AnthropicModel`. If it should be, please file an issue.' | ||
|
|
@@ -567,15 +621,28 @@ def _infer_tool_choice( | |
|
|
||
| return tool_choice | ||
|
|
||
| def _map_extra_headers(self, beta_features: list[str], model_settings: AnthropicModelSettings) -> dict[str, str]: | ||
| """Apply beta_features to extra_headers in model_settings.""" | ||
| def _prepare_betas_and_headers( | ||
| self, betas: set[str], model_settings: AnthropicModelSettings | ||
| ) -> tuple[list[str], dict[str, str]]: | ||
| """Prepare beta features list and extra headers for API request. | ||
| Handles merging custom anthropic-beta header from extra_headers into betas set | ||
| and ensuring User-Agent is set. | ||
| Args: | ||
| betas: Set of beta feature strings (naturally deduplicated) | ||
| model_settings: Model settings containing extra_headers | ||
| Returns: | ||
| Tuple of (betas list, extra_headers dict) | ||
| """ | ||
| extra_headers = model_settings.get('extra_headers', {}) | ||
| extra_headers.setdefault('User-Agent', get_user_agent()) | ||
| if beta_features: | ||
| if 'anthropic-beta' in extra_headers: | ||
| beta_features.insert(0, extra_headers['anthropic-beta']) | ||
| extra_headers['anthropic-beta'] = ','.join(beta_features) | ||
| return extra_headers | ||
|
|
||
| if beta_header := extra_headers.pop('anthropic-beta', None): | ||
| betas.update({stripped_beta for beta in beta_header.split(',') if (stripped_beta := beta.strip())}) | ||
|
|
||
| return sorted(betas), extra_headers | ||
|
|
||
| async def _map_message( # noqa: C901 | ||
| self, | ||
|
|
@@ -846,13 +913,23 @@ async def _map_user_prompt( | |
| else: | ||
| raise RuntimeError(f'Unsupported content type: {type(item)}') # pragma: no cover | ||
|
|
||
| @staticmethod | ||
| def _map_tool_definition(f: ToolDefinition) -> BetaToolParam: | ||
| return { | ||
| def _map_tool_definition(self, f: ToolDefinition) -> BetaToolParam: | ||
| """Maps a `ToolDefinition` dataclass to an Anthropic `BetaToolParam` dictionary.""" | ||
| tool_param: BetaToolParam = { | ||
| 'name': f.name, | ||
| 'description': f.description or '', | ||
| 'input_schema': f.parameters_json_schema, | ||
| } | ||
| if f.strict and self.profile.supports_json_schema_output: # pragma: no branch | ||
| tool_param['strict'] = f.strict | ||
| return tool_param | ||
|
|
||
| @staticmethod | ||
| def _native_output_format(model_request_parameters: ModelRequestParameters) -> BetaJSONOutputFormatParam | None: | ||
| if model_request_parameters.output_mode != 'native': | ||
| return None | ||
| assert model_request_parameters.output_object is not None | ||
| return {'type': 'json_schema', 'schema': model_request_parameters.output_object.json_schema} | ||
|
|
||
|
|
||
| def _map_usage( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Weird that we don't have to pass
output_formathere, as it does contribute to token usage. Can you make an explicit comment about that, so it doesn't look like an oversight?