feat: Add support for Bedrock system tools.

JGoutin · JGoutin · commit 07fb6bc266a0 · 2025-11-28T19:19:10.000+01:00
diff --git a/docs/api_openai_chat_completions.md b/docs/api_openai_chat_completions.md
@@ -46,6 +46,7 @@ This OpenAI-compatible endpoint provides access to AWS Bedrock foundation models
 | Parallel tool calls                      |       :material-cog:{ .model-dep }       | Multiple tools in one turn                                      |
 | Disable Parallel tool calls              | :material-close-circle:{ .unsupported }  | Parallel tool calls are always on                               |
 | Non-function tool types                  | :material-close-circle:{ .unsupported }  | Only function tools supported                                   |
+| System tools (`systemTool_*`)            | :material-plus-circle:{ .extra-feature } | AWS Bedrock system tools (e.g., web grounding with citations)   |
 | **Generation Control**                   |                                          |                                                                 |
 | `max_tokens` / `max_completion_tokens`   |   :material-check-circle:{ .success }    | Output length limits                                            |
 | `temperature`                            |       :material-cog:{ .model-dep }       | Mapped to Bedrock inference params                              |
@@ -74,6 +75,7 @@ This OpenAI-compatible endpoint provides access to AWS Bedrock foundation models
 | Audio                                    |   :material-check-circle:{ .success }    | Synthesis from text output                                      |
 | `response_format` (JSON mode)            |       :material-cog:{ .model-dep }       | Model-specific JSON support                                     |
 | `reasoning_content` (From Deepseek API)  |       :material-cog:{ .model-dep }       | Text reasoning messages                                         |
+| `annotations` (URL citations)            |   :material-check-circle:{ .success }    | URL citations from system tools (non-streaming only)            |
 | **Usage tracking**                       |                                          |                                                                 |
 | Input text tokens                        |   :material-check-circle:{ .success }    | Billing unit                                                    |
 | Output tokens                            |   :material-check-circle:{ .success }    | Billing unit                                                    |
@@ -265,6 +267,95 @@ curl -X POST "$BASE/v1/chat/completions" \
 !!! note "Unsupported Parameter"
     The `tagSuffix` parameter is not supported in this implementation.
 
+### AWS Bedrock System Tools
+
+AWS Bedrock system tools are built-in capabilities that foundation models can use directly without requiring you to implement backend integrations. Access any AWS Bedrock system tool by adding the `systemTool_` prefix to its name—this works for current tools and any future system tools AWS releases.
+
+**How to Use:**
+
+Add system tools to your `tools` array using the `systemTool_` prefix followed by the tool name. System tools don't require parameter definitions—just specify the tool name and the model will handle the rest.
+
+As AWS releases new system tools, simply use the same `systemTool_` prefix pattern to access them.
+
+#### ![Amazon Nova](styles/logo_amazon_nova.svg){ style="height: 1.2em; vertical-align: text-bottom;" } Amazon Nova Web Grounding
+
+Amazon Nova Web Grounding enables models to search the web for current information, helping answer questions requiring real-time data like news, weather, product availability, or recent events. The model automatically determines when to use web grounding based on the user's query.
+
+!!! info "Learn More"
+    - [Amazon Nova Web Grounding - User Guide](https://docs.aws.amazon.com/nova/latest/userguide/grounding.html)
+    - [Build More Accurate AI Applications with Amazon Nova Web Grounding - Blog Post](https://aws.amazon.com/fr/blogs/aws/build-more-accurate-ai-applications-with-amazon-nova-web-grounding/)
+
+**Usage:**
+
+```bash
+curl -X POST "$BASE/v1/chat/completions" \
+  -H "Authorization: Bearer $OPENAI_API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "amazon.nova-premier-v1:0",
+    "messages": [
+      {
+        "role": "user",
+        "content": "What are the current AWS Regions and their locations?"
+      }
+    ],
+    "tools": [
+      {
+        "type": "function",
+        "function": {
+          "name": "systemTool_nova_grounding"
+        }
+      }
+    ]
+  }'
+```
+
+**Response Format:**
+
+When using web grounding, the API response includes `annotations` with URL citations in non-streaming mode:
+
+```json
+{
+  "choices": [{
+    "message": {
+      "role": "assistant",
+      "content": "The AWS Regions include...",
+      "annotations": [
+        {
+          "type": "url_citation",
+          "url_citation": {
+            "url": "https://aws.amazon.com/about-aws/global-infrastructure/",
+            "title": "AWS Global Infrastructure"
+          }
+        }
+      ]
+    }
+  }]
+}
+```
+
+!!! note "Streaming Mode"
+    Citations are only available in non-streaming responses. The OpenAI API does not support annotations in streaming mode.
+
+**Use Cases:**
+
+- **Current Events**: Get up-to-date information about news, weather, stock prices, or sports scores
+- **Dynamic Data**: Query information that changes frequently like AWS service availability or product prices
+- **Verification**: Cross-reference facts with current web sources for improved accuracy
+- **Knowledge Extension**: Supplement model training data with real-time information
+
+**Benefits:**
+
+- **Zero Integration**: No need to implement or maintain web search APIs
+- **Automatic Invocation**: Models intelligently decide when to use web grounding
+- **Enhanced Accuracy**: Reduce hallucinations with real-time information retrieval
+- **OpenAI-Compatible**: Works seamlessly with standard tool calling patterns
+
+!!! warning "Model and Region Compatibility"
+    **Model**: Only Amazon Nova Premier (`amazon.nova-premier-v1:0`) supports the `systemTool_nova_grounding` tool.
+
+    **Region**: Web Grounding is only available in US regions.
+
 ### Provider-Specific Parameters
 
 Unlock advanced model capabilities by passing provider-specific parameters directly in your requests. These parameters are forwarded to AWS Bedrock and allow you to access features unique to each foundation model provider.
diff --git a/docs/roadmap.md b/docs/roadmap.md
@@ -126,16 +126,17 @@ The following features may be implemented in future releases based on community
 
 ## ✨ Release History
 
-### v1.1.0 – Embeddings Enhancement, Prompt Caching & Advanced Routing
+### v1.1.0 – Embeddings Enhancement, Prompt Caching, Advanced Routing & System Tools
 
-Expands multimodal embedding capabilities, adds prompt caching support, and introduces advanced routing with application inference profiles and prompt routers.
+Expands multimodal embedding capabilities, adds prompt caching support, introduces advanced routing with application inference profiles and prompt routers, and enables AWS Bedrock system tools (like web grounding) with automatic URL citation extraction.
 
 ### 💬 Chat Completions
 
-| Provider                                                                       | Endpoint/Feature                                                    | AWS Backend                                                                                                         |
-|--------------------------------------------------------------------------------|---------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------|
-| ![OpenAI](styles/logo_openai.svg){: style="height:20px;width:20px"} **OpenAI** | Prompt caching `/v1/chat/completions` `prompt_cache_key`            | ![Amazon Bedrock](styles/logo_amazon_bedrock.svg){: style="height:20px;width:20px"} Amazon Bedrock - prompt caching |
-| ![OpenAI](styles/logo_openai.svg){: style="height:20px;width:20px"} **OpenAI** | `/v1/chat/completions` GPT5.1 API update  (`reasoning_effort=none`) |                                                                                                                     |
+| Provider                                                                                 | Endpoint/Feature                                                    | AWS Backend                                                                                                         |
+|------------------------------------------------------------------------------------------|---------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| ![OpenAI](styles/logo_openai.svg){: style="height:20px;width:20px"} **OpenAI**           | Prompt caching `/v1/chat/completions` `prompt_cache_key`            | ![Amazon Bedrock](styles/logo_amazon_bedrock.svg){: style="height:20px;width:20px"} Amazon Bedrock - prompt caching |
+| ![OpenAI](styles/logo_openai.svg){: style="height:20px;width:20px"} **OpenAI**           | `/v1/chat/completions` GPT5.1 API update  (`reasoning_effort=none`) |                                                                                                                     |
+| ![AWS Bedrock](styles/logo_amazon_bedrock.svg){: style="height:20px;width:20px"} **AWS** | System tools support with URL citations (`systemTool_*`)            | ![Amazon Bedrock](styles/logo_amazon_bedrock.svg){: style="height:20px;width:20px"} Amazon Bedrock - system tools   |
 
 ### 🧠 Embeddings
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ requires-python = ">=3.13"
 dependencies = [
     "fastapi",
     "aioboto3>=15.1.0",
-    "aiobotocore>=2.24.0",
+    "botocore>=1.40.62",
     "pydantic>=2",
     "pydantic-settings>=2",
     "sse-starlette",
diff --git a/stdapi/routes/openai_chat_completions.py b/stdapi/routes/openai_chat_completions.py
@@ -57,10 +57,13 @@
     log_request_stream_event,
     log_response_params,
 )
+from stdapi.openai_exceptions import OpenaiError
 from stdapi.routes.openai_audio_speech import generate_audio
 from stdapi.tokenizer import estimate_token_count
 from stdapi.types.openai import FunctionDefinition
 from stdapi.types.openai_chat_completions import (
+    Annotation,
+    AnnotationURLCitation,
     ChatCompletion,
     ChatCompletionAssistantMessageParam,
     ChatCompletionAudio,
@@ -107,6 +110,7 @@
         VideoFormatType,
     )
     from types_aiobotocore_bedrock_runtime.type_defs import (
+        CitationOutputTypeDef,
         ContentBlockDeltaEventTypeDef,
         ContentBlockOutputTypeDef,
         ContentBlockStartEventTypeDef,
@@ -119,10 +123,12 @@
         PerformanceConfigurationTypeDef,
         ReasoningContentBlockUnionTypeDef,
         SystemContentBlockTypeDef,
+        SystemToolTypeDef,
         ToolChoiceTypeDef,
         ToolConfigurationTypeDef,
         ToolResultContentBlockUnionTypeDef,
         ToolSpecificationTypeDef,
+        ToolTypeDef,
         VideoBlockTypeDef,
     )
 
@@ -155,6 +161,9 @@
 #: Minimal tool JSON input schema for Bedrock
 _EMPTY_TOOL = {"type": "object"}
 
+#: Bedrock system tools prefix
+_SYSTEM_TOOL_PREFIX = "systemTool_"
+
 
 def _req_extract_system_content_blocks(
     content: str | Iterable[ChatCompletionContentPartTextParam],
@@ -728,39 +737,55 @@ def _req_map_tool_or_function(
 
 
 def _req_map_tool_spec(
-    tool: ChatCompletionToolUnionParam,
-) -> "ToolSpecificationTypeDef":
-    """Convert an OpenAI tool dict to a Bedrock ToolSpecification, if possible."""
+    tool: ChatCompletionToolUnionParam, tools: "list[ToolTypeDef]"
+) -> None:
+    """Maps a tool's specification to the provided tools list based on its type.
+
+    Args:
+        tool: The tool to be processed and mapped.
+        tools: The list where processed tool specifications will be appended.
+    """
     tool_type = tool.type
     if tool_type == "function":
         function_tool: ChatCompletionFunctionToolParam = tool  # type: ignore[assignment]
         function_spec = function_tool.function
-        return {
-            "name": function_spec.name,
-            "description": function_spec.description or tool_type,
-            "inputSchema": {"json": function_spec.parameters or _EMPTY_TOOL},
-        }
-    raise HTTPException(  # pragma: no cover
-        status_code=400,
-        detail=f"Unsupported tool type '{tool_type}': {to_json(tool).decode()}",
-    )
+        name = function_spec.name
+        if name.startswith(_SYSTEM_TOOL_PREFIX) and not function_spec.parameters:
+            system_tool: SystemToolTypeDef = {
+                "name": name.removeprefix(_SYSTEM_TOOL_PREFIX)
+            }
+            tools.append({"systemTool": system_tool})
+        else:
+            tool_spec: ToolSpecificationTypeDef = {
+                "name": function_spec.name,
+                "description": function_spec.description or tool_type,
+                "inputSchema": {"json": function_spec.parameters or _EMPTY_TOOL},
+            }
+            tools.append({"toolSpec": tool_spec})
+    else:  # pragma: no cover
+        msg = f"Unsupported tool type '{tool_type}': {to_json(tool).decode()}"
+        raise OpenaiError(msg)
 
 
 def _req_build_tool_config(
     request: "CompletionCreateParams",
 ) -> "ToolConfigurationTypeDef | None":
-    """Build Bedrock tool configuration from OpenAI tools/function fields.
+    """Builds a configuration for tools based on the provided request.
+
+    Args:
+        request: The request object containing the data
+            to map and configure tools.
 
-    Returns None when no usable function tools are provided.
+    Returns:
+        The mapped tool configuration object if tools are present, otherwise None.
     """
-    tools_specs: list[ToolSpecificationTypeDef] = []
-    tools_specs.extend(_req_map_tool_spec(tool) for tool in _req_map_tools(request))
-    if not tools_specs:
+    tools: list[ToolTypeDef] = []
+    for tool in _req_map_tools(request):
+        _req_map_tool_spec(tool, tools)
+    if not tools:
         return None
 
-    tool_config: ToolConfigurationTypeDef = {
-        "tools": [{"toolSpec": spec} for spec in tools_specs]
-    }
+    tool_config: ToolConfigurationTypeDef = {"tools": tools}
     tool_choice_bedrock = _req_map_tool_or_function(request)
     if tool_choice_bedrock:
         tool_config["toolChoice"] = tool_choice_bedrock
@@ -946,6 +971,59 @@ def _resp_extract_output_text_from_converse(
     ) if reasoning_text else None
 
 
+def _resp_extract_citation_from_bedrock(
+    citation: "CitationOutputTypeDef",
+) -> "Annotation | None":
+    """Extract a single URL citation from Bedrock citation data.
+
+    Args:
+        citation: A Bedrock CitationOutputTypeDef from non-streaming response.
+
+    Returns:
+        An Annotation object with URL citation, or None if no web URL found.
+    """
+    try:
+        web_location = citation["location"]["web"]
+        url = web_location["url"]
+    except KeyError:
+        return None
+    return Annotation(
+        type="url_citation",
+        url_citation=AnnotationURLCitation(
+            url=url,
+            title=citation.get("title") or web_location.get("domain", ""),
+            start_index=0,
+            end_index=0,
+        ),
+    )
+
+
+def _resp_extract_citations_from_output_blocks(
+    contents: "list[ContentBlockOutputTypeDef]",
+) -> "list[Annotation] | None":
+    """Extract URL citations from Bedrock non-streaming content blocks.
+
+    Args:
+        contents: content blocks from Bedrock Converse response (non-streaming).
+
+    Returns:
+        List of Annotation objects with URL citations, or None if no citations found.
+    """
+    annotations: list[Annotation] = []
+    for block in contents:
+        try:
+            citations = block["citationsContent"]["citations"]
+        except KeyError:
+            continue
+
+        for citation in citations:
+            annotation = _resp_extract_citation_from_bedrock(citation)
+            if annotation:
+                annotations.append(annotation)
+
+    return annotations if annotations else None
+
+
 def _resp_extract_tool_calls_from_converse(
     contents: "list[ContentBlockOutputTypeDef]",
 ) -> tuple[list[ChatCompletionMessageToolCallUnion] | None, FunctionCall | None]:
@@ -1115,6 +1193,7 @@ async def _non_streaming_completion(
         message = response["output"]["message"]["content"]
         tool_calls, function_call = _resp_extract_tool_calls_from_converse(message)
         content, reasoning_content = _resp_extract_output_text_from_converse(message)
+        annotations = _resp_extract_citations_from_output_blocks(message)
         if reasoning_content:
             reasoning_contents.append(reasoning_content)
         if audio_params and content:
@@ -1133,6 +1212,7 @@ async def _non_streaming_completion(
                     reasoning_content=reasoning_content,
                     tool_calls=tool_calls,
                     function_call=function_call,
+                    annotations=annotations,
                 ),
             )
         )
diff --git a/tests/test_openai_chat_completions.py b/tests/test_openai_chat_completions.py