In [15]:
%load_ext autoreload
%autoreload 2
%pip install --quiet --upgrade pip
%pip install --quiet boto3 matplotlib aws_sdk_bedrock_runtime

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


---

## Configurations

In [16]:
REGION = "us-west-2"
PROFILE = "default"
MODELS = {
    'nova lite':    "us.amazon.nova-lite-v1:0",
    'nova pro':     "us.amazon.nova-pro-v1:0",
    'sonnet 3.7':   "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    'haiku 3.5':    "us.anthropic.claude-3-5-haiku-20241022-v1:0",
    'llama 70b':    "us.meta.llama3-3-70b-instruct-v1:0",
    'palmyra X5':   "us.writer.palmyra-x5-v1:0",
    'deepseek R1':  "us.deepseek.r1-v1:0"
}

In [17]:
import boto3, logging
import time

session = boto3.Session(profile_name=PROFILE, region_name=REGION)
sts = session.client('sts')
bedrock = session.client('bedrock-runtime')
role = sts.get_caller_identity()['Arn']
# Check if this is an assumed role
if ':assumed-role/' in role:
    parts = role.split(':')
    account_id = parts[4]
    role_name = parts[5].split('/')[1]
    role = f"arn:aws:iam::{account_id}:role/{role_name}"

print(f'The notebook will use aws services hosted in {session.region_name} region')
print(f'Notebook role is {role}')

class Tracker:
    def __init__(self):
        self.short = ""
    def start(self, short):
        self.short = short
        self.start_time = time.time()
        self.input_tokens = self.output_tokens = 0
        self.log('starting')
    def stop(self):
        self.log(f" input tokens: {self.input_tokens}")
        self.log(f"output tokens: {self.output_tokens}")
        self.log('stopped')
    def log(self, message):
        elapsed_time = time.time() - self.start_time
        print(f"{elapsed_time:5.2f}s - [{self.short}] {message}")
    def add_input_tokens(self, count):
        self.input_tokens += count
    def add_output_tokens(self, count):
        self.output_tokens += count

tracker = Tracker()

The notebook will use aws services hosted in us-west-2 region
Notebook role is arn:aws:iam::736296836507:role/Admin


----

## Prompts & Tools

In [4]:
SYSTEM_PROMPT = """
You are writing a news flash on the weather for the city asked by the user. Use the tool to obtain current
weather information. Use narrative format, around 500 words, no bulletpoints. Only provide the output, no comments or questions.

Example: 
Record heat continues to grip the metro area as temperatures soar past 95°F. Residents are urged to stay indoors between 11 AM and 3 PM. Local authorities have activated emergency cooling stations across the city.

Meanwhile, weather radar shows an advancing storm system from the northwest. Meteorologists predict severe thunderstorms will hit the region by early evening, bringing much-needed relief but possible flooding risks.

Coastal communities should prepare for rough seas tonight, with the National Weather Service warning of dangerous rip currents and waves up to 10 feet. All beach activities are suspended until further notice.

Looking ahead to the weekend, we'll see a pleasant drop in temperatures. Saturday brings sunny skies perfect for outdoor activities, while Sunday may require umbrellas as scattered showers move through the area.

Stay tuned for further updates as conditions develop.
"""

MESSAGES = [
    {
        "role": "user", "content": [{"text": "What's the weather like in Zurich?"}]
    }
]
PARAMETERS = {
    "maxTokens": 1000,
    "temperature": 0.5,
    "topP": 0.9,
    "topK": 30,
}

In [5]:
def get_weather(city):
    return """Heavy thunder storm expected in the afternoon in {city}. Later in the night, temperature will drop 
              drastically and thunderstorm turns into a snow blizzard. Tomorrow morning, weather improves and
              the weather will be mostly sunny and warm.

              The highest temperature will be 25 degrees and the lowest will be -5 degrees.
              The wind will be 20 km/h and the humidity will be 80%.
              The pressure will be 1000 hPa.
              The visibility will be 10 m.
              The cloudiness will be 100%.
              The sunrise will be at 06:00 and the sunset will be at 22:00.
              
              Warnings: thunderstorm can disturb your pets
    """.format(city=city)

In [6]:
TOOLS = {
    "tools": [
        {
            "toolSpec": {
                "name": "get_weather",
                "description": "Gets information about the local weather given the name of a nearby city.",
                "inputSchema": {
                    "json": {
                        "type": "object",
                        "properties": {
                            "city": {
                                "type": "string",
                                "description": "The city to retrieve the weather forecast for."
                            }
                        },
                        "required": [
                            "city"
                        ]
                    }
                }
            }
        }
    ]
}

---

## Invoke API

### Nova

The request schema is nearly identical between the Invoke API (streaming and non-streaming) and the Converse API. There are subtle differences related to image and video payload encoding. Because Amazon Nova Micro does not support images or videos as input, those parts of the request schema do not apply to Amazon Nova Micro. Otherwise, the request schema is the same for all Amazon Nova understanding models.

[Documentation](https://docs.aws.amazon.com/nova/latest/userguide/complete-request-schema.html)

In [7]:
import json, time

def nova_output(model):
    body = {
        "schemaVersion": "messages-v1",
        "system": [{"text": SYSTEM_PROMPT}],
        "messages": MESSAGES,
        "inferenceConfig": {
            "maxTokens": PARAMETERS["maxTokens"],
            "temperature": PARAMETERS["temperature"],
            "topP": PARAMETERS["topP"]
        }
    }
    tracker.log(f"invoke with model {MODELS[model]}")
    response = bedrock.invoke_model(
        modelId=MODELS[model], body=json.dumps(body)
    )
    response = json.loads(response.get("body").read())
    tracker.add_input_tokens(response['usage']['inputTokens'])
    tracker.add_output_tokens(response['usage']['outputTokens'])
    return response['output']['message']['content'][0]['text']

### Claude

Anthropic trains Claude models to operate on alternating user and assistant conversational turns. When creating a new message, you specify the prior conversational turns with the messages parameter. The model then generates the next Message in the conversation.

[Documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html#model-parameters-anthropic-claude-messages-request-response)

In [8]:
def claude_output(model):
    body = {
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": PARAMETERS["maxTokens"],
        "system": SYSTEM_PROMPT,
        "messages": [{"role": m['role'], "content": m['content'][0]['text']} for m in MESSAGES],
        "temperature": PARAMETERS["temperature"],
        "top_k": PARAMETERS["topK"]
    }
    tracker.log(f"invoke with model {MODELS[model]}")
    response = bedrock.invoke_model(
        modelId=MODELS[model], body=json.dumps(body),
    )
    response = json.loads(response.get("body").read())
    tracker.add_input_tokens(response['usage']['input_tokens'])
    tracker.add_output_tokens(response['usage']['output_tokens'])
    return response['content'][0]['text']

### Llama
There are 4 different roles that are supported by Llama text models:
- system: Sets the context in which to interact with the AI model. It typically includes rules, guidelines, or necessary information that help the model respond effectively.
- user: Represents the human interacting with the model. It includes the inputs, commands, and questions to the model.
- ipython: A new role introduced in Llama 3.1. Semantically, this role means "tool". This role is used to mark messages with the output of a tool call when sent back to the model from the executor.
- assistant: Represents the response generated by the AI model based on the context provided in the system, ipython and user prompts.


[Documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-meta.html)
[Meta Documentation](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md)

In [9]:
LLAMA_PROMPT_TEMPLATE="""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

{system}

You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>

{user}

<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

def llama_output(model):
    prompt = LLAMA_PROMPT_TEMPLATE.format(system=SYSTEM_PROMPT, user=MESSAGES[0]['content'][0]['text'])
    body = {
        "prompt": prompt,
        "temperature": PARAMETERS["temperature"],
        "top_p": PARAMETERS["topP"],
        "max_gen_len": PARAMETERS["maxTokens"]
    }
    tracker.log(f"invoke with model {MODELS[model]}")
    response = bedrock.invoke_model(
        modelId=MODELS[model], body=json.dumps(body),
    )
    response = json.loads(response.get("body").read())
    tracker.add_input_tokens(response['prompt_token_count'])
    tracker.add_output_tokens(response['generation_token_count'])
    return response['generation']

### Writer / Palmyra

[Documentation of chat format](https://dev.writer.com/api-guides/chat-completion)

In [17]:
def palmyra_output(model):
    body = {
        "messages": [{
            "role": "system",
            "content": SYSTEM_PROMPT
        }] + [{"role": m['role'], "content": m['content'][0]['text']} for m in MESSAGES],
        "max_tokens": PARAMETERS["maxTokens"],
        "temperature": PARAMETERS["temperature"],
        "top_p": PARAMETERS["topP"]
    }
    tracker.log(f"invoke with model {MODELS[model]}")
    response = bedrock.invoke_model(
        modelId=MODELS[model], body=json.dumps(body)
    )
    response = json.loads(response.get("body").read())
    # print(json.dumps(response, indent=2))
    tracker.add_input_tokens(response['usage']['prompt_tokens'])
    tracker.add_output_tokens(response['usage']['completion_tokens'])
    return response['choices'][0]['message']['content']

### Routing of models

In [11]:
def invoke_output(model):
    if 'nova' in model:
        return nova_output(model)
    elif 'llama' in model:
        return llama_output(model)
    elif 'sonnet' in model or 'haiku' in model:
        return claude_output(model)
    elif 'deepseek' in model:
        return deepseek_output(model)
    elif 'palmyra' in model:
        return palmyra_output(model)
    else:
        return "Unknown model"
    

    

----

## Converse API

When you make a Converse request with an Amazon Bedrock runtime endpoint, you can include the following fields:
- modelId – A required parameter in the header that lets you specify the resource to use for inference.

- The following fields let you customize the prompt:
    - messages – Use to specify the content and role of the prompts.
    - system – Use to specify system prompts, which define instructions or context for the model.
    - inferenceConfig – Use to specify inference parameters that are common to all models. Inference parameters influence the generation of the response.
    - additionalModelRequestFields – Use to specify inference parameters that are specific to the model that you run inference with.
    - promptVariables – (If you use a prompt from Prompt management) Use this field to define the variables in the prompt to fill in and the values with which to fill them.

- The following fields let you customize how the response is returned:
    - guardrailConfig – Use this field to include a guardrail to apply to the entire prompt.
    - toolConfig – Use this field to include a tool to help a model generate responses.
    - additionalModelResponseFieldPaths – Use this field to specify fields to return as a JSON pointer object.
    - requestMetadata – Use this field to include metadata that can be filtered on when using invocation logs.


[Documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-call.html)

In [12]:
import json
def converse_output(model):
    tracker.log(f'converse with model {model}')
    response = bedrock.converse(
        modelId=MODELS[model],
        messages=MESSAGES,
        system=[{"text": SYSTEM_PROMPT}],
        inferenceConfig={
            "maxTokens": PARAMETERS['maxTokens'],
            "temperature": PARAMETERS['temperature'],
            "topP": PARAMETERS['topP']
        },
        additionalModelRequestFields={
            # "top_k": PARAMETERS['topK']
        }
    )
    tracker.add_input_tokens(response['usage']['inputTokens'])
    tracker.add_output_tokens(response['usage']['outputTokens'])
    return response['output']['message']['content'][0]['text']

---

## Inline agent with converse API with tool use

In [None]:

def converse_with_tool_use_output(model):
    messages = MESSAGES.copy()

    def call_tool(tool_name, tool_args):
        tool_func = globals()[tool_name]
        return tool_func(**tool_args)

    def converse_with_llm():
        tracker.logger('converse with tools and model {model}')
        response = bedrock.converse(
            modelId=MODELS[model],
            messages=messages,
            system=[{"text": SYSTEM_PROMPT}],
            inferenceConfig={
                "maxTokens": PARAMETERS['maxTokens'],
                "temperature": PARAMETERS['temperature'],
                "topP": PARAMETERS['topP']
            },
            additionalModelRequestFields={
                # "top_k": PARAMETERS['topK']
            },
            toolConfig = TOOLS
        )
        tracker.add_input_tokens(response['usage']['inputTokens'])
        tracker.add_output_tokens(response['usage']['outputTokens'])
        return response
    
    def handle_tool_use(tool_use):
        tool_name = tool_use['name']
        tool_args = tool_use['input'] or {}
        try:
            tracker.log(f'calling tool {tool_name}({", ".join([f"{k}={v}" for k,v in tool_args])})')
            tool_response = call_tool(tool_name, tool_args)
            tracker.log(f'got tool response: {tool_response}')
            tool_result_message = {
                "role": "user",
                "content": [
                    {
                        'toolResult': {
                            'toolUseId': tool_use['toolUseId'],
                            'content': [{"text": tool_response}]
                        }
                    }
                ]
            }
            messages.append(tool_result_message)
        except Exception as e:
            tracker.log(f'error calling tool: {e}')
            tool_result_message = {
                "role": "user",
                "content": [
                    {
                        'toolResult': {
                            'toolUseId': tool_use['toolUseId'],
                            'content': [{"text": f'error calling tool: {e}'}],
                            "status": "error"
                        }
                    }
                ]
            }
            messages.append(tool_result_message)


    for i in range(5):
        # print('reason step ', i+1)
        response = converse_with_llm()
        output_message = response['output']['message']
        messages.append(output_message)
        stop_reason = response['stopReason']
        if stop_reason == 'tool_use':
            for c in output_message['content']:
                if 'toolUse' in c:
                    handle_tool_use(c['toolUse'])
        else:
            break

    return messages[-1]['content'][-1]['text']

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output, Markdown

# Create output widget to display results
output_widget = widgets.Output()

def on_change(change):
    model = model_dropdown.value
    api = api_dropdown.value
    if model and api:
        with output_widget:
            clear_output(wait=True)
            tracker.start(f'{model}/{api}')
            if api == 'invoke':
                result = invoke_output(model)
            elif api == 'converse':
                result = converse_output(model)
            elif api == 'converse-tools':
                result = converse_with_tool_use_output(model)
            tracker.stop()
            display(Markdown(f"\n---\n\n{result}"))

model_dropdown = widgets.Dropdown(
    options=MODELS.keys(),
    value = None,
    description='Model:',
    layout=widgets.Layout(margin="10px 10px 20px 00px")
)

api_dropdown = widgets.Dropdown(
    options={'Invoke API': 'invoke', 'Converse API': 'converse', 'Converse API with Tools': 'converse-tools'},
    value = None,
    description='API:',
    layout=widgets.Layout(margin="10px 10px 20px 00px")
)

model_dropdown.observe(on_change, names='value')
api_dropdown.observe(on_change, names='value')

# Create vertical box layout to stack widgets with increased spacing
vbox = widgets.VBox([widgets.HBox([api_dropdown, model_dropdown]), output_widget])
display(vbox)


VBox(children=(HBox(children=(Dropdown(description='API:', layout=Layout(margin='10px 10px 20px 00px'), option…

---

## Experimental Bedrock Client SDK

In [28]:
from aws_sdk_bedrock_runtime.client import BedrockRuntimeClient, ConverseInput
from aws_sdk_bedrock_runtime.models import Message, ContentBlockText, StopReason
from aws_sdk_bedrock_runtime.config import Config
from smithy_aws_core.credentials_resolvers.static import StaticCredentialsResolver
from smithy_aws_core.identity import AWSCredentialsIdentity
import asyncio

In [30]:
session.get_credentials().access_key
credentials = AWSCredentialsIdentity(
    access_key_id=session.get_credentials().access_key,
    secret_access_key=session.get_credentials().secret_key,
    session_token=session.get_credentials().token,
)

In [31]:
client = BedrockRuntimeClient(
        config=Config(
            aws_credentials_identity_resolver=StaticCredentialsResolver(credentials=credentials),
            region = "us-east-1",
        )
    )

In [32]:
messages = [
    Message(
        role="user",
        content=[
            ContentBlockText(
                value="Create a list of 3 of the best songs from the 1980s."
            )
        ]
    )
]

In [33]:
response =  await client.converse(
    ConverseInput(
        model_id='amazon.titan-text-express-v1', messages=messages
    )
)

  response =  await client.converse(


In [43]:
response.output.value.content[0].value

'\nHere are three of the best songs from the 1980s:\n\n1. "Billie Jean" by Michael Jackson (1982)\n2. "Sweet Child o\' Mine" by Guns N\' Roses (1988)\n3. "Like a Virgin" by Madonna (1984)'

[inline-agent with KB](https://github.com/aws-samples/amazon-bedrock-samples/tree/main/agents-and-function-calling/bedrock-agents/features-examples/15-invoke-inline-agents)

[agent web tools](https://github.com/build-on-aws/bedrock-agents-webscraper)