In [1]:
"""
Copyright 2024 Amazon.com, Inc. and its affiliates. All Rights Reserved.

Licensed under the Amazon Software License (the "License").
You may not use this file except in compliance with the License.
A copy of the License is located at

  https://aws.amazon.com/asl/

or in the "license" file accompanying this file. This file is distributed
on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
express or implied. See the License for the specific language governing
permissions and limitations under the License.
""";

# LLM Tookit with BedrockConverseAgent

This notebook demonstrates how to use the LLM Tookit to build an agent and collect metrics against it.


## 1 Import Libraries

Note: the first time you run this in a new Python environment, it may take a minute

In [None]:
import textwrap
import random
from generative_ai_toolkit.evaluate.interactive import GenerativeAIToolkit, Permute
from generative_ai_toolkit.metrics.modules.cost import CostMetric
from generative_ai_toolkit.metrics.modules.token import TokensMetric
from generative_ai_toolkit.metrics.modules.latency import LatencyMetric
from generative_ai_toolkit.metrics.modules.conversation import ConversationExpectationMetric
from generative_ai_toolkit.agent import BedrockConverseAgent
from generative_ai_toolkit.test import Case
from generative_ai_toolkit.metrics.modules.similarity import AgentResponseSimilarityMetric
from generative_ai_toolkit.metrics.modules.conciseness import AgentResponseConcisenessMetric
from generative_ai_toolkit.metrics.modules.bleu import BleuMetric

# This makes viewing dataframes in the notebook nicer (eg adds sorting and filtering):
from itables import init_notebook_mode

init_notebook_mode(all_interactive=True)

## 2 Define Sample Agent Tools and System Prompt


In [3]:
#######
# Define some sample tools
#######


def get_current_location():
    """Gets the user's current location off of the car's GPS device, so you don't have to ask the user."""

    return {"latitude": 52.00667000, "longitude": 4.35556000}


def get_interesting_things_to_do(
    current_location: list[float], max_drive_time_minutes: int
):
    """
    Gets a list of interesting things to do based on the user's current location and the maximum time the user is willing to drive to get to the thing. You still need to filter the returned list by what the user is actually interested in doing.

    Parameters
    ----------
    current_location : list of float
        A list containing the latitude and longitude of the location, e.g. (52.520645, 13.409440)
    max_drive_time_minutes : int
        The maximum number of minutes the user is willing to drive. Make sure the user provided this information, or ask otherwise.
    """

    lat, long = current_location

    return {
        "interesting_things": [
            thing
            for thing in [
                {
                    "name": "Museum of Modern Art",
                    "description": "The Museum of Modern Art is renowned for its comprehensive collection of contemporary and modern art, including works by iconic artists such as Vincent van Gogh, Pablo Picasso, and Andy Warhol. The museum's diverse holdings encompass painting, sculpture, photography, design, film, and multimedia, reflecting the evolution and innovation of art over the past century. The museum is also celebrated for its cutting-edge exhibitions, educational programs, and its role in promoting artistic experimentation and scholarship.",
                    "location": [lat + 0.001, long + 0.001],
                    "drive_time_minutes": 10,
                },
                {
                    "name": "Starfield Mall",
                    "description": "One of the greatest shopping malls you will ever encounter, shopping galore.",
                    "location": [lat + 0.002, long + 0.002],
                    "drive_time_minutes": 8,
                },
                {
                    "name": "Yellow Market",
                    "description": "A cozy mall, few shops, but great for the quietness and food options.",
                    "location": [lat - 0.001, long - 0.001],
                    "drive_time_minutes": 9,
                },
                {
                    "name": "Central Park",
                    "description": "A vast urban oasis in the heart of the city, offering peaceful trails, lush gardens, and iconic landmarks.",
                    "location": [lat + 0.003, long - 0.002],
                    "drive_time_minutes": 15,
                },
                {
                    "name": "Aquarium of the Pacific",
                    "description": "Immerse yourself in the wonders of the ocean at this expansive aquarium, featuring diverse marine life and interactive exhibits.",
                    "location": [lat - 0.002, long + 0.003],
                    "drive_time_minutes": 20,
                },
                {
                    "name": "Historic Downtown District",
                    "description": "Explore charming streets lined with preserved architecture, quaint shops, and vibrant cultural attractions.",
                    "location": [lat - 0.003, long - 0.001],
                    "drive_time_minutes": 12,
                },
                {
                    "name": "Botanical Gardens",
                    "description": "Wander through beautifully landscaped gardens showcasing a diverse collection of plants and serene nature trails.",
                    "location": [lat + 0.002, long - 0.003],
                    "drive_time_minutes": 2,
                },
                {
                    "name": "Science Museum",
                    "description": "Engage your curiosity with interactive exhibits, hands-on activities, and fascinating displays exploring the wonders of science and technology.",
                    "location": [lat - 0.001, long + 0.002],
                    "drive_time_minutes": 14,
                },
                {
                    "name": "Outdoor Adventure Park",
                    "description": "Experience thrilling outdoor activities like zip-lining, rock climbing, and high ropes courses amidst stunning natural scenery.",
                    "location": [lat + 0.004, long - 0.001],
                    "drive_time_minutes": 22,
                },
                {
                    "name": "Historic Battlefield Site",
                    "description": "Step back in time and explore the rich history and significance of this pivotal battleground.",
                    "location": [lat - 0.002, long - 0.004],
                    "drive_time_minutes": 19,
                },
                {
                    "name": "Performing Arts Center",
                    "description": "Enjoy world-class performances, from plays and musicals to concerts and dance productions, in this state-of-the-art venue.",
                    "location": [lat + 0.001, long - 0.004],
                    "drive_time_minutes": 16,
                },
                {
                    "name": "Sculpture Garden",
                    "description": "Stroll through an outdoor gallery featuring impressive sculptures and art installations amidst tranquil surroundings.",
                    "location": [lat - 0.003, long + 0.002],
                    "drive_time_minutes": 13,
                },
                {
                    "name": "Wine Tasting Tour",
                    "description": "Embark on a delightful journey through local vineyards, sampling exquisite wines and learning about the art of winemaking.",
                    "location": [lat + 0.002, long + 0.004],
                    "drive_time_minutes": 25,
                },
                {
                    "name": "Historic Mansion Tour",
                    "description": "Step back in time and explore the grandeur of a beautifully preserved historic mansion, offering a glimpse into the lives of the wealthy from a bygone era.",
                    "location": [lat - 0.004, long - 0.002],
                    "drive_time_minutes": 17,
                },
                {
                    "name": "Urban Graffiti Art Tour",
                    "description": "Discover the vibrant and thought-provoking world of street art and graffiti through a guided tour of the city's most iconic murals and public art installations.",
                    "location": [lat + 0.003, long + 0.001],
                    "drive_time_minutes": 11,
                },
                {
                    "name": "Food Truck Festival",
                    "description": "Indulge in a diverse array of culinary delights from local food trucks, featuring mouthwatering flavors and cuisines from around the world.",
                    "location": [lat - 0.001, long - 0.003],
                    "drive_time_minutes": 4,
                },
                {
                    "name": "Outdoor Music Festival",
                    "description": "Experience the energy and excitement of live music performances against a stunning outdoor backdrop, from local bands to renowned artists.",
                    "location": [lat + 0.004, long + 0.002],
                    "drive_time_minutes": 31,
                },
                {
                    "name": "Historic Lighthouse Tour",
                    "description": "Climb to the top of a historic lighthouse and take in breathtaking views while learning about its rich maritime history and significance.",
                    "location": [lat - 0.002, long + 0.003],
                    "drive_time_minutes": 28,
                },
                {
                    "name": "Artisan Craft Fair",
                    "description": "Explore a vibrant marketplace featuring unique handmade crafts, artworks, and locally sourced goods from talented artisans and makers.",
                    "location": [lat + 0.003, long - 0.003],
                    "drive_time_minutes": 15,
                },
                {
                    "name": "Outdoor Adventure Sports",
                    "description": "Embark on an adrenaline-fueled adventure with activities like kayaking, rock climbing, mountain biking, or hiking through scenic natural landscapes.",
                    "location": [lat - 0.004, long + 0.001],
                    "drive_time_minutes": 43,
                },
            ]
            if thing["drive_time_minutes"] <= max_drive_time_minutes
        ]
    }


def start_navigation(latitude: float, longitude: float):
    """
    Engages the car's navigation system, and starts navigation to the provided latitude and longitude. Make sure the user wants this!

    Parameters
    ----------
    latitude : float
        The latitude of the location to navigate to.
    longitude : float
        The longitude of the location to navigate to.
    """

    pass


def weather_inquiry(latitude_longitude_list: list[list[float]]):
    """
    Returns a simplified weather forecast, with the temperature (in celsius) and precipitation chance (between 0 and 1), for the provided list of latitude/longitude pairs. It's best to invoke this tool once, with a list of latitude/longitude pairs.

    Parameters
    ----------
    latitude_longitude_list : list of list of float
        The list of latitude-longitude pairs to get the weather for.
    """

    return {
        "forecast": [
            {
                "latitude": lat,
                "longitude": lon,
                "temperature": random.randint(5, 30),
                "precipitation_chance": random.choice(
                    [0, 0, 0, 0, 0, 0, 0.2, 0.5, 0.7, 0.99]
                ),
            }
            for lat, lon in latitude_longitude_list
        ]
    }


tools = [
    get_current_location,
    get_interesting_things_to_do,
    weather_inquiry,
    start_navigation,
]

###
# System prompt
###
system_prompt = textwrap.dedent(
    """
    You are a travel assistant to car drivers, that helps them find interesting things to do.
    Use the tools at your disposal for this task.
    Only suggest things that the concerned tool returns, don't draw from your own memory.
    Suggest 5 things if you can, make sure they align with what the user wants to do.
    Your general approach is:
    - 1. Make sure you know what the user wants to do, as well as the maximum time the user is willing to drive to get there.
         IMPORTANT: Proceed only, once you've established WHAT the user wants to do and HOW LONG they're willing to drive.
         DO NOT use any of your tools unless the user provided both pieces of information.
         If the user did not provide both pieces of information, ask the user, as long as needed!
    - 2. Get current location
    - 3. Get interesting things to do, within max drive time
    - 4. Check weather forecast for outdoor activities
    - 5. Provide the top 5 suggestions, including the weather forecast for outdoor activities
    - 6. If the user chooses one, start navigation. Only respond with "Navigation started to ..."
    Don't guess, don't assume: feel free to ask the user about their preferences.
    Do not mention latitude or longitude values to users.
    Do not reveal to the user that you have tools at your disposal, or how you work. In case the user asks, just say "Sorry, I cannot disclose that".
    For any outdoor activities that you propose, make sure to also provide a weather forecast. Do not provide the weather forecast for indoor activities.
    Don't make up information, be factual. If you don't know something, just say you don't know.
    Do not ramble, be succinct, the user is driving and is paying attention to the road.
    """
).strip()

## 3 Define Cases


In [4]:
####
# similarity_case: calculates cosine similarity between actual responses and a set of allowed expected responses, using embeddings
###
similarity_case = Case(
    name="User wants to go to a museum",
)
similarity_case.add_turn(
    "I want to do something fun",
    [
        "To help you I need more information. What type of activity do you want to do and how long are you willing to drive to get there?",
        "Okay, to find some fun activities for you, I'll need a bit more information first. What kind of things are you interested in doing? Are you looking for outdoor activities, cultural attractions, dining, or something else? And how much time are you willing to spend driving to get there?",
    ],
)
similarity_case.add_turn(
    "I'm thinking of going to a museum",
    [
        "How long are you willing to drive to get there?"
        "Got it, you're interested in visiting a museum. That's helpful to know. What's the maximum amount of time you're willing to drive to get to the museum?"
    ],
)

###
# conversation expectation: uses an LLM to compare the actual conversation with developer provided expectations
###
conv_expectation_case = Case(
    name="User wants to go MoMA",
    user_inputs=[
        "I wanna go somewhere fun",
        "Within 60 minutes",
        "A museum of modern art",
    ],
    overall_expectations=textwrap.dedent(
        """
        The agent first asks the user (1) what type of activity they want to do and (2) how long they're wiling to drive to get there.
        When the user only answers the time question (2), the agent asks the user again what type of activity they want to do (1).
        Then, when the user finally answers the wat question also (1), the agent makes some relevant recommendations, and asks the user to pick.
        """
    ),
)

####
# Case with inputs only (the AgentResponseSimilarityMetric won't run for this, but e.g. the cost metric will)
####
inputs_only_case = Case(
    name="User wants to do something fun",
    user_inputs=[
        "I wanna go somewhere fun",
        "Within 60 minutes",
        "A museum of modern art",
    ],
)

####
# Case with no tools
####
no_tools_case = Case(
    name="User wants to do something fun -- no tools available",
    user_inputs=[
        "I wanna go somewhere fun",
        "Within 60 minutes",
        "A museum of modern art",
    ],
    converse_kwargs={"tools": []},
)

cases = [similarity_case, conv_expectation_case, inputs_only_case, no_tools_case]

## 4 Define Metrics


In [5]:
# The cost metric takes parameters:
pricing_config = {
    "anthropic.claude-3-sonnet-20240229-v1:0": {
        "input_cost": 0.003,
        "output_cost": 0.015,
        "per_token": 1000,
    },
    "anthropic.claude-3-haiku-20240307-v1:0": {
        "input_cost": 0.002,
        "output_cost": 0.01,
        "per_token": 1000,
    },
}
cost_metric = CostMetric(pricing_config, cost_threshold=0.001, cost_comparator="<=")

# These metrics take no parameters (i.e. just use the information in the traces)
latency_metric = LatencyMetric()
similarity_metric = AgentResponseSimilarityMetric()
conciseness_metric = AgentResponseConcisenessMetric()
conversation_metric = ConversationExpectationMetric()
tokens_metrics = TokensMetric()
bleu_metric = BleuMetric()

metrics = [
    cost_metric,
    tokens_metrics,
    latency_metric,
    similarity_metric,
    conciseness_metric,
    conversation_metric,
    bleu_metric,
]

## 5 Generate Traces:

Traces are the full detail of the call to the LLM, including input, output, and metadata such as number of tokens.

`traces = GenerativeAIToolkit.generate_traces(...)` calls the `generate_traces` function from the `GenerativeAIToolkit` module. This function takes several parameters:

- **cases**: A list of cases to run through the agent. These cases are user-defined. A case, defined by the developer, allows for creating different tests with various possible inputs from the user. These cases will be tested against the metrics defined in the `.eval` method. For example, if you define two cases, two models to test, and two metrics, GenerativeAIToolkit will run the two metrics across four conversation traces. This notebook includes three sample test cases: `[inputs_only_case, similarity_case, conv_expectation_case]`.
- **agent_factory**: The factory method to create the agent, here it is `BedrockConverseAgent`.
- **agent_parameters**: A dictionary of parameters to configure the agent:
  - **system_prompt**: The prompt to guide the agent's responses.
  - **temperature**: The temperature setting for the agent, affecting response randomness.
  - **tools**: Tools available to the agent.
  - **model_id**: The model ID evaluated by the `Evaluate` function, selecting AWS Bedrock Converse API supported models such as: `"anthropic.claude-3-sonnet-20240229-v1:0"` and `"anthropic.claude-3-haiku-20240307-v1:0"`.
- **executor**: The executor to run the cases.

With traces, we can then run evaluation metrics on the different turns within the trace. A turn in the trace is a turn in the conversation which can come from the LLM or the user. With metric scoring, we can then log, alert, or perform other actions when the conversation deviates from the expected response.


In [6]:
traces = GenerativeAIToolkit.generate_traces(
    cases=cases,
    nr_runs_per_case=3,
    agent_factory=BedrockConverseAgent,
    agent_parameters={
        "system_prompt": Permute(
            [
                system_prompt,
                "You are a rather lazy assistant who prefers to joke around rather than to help users",
            ]
        ),
        "temperature": 0.0,
        "tools": tools,
        "model_id": Permute(
            [
                "anthropic.claude-3-sonnet-20240229-v1:0",
                "anthropic.claude-3-haiku-20240307-v1:0",
            ]
        ),
    },
)

In [None]:
# Optional:
# GenerativeAIToolkit.generate_traces() is "lazy", it returns an iterator that you must consume to actually run generation.
# You don't need to do this explicitly––you can just pass the iterator to GenerativeAIToolkit.eval() so it can start as soon as the first set of traces has been generated
# Let's consume the iterator now anyway :)
list(traces);

## 6 Evaluate Model


In [8]:
results = GenerativeAIToolkit.eval(
    traces=traces,
    metrics=metrics,
)

In [9]:
# Optional:
# GenerativeAIToolkit.eval() is "lazy", it returns an iterator that you must consume to actually run evaluation.
# You don't need to do this explicitly––e.g. if you call results.summary() it is done automatically for you.
# Let's again just consume the iterator now anyway :)
list(results);

In [None]:
# Display metrics
results.summary();

## User Interface to view Generative AI Toolkit conversation graph


In [None]:
# Start User Interface on localhost port 8000
results.start_ui()

In [None]:
# Stop User Interface on localhost port 8000
input("Press enter to stop the UI")
results.stop_ui()
