### Install requirements
`pip install pgvector==0.3.6 psycopg2-binary==2.9.10`

### Start the postgres db using:

`docker compose up`

In [1]:
from bedrock_agent.agent import BedrockAgent
from bedrock_agent.utils import get_boto_session, get_embedding
from bedrock_agent.tool import tool
from bedrock_agent.types import ModelConfig
from pgvector.psycopg import register_vector
import psycopg
import json
import boto3
from ast import literal_eval

In [2]:
session = get_boto_session()

table_name = "rag_example" # name of the embedding table
dimensions = 256

conn_params = {
    'dbname': 'postgres', # default db name
    'host': '172.18.0.2', # IP address of the Postgres container
    'user': 'postgres',   # default user name
    'password': 'example' # some password
}

In [3]:
def generate_facts(agent: BedrockAgent, topic: str  = "animals"):
    query = f"""
    Can you please create a few different facts about {topic} and return them as a list?

    The output should look like this for example:
    [
        'fact 1',
        'fact 2',
        'fact 3'
    ]

    PLEASE NOTE: ONLY OUTPUT THE LIST OF FACTS, DO NOT RETURN ANYTHING ELSE
    """
    res = agent.completion(query)
    text_str = res.output.message.content[0].text    
    texts = literal_eval(text_str)
    assert isinstance(texts, list), "The output should be a list of facts"
    
    return texts

def ingest_kb(contents: list, session: boto3.Session, conn_params: dict, table_name: str = "rag_example", dimensions: int = 256, recreate_index: bool = False):    
    """
    Ingest content into a vector database knowledge base
    
    :param contents: (list): List of text content to embed
    :param session: boto3.Session: Boto3 session for Bedrock
    :param conn_params: dict: Database connection parameters
    :param table_name: str: Name of the table to store embeddings (default is "rag_example")
    :param dimensions: int: Embedding dimensions (default is 256)
    :param recreate_index: bool: Whether to recreate the table (default is False)
    """
    embeddings = [get_embedding(content, session, dimensions=dimensions) for content in contents]
    
    conn = psycopg.connect(**conn_params, autocommit=True)
    register_vector(conn)
    
    create_table_query = f'CREATE TABLE {table_name} (id bigserial PRIMARY KEY, content text, embedding vector({dimensions}))'
    
    if recreate_index:
        conn.execute(f'DROP TABLE IF EXISTS {table_name}')
        conn.execute(create_table_query)
    
    for content, embedding in zip(contents, embeddings):
        conn.execute(f'INSERT INTO {table_name} (content, embedding) VALUES (%s, %s)', (content, embedding))

In [8]:
@tool
def ingest_facts_to_kb(topic: str) -> BedrockAgent:
    """
    Generate and ingest facts about a user defined topic to a vector knowledge base. 
    This knowledge base can be used for similarity search i.e. retrieving similar documents/facts about a given topic depending on the user request.
    
    IMPORTANT:
    Only use this tool when the user explicitly asks for ingesting new facts to the knowledge base.
    Do not use this tool if you feel like you need to ingest facts to the knowledge base.
    For retrieving existing facts, use the handoff_rag_agent tool to query the knowledge base for similar results.
    If the user does not provide a topic, you can use a default topic like "animals" for generating facts.
    
    :param agent: The BedrockAgent object. This is a reference to self.
    :param topic: The topic for which facts need to be ingested. Should not be more than 2-3 words.
    """
    _agent = BedrockAgent()
    facts = generate_facts(_agent, topic)
    ingest_kb(facts, session, conn_params, table_name=table_name, dimensions=dimensions)
    return facts

@tool
def query_kb(query: str):
    """
    Query the knowledge base for similar facts to the input query.
    This might be useful for retrieving additional information to answer a query.
    If the user asks for more information about a certain topic, this function can provide additional facts about that topic.
    
    IMPORTANT:
    Always use this tool first. If the results are not satisfactory, you can use the ingest_facts_to_kb tool to ingest new facts to the knowledge base.
    
    :param query: The query to search for similar facts
    """
    conn = psycopg.connect(**conn_params)    
    embedding = get_embedding(query, session, dimensions=dimensions)
    sql_query = f"SELECT *, embedding <=> '{embedding}'::vector AS similarity_score FROM {table_name} ORDER BY similarity_score LIMIT 5"
    res = conn.execute(sql_query)
    
    return json.dumps([r[1] for r in res], indent=4)

### Optional, truncate the table

In [9]:
conn = psycopg.connect(**conn_params, autocommit=True)    
sql_query = f"Truncate table {table_name}"
res = conn.execute(sql_query)

### Create ingestion agent. This agent will take care of ingesting some randonly generated facts into the kb. Can obviously also do this manually. Just wanted to show that you could also use an agent for that :).

In [10]:
ingest_tools = [ingest_facts_to_kb]
model_cfg = ModelConfig(model_id="amazon.nova-micro-v1:0")
fact_ingestor_agent = BedrockAgent(
    "IngestionAgent", 
    session, 
    model_cfg=model_cfg, 
    tools=ingest_tools, 
    system_message="You are an expert in generating new and interesting facts about a topic.",
    verbose=False
)

In [11]:
fact_ingestor_agent.chat("Can you please generate some facts about different types of animals? Please use the following: Mammals, Birds, Reptiles, Amphibians, Fish, Insects, and Arachnids.")

[32m2024-12-22 16:48:30.114[0m | [1mINFO    [0m | [36mbedrock_agent.agent[0m:[36mchat[0m:[36m167[0m - [1mAgent: IngestionAgent | End of conversation. Returning response.[0m


ConverseResponse(ResponseMetadata=ResponseMetadata(RequestId='a59ae6d1-f50b-453d-82bc-7314d74d0539', HTTPStatusCode=200, HTTPHeaders={'date': 'Sun, 22 Dec 2024 15:48:30 GMT', 'content-type': 'application/json', 'content-length': '3795', 'connection': 'keep-alive', 'x-amzn-requestid': 'a59ae6d1-f50b-453d-82bc-7314d74d0539'}, RetryAttempts=0), output=Output(message=Message(role='assistant', content=[Content(text='Here are some interesting facts about different types of animals:\n\n**Mammals**\n- Mammals are warm-blooded animals that have hair or fur.\n- Mammals are the only animals that have live births and nurse their young with milk.\n- There are over 5,400 different species of mammals on Earth.\n- The blue whale, the largest mammal, can weigh as much as 200 tons.\n- Some mammals, like bats and whales, have the ability to echolocate to navigate and hunt.\n\n**Birds**\n- Birds are the only known animals that possess feathers.\n- Ostriches are the largest living birds and can run at spee

In [12]:
fact_ingestor_agent.draw_trace()


--------------------------------------------------
Message # 1
--------------------------------------------------
ROLE: USER
TEXT: Can you please generate some facts about different types of animals? Please use the following: Mammals, Birds, Reptiles, Amphibians, Fish, Insects, and Arachnids.

--------------------------------------------------
Message # 2
--------------------------------------------------
ROLE: ASSISTANT
TEXT: <thinking> The User has asked for facts about different types of animals, specifying the categories: Mammals, Birds, Reptiles, Amphibians, Fish, Insects, and Arachnids. I need to generate and ingest these facts into the vector knowledge base to later perform similarity searches if needed. To do this, I will use the tool "ingest_facts_to_kb" with the appropriate topic for each animal type.</thinking>

TOOL USE: ingest_facts_to_kb
INPUT: {'topic': 'Mammals'}
TOOL USE: ingest_facts_to_kb
INPUT: {'topic': 'Birds'}
TOOL USE: ingest_facts_to_kb
INPUT: {'topic': 'Repti

In [13]:
print(f"Total tokens used: {fact_ingestor_agent.total_tokens_used} | total costs: {fact_ingestor_agent.total_costs} USD")

Total tokens used: 4565 | total costs: 0.0004180750 USD


In [14]:
conn = psycopg.connect(**conn_params)    
sql_query = f"SELECT content FROM {table_name} LIMIT 10"
res = conn.execute(sql_query)
res.fetchall()

[('Mammals are warm-blooded animals that have hair or fur.',),
 ('Mammals are the only animals that have live births and nurse their young with milk.',),
 ('There are over 5,400 different species of mammals on Earth.',),
 ('The blue whale, the largest mammal, can weigh as much as 200 tons.',),
 ('Some mammals, like bats and whales, have the ability to echolocate to navigate and hunt.',),
 ('Birds are the only known animals that possess feathers.',),
 ('Ostriches are the largest living birds and can run at speeds of up to 43 mph.',),
 ('Hummingbirds are the only birds that can fly backwards.',),
 ('The smallest bird in the world is the bee hummingbird, which weighs less than a penny.',),
 ('Some birds, like parrots, have the ability to mimic human speech.',)]

In [15]:
tools = [query_kb]
model_cfg = ModelConfig(model_id="amazon.nova-micro-v1:0")
agent = BedrockAgent(
    "UserAgent", 
    session, 
    model_cfg=model_cfg, 
    tools=tools, 
    system_message="You are an user agent that can help users with a wide range of topics.",
    verbose=False
    )

In [16]:
agent.chat("Can you please tell me some interesting facts about birds?")

[32m2024-12-22 16:49:03.654[0m | [1mINFO    [0m | [36mbedrock_agent.agent[0m:[36mchat[0m:[36m167[0m - [1mAgent: UserAgent | End of conversation. Returning response.[0m


ConverseResponse(ResponseMetadata=ResponseMetadata(RequestId='de7c2475-2e4e-4508-9644-07e46040090e', HTTPStatusCode=200, HTTPHeaders={'date': 'Sun, 22 Dec 2024 15:49:03 GMT', 'content-type': 'application/json', 'content-length': '625', 'connection': 'keep-alive', 'x-amzn-requestid': 'de7c2475-2e4e-4508-9644-07e46040090e'}, RetryAttempts=0), output=Output(message=Message(role='assistant', content=[Content(text='Here are some interesting facts about birds that I found:\n\n1. Birds are the only known animals that possess feathers.\n2. The smallest bird in the world is the bee hummingbird, which weighs less than a penny.\n3. Some birds, like parrots, have the ability to mimic human speech.\n4. Ostriches are the largest living birds and can run at speeds of up to 43 mph.\n\nIf you need more information or have any other questions, feel free to ask!', toolUse=None)])), stopReason='end_turn', usage=Usage(inputTokens=641, outputTokens=100, totalTokens=741), metrics=Metrics(latencyMs=743))

In [17]:
agent.draw_trace()


--------------------------------------------------
Message # 1
--------------------------------------------------
ROLE: USER
TEXT: Can you please tell me some interesting facts about birds?

--------------------------------------------------
Message # 2
--------------------------------------------------
ROLE: ASSISTANT
TEXT: <thinking> The User has asked for interesting facts about birds. To provide accurate information, I should first use the query_kb tool to retrieve any relevant facts from the knowledge base. </thinking>

TOOL USE: query_kb
INPUT: {'query': 'interesting facts about birds'}

--------------------------------------------------
Message # 3
--------------------------------------------------
ROLE: USER
TOOL RESULT: [
    "Birds are the only known animals that possess feathers.",
    "The smallest bird in the world is the bee hummingbird, which weighs less than a penny.",
    "Some birds, like parrots, have the ability to mimic human speech.",
    "Insects are the most di

In [18]:
print(f"Total tokens used: {agent.total_tokens_used} | total costs: {agent.total_costs} USD")

Total tokens used: 1295 | total costs: 0.0000660100 USD
