# Import Required Libraries

In [30]:
import os
import time
import boto3
import json
import pprint
import textwrap


# Initialize AWS clients
s3_client = boto3.client('s3')
sts_client = boto3.client('sts')
session = boto3.session.Session()
region = session.region_name
account_id = sts_client.get_caller_identity()["Account"]
bedrock_agent_client = boto3.client('bedrock-agent')
bedrock_agent_runtime_client = boto3.client('bedrock-agent-runtime')
bedrock_runtime_client = boto3.client('bedrock-runtime', region_name=region)


account_id_suffix = account_id[:3]
region_suffix = ''.join([word[0] for word in region.split('-')])
suffix = f"{region_suffix}-{account_id_suffix}"

# Define Knowledge Base parameters
knowledge_base_name= 'fixed-size-chunk-kb'
knowledge_base_description = "Knowledge Base containing sample synthetic Octank financial data"

print(f"AWS Region: {region}", f"Account ID: {account_id}", f"Suffix: {suffix}", sep='\n')

AWS Region: us-east-1
Account ID: 533267284022
Suffix: ue1-533


# Import utility classes and functions from the utils module

In [2]:
from utils.knowledgebase import BedrockKnowledgeBases
from utils.agents import BedrockAgents
from utils.agents import AWSResourceManager
from utils.chat import BedrockChat, SyntheticDataGenerator
from IPython.display import Markdown, display

def print_markdown(text):
    display(Markdown(text))

    
kb = BedrockKnowledgeBases()
agents = BedrockAgents()
chat = BedrockChat()
data = SyntheticDataGenerator()

# Configure Common Settings
Define common settings such as model, num_results, etc., that will be used for both Knowledgebase and Agents.

In [3]:
foundation_model = 'anthropic.claude-3-sonnet-20240229-v1:0'  # identifier for the model that you want to be used for text generation
embedding_model = 'amazon.titan-embed-text-v2:0'
num_results = 5 # number of results to retrieve from knowledge base

# Setup Knowledge Base

Configure the Knowledgebase with a chunking strategy, vector database, and embedding model.


In [7]:
knowledge_base_name = 'amzn10k'
knowledge_base_description = 'Knowledge Base containing Amazon 10K documents'
vector_store = 'OPENSEARCH_SERVERLESS'

def upload_documents_to_s3(bucket_name, folder_path):
    s3_client = boto3.client('s3', region_name=region)
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            if file not in ["LICENSE", "NOTICE", "README.md"]:
                print(f"Uploading {file_path} to {bucket_name}")
                s3_client.upload_file(file_path, bucket_name, file)
            else:
                print(f"Skipping {file_path}")

                
kb_id, ds_id, bucket_name = kb.create_knowledge_base(
    kb_name=knowledge_base_name,
    kb_description=knowledge_base_description,
    vector_store=vector_store,
    embedding_model=embedding_model,
    suffix=suffix
)

print(f"Knowledge base ID: {kb_id}, Data source ID: {ds_id}, Bucket name: {bucket_name}")

2025-01-08 22:08:28,026 [INFO] Step 1: Create S3 bucket
2025-01-08 22:08:28,027 [INFO] Creating S3 bucket: amazon-10k-kb-ue1-533-bucket
2025-01-08 22:08:28,442 [INFO] Created S3 bucket: amazon-10k-kb-ue1-533-bucket
2025-01-08 22:08:28,444 [INFO] Step 2: Create KB execution role
2025-01-08 22:08:28,444 [INFO] Creating Bedrock KB execution role: amazon-10k-kb-ue1-533-execution-role
2025-01-08 22:08:29,215 [INFO] Step 3: Create AOSS encryption/network/access policies
2025-01-08 22:08:29,216 [INFO] Creating encryption, network, and access policies for azon-10k-kb-ue1-533-vector-store
2025-01-08 22:08:29,702 [INFO] Created AOSS policies: b-ue1-533-vector-store-en-policy, b-ue1-533-vector-store-ne-policy, b-ue1-533-vector-store-ac-policy
2025-01-08 22:08:29,703 [INFO] Step 4: Create AOSS collection
2025-01-08 22:08:29,704 [INFO] Creating AOSS collection: azon-10k-kb-ue1-533-vector-store
2025-01-08 22:08:30,094 [INFO] Collection is still creating, waiting 60 seconds...
.......................

#### Upload documents to S3 and initiate Bedrock knowledge base data ingestion

In [9]:
# Upload all documents in the folder to the S3 bucket
upload_documents_to_s3(bucket_name, "kb_documents")

# Synchronize the data from the S3 bucket to the Knowledge Base
kb.synchronize_data(kb_id=kb_id, ds_id=ds_id)

Uploading kb_documents/AMAZON_2022_10K.pdf to amazon-10k-kb-ue1-533-bucket
2025-01-08 22:22:26,880 [INFO] Starting data synchronization for KB ID: CZ6SKPBM2X and DS ID: W54IF5P3BM
2025-01-08 22:22:28,381 [INFO] Started ingestion job: {'dataSourceId': 'W54IF5P3BM', 'ingestionJobId': 'P4HX7HWKV2', 'knowledgeBaseId': 'CZ6SKPBM2X', 'startedAt': datetime.datetime(2025, 1, 9, 6, 22, 28, 340591, tzinfo=tzutc()), 'statistics': {'numberOfDocumentsDeleted': 0, 'numberOfDocumentsFailed': 0, 'numberOfDocumentsScanned': 0, 'numberOfMetadataDocumentsModified': 0, 'numberOfMetadataDocumentsScanned': 0, 'numberOfModifiedDocumentsIndexed': 0, 'numberOfNewDocumentsIndexed': 0}, 'status': 'STARTING', 'updatedAt': datetime.datetime(2025, 1, 9, 6, 22, 28, 340591, tzinfo=tzutc())}
2025-01-08 22:22:28,522 [INFO] Ingestion job status: STARTING
.....
2025-01-08 22:22:33,691 [INFO] Ingestion job status: IN_PROGRESS
.....
2025-01-08 22:22:38,872 [INFO] Ingestion job status: IN_PROGRESS
.....
2025-01-08 22:22:44,

# Setup Agent

Configure the Agent and associate it with the previously created Knowledge Base so that the agent queries the knowledge base for information to augment its responses



In [11]:
# agent_name=f'rag-agent-{suffix}'
# agents.delete_bedrock_agent(agent_name=agent_name, delete_role=True, verbose=True)

2025-01-08 22:23:32,386 [INFO] Found agent 'rag-agent-ue1-533' with ID: KPJOGYOMCS
2025-01-08 22:23:32,387 [INFO] Deleting aliases for agent KPJOGYOMCS...
2025-01-08 22:23:32,527 [INFO] Deleting alias TSTALIASID from agent KPJOGYOMCS
2025-01-08 22:23:32,744 [INFO] Deleting action groups for agent KPJOGYOMCS...
2025-01-08 22:23:32,875 [INFO] Deleting action group 'CodeInterpreterAction' (ID: SXAMLZM8ZL) from agent KPJOGYOMCS
2025-01-08 22:23:33,014 [INFO] Action group details: {'ResponseMetadata': {'RequestId': '0163ad1c-ae2b-4697-8e3c-9a19a0ede867', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Thu, 09 Jan 2025 06:23:33 GMT', 'content-type': 'application/json', 'content-length': '360', 'connection': 'keep-alive', 'x-amzn-requestid': '0163ad1c-ae2b-4697-8e3c-9a19a0ede867', 'x-amz-apigw-id': 'EG231F20oAMEa_A=', 'x-amzn-trace-id': 'Root=1-677f6b64-7319856e3ba34c06459a285d'}, 'RetryAttempts': 0}, 'agentActionGroup': {'actionGroupId': 'SXAMLZM8ZL', 'actionGroupName': 'CodeInterpreterActio

In [12]:
agent_name=f'rag-agent-{suffix}'

# Description of the agent
agent_description = """You are a versatile AI assistant that combines provided context with general knowledge to deliver accurate, 
well-sourced information and solutions to questions across various domains."""

# Instructions that tell the agent what it should do and how it should interact with users.
agent_instruction = """You are a General AI Assistant helping users with diverse tasks and questions across multiple domains. Your role is to 
leverage both your general expertise and context-based knowledge retrieved in real-time to provide accurate, relevant, and up-to-date information. You are capable of 
answering questions with clear explanations on a broad range of topics, including but not limited to technology, science, health, finance, and general knowledge.

Response style:
1. Maintain a professional yet friendly tone
2. Use clear, straightforward language
3. Focus on practical, actionable solutions
4. Structure responses logically and concisely
5. Break down complex concepts simply
6. Stay relevant to user queries
7. Avoid unnecessary information
8. Ensure a natural conversation flow
9. Use available information before asking questions
10. Do not directly quote or mention that the information is from the context."""


# Description of what the agent should use the knowledge base for
kb_usage_description = "Use this knowledge base to provide responses to user queries on Amazon's financial performance and business activities"

# Create the RAG agent
rag_agent = agents.create_bedrock_agent(
    agent_name=agent_name, 
    agent_description=agent_description,
    agent_instructions=agent_instruction,
    model_id=foundation_model, # TODO: Should this be a list?
    kb_id=kb_id,
    associate_kb=True,
    code_interpreter=True,  # Enabling code interpreter to make the agent use instructions
    kb_usage_description="Use this knowledge base to provide responses to user queries on Amazon's financial performance and business activities",
    verbose=True
    )


print('Agent name:', agent_name)
print(f"Agent name: {agent_name}", f"Agent ID: {rag_agent[0]}", f"Agent Alias ID: {rag_agent[1]}", sep='\n')

# Prepare the agent
agents.prepare_bedrock_agent(agent_name=agent_name, verbose=True)

2025-01-08 22:23:52,479 [INFO] Creating IAM role: rag-agent-ue1-533-exec-role
2025-01-08 22:23:52,622 [INFO] Created IAM role: arn:aws:iam::533267284022:role/rag-agent-ue1-533-exec-role
2025-01-08 22:23:57,879 [INFO] Created managed policy: arn:aws:iam::533267284022:policy/rag-agent-ue1-533-AmazonBedrockAgentModelAccessPolicy
2025-01-08 22:23:58,038 [INFO] Attached policy 'rag-agent-ue1-533-AmazonBedrockAgentModelAccessPolicy' to role 'rag-agent-ue1-533-exec-role'
2025-01-08 22:23:58,238 [INFO] Created agent: SOUUGYIXPE
2025-01-08 22:24:13,244 [INFO] Updating policy 'rag-agent-ue1-533-AmazonBedrockAgentKBAccessPolicy' for role 'arn:aws:iam::533267284022:role/rag-agent-ue1-533-exec-role'
2025-01-08 22:24:13,480 [INFO] Created new policy: rag-agent-ue1-533-AmazonBedrockAgentKBAccessPolicy
2025-01-08 22:24:13,660 [INFO] Attached policy 'rag-agent-ue1-533-AmazonBedrockAgentKBAccessPolicy' to role 'rag-agent-ue1-533-exec-role'
2025-01-08 22:24:13,660 [INFO] Associating agent 'SOUUGYIXPE' wi

# Query Knowledge Base
Execute a query against the knowledge base to test responses and citations.


In [83]:
user_query = "What is Amazon's primary focus?"

kb_response = kb.retrieve_and_generate_from_kb(
    query=user_query,
    kb_id=kb_id,
    num_results=5,
    model_id=foundation_model,
    region=region,
    verbose=False
)

In [85]:
print(f"User query:", textwrap.fill(user_query, width=150), sep='\n')
print("\nKnowledge Base's response:", textwrap.fill(kb_response.generated_text, width=150), sep='\n')
for i, citation in enumerate(kb_response.citations, start=1):
    print(f"\nCitation {i}:", textwrap.fill(citation.text[:200] + "......", width=150), sep='\n')

User query:
What is Amazon's primary focus?

Knowledge Base's response:
Amazon's primary focus is to be Earth's most customer-centric company. They are guided by four principles: customer obsession rather than competitor
focus, passion for invention, commitment to operational excellence, and long-term thinking.

Citation 1:
Certain Relationships and Related Transactions, and Director Independence 71 Item 14. Principal Accountant Fees and Services 71     PART IV Item 15.
Exhibits, Financial Statement Schedules 72 Item 16.......


# Query Agent
Run the same query through the agent and test its response and citations

In [58]:
agents_response = agents.invoke(
    agent_name=agent_name, 
    input_text=user_query, 
    verbose=True, 
    enable_trace=True,      # Enable trace to get citations from agent's response
    kb_id=kb_id,
    num_results=num_results,
    trace_level='core',

)

2025-01-08 23:33:08,648 [INFO] Invoking agent 'rag-agent-ue1-533' with input: What is Amazon's primary focus?
[32m---- Step 1 ----[0m
[33mTook 3.0s, using 1775 tokens (in: 1670, out: 105) to complete action.[0m
[34mTo answer this question about Amazon's primary focus, I will search the provided knowledge base for relevant information.[0m
[32m---- Step 2 ----[0m
[33mTook 8.8s, using 2272 tokens (in: 2081, out: 191) to complete action.[0m
[36mFinal response:


Amazon's primary focus is to be the most customer-centric company in the world. The company is guided by four key principles:

1) Obsession over customer satisfaction rather than focusing on competitors
2) Passion for innovation and invention 
3) Commitment to operational excellence
4) Thinking long-term rather than being shortsighted

Amazon aims to offer consumers low prices, fast and free delivery, easy-to-use functionality, and excellent customer service through its online and physical stores. The company serves a di

In [86]:
print(f"User query:", textwrap.fill(user_query, width=150), sep='\n')
print("\nAgent's response:", textwrap.fill(agents_response['response'], width=150), sep='\n')
for i, citation in enumerate(agents_response['citations'], start=1):
    print(f"\nCitation {i}:", textwrap.fill(citation.text[:200] + "......", width=150), sep='\n')

User query:
What is Amazon's primary focus?

Agent's response:
  Amazon's primary focus is to be the most customer-centric company in the world. The company is guided by four key principles:  1) Obsession over
customer satisfaction rather than focusing on competitors 2) Passion for innovation and invention  3) Commitment to operational excellence 4) Thinking
long-term rather than being shortsighted  Amazon aims to offer consumers low prices, fast and free delivery, easy-to-use functionality, and excellent
customer service through its online and physical stores. The company serves a diverse set of customers including consumers, sellers, developers,
enterprises, content creators, advertisers, and employees.

Citation 1:
Certain Relationships and Related Transactions, and Director Independence 71 Item 14. Principal Accountant Fees and Services 71     PART IV Item 15.
Exhibits, Financial Statement Schedules 72 Item 16.......

Citation 2:
In each of our segments, we serve our primary custom

# Compare Responses

Compare the responses obtained from the Knowledge Base and Agent for an evaluation dataset.

In [None]:

# Generate responses for a list of QA records
def generate_kb_and_agent_responses(qa_records):
    results = []
    for record in qa_records:
        record_results = []
        for turn in record['conversationTurns']:
            user_query = turn['prompt']['content'][0]['text']
            
            # Generate KB response
            kb_response = kb.retrieve_and_generate_from_kb(
                query=user_query,
                kb_id=kb_id,
                num_results=5,
                model_id="anthropic.claude-3-sonnet-20240229-v1:0",
                region=region,
                verbose=False
            )
            
            # Generate Agent response
            agent_response = agents.invoke(
                agent_name=agent_name, 
                input_text=user_query, 
                num_results=5,
                verbose=False, 
                trace_level=None
            )
            
            # Store responses
            record_results.append({
                'user_query': user_query,
                'kb_response': kb_response.generated_text,
                'agent_response': agent_response['response'],
            })
        
        # Append record results to overall results
        results.append(record_results)
    
    # Compare responses
    for record_result in results:
        for result in record_result:
            kb_resp = result['kb_response']
            agent_resp = result['agent_response']
            user_query = result['user_query']
            
            # # Print comparison (for debugging purposes)
            # print(f"User Query: {user_query}")
            # print(f"KB Response: {kb_resp}")
            # print(f"Agent Response: {agent_resp}")
            # print("\n")

    return results



# Load the QA records from the JSON file in S3
qa_records = data.read_jsonl_from_s3(
    bucket="synthetic-qa-dataset-amzn-10k",
    key="evaluation_data/rag_dataset_prompt_only.jsonl",
    verbose=True
)

qa_records = qa_records[:10]  # Limit the number of records for testing

# Generate responses for the QA records
kb_and_agents_results = generate_kb_and_agent_responses(qa_records)

# Cleanup

In [88]:
# Delete the Knowledge Base and all assoicated resources based on kb name and suffix
kb.delete_knowledge_base_resources_by_name(kb_name=knowledge_base_name, suffix=suffix)

2025-01-09 08:03:45,348 [INFO] Deleting resources for kb_name='fixed-size-chunk-kb' and suffix='ue1-533'
2025-01-09 08:03:45,348 [INFO] Listing all knowledge bases via pagination...
2025-01-09 08:03:45,479 [INFO] Deleting S3 bucket: fixed-size-chunk-kb-ue1-533-bucket
2025-01-09 08:03:45,811 [INFO] Deleting Bedrock KB execution role: fixed-size-chunk-kb-ue1-533-execution-role
2025-01-09 08:03:45,904 [INFO] Finding AOSS collection for name: ze-chunk-kb-ue1-533-vector-store


UnboundLocalError: local variable 'aoss' referenced before assignment

In [89]:
agents.delete_bedrock_agent(agent_name=agent_name, delete_role=True, verbose=True)

2025-01-09 08:03:58,713 [INFO] Found agent 'rag-agent-ue1-533' with ID: SOUUGYIXPE
2025-01-09 08:03:58,713 [INFO] Deleting aliases for agent SOUUGYIXPE...
2025-01-09 08:03:58,852 [INFO] Deleting alias TSTALIASID from agent SOUUGYIXPE
2025-01-09 08:03:59,057 [INFO] Deleting action groups for agent SOUUGYIXPE...
2025-01-09 08:03:59,182 [INFO] Deleting action group 'CodeInterpreterAction' (ID: 0SXJTKLIMJ) from agent SOUUGYIXPE
2025-01-09 08:03:59,342 [INFO] Action group details: {'ResponseMetadata': {'RequestId': '235b0854-271e-4384-ba78-2ee2e9a0c03e', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Thu, 09 Jan 2025 16:03:59 GMT', 'content-type': 'application/json', 'content-length': '360', 'connection': 'keep-alive', 'x-amzn-requestid': '235b0854-271e-4384-ba78-2ee2e9a0c03e', 'x-amz-apigw-id': 'EIL5cHY3oAMEL_Q=', 'x-amzn-trace-id': 'Root=1-677ff36f-02b9f8dd12a0fd210f3153b4'}, 'RetryAttempts': 0}, 'agentActionGroup': {'actionGroupId': '0SXJTKLIMJ', 'actionGroupName': 'CodeInterpreterActio