In [1]:
import boto3
print(f"Current boto3 version: {boto3.__version__}")

Current boto3 version: 1.35.94


In [2]:
import os
import time
import boto3
import json

# Initialize AWS clients
s3_client = boto3.client('s3')
sts_client = boto3.client('sts')
session = boto3.session.Session()
region = session.region_name
account_id = sts_client.get_caller_identity()["Account"]
bedrock_agent_client = boto3.client('bedrock-agent')
bedrock_agent_runtime_client = boto3.client('bedrock-agent-runtime')
bedrock_runtime_client = boto3.client('bedrock-runtime', region_name=region)

# # Configure logging
# logging.basicConfig(format='[%(asctime)s] %(levelname)s - %(message)s', level=logging.INFO)
# logger = logging.getLogger(__name__)

# Generate a unique suffix using the current timestamp
# timestamp_str = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))[-3:]
# suffix = f"{timestamp_str}"

account_id_suffix = account_id[:3]
region_suffix = ''.join([word[0] for word in region.split('-')])
suffix = f"{region_suffix}-{account_id_suffix}"

# Define Knowledge Base parameters
knowledge_base_name_standard = 'fixed-size-chunk-kb'
knowledge_base_name_hierarchical = 'hierarchical-chunk-kb'
knowledge_base_description = "Knowledge Base containing sample synthetic Octank financial data"
# bucket_name = f'octank-financial-data-source-{suffix}'

print(f"AWS Region: {region}", f"Account ID: {account_id}", f"Suffix: {suffix}", sep='\n')

AWS Region: us-east-1
Account ID: 533267284022
Suffix: ue1-533


In [3]:
from utils.knowledgebase import BedrockKnowledgeBases
from utils.agents import BedrockAgents
from utils.agents import AWSResourceManager
from utils.chat import BedrockChat, SyntheticDataGenerator
from IPython.display import Markdown, display

def print_markdown(text):
    display(Markdown(text))

    
kb = BedrockKnowledgeBases()
agents = BedrockAgents()

## Create Knowledge Base

In [4]:
kb_name = 'octank-financial-kb'
vector_store = 'OPENSEARCH_SERVERLESS'
embedding_model = 'amazon.titan-embed-text-v2:0'

kb_id, ds_id, bucket_name = kb.create_knowledge_base(
    kb_name=kb_name, 
    kb_description='Knowledge Base containing sample synthetic Octank financial data',
    vector_store=vector_store,
    embedding_model=embedding_model,
    suffix=suffix
)

2025-01-07 15:31:24,998 [INFO] Step 1: Create S3 bucket
2025-01-07 15:31:24,999 [INFO] Creating S3 bucket: octank-financial-kb-ue1-533-bucket
2025-01-07 15:31:25,496 [INFO] Created S3 bucket: octank-financial-kb-ue1-533-bucket
2025-01-07 15:31:25,496 [INFO] Step 2: Create KB execution role
2025-01-07 15:31:25,497 [INFO] Creating Bedrock KB execution role: octank-financial-kb-ue1-533-execution-role
2025-01-07 15:31:26,518 [INFO] Step 3: Create AOSS encryption/network/access policies
2025-01-07 15:31:26,520 [INFO] Creating encryption, network, and access policies for inancial-kb-ue1-533-vector-store
2025-01-07 15:31:27,382 [INFO] Created AOSS policies: b-ue1-533-vector-store-en-policy, b-ue1-533-vector-store-ne-policy, b-ue1-533-vector-store-ac-policy
2025-01-07 15:31:27,383 [INFO] Step 4: Create AOSS collection
2025-01-07 15:31:27,384 [INFO] Creating AOSS collection: inancial-kb-ue1-533-vector-store
2025-01-07 15:31:27,983 [INFO] Collection is still creating, waiting 60 seconds...
.....

In [20]:
print(f"Knowledge base ID: {kb_id}, Data source ID: {ds_id}, Bucket name: {bucket_name}")

Knowledge base ID: 892DWWRLXW, Data source ID: NE9VZUTTUX, Bucket name: octank-financial-kb-ue1-533-bucket


In [21]:
# Delete the Knowledge Base and all assoicated resources based on kb name and suffix
# kb.delete_knowledge_base_resources_by_name(kb_name=kb_name, suffix=suffix)

### Upload KB documents to S3  

In [22]:
def upload_documents_to_s3(bucket_name, folder_path):
    s3_client = boto3.client('s3', region_name=region)
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            if file not in ["LICENSE", "NOTICE", "README.md"]:
                print(f"Uploading {file_path} to {bucket_name}")
                s3_client.upload_file(file_path, bucket_name, file)
            else:
                print(f"Skipping {file_path}")

# Example usage
upload_documents_to_s3(bucket_name, "kb_documents")

Uploading kb_documents/octank_financial_10K.pdf to octank-financial-kb-ue1-533-bucket


### Initiate Knowledge Base Data Ingestion 

In [23]:
kb.synchronize_data(kb_id=kb_id, ds_id=ds_id)

2025-01-07 15:42:28,828 [INFO] Starting data synchronization for KB ID: 892DWWRLXW and DS ID: NE9VZUTTUX
2025-01-07 15:42:31,662 [INFO] Started ingestion job: {'dataSourceId': 'NE9VZUTTUX', 'ingestionJobId': 'A9LOUWC7N9', 'knowledgeBaseId': '892DWWRLXW', 'startedAt': datetime.datetime(2025, 1, 7, 23, 42, 31, 703354, tzinfo=tzutc()), 'statistics': {'numberOfDocumentsDeleted': 0, 'numberOfDocumentsFailed': 0, 'numberOfDocumentsScanned': 0, 'numberOfMetadataDocumentsModified': 0, 'numberOfMetadataDocumentsScanned': 0, 'numberOfModifiedDocumentsIndexed': 0, 'numberOfNewDocumentsIndexed': 0}, 'status': 'STARTING', 'updatedAt': datetime.datetime(2025, 1, 7, 23, 42, 31, 703354, tzinfo=tzutc())}
2025-01-07 15:42:31,800 [INFO] Ingestion job status: STARTING
.....
2025-01-07 15:42:36,961 [INFO] Ingestion job status: COMPLETE
.....
2025-01-07 15:42:41,982 [INFO] Final ingestion job status: COMPLETE
2025-01-07 15:42:41,986 [INFO] Here are the job details:{
  "dataSourceId": "NE9VZUTTUX",
  "ingest

## Generate response using KB's `RetrieveAndGenerate` API

In [9]:
user_query = "What is the total cost of Octank Financial's property and equipment as of December 31, 2022?"

In [10]:
def retrieve_from_knowledge_base(knowledge_base_id, query, number_of_results=3):
    response = bedrock_agent_runtime_client.retrieve(
        knowledgeBaseId=knowledge_base_id,
        retrievalQuery={"text": query},
        retrievalConfiguration={
            "vectorSearchConfiguration": {
                "numberOfResults": number_of_results
            }
        }
    )
    return response['retrievalResults']

In [11]:
system_prompt = """Human: You are a versatile AI assistant. Your task is to combine the provided context with your general knowledge to deliver accurate, well-sourced information and solutions to questions across various domains. Use the context and your general expertise to answer user queries effectively.

Here is the context and user's question:
<context>
$search_results$
</context>

<question>
$query$
</question>

Your role is to leverage both your general expertise and context-based knowledge to provide accurate, relevant, and up-to-date information. Follow these guidelines in your responses:
1. Maintain a professional yet friendly tone.
2. Use clear, straightforward language.
3. Focus on practical, actionable solutions.
4. Structure responses logically and concisely.
5. Break down complex concepts simply.
6. Stay relevant to user queries.
7. Avoid unnecessary information.
8. Ensure a natural conversation flow.
9. Use available information before asking questions.
10. Do not directly quote or mention that the information is from the context.

Assistant:
"""


In [12]:
def retrieve_and_generate_from_kb(query, kb_id, num_results=5, prompt_template=None, model_id="anthropic.claude-v2", region="us-east-1"):
    """
    Retrieves information from a knowledge base and generates a response using Amazon Bedrock.
    
    Args:
        query (str): The input query text
        kb_id (str): The knowledge base ID
        num_results (int, optional): Number of results to retrieve. Defaults to 5.
        prompt_template (str, optional): Custom prompt template for generation. Defaults to None.
        model_id (str, optional): Foundation model ID. Defaults to "anthropic.claude-v2".
        region (str, optional): AWS region. Defaults to "us-east-1".
    
    Returns:
        str: The generated response text
        
    Raises:
        Exception: If there's an error during the retrieve and generate operation
    """
    try:
        # Construct the base configuration
        config = {
            'knowledgeBaseId': kb_id,
            'modelArn': f'arn:aws:bedrock:{region}::foundation-model/{model_id}',
            'retrievalConfiguration': {
                'vectorSearchConfiguration': {
                    'numberOfResults': num_results,
                    'overrideSearchType': 'SEMANTIC'
                }
            }
        }
        
        # Add prompt template if provided
        if prompt_template:
            config['generationConfiguration'] = {
                'promptTemplate': {
                    'textPromptTemplate': prompt_template
                }
            }
            
        response = bedrock_agent_runtime_client.retrieve_and_generate(
            input={
                'text': query
            },
            retrieveAndGenerateConfiguration={
                'type': 'KNOWLEDGE_BASE',
                'knowledgeBaseConfiguration': config
            }
        )
        
        return response['output']['text']
    
    except Exception as e:
        print(f"Error during retrieve and generate operation: {str(e)}")
        raise

In [13]:
response_text = retrieve_and_generate_from_kb(
    kb_id=kb_id,
    query=user_query,
    region="us-east-1",
    num_results=5,
    prompt_template=system_prompt,
    model_id="anthropic.claude-3-sonnet-20240229-v1:0"
)

print_markdown(response_text)


According to the table provided in the notes to Octank Financial's consolidated financial statements, the total cost of the company's property and equipment as of December 31, 2022 is $440,000,000. This amount represents the sum of the costs across various asset classes, including land ($50,000,000), buildings ($200,000,000), machinery and equipment ($150,000,000), furniture and fixtures ($20,000,000), and vehicles ($15,000,000).

It's important to note that the total cost does not reflect the accumulated depreciation or the net book value of these assets. The net book value, which considers the impact of depreciation, is reported as $307,500,000 in the table. The cost figure represents the original acquisition cost or construction cost of these long-lived assets before any depreciation has been applied.

In [14]:
user_query = "How is the net book value of each asset class calculated?"

response_text = retrieve_and_generate_from_kb(
    kb_id=kb_id,
    query=user_query,
    region="us-east-1",
    num_results=5,
    prompt_template=system_prompt,
    model_id="anthropic.claude-3-sonnet-20240229-v1:0"
)

print_markdown(response_text)


The net book value of each asset class is calculated by taking the original cost of the asset and subtracting the accumulated depreciation from it.

To break it down:

Cost - This is the original purchase price or cost to acquire the asset.

Accumulated Depreciation - This is the total amount of depreciation expense that has been recorded for the asset since it was put into service. Depreciation is the systematic allocation of the cost of an asset over its estimated useful life.

Net Book Value = Cost - Accumulated Depreciation

For example, let's look at the Buildings asset class from the table:

Cost: $200,000,000
Accumulated Depreciation: $40,000,000

Net Book Value = $200,000,000 - $40,000,000 = $160,000,000

So the net book value of the Buildings is $160,000,000. This represents the remaining cost of the buildings that has not yet been depreciated.

The net book value calculation is done for each asset class to show the remaining undepreciated cost that can provide future economic benefits to the company over the assets' useful lives.

## Create RAG Agent

In [6]:
agent_description = """You are a versatile AI assistant that combines provided context with general knowledge to deliver accurate, 
well-sourced information and solutions to questions across various domains."""


agent_instruction = """You are a General AI Assistant helping users with diverse tasks and questions across multiple domains. Your role is to 
leverage both your general expertise and context-based knowledge retrieved in real-time to provide accurate, relevant, and up-to-date information. You are capable of 
answering questions with clear explanations on a broad range of topics, including but not limited to technology, science, health, finance, and general knowledge.

Response style:
1. Maintain a professional yet friendly tone
2. Use clear, straightforward language
3. Focus on practical, actionable solutions
4. Structure responses logically and concisely
5. Break down complex concepts simply
6. Stay relevant to user queries
7. Avoid unnecessary information
8. Ensure a natural conversation flow
9. Use available information before asking questions
10. Do not directly quote or mention that the information is from the context."""


In [9]:
agent_name=f'rag-agent-{suffix}'
print('Agent name:', agent_name)

kb_id = "892DWWRLXW"

Agent name: rag-agent-ue1-533


In [10]:
rag_agent = agents.create_bedrock_agent(
    agent_name=agent_name, 
    agent_description=agent_description,
    agent_instructions=agent_instruction,
    model_id="anthropic.claude-3-sonnet-20240229-v1:0",
    kb_id=kb_id,
    associate_kb=True,
    kb_usage_description="Use this knowledge base to provide responses to user queries on any topic",
    verbose=True
    )

2025-01-07 15:55:10,277 [INFO] Creating IAM role: rag-agent-ue1-533-exec-role
2025-01-07 15:55:10,431 [INFO] Created IAM role: arn:aws:iam::533267284022:role/rag-agent-ue1-533-exec-role
2025-01-07 15:55:15,696 [INFO] Created managed policy: arn:aws:iam::533267284022:policy/rag-agent-ue1-533-AmazonBedrockAgentModelAccessPolicy
2025-01-07 15:55:15,883 [INFO] Attached policy 'rag-agent-ue1-533-AmazonBedrockAgentModelAccessPolicy' to role 'rag-agent-ue1-533-exec-role'
2025-01-07 15:55:16,059 [INFO] Created agent: XNSPVCAHBV
2025-01-07 15:55:31,073 [INFO] Updating policy 'rag-agent-ue1-533-AmazonBedrockAgentKBAccessPolicy' for role 'arn:aws:iam::533267284022:role/rag-agent-ue1-533-exec-role'
2025-01-07 15:55:31,356 [INFO] Created new policy: rag-agent-ue1-533-AmazonBedrockAgentKBAccessPolicy
2025-01-07 15:55:31,534 [INFO] Attached policy 'rag-agent-ue1-533-AmazonBedrockAgentKBAccessPolicy' to role 'rag-agent-ue1-533-exec-role'
2025-01-07 15:55:31,536 [INFO] Associating agent 'XNSPVCAHBV' wi

In [7]:
# agents.delete_bedrock_agent(agent_name=agent_name, delete_role=True, verbose=True)

2025-01-07 15:54:29,161 [INFO] Found agent 'rag-agent-ue1-533' with ID: 9PCQTVLUIV
2025-01-07 15:54:29,162 [INFO] Deleting aliases for agent 9PCQTVLUIV...
2025-01-07 15:54:29,294 [INFO] Deleting alias TSTALIASID from agent 9PCQTVLUIV
2025-01-07 15:54:29,437 [INFO] Deleting action groups for agent 9PCQTVLUIV...
2025-01-07 15:54:29,561 [INFO] Deleting agent: 9PCQTVLUIV
.....
.....
2025-01-07 15:54:39,823 [INFO] Deleted agent: 9PCQTVLUIV
2025-01-07 15:54:39,825 [INFO] Deleting IAM role: rag-agent-ue1-533-exec-role
2025-01-07 15:54:41,231 [INFO] Deleted role: rag-agent-ue1-533-exec-role


In [11]:
agents.prepare_bedrock_agent(agent_name=agent_name, verbose=True)

2025-01-07 15:56:11,094 [INFO] Agent 'rag-agent-ue1-533' status: {'agentId': 'XNSPVCAHBV', 'agentName': 'rag-agent-ue1-533', 'agentStatus': 'PREPARING', 'foundationModel': 'anthropic.claude-3-sonnet-20240229-v1:0', 'agentCollaboration': 'DISABLED', 'orchestrationType': 'DEFAULT', 'createdAt': datetime.datetime(2025, 1, 7, 23, 55, 16, 121319, tzinfo=tzutc()), 'lastUpdatedAt': datetime.datetime(2025, 1, 7, 23, 55, 16, 952293, tzinfo=tzutc())}
2025-01-07 15:56:11,095 [INFO] Agent preparing... (attempt 1/12)
.....
2025-01-07 15:56:16,375 [INFO] Agent 'rag-agent-ue1-533' status: {'agentId': 'XNSPVCAHBV', 'agentName': 'rag-agent-ue1-533', 'agentStatus': 'PREPARED', 'foundationModel': 'anthropic.claude-3-sonnet-20240229-v1:0', 'agentCollaboration': 'DISABLED', 'orchestrationType': 'DEFAULT', 'createdAt': datetime.datetime(2025, 1, 7, 23, 55, 16, 121319, tzinfo=tzutc()), 'lastUpdatedAt': datetime.datetime(2025, 1, 7, 23, 55, 16, 952293, tzinfo=tzutc())}
2025-01-07 15:56:16,376 [INFO] Agent rag

In [12]:
user_query = "What is the total cost of Octank Financial's property and equipment as of December 31, 2022?"

result = agents.invoke(
    agent_name=agent_name, 
    input_text=user_query, 
    verbose=True, 
    trace_level='core',
    save_trace_json_file='agent_full_trace.json'
)

2025-01-07 15:56:24,395 [INFO] Invoking agent 'rag-agent-ue1-533' with input: What is the total cost of Octank Financial's property and equipment as of December 31, 2022?
[32m---- Step 1 ----[0m
[33mTook 3.0s, using 3251 tokens (in: 3145, out: 106) to complete action.[0m
[36mFinal response:
According to the table for Property and Equipment, Net under NOTES TO CONSOLIDATED FINANCIAL STATEMENTS, the total cost of Octank Financial's property and equipment as of December 31, 2022 is $440,000,000....[0m
[33mAgent made a total of 1 LLM calls, using 3251 tokens (in: 3145, out: 106), and took 3.5 total seconds.[0m


In [14]:
print_markdown(result['response'])

According to the table for Property and Equipment, Net under NOTES TO CONSOLIDATED FINANCIAL STATEMENTS, the total cost of Octank Financial's property and equipment as of December 31, 2022 is $440,000,000.

In [15]:
result['trace']

<botocore.eventstream.EventStream at 0x115535a80>