In [1]:
import os
import time
import boto3
import logging
import pprint
import json

from utils.knowledge_base import BedrockKnowledgeBase

In [2]:
# boto3.setup_default_session(region_name='us-west-2')

In [3]:

# ReRanking models are available in US West 2
# region = 'us-west-2'

# Initialize clients with the specified region
s3_client = boto3.client('s3', region_name='us-west-2')
sts_client = boto3.client('sts', region_name='us-west-2')
session = boto3.session.Session(region_name='us-west-2')
region =  session.region_name
account_id = sts_client.get_caller_identity()["Account"]

bedrock_agent_client = boto3.client('bedrock-agent', region_name=region)
bedrock_agent_runtime_client = boto3.client('bedrock-agent-runtime', region_name=region)

# Logging setup
logging.basicConfig(
    format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
    level=logging.INFO
)
logger = logging.getLogger(__name__)

# Print to verify
print(region, account_id)


us-west-2 533267284022


In [4]:
import time

# Get the current timestamp
current_time = time.time()

# Format the timestamp as a string
timestamp_str = time.strftime("%Y%m%d%H%M%S", time.localtime(current_time))[-7:]
# Create the suffix using the timestamp
suffix = f"{timestamp_str}"
knowledge_base_name = 'reranker-kb'
knowledge_base_description = "Knowledge Base containing files to demonstrate the reranker feature"
bucket_name = f'{knowledge_base_name}-{suffix}'


In [5]:
TEXT_GENERATION_MODEL_ID = "anthropic.claude-3-sonnet-20240229-v1:0"

# Reranker model: there are two reranker models available at launch
AMAZON_RERANKER_MODEL = "arn:aws:bedrock:us-west-2::foundation-model/amazon.rerank-v1:0"
COHERE_RERANKER_MODEL = "arn:aws:bedrock:us-west-2::foundation-model/cohere.rerank-v3-5:0"

In [6]:
knowledge_base_reranker = BedrockKnowledgeBase(
    kb_name=f'{knowledge_base_name}-{suffix}',
    kb_description=knowledge_base_description,
    data_bucket_name=bucket_name, 
    chunking_strategy = "FIXED_SIZE", 
    suffix = suffix,
    reranking_model="cohere.rerank-v3-5:0",
    generation_model=TEXT_GENERATION_MODEL_ID
)

[2024-12-12 08:26:46,126] p21922 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-12-12 08:26:47,850] p21922 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials



 Region:  us-west-2
Step 1 - Creating or retrieving S3 bucket(s) for Knowledge Base documents
['reranker-kb-2082646']
Creating bucket reranker-kb-2082646
Step 2 - Creating Knowledge Base Execution Role (AmazonBedrockExecutionRoleForKnowledgeBase_2082646) and Policies
Step 3 - Creating OSS encryption, network and data access policies
Step 4 - Creating OSS Collection (this step takes a couple of minutes to complete)
{ 'ResponseMetadata': { 'HTTPHeaders': { 'connection': 'keep-alive',
                                         'content-length': '318',
                                         'content-type': 'application/x-amz-json-1.0',
                                         'date': 'Thu, 12 Dec 2024 16:26:51 '
                                                 'GMT',
                                         'x-amzn-requestid': '26cbbb4c-5f65-437e-bb98-f466a8e7df3b'},
                        'HTTPStatusCode': 200,
                        'RequestId': '26cbbb4c-5f65-437e-bb98-f466a8e7df3b',

[2024-12-12 08:28:23,053] p21922 {base.py:258} INFO - PUT https://k6xg4kvihug2x207f5q0.us-west-2.aoss.amazonaws.com:443/bedrock-sample-rag-index-2082646 [status:200 request:0.527s]



Creating index:
{ 'acknowledged': True,
  'index': 'bedrock-sample-rag-index-2082646',
  'shards_acknowledged': True}
Step 6 - Will create Lambda Function if chunking strategy selected as CUSTOM
Not creating lambda function as chunking strategy is FIXED_SIZE
Step 7 - Creating Knowledge Base
Creating KB with chunking strategy - FIXED_SIZE
 {'chunkingConfiguration': {'chunkingStrategy': 'FIXED_SIZE', 'fixedSizeChunkingConfiguration': {'maxTokens': 300, 'overlapPercentage': 20}}}
{ 'createdAt': datetime.datetime(2024, 12, 12, 16, 29, 23, 533706, tzinfo=tzutc()),
  'description': 'Knowledge Base containing files to demonstrate the reranker '
                 'feature',
  'knowledgeBaseArn': 'arn:aws:bedrock:us-west-2:533267284022:knowledge-base/WE74FTZS4H',
  'knowledgeBaseConfiguration': { 'type': 'VECTOR',
                                  'vectorKnowledgeBaseConfiguration': { 'embeddingModelArn': 'arn:aws:bedrock:us-west-2::foundation-model/amazon.titan-embed-text-v2:0'}},
  'knowledge

In [7]:
import os

def create_directory(directory_name):    
    if not os.path.exists(directory_name):
        os.makedirs(directory_name)
        print(f"Directory '{directory_name}' created successfully.")
    else:
        print(f"Directory '{directory_name}' already exists.")

# Call the function to create the directory
create_directory("sec-10-k")

Directory 'sec-10-k' already exists.


In [8]:
import requests

def download_file(url, filename):
    # Send a GET request to the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Open the file in write-binary mode
        with open(filename, 'wb') as file:
            # Write the content of the response to the file
            file.write(response.content)
        print(f"File downloaded successfully: {filename}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")

# URL of the files to download
urls = ["https://s2.q4cdn.com/299287126/files/doc_financials/2024/ar/Amazon-com-Inc-2023-Annual-Report.pdf"]


for url in urls:
    # Name for the downloaded file
    filename = url.split('/')[-1]

    # Path to save the downloaded file
    filepath = f"./sec-10-k/{filename}"

    # Call the function to download the file
    download_file(url, filepath)

File downloaded successfully: ./sec-10-k/Amazon-com-Inc-2023-Annual-Report.pdf


In [9]:
def upload_directory(path, bucket_name):
        for root,dirs,files in os.walk(path):
            for file in files:
                if not file.startswith('.DS_Store'):
                    file_to_upload = os.path.join(root,file)
                    print(f"uploading file {file_to_upload} to {bucket_name}")
                    s3_client.upload_file(file_to_upload,bucket_name,file)

upload_directory("sec-10-k", bucket_name)

uploading file sec-10-k/Amazon-com-Inc-2023-Annual-Report.pdf to reranker-kb-2082646


In [10]:
# ensure that the kb is available
time.sleep(30)
# sync knowledge base
knowledge_base_reranker.start_ingestion_job()

{ 'dataSourceId': 'DOWGZTCS6M',
  'ingestionJobId': '6ZCA6KR1S6',
  'knowledgeBaseId': 'WE74FTZS4H',
  'startedAt': datetime.datetime(2024, 12, 12, 16, 29, 56, 997789, tzinfo=tzutc()),
  'statistics': { 'numberOfDocumentsDeleted': 0,
                  'numberOfDocumentsFailed': 0,
                  'numberOfDocumentsScanned': 0,
                  'numberOfMetadataDocumentsModified': 0,
                  'numberOfMetadataDocumentsScanned': 0,
                  'numberOfModifiedDocumentsIndexed': 0,
                  'numberOfNewDocumentsIndexed': 0},
  'status': 'STARTING',
  'updatedAt': datetime.datetime(2024, 12, 12, 16, 29, 56, 997789, tzinfo=tzutc())}
{ 'dataSourceId': 'DOWGZTCS6M',
  'ingestionJobId': '6ZCA6KR1S6',
  'knowledgeBaseId': 'WE74FTZS4H',
  'startedAt': datetime.datetime(2024, 12, 12, 16, 29, 56, 997789, tzinfo=tzutc()),
  'statistics': { 'numberOfDocumentsDeleted': 0,
                  'numberOfDocumentsFailed': 0,
                  'numberOfDocumentsScanned': 1,
     

In [11]:
kb_id = knowledge_base_reranker.get_knowledge_base_id()

'WE74FTZS4H'


In [12]:
# knowledge_base_reranker.delete_kb(delete_s3_bucket=True,delete_iam_roles_and_policies=True, delete_lambda_function = True)

No intermediate bucket found
Found role AmazonBedrockExecutionRoleForKnowledgeBase_2082646
 [{'PolicyName': 'AmazonBedrockCloudWatchPolicyForKnowledgeBase_2082646', 'PolicyArn': 'arn:aws:iam::533267284022:policy/AmazonBedrockCloudWatchPolicyForKnowledgeBase_2082646'}, {'PolicyName': 'AmazonBedrockFoundationModelPolicyForKnowledgeBase_2082646', 'PolicyArn': 'arn:aws:iam::533267284022:policy/AmazonBedrockFoundationModelPolicyForKnowledgeBase_2082646'}, {'PolicyName': 'AmazonBedrockS3PolicyForKnowledgeBase_2082646', 'PolicyArn': 'arn:aws:iam::533267284022:policy/AmazonBedrockS3PolicyForKnowledgeBase_2082646'}, {'PolicyName': 'AmazonBedrockOSSPolicyForKnowledgeBase_2082646', 'PolicyArn': 'arn:aws:iam::533267284022:policy/AmazonBedrockOSSPolicyForKnowledgeBase_2082646'}]
Detached policy AmazonBedrockCloudWatchPolicyForKnowledgeBase_2082646 from role AmazonBedrockExecutionRoleForKnowledgeBase_2082646
Deleted policy AmazonBedrockCloudWatchPolicyForKnowledgeBase_2082646 from role AmazonBedrock

In [14]:
query = "What is Amazon's core business philosophy?"


In [25]:
def retrieve_from_knowledge_base(knowledge_base_id, query, number_of_results=5, reranker_model=None):
    if reranker_model:
        retrieval_config = {
            "vectorSearchConfiguration": {
                "numberOfResults": number_of_results,
                "rerankingConfiguration": {
                    "type": "BEDROCK_RERANKING_MODEL",
                    "bedrockRerankingConfiguration": {
                        "modelConfiguration": {
                            "modelArn": reranker_model
                        },
                        "numberOfRerankedResults": 2
                    }
                }
            }
        }
    else:
        retrieval_config = {
            "vectorSearchConfiguration": {
                "numberOfResults": number_of_results
            }
        }

    response = bedrock_agent_runtime_client.retrieve(
        knowledgeBaseId=knowledge_base_id,
        retrievalQuery={"text": query},
        retrievalConfiguration=retrieval_config
    )
    return response['retrievalResults']

In [26]:
AMAZON_RERANKER_MODEL

'arn:aws:bedrock:us-west-2::foundation-model/amazon.rerank-v1:0'

In [27]:
retrieved_contexts_with_amazon_reranker = retrieve_from_knowledge_base(kb_id, query, reranker_model=COHERE_RERANKER_MODEL)

In [28]:

retrieved_contexts_combined = "\n\n".join(
    [f"=== Context {i+1} ===\n{result['content']['text']}" for i, result in enumerate(retrieved_contexts_with_amazon_reranker)]
)

In [29]:
from IPython.display import Markdown, display

def print_markdown(text):
    display(Markdown(text))


print_markdown(retrieved_contexts_combined)

=== Context 1 ===
Form 10-K Summary 77 Signatures 78     2AMAZON.COM, INC.     PART I     Item 1. Business     This Annual Report on Form 10-K and the documents incorporated herein by reference contain forward-looking statements based on expectations, estimates, and projections as of the date of this filing. Actual results and outcomes may differ materially from those expressed in forward-looking statements. See Item 1A of Part I — “Risk Factors.” As used herein, “Amazon.com,” “we,” “our,” and similar terms include Amazon.com, Inc. and its subsidiaries, unless the context indicates otherwise.     General     We seek to be Earth’s most customer-centric company. We are guided by four principles: customer obsession rather than competitor focus, passion for invention, commitment to operational excellence, and long-term thinking. In each of our segments, we serve our primary customer sets, consisting of consumers, sellers, developers, enterprises, content creators, advertisers, and employees.     We have organized our operations into three segments: North America, International, and Amazon Web Services (“AWS”). These segments reflect the way the Company evaluates its business performance and manages its operations. Information on our net sales is contained in Item 8 of Part II, “Financial Statements and Supplementary Data — Note 10 — Segment Information.”     Consumers     We serve consumers through our online and physical stores and focus on selection, price, and convenience.

=== Context 2 ===
notes to consolidated financial statements.     41AMAZON.COM, INC. NOTES TO CONSOLIDATED FINANCIAL STATEMENTS     Note 1 — DESCRIPTION OF BUSINESS, ACCOUNTING POLICIES, AND SUPPLEMENTAL DISCLOSURES     Description of Business     We seek to be Earth’s most customer-centric company. In each of our segments, we serve our primary customer sets, consisting of consumers, sellers, developers, enterprises, content creators, advertisers, and employees. We serve consumers through our online and physical stores and focus on selection, price, and convenience. We offer programs that enable sellers to grow their businesses, sell their products in our stores, and fulfill orders using our services, and programs that allow authors, independent publishers, musicians, filmmakers, Twitch streamers, skill and app developers, and others to publish and sell content. We serve developers and enterprises of all sizes through AWS, which offers a broad set of on-demand technology services, including compute, storage, database, analytics, and machine learning, and other services. We also manufacture and sell electronic devices. In addition, we provide advertising services to sellers, vendors, publishers, authors, and others, through programs such as sponsored ads, display, and video advertising.     We have organized our operations into three segments: North America, International, and AWS. See “Note 10 — Segment Information.”

In [None]:
def retrieve_from_knowledge_base(knowledge_base_id, query, number_of_results=5):
    response = bedrock_agent_runtime_client.retrieve(
        knowledgeBaseId=knowledge_base_id,
        retrievalQuery={"text": query},
        retrievalConfiguration={
            "vectorSearchConfiguration": {
                "numberOfResults": number_of_results
            }
        }
    )
    return response['retrievalResults']

In [11]:
def get_retrieval_config(reranker_model=None):
    retrieval_config = None
    if reranker_model:
        retrieval_config = {
            "vectorSearchConfiguration": {
                "numberOfResults": 30,
                "rerankingConfiguration": {
                    "type": "BEDROCK_RERANKING_MODEL",
                    "bedrockRerankingConfiguration": {
                        "modelConfiguration": {
                            "modelArn": reranker_model
                        },
                        "numberOfRerankedResults": 3
                    }
                }
            }
        }
    else:
        retrieval_config = {
            "vectorSearchConfiguration": {
                "numberOfResults": 3
            }
        }
    return retrieval_config

In [19]:
def retrieve_and_generate(query, retrieval_config):
    start = time.time()
    response = bedrock_agent_runtime_client.retrieve_and_generate(
        input={
                'text': query
            },
            retrieveAndGenerateConfiguration={
                'type': 'KNOWLEDGE_BASE',
                'knowledgeBaseConfiguration': {
                    'knowledgeBaseId': kb_id,
                    'modelArn': TEXT_GENERATION_MODEL_ID,
                    'retrievalConfiguration': retrieval_config,
                },
            }
        )
    time_spent = time.time() - start
    print(f"[Response]\n{response['output']['text']}\n")
    print(f"[Invocation time]\n{time_spent}\n")

    return response

In [23]:
def retrieve_docs(query, retrieval_config):
    start = time.time()
    response = bedrock_agent_runtime_client.retrieve(
        retrievalQuery={
            'text': query
        },
        knowledgeBaseId=kb_id,
        retrievalConfiguration=retrieval_config
    )
    time_spent = time.time() - start
    # print(f"[Retrieved docs]\n{json.dumps(response['retrievalResults'], indent=2)}\n")
    # print(f"[Invocation time]\n{time_spent}\n")

    return response

In [20]:
query = "What is Amazon's core business philosophy?"

# without reranker
retrieval_config = get_retrieval_config(reranker_model=None)

In [24]:
retrieved_contexts = retrieve_docs(query, retrieval_config)


In [25]:
retrieved_contexts

{'ResponseMetadata': {'RequestId': '08fb6d27-dd0b-496c-abe8-63fb0979305a',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Wed, 11 Dec 2024 23:49:16 GMT',
   'content-type': 'application/json',
   'content-length': '5605',
   'connection': 'keep-alive',
   'x-amzn-requestid': '08fb6d27-dd0b-496c-abe8-63fb0979305a'},
  'RetryAttempts': 0},
 'retrievalResults': [{'content': {'text': 'Form 10-K Summary 77 Signatures 78     2AMAZON.COM, INC.     PART I     Item 1. Business     This Annual Report on Form 10-K and the documents incorporated herein by reference contain forward-looking statements based on expectations, estimates, and projections as of the date of this filing. Actual results and outcomes may differ materially from those expressed in forward-looking statements. See Item 1A of Part I — “Risk Factors.” As used herein, “Amazon.com,” “we,” “our,” and similar terms include Amazon.com, Inc. and its subsidiaries, unless the context indicates otherwise.     General     We seek to be

In [26]:
retrieve_and_generate(query, retrieval_config)

[Response]
Amazon's core business philosophy is centered around being the most customer-centric company. They are guided by four key principles:

1) Customer obsession rather than competitor focus
2) Passion for invention
3) Commitment to operational excellence
4) Long-term thinking Amazon focuses on continually improving and expanding what's possible for customers across all their business segments, whether it's consumers, sellers, developers, enterprises, content creators, advertisers, or employees. They aim to solve real customer challenges and embrace new technologies that enable better customer experiences.

[Invocation time]
8.823477029800415



{'ResponseMetadata': {'RequestId': '3b4c4021-0915-421b-a2d5-e2d273083cf6',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Wed, 11 Dec 2024 23:50:15 GMT',
   'content-type': 'application/json',
   'content-length': '7068',
   'connection': 'keep-alive',
   'x-amzn-requestid': '3b4c4021-0915-421b-a2d5-e2d273083cf6'},
  'RetryAttempts': 0},
 'citations': [{'generatedResponsePart': {'textResponsePart': {'span': {'end': 271,
      'start': 0},
     'text': "Amazon's core business philosophy is centered around being the most customer-centric company. They are guided by four key principles:\n\n1) Customer obsession rather than competitor focus\n2) Passion for invention\n3) Commitment to operational excellence\n4) Long-term thinking"}},
   'retrievedReferences': [{'content': {'text': 'Form 10-K Summary 77 Signatures 78     2AMAZON.COM, INC.     PART I     Item 1. Business     This Annual Report on Form 10-K and the documents incorporated herein by reference contain forward-looking stateme

In [None]:
def retrieve_from_knowledge_base(knowledge_base_id, query, number_of_results=5, reranker_model=None):
    if reranker_model:
        retrieval_config = {
            "vectorSearchConfiguration": {
                "numberOfResults": number_of_results,
                "rerankingConfiguration": {
                    "type": "BEDROCK_RERANKING_MODEL",
                    "bedrockRerankingConfiguration": {
                        "modelConfiguration": {
                            "modelArn": reranker_model
                        },
                        "numberOfRerankedResults": 3
                    }
                }
            }
        }
    else:
        retrieval_config = {
            "vectorSearchConfiguration": {
                "numberOfResults": number_of_results
            }
        }

    response = bedrock_agent_runtime_client.retrieve(
        knowledgeBaseId=knowledge_base_id,
        retrievalQuery={"text": query},
        retrievalConfiguration=retrieval_config
    )
    return response['retrievalResults']

In [38]:
def retrieve_from_knowledge_base(knowledge_base_id, query, number_of_results=5, reranker_model=None):
    # Construct the base retrieval configuration
    retrieval_config = {
        "vectorSearchConfiguration": {
            "numberOfResults": number_of_results
        }
    }

    # Add reranking configuration if a reranker model is provided
    if reranker_model:
        retrieval_config["vectorSearchConfiguration"]["rerankingConfiguration"] = {
            "type": "BEDROCK_RERANKING_MODEL",
            "bedrockRerankingConfiguration": {
                "modelConfiguration": {
                    "modelArn": reranker_model
                },
                "numberOfRerankedResults": 3
            }
        }

    # Make the API call with the constructed configuration
    response = bedrock_agent_runtime_client.retrieve(
        knowledgeBaseId=knowledge_base_id,
        retrievalQuery={"text": query},
        retrievalConfiguration=retrieval_config
    )

    return response['retrievalResults']

# Example usage
retrieved_contexts_with_reranker = retrieve_from_knowledge_base(kb_id, query, number_of_results=10, reranker_model=AMAZON_RERANKER_MODEL)


ParamValidationError: Parameter validation failed:
Unknown parameter in retrievalConfiguration.vectorSearchConfiguration: "rerankingConfiguration", must be one of: filter, numberOfResults, overrideSearchType

In [28]:
retrieved_contexts = retrieve_from_knowledge_base(kb_id, query, number_of_results=5, reranker_model=None)

In [29]:
retrieved_contexts = "\n\n".join(
    [f"=== Context {i+1} ===\n{result['content']['text']}" for i, result in enumerate(retrieved_contexts)]
)

In [31]:
from IPython.display import Markdown, display

def print_markdown(text):
    display(Markdown(text))

In [32]:
print_markdown(retrieved_contexts)

=== Context 1 ===
Form 10-K Summary 77 Signatures 78     2AMAZON.COM, INC.     PART I     Item 1. Business     This Annual Report on Form 10-K and the documents incorporated herein by reference contain forward-looking statements based on expectations, estimates, and projections as of the date of this filing. Actual results and outcomes may differ materially from those expressed in forward-looking statements. See Item 1A of Part I — “Risk Factors.” As used herein, “Amazon.com,” “we,” “our,” and similar terms include Amazon.com, Inc. and its subsidiaries, unless the context indicates otherwise.     General     We seek to be Earth’s most customer-centric company. We are guided by four principles: customer obsession rather than competitor focus, passion for invention, commitment to operational excellence, and long-term thinking. In each of our segments, we serve our primary customer sets, consisting of consumers, sellers, developers, enterprises, content creators, advertisers, and employees.     We have organized our operations into three segments: North America, International, and Amazon Web Services (“AWS”). These segments reflect the way the Company evaluates its business performance and manages its operations. Information on our net sales is contained in Item 8 of Part II, “Financial Statements and Supplementary Data — Note 10 — Segment Information.”     Consumers     We serve consumers through our online and physical stores and focus on selection, price, and convenience.

=== Context 2 ===
Customers’ AI models contain some of their most sensitive data. AWS and its partners offer the strongest security capabilities and track record in the world; and as a result, more and more customers want to run their GenAI on AWS.)     ===     Recently, I was asked a provocative question—how does Amazon remain resilient? While simple in its wording, it’s profound because it gets to the heart of our success to date as well as for the future. The answer lies in our discipline around deeply held principles: 1/ hiring builders who are motivated to continually improve and expand what’s possible; 2/ solving real customer challenges, rather than what we think may be interesting technology; 3/ building in primitives so that we can innovate and experiment at the highest rate; 4/ not wasting time trying to fight gravity (spoiler alert: you always lose)—when we discover technology that enables better customer experiences, we embrace it; 5/ accepting and learning from failed experiments— actually becoming more energized to try again, with new knowledge to employ.     Today, we continue to operate in times of unprecedented change that come with unusual opportunities for growth across the areas in which we operate. For instance, while we have a nearly $500B consumer business, about 80% of the worldwide retail market segment still resides in physical stores.

=== Context 3 ===
These results represent a lot of invention, collaboration, discipline, execution, and reimaginationacross Amazon. Yet, I think every one of us at Amazon believes that we have a long way to go, in every one of our businesses, before we exhaust how we can make customers’ lives better and easier, and there is considerable upside in each of the businesses in which we’re investing.     ===     In my annual letter over the last three years, I’ve tried to give shareholders more insight into how we’re thinking about the company, the businesses we’re pursuing, our future opportunities, and what makes us tick. We operate in a diverse number of market segments, but what ties Amazon together is our joint mission to make customers’ lives better and easier every day. This is true across every customer segment we serve (consumers, sellers, brands, developers, enterprises, and creators). At our best, we’re not just customer obsessed, but also inventive, thinking several years out, learning like crazy, scrappy, delivering quickly, and operating like the world’s biggest start-up.     We spend enormous energy thinking about how to empower builders, inside and outside of our company. We characterize builders as people who like to invent. They like to dissect a customer experience, assess what’s wrong with it, and reinvent it. Builders tend not to be satisfied until the customer experience is perfect.

=== Context 4 ===
notes to consolidated financial statements.     41AMAZON.COM, INC. NOTES TO CONSOLIDATED FINANCIAL STATEMENTS     Note 1 — DESCRIPTION OF BUSINESS, ACCOUNTING POLICIES, AND SUPPLEMENTAL DISCLOSURES     Description of Business     We seek to be Earth’s most customer-centric company. In each of our segments, we serve our primary customer sets, consisting of consumers, sellers, developers, enterprises, content creators, advertisers, and employees. We serve consumers through our online and physical stores and focus on selection, price, and convenience. We offer programs that enable sellers to grow their businesses, sell their products in our stores, and fulfill orders using our services, and programs that allow authors, independent publishers, musicians, filmmakers, Twitch streamers, skill and app developers, and others to publish and sell content. We serve developers and enterprises of all sizes through AWS, which offers a broad set of on-demand technology services, including compute, storage, database, analytics, and machine learning, and other services. We also manufacture and sell electronic devices. In addition, we provide advertising services to sellers, vendors, publishers, authors, and others, through programs such as sponsored ads, display, and video advertising.     We have organized our operations into three segments: North America, International, and AWS. See “Note 10 — Segment Information.”

=== Context 5 ===
Setting the bar high in our approach to hiring has been, and will continue to be, the single most important element of Amazon.com’s success.     It’s not easy to work here (when I interview people I tell them, “You can work long, hard, or smart, but at Amazon.com you can’t choose two out of three”), but we are working to build something important, something that matters to our customers, something that we can all tell our grandchildren about. Such things aren’t meant to be easy. We are incredibly fortunate to have this group of dedicated employees whose sacrifices and passion build Amazon.com.     Goals for 1998 We are still in the early stages of learning how to bring new value to our customers through Internet     commerce and merchandising. Our goal remains to continue to solidify and extend our brand and customer base. This requires sustained investment in systems and infrastructure to support outstanding customer convenience, selection, and service while we grow. We are planning to add music to our product offering, and over time we believe that other products may be prudent investments. We also believe there are significant opportunities to better serve our customers overseas, such as reducing delivery times and better tailoring the customer experience. To be certain, a big part of the challenge for us will lie not in finding new ways to expand our business, but in prioritizing our investments.

In [37]:
retrieved_contexts_with_reranker = retrieve_from_knowledge_base(kb_id, query, number_of_results=10, reranker_model=AMAZON_RERANKER_MODEL)

ParamValidationError: Parameter validation failed:
Unknown parameter in retrievalConfiguration.vectorSearchConfiguration: "rerankingConfiguration", must be one of: filter, numberOfResults, overrideSearchType

In [40]:
reranker_retrieval_config = get_retrieval_config(reranker_model=AMAZON_RERANKER_MODEL)


In [41]:
reranker_retrieval_config

{'vectorSearchConfiguration': {'numberOfResults': 30,
  'rerankingConfiguration': {'type': 'BEDROCK_RERANKING_MODEL',
   'bedrockRerankingConfiguration': {'modelConfiguration': {'modelArn': 'arn:aws:bedrock:us-east-1::foundation-model/amazon.rerank-v1:0'},
    'numberOfRerankedResults': 3}}}}

In [42]:
retrieve_and_generate(query, reranker_retrieval_config)

ParamValidationError: Parameter validation failed:
Unknown parameter in retrieveAndGenerateConfiguration.knowledgeBaseConfiguration.retrievalConfiguration.vectorSearchConfiguration: "rerankingConfiguration", must be one of: filter, numberOfResults, overrideSearchType

In [43]:
def retrieve_from_knowledge_base(knowledge_base_id, query, number_of_results=5, reranker_model=None):
    if reranker_model:
        retrieval_config = {
            "vectorSearchConfiguration": {
                "numberOfResults": number_of_results,
                "rerankingConfiguration": {
                    "type": "BEDROCK_RERANKING_MODEL",
                    "bedrockRerankingConfiguration": {
                        "modelConfiguration": {
                            "modelArn": reranker_model,
                            "additionalModelRequestFields": {}  # Add if specific fields are required
                        },
                        "numberOfRerankedResults": min(number_of_results, 3),  # Ensure reranked results <= total results
                        "metadataConfiguration": {
                            "selectionMode": "ALL"  # Adjust to 'SELECTIVE' if needed
                        }
                    }
                }
            }
        }
    else:
        retrieval_config = {
            "vectorSearchConfiguration": {
                "numberOfResults": number_of_results
            }
        }

    try:
        response = bedrock_agent_runtime_client.retrieve(
            knowledgeBaseId=knowledge_base_id,
            retrievalQuery={"text": query},
            retrievalConfiguration=retrieval_config
        )
        return response.get('retrievalResults', [])
    except Exception as e:
        raise RuntimeError(f"Failed to retrieve results: {str(e)}")


In [44]:
retrieved_contexts_with_reranker = retrieve_from_knowledge_base(kb_id, query, number_of_results=10, reranker_model=AMAZON_RERANKER_MODEL)

RuntimeError: Failed to retrieve results: Parameter validation failed:
Unknown parameter in retrievalConfiguration.vectorSearchConfiguration: "rerankingConfiguration", must be one of: filter, numberOfResults, overrideSearchType

In [45]:
def retrieve_from_knowledge_base(knowledge_base_id, query, number_of_results=5, reranker_model=None):
    # Basic vector search configuration
    retrieval_config = {
        "vectorSearchConfiguration": {
            "numberOfResults": number_of_results
        }
    }

    # Add reranking configuration if a reranker model is provided
    if reranker_model:
        retrieval_config["rerankingConfiguration"] = {
            "type": "BEDROCK_RERANKING_MODEL",
            "bedrockRerankingConfiguration": {
                "modelConfiguration": {
                    "modelArn": reranker_model
                },
                "numberOfRerankedResults": min(number_of_results, 3),  # Ensure this number is valid
                "metadataConfiguration": {
                    "selectionMode": "ALL"  # Adjust based on use case
                }
            }
        }

    try:
        response = bedrock_agent_runtime_client.retrieve(
            knowledgeBaseId=knowledge_base_id,
            retrievalQuery={"text": query},
            retrievalConfiguration=retrieval_config
        )
        return response.get("retrievalResults", [])
    except Exception as e:
        raise RuntimeError(f"Failed to retrieve results: {str(e)}")


In [46]:
retrieved_contexts_with_reranker = retrieve_from_knowledge_base(kb_id, query, number_of_results=10, reranker_model=AMAZON_RERANKER_MODEL)

RuntimeError: Failed to retrieve results: Parameter validation failed:
Unknown parameter in retrievalConfiguration: "rerankingConfiguration", must be one of: vectorSearchConfiguration

In [51]:
def retrieve_from_knowledge_base(knowledge_base_id, query, number_of_results=5, reranker_model=None):
    # Basic retrieval configuration
    retrieval_config = {
        "vectorSearchConfiguration": {
            "numberOfResults": number_of_results
        }
    }

    # Add reranking configuration if a reranker model is provided
    if reranker_model:
        retrieval_config["vectorSearchConfiguration"].update({
            "overrideSearchType": "SEMANTIC",  # Optional: Use SEMANTIC or HYBRID as per your needs
            "bedrockRerankingConfiguration": {
                "modelConfiguration": {
                    "modelArn": reranker_model
                },
                "numberOfRerankedResults": min(number_of_results, 3),  # Ensure reranked results <= total results
                "metadataConfiguration": {
                    "selectionMode": "ALL"  # Adjust to 'SELECTIVE' if specific fields need to be used
                }
            }
        })

    try:
        response = bedrock_agent_runtime_client.retrieve(
            knowledgeBaseId=knowledge_base_id,
            retrievalQuery={"text": query},
            retrievalConfiguration=retrieval_config
        )
        return response.get("retrievalResults", [])
    except Exception as e:
        raise RuntimeError(f"Failed to retrieve results: {str(e)}")


In [52]:
retrieved_contexts_with_reranker = retrieve_from_knowledge_base(kb_id, query, number_of_results=10, reranker_model=AMAZON_RERANKER_MODEL)


RuntimeError: Failed to retrieve results: Parameter validation failed:
Unknown parameter in retrievalConfiguration.vectorSearchConfiguration: "bedrockRerankingConfiguration", must be one of: filter, numberOfResults, overrideSearchType