# Bedrock Knowledge Base Retrieval and Generation with Metadata Filtering

### Description:
This notebook demonstrates how to query and retrieve data from an Amazon Bedrock-powered knowledge base using different configurations, filters, and citation extraction. The steps include creating a query, retrieving responses, and printing the citations used for generating the results.


## 1. Load Configuration Variables

In [None]:
# Load configuration variables from a JSON file to access knowledge base ID, account number, and guardrail info.
import json

with open("../Lab 1/variables.json", "r") as f:
    variables = json.load(f)

variables  # Display the loaded variables for confirmation

## 2. Set Up Required IDs and Model ARNs

In [None]:
knowledge_base_id = variables['kbFixedChunk']   
model_id = 'us.amazon.nova-pro-v1:0' 
accountNumber = variables['accountNumber']
guardrail_version = variables['guardrail_version'] 
guardrail_id = variables['guardrail_id']    
model_arn = f"arn:aws:bedrock:us-west-2:{accountNumber}:inference-profile/{model_id}"
rerank_model_arn = f"arn:aws:bedrock:us-west-2:{accountNumber}:inference-profile/us.amazon.rerank-v1:0"
guardrail_arn = f'arn:aws:bedrock:us-east-1:{accountNumber}:guardrail/{guardrail_id}'  # Replace with your guardrail ARN
reranker_model_arn = f"arn:aws:bedrock:us-west-2:{accountNumber}:guardrail/us.amazon.rerank-v1:0"


## 3. Configure Bedrock Client

In [None]:
import boto3
import json
from typing import *

# Configure the Bedrock client
bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name="us-west-2")


## 4. Define Function to Retrieve and Generate Without Filters

In [None]:
def retrieve_and_generate_without_filter(query, knowledge_base_id, model_arn):
    """
    Retrieves and generates a response based on the given query.

    Parameters:
    - query (str): The input query.
    - knowledge_base_id (str): The ID of the knowledge base.
    - model_arn (str): The ARN of the model.

    Returns:
    - response: The response from the retrieve_and_generate method.
    """
    response = bedrock_agent_runtime.retrieve_and_generate(
        input={
            "text": query
        },
        retrieveAndGenerateConfiguration={
            "type": "KNOWLEDGE_BASE",
            "knowledgeBaseConfiguration": {
                'knowledgeBaseId': knowledge_base_id,
                "modelArn": model_arn,
                "retrievalConfiguration": {
                    "vectorSearchConfiguration": {
                        "numberOfResults": 5  # Number of results to return
                    }
                }
            }
        }
    )
    return response


## 5. Define Function to Retrieve and Generate With Filters

In [None]:
def retrieve_and_generate_with_filter(query, knowledge_base_id, model_arn, metadata_filter):
    """
    Retrieves and generates a response based on the given query, with metadata filters.

    Parameters:
    - query (str): The input query.
    - knowledge_base_id (str): The ID of the knowledge base.
    - model_arn (str): The ARN of the model.
    - metadata_filter (dict): The filter for the vector search configuration.

    Returns:
    - response: The response from the retrieve_and_generate method.
    """
    response = bedrock_agent_runtime.retrieve_and_generate(
        input={
            "text": query
        },
        retrieveAndGenerateConfiguration={
            "type": "KNOWLEDGE_BASE",
            "knowledgeBaseConfiguration": {
                'knowledgeBaseId': knowledge_base_id,
                "modelArn": model_arn,
                "retrievalConfiguration": {
                    "vectorSearchConfiguration": {
                        "numberOfResults": 5,
                        "filter": metadata_filter  # Apply metadata filter
                    }
                }
            }
        }
    )
    return response


## 6. Define Query

In [None]:
# Define the query to be used for both retrieval methods
query = "what was the % increase in sales?"


## 7. Retrieve Response Without Metadata Filter

In [None]:
# Use the function to retrieve a response without any metadata filter
response_withoutMetadata = retrieve_and_generate_without_filter(query, knowledge_base_id, model_arn)

# Print the response text
print(response_withoutMetadata['output']['text'])


## 8. Retrieve and Print Citations Without Metadata Filter

In [None]:
# Extract citations used to generate the response
response_without_MD = response_withoutMetadata['citations'][0]['retrievedReferences']
print("# of citations or chunks used to generate the response: ", len(response_without_MD))

# Function to print citations or chunks of text retrieved
def citations_rag_print(response_ret):
    for num, chunk in enumerate(response_ret, 1):
        print(f'Chunk {num}: ', chunk['content']['text'], end='\n'*2)
        print(f'Chunk {num} Location: ', chunk['location'], end='\n'*2)
        print(f'Chunk {num} Metadata: ', chunk['metadata'], end='\n'*2)

# Print citations
citations_rag_print(response_without_MD)


## 9. Define Metadata Filter

In [None]:
# Define a metadata filter for advanced filtering based on specific conditions
one_group_filter = {
    "andAll": [
        {
            "equals": {
                "key": "docType",
                "value": '10K Report'
            }
        },
        {
            "greaterThanOrEquals": {
                "key": "year",
                "value": 2025
            }
        }
    ]
}


## 10. Retrieve Response With Metadata Filter

In [None]:
# Use the function to retrieve a response with metadata filtering
response_with_Metadata = retrieve_and_generate_with_filter(query, knowledge_base_id, model_arn, one_group_filter)

# Print the response text
print(response_with_Metadata['output']['text'])


## 11. Retrieve and Print Citations With Metadata Filter

In [None]:
# Extract citations used to generate the response with metadata filter
response_with_MD = response_with_Metadata['citations'][0]['retrievedReferences']
print("# of citations or chunks used to generate the response: ", len(response_with_MD))

# Print citations for the filtered response
citations_rag_print(response_with_MD)
