# Retrieval Augmented Generation (RAG) using Amazon Bedrock and Amazon OpenSearch
In this notebook, we demonstrate a RAG solution that uses Amazon OpenSearch as a vector database (knowledge base) and Amazon Bedrock for generation.

### Prerequisites
Install the required packages and libraries

In [None]:
!pip3 install opensearch-py --quiet
!pip3 install requests_aws4auth --quiet

### Imports
Import the relevant packages and modules.

In [None]:
import requests
import logging 
import boto3
import yaml
import json
import random
import string
from langchain.embeddings import BedrockEmbeddings
import ipywidgets as ipw
from IPython.display import display, clear_output
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import Bedrock

from requests_aws4auth import AWS4Auth
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth, helpers

### Define variables
- Define the Bedrock embedding model and the generation model.
- Set the region name
- Create boto3 client for bedrock
- Create Langchain modules for Bedrock embeddings and LLM.

In [None]:
BEDROCK_EMBEDDING_MODEL = "amazon.titan-embed-text-v1"
BEDROCK_GENERATION_MODEL = 'anthropic.claude-v2'
REGION_NAME = boto3.session.Session().region_name

boto3_bedrock = boto3.client("bedrock-runtime")
credentials = boto3.Session().get_credentials()

embeddings = BedrockEmbeddings(model_id=BEDROCK_EMBEDDING_MODEL, 
                               client=boto3_bedrock)
generation = Bedrock(model_id=BEDROCK_GENERATION_MODEL,
                     client=boto3_bedrock)


Retrieve the following from the parameter store - 
- Access Key
- Secret Access Key
- OpenSearch Host

In [None]:
ssm_client = boto3.client('ssm')
access_key = ssm_client.get_parameter(Name='AccessKey')['Parameter']['Value']
secret_key = ssm_client.get_parameter(Name='SecretAccessKey')['Parameter']['Value']
host = ssm_client.get_parameter(Name='OpenSearchHost')['Parameter']['Value']

### Create a OpenSearch Index

Here, we use OpenSearch as a Vector Store. The first step is to create an Index.

In [None]:
service = 'aoss'

INDEX_NAME = 'sm_docs_' + ''.join(random.choices(string.ascii_lowercase, k=8))
VECTOR_FIELD = 'vectors'

awsauth = AWS4Auth(access_key, secret_key,
                   REGION_NAME, service)# session_token=credentials.token)

# Create the OpenSearch client
aoss_client = OpenSearch(
        hosts=[{'host': host, 'port': 443}],
        http_auth=awsauth,
        use_ssl=True,
        verify_certs=True,
        ssl_assert_hostname = False,
        ssl_show_warn = False,
        connection_class=RequestsHttpConnection,
        timeout=300
    )

##Delete the index if exists
#response = aoss_client.indices.delete(
#    index = INDEX_NAME
#)

#Create the index
aoss_client.indices.create(INDEX_NAME, 
    body={
        "settings":{
            "index.knn": True
        },
        "mappings":{
            "properties": {
                "vectors": {
                    "type": "knn_vector", 
                    "dimension": 1536 # dimension of the embedding vector
                },
            }
        }
    }
)

### Load the documents for Indexing

This step performs the following actions:
1. Splits the document into chunks
2. Creates a numerical vector representation of each chunk using Amazon Bedrock Titan Embeddings model
3. Creates an index using the chunks and the corresponding embeddings


In [None]:
loader = CSVLoader("../data/Amazon_SageMaker_FAQs.csv") # --- > 219 docs with 400 chars, each row consists in a question column and an answer column
documents_aws = loader.load() #
print(f"Number of documents={len(documents_aws)}")

docs = CharacterTextSplitter(chunk_size=2000, chunk_overlap=400, separator=",").split_documents(documents_aws)

In [None]:
for i in docs:
    # The text data of each chunk
    exampleContent = i.page_content
    # Generating the embeddings for each chunk of text data
    exampleInput = json.dumps({"inputText": exampleContent})
    exampleVectors = embeddings.embed_query(exampleInput)

    # setting the text data as the text variable, and generated vector to a vector variable
    text = exampleContent
    vectors = exampleVectors
    
    indexDocument = {VECTOR_FIELD: vectors,'text': text}
   
    response = aoss_client.index(
        index=INDEX_NAME,
        body=indexDocument,
        refresh=False
    )

### Query OpenSearch

Define a function that queries OpenSearch.

In [None]:
def query_docs(query: str, k: int = 3):
    """
    Convert the query into embedding and then find similar documents from AOSS
    """

    # embedding
    query_embedding = embeddings.embed_query(query)

    # query to lookup OpenSearch kNN vector. Can add any metadata fields based filtering
    # here as part of this query.
    query_qna = {
        "size": k,
        "query": {
            "knn": {
            "vectors": {
                "vector": query_embedding,
                "k": k
                }
            }
        }
    }

    # OpenSearch API call
    relevant_documents = aoss_client.search(
        body = query_qna,
        index = INDEX_NAME
    )
    return relevant_documents

In [None]:
def create_context_for_query(q: str) -> str:
    """
    Create a context out of the similar docs retrieved from the vector database
    by concatenating the text from the similar documents.
    """
    print(f"query -> {q}")
    aoss_response = query_docs(q)
    context = ""
    for r in aoss_response['hits']['hits']:
        s = r['_source']
        context += f"{s['text']}\n"
    return context

Create a prompt template

In [None]:
query = 'How can I check for imbalances in my model?'

context = create_context_for_query(query)

PROMPT_TEMPLATE = """

Human: Answer the question asked in the <question> tag based only on the context provided in <context> tags. Do not include any preamble in your answer.
<context>
{}
</context>

<question>
{}
</question>


Assistant:"""

prompt = PROMPT_TEMPLATE.format(context, query)

print(prompt)

### Prompt the LLM (Bedrock - Claudev2) to generate a response

In [None]:
response = generation(prompt)

print(response)