## RAG Demo:
 A demo that loads a list of HTML URLs into Vector Store (Opensearch) with Bedrock Embdedding. Then QnA using LLM with RetrievalQA chain provided by LangChain. Inference using LLaMa-2-13b from SageMaker Jumpstart

### 1. RAG
- Prepare HTML links
- Store into Vector store 
- QnA using LLM with RetrievalQA chain provided by LangChain

Ref: https://python.langchain.com/docs/use_cases/question_answering/

In [None]:
%pip -qqq install --no-build-isolation --force-reinstall \
    "boto3>=1.28.57" \
    "awscli>=1.29.57" \
    "botocore>=1.31.57" \
    "sagemaker"

In [None]:
%pip install --quiet langchain==0.0.309

In [None]:
#set True to enable debug mode
import langchain
langchain.debug=False

In [None]:
import os
AWS_REGION = 'us-east-1'

# TODO: access as a IAM user. Ensure user has the appropriate permissions and store the access keys here
os.environ['AWS_DEFAULT_REGION']=AWS_REGION
os.environ['AWS_ACCESS_KEY_ID']=''
os.environ['AWS_SECRET_ACCESS_KEY']=''

## 1. Init LLM model (Llama-2 70b chat on Amazon Sagemaker JumpStart )
In this demo, we use Llama-2 13b chat model as LLM Foundation model, hosted by Amazon Sagemaker JumpStart Endpoint.

In [None]:
# TODO: Deploy Sagemaker endpoint model from console, then define Sagemaker endpoint model name here
SAGEMAKER_LLM_MODEL_NAME = 'jumpstart-dft-meta-textgeneration-llama-2-13b-f'

In [None]:
import json
from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint

def init_llm_sm_endpoint():

    endpoint_name = SAGEMAKER_LLM_MODEL_NAME
    aws_region=AWS_REGION
    parameters = {"max_new_tokens": 1000, "temperature": 0.1}

    class ContentHandler(LLMContentHandler):
        content_type = "application/json"
        accepts = "application/json"

        # LLAMA-2 chat
        # note: the input format requirement differs from LLM to LLM
        def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
            input_str = json.dumps({"inputs" : [[{"role" : "system",
            "content" : """"You are QnA bot to answer the questions based on the context. If it is not in the context, start your reply with \"Based on context, I cannot answer\"
                            """},
            {"role" : "user", "content" : prompt}]],
            "parameters" : {**model_kwargs}})
            return input_str.encode('utf-8')

        def transform_output(self, output: bytes) -> str:
            response_json = json.loads(output.read().decode("utf-8"))
            return response_json[0]["generation"]["content"]
        
    content_handler = ContentHandler()

    sm_llm = SagemakerEndpoint(
        endpoint_name=endpoint_name,
        region_name=aws_region,
        model_kwargs=parameters,
        content_handler=content_handler,
        endpoint_kwargs={"CustomAttributes": "accept_eula=true"},
    )
    return sm_llm


In [None]:
# Test
llm = init_llm_sm_endpoint()
text = "What is Amazon Bedrock?"
print(llm(text))

## 2. Embedding (Amazon Bedrock Embeddings)
We use Amazon Bedrock Titan Embedding as Embedding Foundation model


In [None]:
from langchain.embeddings import BedrockEmbeddings
import boto3

def init_eb_bedrock():
    bedrock_client = boto3.client('bedrock-runtime',
                                region_name=AWS_REGION,
                                aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
                                aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
    # TODO: allow Bedrock Titan access on the AWS console, and store model ID here
    modelId = "amazon.titan-embed-text-v1"
    bedrock_embeddings = BedrockEmbeddings(
        client=bedrock_client,
        region_name=AWS_REGION,
        model_id=modelId, 
    )

    return bedrock_embeddings

In [None]:
# unit test for embedding
bedrock_embeddings = init_eb_bedrock()
print(bedrock_embeddings.model_id)
embedding_vectors = bedrock_embeddings.embed_documents(['hello', 'world'])
print("len(embedding_vectors): ", len(embedding_vectors))
print("sample vector:\n",embedding_vectors[0][0:10])


## 3. Vector Store (Amazon Opensearch service)
We used Amazon Opensearch service as the vector store. 

In [None]:
# TODO: create a Amazon OpenSearch cluster and store the URL here
OPENSEARCH_URL = ""
OPENSEARCH_VECTOR_INDEX_HTML = "rag-demo-aws-genai-html-asia"
AWS_REGION_AOS = "ap-southeast-1"

# TODO: store credentials for Amazon OpenSearch cluster ("login", "password") here
OPENSEARCH_http_auth=("", "") 

### 3.1 Split HTML into chunks

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # TODO: feel free to change chunk_size to fit context window size of LLM
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
    is_separator_regex = False,
)

In [None]:
from langchain.document_loaders import AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer

# TODO: store the list of URLs you want to turn into embeddings into a txt file and store the name here
with open('') as f:
    urls = f.read().splitlines()
loader = AsyncHtmlLoader(urls)
docs = loader.load()
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)
contents_transformed, metadatas = [], []
for doc_transformed in docs_transformed:
    content_transformed = text_splitter.split_text(doc_transformed.page_content)
    contents_transformed.extend(content_transformed)
    metadatas.extend([doc_transformed.metadata]*len(content_transformed))

print("sample split:\n", contents_transformed)
print("sample metadatas:\n", metadatas)
print("length of sample split:\n", len(contents_transformed))
print("length of sample metadatas:\n", len(metadatas))

### 3.2 Embedding and Store into Vector Store

In [None]:
from langchain.vectorstores import OpenSearchVectorSearch
from opensearchpy import RequestsHttpConnection

service = 'es' # must set the service as 'aoss' for Amazon OpenSearch Serverless
region = AWS_REGION_AOS

vectorstore = OpenSearchVectorSearch.from_texts(
    contents_transformed,
    init_eb_bedrock(), # embedding model
    metadatas,
    opensearch_url=OPENSEARCH_URL,
    http_auth=OPENSEARCH_http_auth,
    timeout = 600,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    index_name=OPENSEARCH_VECTOR_INDEX_HTML,
    engine="faiss",
    bulk_size=10000
)



In [None]:
from langchain.vectorstores import OpenSearchVectorSearch
from opensearchpy import RequestsHttpConnection
vectorstore = OpenSearchVectorSearch(
    opensearch_url=OPENSEARCH_URL,
    index_name=OPENSEARCH_VECTOR_INDEX_HTML,
    embedding_function=init_eb_bedrock(),
    http_auth=OPENSEARCH_http_auth,
    timeout = 600,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    engine="faiss"
)


question = "What is a GenAI use case to improve customer experience?"
docs = vectorstore.similarity_search(question, k=20)
print(f"Vector search: Number of document related to the question = {len(docs)}")
print('sample result:\n', docs[1])
print('sample result:\n', docs[3])

## 4. QnA the content using RetrievalQA chain
QA using a Retriever

In [None]:
from langchain.chains import RetrievalQA

def find_metadata_sources_from_documents(documents):
    source_list = []
    for document in documents:
        if 'source' in document.metadata:
            source = document.metadata['source']
            source_list.append(source)
    #dedup
    source_list = list(dict.fromkeys(source_list))
    return source_list

llm = init_llm_sm_endpoint()
retreiver = vectorstore.as_retriever(search_type="similarity", search_kwargs={'k': 6, 'score_threshold': 0.8})
qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                       retriever=retreiver,
                                       return_source_documents=True)

# test
question = "What is Amazon Bedrock?"
full_result = qa_chain({"query": question})
result = full_result['result']
print(result)
print("\nsource:\n", find_metadata_sources_from_documents(full_result['source_documents']))

In [None]:
# Question that are not related to the documents
question = "Why Siu mei is the best Dim sum in Hong Kong?"

result = qa_chain({"query": question})
print("answer:\n", result['result'])
print("\nsource:\n", find_metadata_sources_from_documents(result['source_documents']))