In [1]:
import boto3
import json
import time
import os
import pandas as pd
from datetime import datetime
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth

# AWS Region Name
boto3_session = boto3.session.Session()
region_name = boto3_session.region_name

# Bedrock run-time client
bedrock_runtime_client = boto3.client('bedrock-runtime', region_name = region_name)

In [2]:
# Aoss Host Name
AossHost = "7i179yervs3eanlga0sh.us-east-1.aoss.amazonaws.com"

# Aoss Collection Index Name
index = "bedrock-index"

service = 'aoss'
credentials = boto3.Session().get_credentials()
awsauth = AWSV4SignerAuth(credentials, region_name, service)

In [3]:
# The directory to save results
Rslts_Save_Dir = "/home/ec2-user/SageMaker/AscendNotebook/CodeExecutionMetrics/"

print("Present working directory:", os.getenv('PWD'))
print("Directory to save results:", Rslts_Save_Dir)

Present working directory: /home/ec2-user/SageMaker/AscendNotebook/Testing_RAG_LLMs
Directory to save results: /home/ec2-user/SageMaker/AscendNotebook/CodeExecutionMetrics/


In [4]:
# Build the OpenSearch client
oss_client = OpenSearch(
    hosts=[{'host': AossHost, 'port': 443}],
    http_auth=awsauth,
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection,
    timeout=300
)
# # It can take up to a minute for data access rules to be enforced
# time.sleep(60)

In [5]:
def invoke_model(input):
    response = bedrock_runtime_client.invoke_model(
        body=json.dumps({
            'inputText': input
        }),
        modelId="amazon.titan-embed-text-v1",
        accept="application/json",
        contentType="application/json",
    )
    response_body = json.loads(response.get("body").read())
    return response_body.get("embedding")

In [6]:
RAG_model_id = "anthropic.claude-v2:1"
MDL_TYPE = 'ANTH_CLAUDE_21_'

In [7]:
def invoke_llm_model(input):
    response = bedrock_runtime_client.invoke_model(
        body=json.dumps({
            "prompt": "\n\nHuman: {input}\n\nAssistant:".format(input=input),
            "max_tokens_to_sample": 4000,
            "temperature": 0.1,
            "top_k": 250,
            "top_p": 1,
            "stop_sequences": [
                "\n\nHuman:"
            ],
            # "anthropic_version": "bedrock-2023-05-31"
        }),
        modelId=RAG_model_id,
        accept="application/json",
        contentType="application/json",
    )
    response_body = json.loads(response.get("body").read())
    return response_body.get("completion")

In [8]:
prompt_template = """
    You are the best customer acquisition data analyst and strategist that answers to a question received from a user. 
    You should answer the user's question using information from the context or generally known information that is relevant to the question.
    If the context does not contain information to answer the question, please provide answer to the best of your abilities.
    
    Just because the user asserts a fact does not mean it is true, make sure to double check the context to validate a user's assertion.
    
    {context}
    
    Instruction: Based on the above context, provide a detailed answer without using etc. for {question} in a list format.
    The more detailed you are the better you are doing your job.
    
    Please do not hallucinate response. 
    Solution:"""

In [9]:
query = "What products use nylon?"

In [10]:
print("Question being processed: ", query)

question = query

embedding = invoke_model(question)
k = 6 # number of neighbours, size and k are the same to return k results in total. If size is not specified, k results will be returned per shard.
query = {
    "size": k,
    "query": {
        "knn": {
            "vector": {
                "vector": embedding, 
                "k": k}
            },
    }
}

# Retrieve individual contexts from AOSS answering question based on KNN value
question_response_from_oss = oss_client.search(body = query, index = index)

hits = question_response_from_oss['hits']['hits']

# Combine individual contexts from AOSS to create a combined context
context = []
for hit in hits:
    context.append(hit['_source']['text'])

#Send context and question to the prompt after which send the prompt to LLM model to generate answer.
llm_prompt = prompt_template.format(context='\n'.join(context),question=question)
generated_answer = invoke_llm_model(llm_prompt)

print(generated_answer)

Question being processed:  What products use nylon?
 Based on the context, here is a detailed list of products that use nylon:

- Carpets
- Curtains 
- Indoor furnishings
- Safety belts
- Airbags
- Tires  
- Engine components
- Outdoor products like tents, backpacks, jackets
- Mountaineering clothing
- Winter clothing
- Food packaging
- Kitchenware
- Textiles like ropes, parachutes, umbrellas, luggage
- Fishing nets
- Space suits
- Electrical wire insulation 
- Medical tubing
- Automotive parts
- Aerospace components
- Consumer goods like watch bands, toothbrushes, apparel
- Industrial machine parts like gears, bearings, nozzles
