In [1]:
import boto3
import json
import time
import os
import pandas as pd
from datetime import datetime
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth

# AWS Region Name
boto3_session = boto3.session.Session()
region_name = boto3_session.region_name

# Bedrock run-time client
bedrock_runtime_client = boto3.client('bedrock-runtime', region_name = region_name)

In [2]:
# Aoss Host Name
AossHost = "7i179yervs3eanlga0sh.us-east-1.aoss.amazonaws.com"

# Aoss Collection Index Name
index = "bedrock-index"

service = 'aoss'
credentials = boto3.Session().get_credentials()
awsauth = AWSV4SignerAuth(credentials, region_name, service)

In [3]:
# The directory to save results
Rslts_Save_Dir = "/home/ec2-user/SageMaker/AscendNotebook/CodeExecutionMetrics/"

print("Present working directory:", os.getenv('PWD'))
print("Directory to save results:", Rslts_Save_Dir)

Present working directory: /home/ec2-user/SageMaker/AscendNotebook/Testing_RAG_LLMs
Directory to save results: /home/ec2-user/SageMaker/AscendNotebook/CodeExecutionMetrics/


In [4]:
# Build the OpenSearch client
oss_client = OpenSearch(
    hosts=[{'host': AossHost, 'port': 443}],
    http_auth=awsauth,
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection,
    timeout=300
)
# # It can take up to a minute for data access rules to be enforced
time.sleep(60)

In [5]:
def invoke_model(input):
    response = bedrock_runtime_client.invoke_model(
        body=json.dumps({
            'inputText': input
        }),
        modelId="amazon.titan-embed-text-v1",
        accept="application/json",
        contentType="application/json",
    )
    response_body = json.loads(response.get("body").read())
    return response_body.get("embedding")

In [6]:
RAG_model_id = "anthropic.claude-v2:1"
MDL_TYPE = 'ANTH_CLAUDE_21_'

In [19]:
def invoke_llm_model(input):
    response = bedrock_runtime_client.invoke_model(
        body=json.dumps({
            "prompt": "\n\nHuman: {input}\n\nAssistant:".format(input=input),
            "max_tokens_to_sample": 4000,
            "temperature": 0.1,
            "top_k": 10,
            "top_p": 1,
            "stop_sequences": [
                "\n\nHuman:"
            ],
            # "anthropic_version": "bedrock-2023-05-31"
        }),
        modelId=RAG_model_id,
        accept="application/json",
        contentType="application/json",
    )
    response_body = json.loads(response.get("body").read())
    return response_body.get("completion")

In [20]:
prompt_template = """
    You are the best customer acquisition data analyst and strategist that answers to a question received from a user. 
    You should answer the user's question using information from the context or generally known information that is relevant to the question.
    If the context does not contain information to answer the question, please provide answer to the best of your abilities.
    
    Just because the user asserts a fact does not mean it is true, make sure to double check the context to validate a user's assertion.
    
    {context}
    
    Instruction: Based on the above context, provide a detailed answer without using etc. for {question} in a list format.
    The more detailed you are the better you are doing your job.
    
    Please do not hallucinate response. 
    Solution:"""

In [21]:
questions = ["What products use nylon?", 
"give me a list of top 100 products that use nylon (10 is too small and is no representative)?", 
"what companies use Nylon in their products?", 
"who are the biggest users of nylon?", 
"who is compounding Nylon in North America and South America or Latin America?", 
"Are compounders using more recycled feedstock?", 
"What innovations are driving the use of recycled polypropylene?", 
"What are the main applications of recycled PP in the automotive industry?", 
"Which brands are known for their use of recycled PP in their products?", 
"What are some examples of high-volume products produced by leading companies using injection molding?", 
"How do aerospace companies utilize nylon 6,6 in their components?", 
"List all the companies extruding Nylon for extruded profiles and sheet?", 
"Which automotive companies are innovating with nylon 6,6 to enhance vehicle performance?", 
"In the automotive sector, what are the key components produced using nylon 6,6, and which manufacturers specialize in their production?", 
"What are the major industrial machinery components manufactured from nylon 6,6, and which companies are the leading suppliers of these components?", 
"Which consumer electronics companies rely on nylon 6,6 for the production of their casings and structural components, and how much nylon 6,6 do they consume annually?", 
"Can you list the flagship consumer electronic devices that feature nylon 6,6 components, and identify the manufacturers responsible for their production?", 
"In the automotive sector, what are the critical components manufactured from nylon 6,6, and which suppliers specialize in the production of these components?"]

In [22]:
counter = 1
Q_A_DF = pd.DataFrame(columns = ['Question_Num', 'Question', 'Answer', 'Contexts', 'Value_Of_K_In_Knn', 'Processed Date',
                                'Emm_Response_Time', 'AOSS_Response_Time', 'RAG_Response_Time'])
for query in questions:
    Q_Num = "Question - " + str(counter)
    print("question number:", Q_Num)
    print("Question being processed: ", query)
    
    question = query
    
    Emm_start_time = time.time()
    embedding = invoke_model(question)
    k = 6 # number of neighbours, size and k are the same to return k results in total. If size is not specified, k results will be returned per shard.
    query = {
        "size": k,
        "query": {
            "knn": {
                "vector": {
                    "vector": embedding, 
                    "k": k}
                },
        }
    }
    
    Emm_end_time = time.time()
    
    AOSS_start_time = time.time()
    question_response_from_oss = oss_client.search(body = query, index = index)
    AOSS_end_time = time.time()
    
    hits = question_response_from_oss['hits']['hits']
    
    context = []
    for hit in hits:
        context.append(hit['_source']['text'])
    
    RAG_start_time = time.time()
    llm_prompt = prompt_template.format(context='\n'.join(context),question=question)
    generated_answer = invoke_llm_model(llm_prompt)
    RAG_end_time = time.time()
    
    todays_date = datetime.now().strftime("%Y_%m_%d")
    Emm_exec_time = (Emm_end_time - Emm_start_time)/60
    AOSS_exec_time = (AOSS_start_time - AOSS_end_time)/60
    RAG_exec_time = (RAG_end_time - RAG_start_time)/60
    
    print(generated_answer) # with K=4 and older prompt
    
    q_a_new_row = {'Question_Num': Q_Num, 'Question': question, 'Answer': generated_answer, 
                   'Contexts': context, 'Value_Of_K_In_Knn': k, 'Processed Date': todays_date,
                   'Emm_Response_Time': Emm_exec_time, 'AOSS_Response_Time': AOSS_exec_time, 
                   'RAG_Response_Time': RAG_exec_time}
    Q_A_DF.loc[len(Q_A_DF)] = q_a_new_row

    counter+=1

    Q_A_DF.to_csv(Rslts_Save_Dir + 'AossMultiResponse' + '_'+ MDL_TYPE + datetime.now().strftime("%Y_%m_%d_%H_%M") + '.csv', index=False)
    print(" ")

question number: Question - 1
Question being processed:  What products use nylon?
 Based on the context, here is a detailed list of products that use nylon:

- Carpets
- Curtains 
- Indoor furnishings
- Safety belts
- Airbags
- Tires  
- Engine components
- Outdoor products like tents, backpacks, jackets
- Mountaineering clothing
- Winter clothing
- Food packaging
- Kitchenware
- Textiles like ropes, parachutes, umbrellas, luggage
- Fishing nets
- Space suits
- Electrical wire insulation
- Medical tubing
- Automotive components
- Conveyor belts  
- Bushings and flanges
- Gears
- Bearings
- Cases, covers and housings
 
question number: Question - 2
Question being processed:  give me a list of top 100 products that use nylon (10 is too small and is no representative)?
 Unfortunately the context provided does not contain enough information to provide a detailed list of 100 products that use nylon. However, here is an attempt to provide a more expanded list of products that may use nylon b

 Based on the context, polypropylene (PP) is used to make the following main automotive parts and components:

1. Bumper fascias
2. Gas cans 
3. Engine covers
4. Cable insulation
5. Instrument panels

The context does not mention any applications of recycled PP specifically in the automotive industry. It only discusses the applications of PP in general. Please let me know if you need any clarification or have additional questions!
 
question number: Question - 9
Question being processed:  Which brands are known for their use of recycled PP in their products?
 Unfortunately, the context does not provide specific information about which brands are known for using recycled polypropylene (PP) in their products. The passage discusses the properties and applications of various types of plastics, including polypropylene, but does not name any brands that use recycled PP. 

Since there is no relevant information in the context to answer this question directly, I cannot provide a list of brands

 Based on the information provided in the context, the major industrial machinery components manufactured from nylon 6,6 include:

1. Gears
2. Bearings
3. Rollers
4. Pulleys  
5. Conveyor belts

The context does not provide information on the leading suppliers of these specific nylon 6,6 industrial machinery components. The context discusses the properties and applications of nylon 6,6 in general, but does not give company names for suppliers of the listed machinery components.
 
question number: Question - 16
Question being processed:  Which consumer electronics companies rely on nylon 6,6 for the production of their casings and structural components, and how much nylon 6,6 do they consume annually?
 Unfortunately, the context provided does not contain any specific information about which consumer electronics companies use nylon 6,6 or how much they consume annually. The passage mentions that nylon 6,6 is used in the electrical and electronics industry for its insulating properties an