In [1]:
import boto3
import json
import pandas as pd
import yaml

from langchain.document_loaders.csv_loader import CSVLoader 
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings

from langchain_community.llms import Bedrock
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.base import BasePromptTemplate
from langchain_openai import ChatOpenAI

from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load in environment variables 
with open('config.yaml') as info:
  env = yaml.load(info, Loader=yaml.Loader)

In [3]:
# connect to  amazon bedrock
bedrock = boto3.client(service_name="bedrock-runtime", region_name="us-east-1")

In [4]:
# Download and save the dataset containing product information in a Pandas dataframe.
DATASET_URL='https://github.com/GoogleCloudPlatform/python-docs-samples/raw/main/cloud-sql/postgres/pgvector/data/retail_toy_dataset.csv'
df = pd.read_csv(DATASET_URL)

#filter for specific columns
df = df.loc[:, ['product_id', 'product_name', 'description', 'list_price']]

#remove null values
df = df[~df['description'].isna()]
df.head(5)

Unnamed: 0,product_id,product_name,description,list_price
0,7e8697b5b7cdb5a40daf54caf1435cd5,"Koplow Games Set of 2 D12 12-Sided Rock, Paper...","Rock, paper, scissors is a great way to resolv...",3.56
1,7de8b315b3cb91f3680eb5b88a20dcee,"12""-20"" Schwinn Training Wheels",Turn any small bicycle into an instrument for ...,28.17
2,fb9535c103d7d717f0414b2b111cfaaa,Bicycle Pinochle Jumbo Index Playing Cards - 1...,Purchase includes 1 blue deck and 1 red deck. ...,6.49
3,c73ea622b3be6a3ffa3b0b5490e4929e,Step2 Woodland Adventure Playhouse & Slide,The Step2 Woodland Climber Adventure Playhouse...,499.99
4,dec7bd1f983887650715c6fafaa5b593,Step2 Naturally Playful Welcome Home Playhouse...,Children can play and explore in the Step2 Nat...,600.0


In [5]:
# use ddataframe loader to create documents 
loader = DataFrameLoader(df, page_content_column='description')
data = loader.load()

# use dataframe loader to split documents into chunks of 1500 and have a character overlap of 150 characters
splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
splits = splitter.create_documents(texts=[datum.page_content for datum in data], metadatas=[datum.metadata for datum in data])


In [6]:
#connect to openai for document embeddings
embeddings = OpenAIEmbeddings(api_key=env['open_ai_api_key'])

  embeddings = OpenAIEmbeddings(api_key=env['open_ai_api_key'])


In [7]:
# connect to pinecone
pc = Pinecone(api_key=env['pinecone_api_key'])
#delete index if exists
pc.delete_index(name='product-descriptions')

In [8]:
# create pinecone index with dimensions that match open ai embeddings
index = 'product-descriptions'
pc.create_index(name=index, dimension=1536, spec=ServerlessSpec(cloud="aws", region="us-east-1"))

# set up vector store to upsert to index and then embedd the documents using openAI Embedding from above
index = pc.Index('product-descriptions')
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [9]:
uuids = [str(i) for i in range(len(splits))]
vector_store.add_documents(documents=splits, ids=uuids)

['0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '50',
 '51',
 '52',
 '53',
 '54',
 '55',
 '56',
 '57',
 '58',
 '59',
 '60',
 '61',
 '62',
 '63',
 '64',
 '65',
 '66',
 '67',
 '68',
 '69',
 '70',
 '71',
 '72',
 '73',
 '74',
 '75',
 '76',
 '77',
 '78',
 '79',
 '80',
 '81',
 '82',
 '83',
 '84',
 '85',
 '86',
 '87',
 '88',
 '89',
 '90',
 '91',
 '92',
 '93',
 '94',
 '95',
 '96',
 '97',
 '98',
 '99',
 '100',
 '101',
 '102',
 '103',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '110',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '117',
 '118',
 '119',
 '120',
 '121',
 '122',
 '123',
 '124',
 '125',
 '126',
 '127',
 '128',
 '129',
 '130',
 '131',
 '132',
 '133',
 '134',
 '135',
 '136',
 '137',
 '138'

In [10]:
# create retreiver to get 5 closest documents that have a score of atleast 0.7
retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={"k": 5, "score_threshold": 0.7}
    )

In [11]:
# state model id you want to use from amazon bedrock
model_id = "amazon.titan-text-lite-v1"

#create bedrock object to get model
bedrock = boto3.client(service_name="bedrock-runtime", region_name="us-east-1")

# use model id amd bedrock object to create model object
llm = Bedrock(model_id=model_id,
              client = bedrock,
              model_kwargs={'maxTokenCount':512})

# create system input for llm
system_prompt = (
    """You are a friendly chatbot capable of answering questions related to products. User's can ask questions about its description,
            and prices. Be polite and redirect conversation specifically to product information in the Context. If you dont know the answer say I don't know
            Context: {context} 
            """
)

# fill in chatPromptTemplate
prompt = ChatPromptTemplate(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ],
)

# create question chain with model and created prompt
question_answer_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)

# create retreival chain to get answer
chain = create_retrieval_chain(retriever, question_answer_chain)

# call model to get response
question = "I am looking for Bicycle Playing cards, what is the product_name and list_price"
response = chain.invoke({"input": question})
print(response)

  llm = Bedrock(model_id=model_id,


{'input': 'I am looking for Bicycle Playing cards, what is the product_name and list_price', 'context': [Document(id='1172', metadata={'list_price': 23.99, 'product_id': '3d24e5d3036c334d9b0e16fef5f6d2dc', 'product_name': 'Bicycle Poker JUMBO FACES Playing Cards, 12 Decks'}, page_content='Bicycle delivers classic playing cards and memorable experiences that players can trust based on its longstanding tradition of producing quality, fun and family-friendly products that bring people together. JUMBO UNITED STATES PLAYI NG CARDS ND'), Document(id='260', metadata={'list_price': 9.29, 'product_id': '9daeb93e3b7b39c62c61c71bd494e1d8', 'product_name': '2 Decks Bicycle Fire Standard Poker Playing Cards'}, page_content='Bicycle Fire playing cards are a showstopper with hues of red, orange, and black smoldering chars. The first of the element series, Bicycle Fire is a new take on the very collectible Hobgoblin card back design printed on the renowned Vanity Fair deck produced by The United State

In [14]:
# create function to add metadata to get price information in the response
def get_products(question):

    # create initial prompt template for llm
    prompt = """You are a friendly chatbot capable of answering questions related to products. User's can ask questions about its description,
       and prices. Be polite and redirect conversation specifically to product information in the Context. If you dont know the answer say I don't know

        Question: {question}
       
       Context: {context} 
    """

    # using the question asked get 5 closest documents
    results = vector_store.similarity_search(question, k=5)


    # loop through results and append document information and metadata
    information = []
    metadata = []
    for res in results:
        information.append(f"* {res.page_content + f" The price of the product is {res.metadata['list_price']}"} [{res.metadata}]")
        metadata.append(res.metadata)

    
    
    # fill in returned information from pinecone vector store into prompt
    prompt = prompt.format(context=" ".join(information), question=question)

    
    # create payload for model
    payload = {
        "inputText": prompt,
        "textGenerationConfig": {
            "maxTokenCount":3000,
            "stopSequences": ["User:"],
            "temperature": 0,
            "topP": 1
            
        }
        
    }

    # convert payload to json
    body = json.dumps(payload)

    # set model id we want to use from bedrock
    model_id = "amazon.titan-text-lite-v1"

    # invoke the model to get a response
    response = bedrock.invoke_model(
           body=body,
           modelId=model_id,
           accept="application/json",
           contentType="application/json"
    )

    # get response from returned json
    response_body = json.loads(response.get("body").read())
    response_text = response_body['results'][0]['outputText']

    # add metadata to response for reference
    response_text = f"{response_text}, metadata: {"".join(str(metadata))}"
    
    return response_text

In [15]:
response = get_products("""I am looking for some playing cards please list me some options as well as the list price""")

In [16]:
response

"\nBot: I can help you with that. Here is a list of some playing cards and their prices:\n- Cartas Españolas Yoruba Naipes Barajas Espanolas Futuro Senteria Future Tarot $5.25\n- Gold Foil Plated Poker Playing Cards Deck $100\n- PLAYING CARDS HIGH QUALITY 24K GOLD FOIL NEWEST 100.00 BILL BENJAMIN FRANKLIN $8.06\n- don manolo spanish suite playing cards - 5 red and 5 blue decks $5.94\n- All of our productsWalmartply with international quality standards and are greatly appreciated in a variety of different markets throughout the world. This deck consists of playing cards that have been used at Hard Rock, which were then repackaged and sent to us. This deck is great for parties, home games, bar leagues, and many other uses. These cardsWalmarte directly from the casino pit. They do not have a hole drilled in them like many otherWalmartpetitors offer. Instead they have 2 slightly rounded corners to prevent them from being used at the casino again. Now you can use the actual cards used in Ne