# Build Your Own RAG Bot using VectorDB, LLM, Open AI and Langchain

In [30]:
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.document_loaders import CSVLoader

In [31]:
import os
import openai
import panel as pn  # GUI
import pandas as pd
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [33]:
# Warning control
import warnings
warnings.filterwarnings('ignore')

## Set the Embeddings and Load the data ( I am using CSV, but any file format or comnination of file format can be used e.g. PDF, Image etc. You need to use the corresponding library for the same)

In [34]:
embeddings = OpenAIEmbeddings()

In [35]:
file = 'ClothingCatalog.csv'
loader = CSVLoader(file_path=file, encoding='utf-8')
documents = loader.load()

## Load the Documents into the Vector DB

In [36]:
vectorstore = Chroma.from_documents(documents, embeddings)

In [37]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 6}
)

## Import required librarariy from LangChain

In [38]:
from langchain.prompts.prompt import PromptTemplate
from langchain_openai import OpenAI
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

## Define the prompt

In [39]:
template = """You are an AI assistant for answering questions about the shirts with sun protection.
You are given the following extracted parts of a long document and a question. Provide a conversational answer.
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
If the question is not about shirts, politely inform them that you are tuned to only answer questions about shirts.
Question: {question}
=========
{context}
=========
Answer in Markdown:"""
prompt = PromptTemplate(template=template, input_variables=["question", "context"])

## Set LLM and Doc Chain

In [40]:
llm = OpenAI(temperature=0)

doc_chain = load_qa_with_sources_chain(llm, chain_type="map_reduce")
question_generator_chain = LLMChain(llm=llm, prompt=prompt)
qa_chain = ConversationalRetrievalChain(
    retriever=retriever,
    question_generator=question_generator_chain,
    combine_docs_chain=doc_chain,
)

## You can provide a chat history as well, which will make the bot context sensitive

In [41]:
response = qa_chain.invoke({
    "question": "Please list all your shirts with sun protection in a table in markdown and summarize each one?",
    "chat_history": []
})

In [42]:
response['answer']

" | Shirt Name | Summary | \n| --- | --- | \n| Men's Tropical Plaid Short-Sleeve Shirt | Rated UPF 50+, traditional fit, 100% polyester, wrinkle-resistant, front and back cape venting, two front bellows pockets, imported | \n| Men's Plaid Tropic Shirt, Short-Sleeve | Rated UPF 50+, Ultracomfortable, originally designed for fishing, 52% polyester and 48% nylon, machine washable and dryable, front and back cape venting, two front bellows pockets, imported | \nSOURCES: OutdoorClothingCatalog_1000.csv"

In [43]:
from IPython.display import display, Markdown

In [44]:
display(Markdown(response['answer']))

 | Shirt Name | Summary | 
| --- | --- | 
| Men's Tropical Plaid Short-Sleeve Shirt | Rated UPF 50+, traditional fit, 100% polyester, wrinkle-resistant, front and back cape venting, two front bellows pockets, imported | 
| Men's Plaid Tropic Shirt, Short-Sleeve | Rated UPF 50+, Ultracomfortable, originally designed for fishing, 52% polyester and 48% nylon, machine washable and dryable, front and back cape venting, two front bellows pockets, imported | 
SOURCES: OutdoorClothingCatalog_1000.csv