## This is an AI chatbot to interact with you based on Nestle's HR policies.

Step 1: Import necessary libraries.

Step 2: Download Nestle's HR PDF file.

Step 3: Create vector representations for text chunks using Chroma dB and any embeddings.

Step 4: Build a system to retreive answers from text chunks.

Step 5: Create a prompt and feed it to any LLM to build a QA chatbot.

Step 6: Use Gradio to build an UI for the chatbot.

In [None]:
# Step 1
import requests
import gradio as gr

from langchain_ollama import OllamaEmbeddings # download ollama model first using 'ollama pull llama3'
from langchain_ollama.llms import OllamaLLM
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain.chains import RetrievalQA
from langchain_core.prompts import ChatPromptTemplate

In [18]:
# Step 2
nestle_url = 'https://www.nestle.com/sites/default/files/asset-library/documents/jobs/humanresourcespolicy.pdf'
filename = 'policy.pdf'
chunk_size = 2000

r = requests.get(nestle_url, stream=True)
with open(filename, 'wb') as fd:
    for chunk in r.iter_content(chunk_size):
        fd.write(chunk)

In [None]:
# Step 3

# load multiple files in a folder
loaders = [PyPDFLoader(filename)]

print(loaders)

alldocument = []
for loader in loaders:
    print("Loading raw document...", loader.file_path)
    raw_documents = loader.load()
    print("Splitting text...")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
    doc = text_splitter.split_documents(raw_documents)
    alldocument.extend(doc)

print("The first document is:", alldocument[0].page_content)
print("Length of all documents is {}".format(len(alldocument)))

In [43]:
# Step 4
# Setup database and LLM
vectorstore = Chroma.from_documents(documents=alldocument, embedding=OllamaEmbeddings(model="llama3.1"), persist_directory='./chromadb')
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 30})
llm = OllamaLLM(model="llama3.1", temperature=0)    

In [None]:
# Step 5
system_prompt = (
        "You are an virtual assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        "{context}"
    )

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{query}"),
    ]
)

# RAG Pipeline
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=retriever,
    verbose=False,
)

# Define an ask function
def ask(query, history):    
    response = rag_chain.invoke({'query': query})

    return response['result']

# Test
print(ask("Tell me about the HR policy in one sentence", None))

In [None]:
# Step 6

gr.ChatInterface(
    ask,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question regarding Nestle's HR policy.", container=False, scale=7),
    title="HR Chatbot",
    description="Ask HR Chatbot any question regarding HR policy",
    theme="soft",
    examples=["Tell me about Nestle", "Summarize the most important HR policies", "What's the work life balance in Nestle?"],
    cache_examples=True,    
).launch()