# Nestlé HR Chatbot Assistant

In [1]:
# !pip install dotenv langchain langchain-community langchain-huggingface gradio chromadb pypdf sentence-transformers

In [2]:
# Import essential tools and libraries
import os
import sys

import gradio as gr

from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

# Create a '.env' file in the root folder of the project with this info:
# HUGGINGFACEHUB_API_TOKEN="my-huggingface-token"

# Load .env file
load_dotenv()

#  Set up the LangChain API environment
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

# Verify environment variables
# print(f'HuggingFace Access Token: {HUGGINGFACEHUB_API_TOKEN}')

In [3]:
# Load the HR policy PDFs from a directory and preprocess them
PDF_DIR = "./pdf_docs/"
loader = PyPDFDirectoryLoader(PDF_DIR)
documents = loader.load()
# documents

In [4]:
# Split text into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2024, chunk_overlap=48)
texts = text_splitter.split_documents(documents)
print(f'Number of chunks: {len(texts)}')

Number of chunks: 12


In [5]:
# Create vector representations for text chunks
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
embedding_model = HuggingFaceEmbeddings(model_name=MODEL_NAME)
vector_db = Chroma.from_documents(documents=texts, embedding=embedding_model)
retriever = vector_db.as_retriever()

In [6]:
# Build a question-answering system
llm = HuggingFaceHub(
    repo_id='google/flan-t5-base',
)

  llm = HuggingFaceHub(


In [7]:

# Create a prompt template for the chatbot
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""You are a helpful assistant. Answer the following question in detail based on the HR policy documents. 
    Please follow the following rules:
    1. If you don't know the answer, don't try to make up an answer. Just say **I can't find the answer to your question based on the HR documents**.
    2. If you find the answer, write the answer in a concise way and add the name of the document and the page that are **directly** used to derive the answer. Exclude the sources that are irrelevant to the final answer.
    {question}\n\nContext:\n{context}
"""
)

# Create the chain using the provided pattern
retrieval_chain = (
    {
        "context": retriever,                   # The retriever fetches documents and passes them through
        "question": RunnablePassthrough(),      # The question will be passed through unchanged
    }
    | prompt_template                           # Apply the prompt template
    | llm                                       # Run the prompt through the language model
    | StrOutputParser()                         # Parse the output as a string
)

In [8]:
# Response without Gradio for testing
# response = retrieval_chain.invoke("How do I promote in the company?")
# print(response.capitalize())

In [9]:
# Build a Gradio interface for the chatbot
def chatbot(query):
    response = retrieval_chain.invoke(query)
    return response.capitalize()

iface = gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question about the HR policy here..."),
    outputs="text",
    title="Nestlé HR Assistant",
    description="Ask any question related to the HR policies located in the / pdf_docs folder."
)

# Launch the interface
if __name__ == "__main__":
    iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://c440b6eaab030d16d7.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
