In [None]:
# Load the Pdf
# Split the file into chunks
# Convert into embeddings
# Store in vector db ChromaDB
# Retriever
# Send Context + Query to LLM
# Generate the response

In [2]:
#import libraries
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

In [3]:
#Importing SQLite verion>3.5 to run ChromaDB
import sys
import pysqlite3  # provided by pysqlite3-binary
sys.modules["pysqlite3"] = pysqlite3  # make sure chromadb can find it
sys.modules["sqlite3"] = pysqlite3    # replace sqlite3 with the newer one


In [4]:
import chromadb
import sqlite3
print("SQLite version:", sqlite3.sqlite_version)
from langchain_chroma import Chroma

SQLite version: 3.51.1


In [5]:
pdf_path = "Nestle HR policy.pdf"
chunkSize = 1000
chunkOverlap = 200

# Load the file
loader = PyPDFLoader(pdf_path)
documents = loader.load()

In [6]:
# check number of pages
print(len(documents))

8


In [7]:
# Split the document in chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunkSize, chunk_overlap = chunkOverlap)
chunks = text_splitter.split_documents(documents)


In [19]:
print(chunks[0])
print("---------------------------------------")
print(chunks[1])
print("---------------------------------------")
print(chunks[2])
print("________-------------------------------")
print(chunks[3])

page_content='Policy
Mandatory
September  2012
The Nestlé  
Human Resources Policy' metadata={'producer': 'Adobe PDF Library 10.0.1', 'creator': 'Adobe InDesign CS6 (Macintosh)', 'creationdate': '2013-02-12T08:06:14+01:00', 'moddate': '2013-10-31T10:20:17+01:00', 'trapped': '/False', 'source': 'Nestle HR policy.pdf', 'total_pages': 8, 'page': 0, 'page_label': '1'}
---------------------------------------
page_content='Policy
Mandatory
September 
 20
12
Issuing departement
Hum
an Resources
Target audience 
All
 employees
Approver
Executive Board, Nestlé S.A.
Repository
All Nestlé Principles and Policies, Standards and  
Guidelines can be found in the Centre online repository at:  
http://intranet.nestle.com/nestledocs
Copyright
 and confidentiality
Al
l rights belong to Nestec Ltd., Vevey, Switzerland.
© 2012, Nestec Ltd.
Design
Nestec Ltd., Corporate Identity & Design,  
Vevey, Switzerland
Production
brain’print GmbH, Switzerland
Paper
This report is printed on BVS, a paper produced  
f

In [8]:
#Create Embeddings
embedding_model = OpenAIEmbeddings()

In [14]:
#Store in a ChromaDb vectorstore
vectorStore = Chroma.from_documents(chunks, embedding_model)

In [10]:
#Had to run for some Chromadb version mismatch
import chromadb, sys
print("chromadb module:", chromadb.__file__)
print("python:", sys.executable)

# If packaging is available:
%pip show chromadb langchain-chroma langchain-openai


chromadb module: /voc/work/myenv/lib/python3.10/site-packages/chromadb/__init__.py
python: /voc/work/myenv/bin/python
Name: chromadb
Version: 1.4.1
Summary: Chroma.
Home-page: 
Author: 
Author-email: Jeff Huber <jeff@trychroma.com>, Anton Troynikov <anton@trychroma.com>
License: 
Location: /voc/work/myenv/lib/python3.10/site-packages
Requires: grpcio, numpy, onnxruntime, typer, tqdm, pyyaml, pybase64, uvicorn, opentelemetry-exporter-otlp-proto-grpc, tenacity, httpx, jsonschema, tokenizers, mmh3, opentelemetry-api, kubernetes, bcrypt, rich, build, pypika, pydantic, overrides, posthog, importlib-resources, typing-extensions, orjson, opentelemetry-sdk
Required-by: langchain-chroma
---
Name: langchain-chroma
Version: 1.1.0
Summary: An integration package connecting Chroma and LangChain.
Home-page: 
Author: 
Author-email: 
License: MIT
Location: /voc/work/myenv/lib/python3.10/site-packages
Requires: chromadb, numpy, langchain-core
Required-by: 
---
Name: langchain-openai
Version: 1.1.7
Summ

In [15]:
# Define a retriever
doc_retriever = vectorStore.as_retriever(search_type="similarity",search_kwargs={"k":5})

In [16]:
# LLM Set up
doc_llm = ChatOpenAI(model = "gpt-4.1-mini", temperature=0)

In [19]:
# Define prompt template
from langchain_core.prompts import ChatPromptTemplate

doc_prompt = ChatPromptTemplate.from_messages([
    ("system", 
     "You are a very helpful assistant. Use ONLY the context below to answer the question. "
     "If you don't know the answer, say \"I don't know.\""),
    ("human",
     "Context:\n{context}\n\nQuestion:\n{query}\n\nAnswer:")
])


In [20]:
# RAG Chain
rag_chain = ({ "context":doc_retriever,"query":RunnablePassthrough()} | doc_prompt | doc_llm)

In [21]:
response = rag_chain.invoke("What is the working condition in Nestle?")
print(response.content)

Nestlé is committed to providing its employees all over the world with good working conditions, a safe and healthy work environment, and flexible employment possibilities that support a better balance of private and professional life. They provide flexible working conditions whenever possible and encourage employees to have outside interests, especially community involvement. Additionally, Nestlé cares about all people working inside or outside their premises under contractual obligations with service providers and insists that adequate working conditions are made available to them as well.


In [22]:
response = rag_chain.invoke("What is the rewards system in Nestle?")
print(response.content)

The rewards system in Nestlé, referred to as Total Rewards, encompasses more than just remuneration and benefits based on solid performance. It includes the value and trust associated with the Nestlé name, relationships with line managers and fellow workers, recognition, experiences working for a diverse global company, and opportunities for learning and growth. The key elements that define Total Rewards at Nestlé are Fixed Pay, Variable Pay, Benefits, Personal Growth and Development, and Work Life Environment. These elements are designed to correspond to what is valued by employees in each market and demonstrate Nestlé's commitment to giving each employee the opportunity to grow, evolve, and contribute. Additionally, Nestlé Total Rewards programs are established within the social and legal framework of each country and respect applicable collective agreements. Managers are responsible for proposing remuneration within company policy and ensuring clear and transparent communication wit

In [None]:
def respond(message, history):
    if not message or not message.strip():
        return "Ask me a question about the Nestlé HR policy."
    resp = rag_chain.invoke(message.strip())
    return getattr(resp, "content", str(resp))


In [None]:
if __name__ == "__main__":
    gr.ChatInterface(
        fn=respond, title="Nestlé HR Chatbot", type="messages", examples=[ "What is the leave policy?", "What is the rewards system?" ],
    ).launch()
