In [None]:
# Langchain Simple Agent

# Import required libraries

# For calling the Openai API
from langchain_openai import ChatOpenAI # LLM BRAIN - ENGINE
# # For calling the Anthropic API
# from langchain_anthropic import AnthropicChat
# # For Azure OpenAI API
# from langchain_azure_openai import AzureChatOpenAI
# # For calling the Google API
# from langchain_google import ChatGoogle
# # For calling the Ollama API
# from langchain_ollama import OllamaChat
# # For calling the Cohere API
# from langchain_cohere import CohereChat
# # For calling the HuggingFace API
# from langchain_huggingface import HuggingFaceChat
# # For calling the Google Gemini API
# from langchain_gemini import GeminiChat

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # MESSAGES - CHASSIS
# ChatPromptTemplate: It is used to create a prompt template for chat models. This helps in structuring the input for the model.
# MessagesPlaceholder: It is used to create a placeholder for messages in the prompt template.

from langchain.tools import tool # HANDS - DASHBOARD
# tool: It is used to define a tool that can be called by the agent. This allows the agent to perform specific actions or retrieve information.

from langchain.agents import create_tool_calling_agent, AgentExecutor
# create_tool_calling_agent: It is used to create an agent that can call tools.
# AgentExecutor: It is used to execute the agent with the provided tools and model.

In [None]:
import os
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader # a class used to load text from PDF files

# Imports a powerful text-splitting tool that breaks large documents into manageable chunks for embedding.
# LLMs have context length limits, so splitting documents into chunks is essential.
# Why recursive?
# It tries to split at logical boundaries (like paragraphs, sentences, etc.) before falling back to raw character limits.
from langchain.text_splitter import RecursiveCharacterTextSplitter

# OpenAIEmbeddings - to convert text into vector format using OpenAI’s embedding API
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

from langchain.vectorstores import FAISS # a fast in-memory vector search engine used for semantic search.

"""Imports RetrievalQA, a chain that connects:
-A retriever (like FAISS)
-A language model (like GPT): To answer questions using retrieved context.

This is the heart of a RAG system — combining search + generation"""
from langchain.chains import RetrievalQA

In [None]:
# Step 1: Load environment variables from a .env file and set up API keys and other configurations
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
#step 2: Load and preprocess the document
pdf_path = "docs/the_nestle_hr_policy_pdf_2012.pdf"

try:
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()
    print(f"Loaded {len(documents)} pages from the document.")
except Exception as e:
    print(f"Error loading document: {e}")

In [None]:
#step 3: Split the document into smaller chunks for processing
#RecursiveCharacter text splitter tries to split at logical boundaries (like paragraphs, sentences, etc.) before falling back to raw character limits.
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,  # Maximum size of each chunk
    chunk_overlap=100  # Overlap between chunks to maintain context
)
#chunk has text , vector representation and metadata
#chuking is important because LLMs have context length limits
chunks = text_splitter.split_documents(documents)
print(f"Split document into {len(chunks)} chunks.")
print(chunks[0])  # Print the first chunk to verify

In [None]:
#Step 4 : Create embeddings for the chunks using OpenAIEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) #text-embedding-ada-002

print(embeddings.model)

In [None]:
#step 5: Create a vector store using FAISS to store and retrieve embeddings
from langchain.vectorstores import FAISS
vector_store = FAISS.from_documents(chunks, embeddings)
print("Vector store created with FAISS.")
#save the vector store to disk
#vector_store.save_local("faiss_index")


In [None]:
#step 6: Create a RetrievalQA chain that uses the vector store to answer questions
llm = ChatOpenAI(model_name="gpt-4-turbo", openai_api_key=OPENAI_API_KEY)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
#available search types: similarity, mmr, hybrid

rag_chain = RetrievalQA.from_chain_type(
    llm=llm, # LLM model
    retriever=retriever,# Retriever to fetch relevant documents
    return_source_documents=False # Whether to return source documents along with the answer
)
print("RAG chain created.")

In [None]:
def answer_question(question: str):
    """Function to answer a question using the RAG chain."""
    result = rag_chain.invoke({"query":  question})
    answer = result['result']
    #source_docs = result['source_documents']
    
    print(f"Question: {question}\n")
    print(f"Answer: {answer}\n")
    #print("Source Documents:")
    #for i, doc in enumerate(source_docs):
     #   print(f"Document {i+1}:\n{doc.page_content}\n")
    
   # return answer, #source_docs

In [None]:
#step 7: Test the RAG system with a sample question
sample_question = "What are the key responsibilities of the HR department as mentioned in the document?"
answer_question(sample_question)


In [None]:
#ask another question
another_question = "What does SPIL stand for?"
answer_question(another_question)

In [None]:
question = "what are the working hours mentioned in the document?"
answer_question(question)

In [None]:
question= "My child secured 79.9% in 12th, what rewards can he get as per the document?"
answer_question(question)

In [None]:
question = "where can i take the international training courses?"
answer_question(question)