In [1]:
# Import all required libraries
from langchain_classic.chains import create_history_aware_retriever, create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
import os
from dotenv import load_dotenv

In [2]:
# Load environment variables from .env file
load_dotenv()
os.environ["HUGGINGFACE_HUB_TOKEN"] = os.getenv("HUGGINGFACE_HUB_TOKEN")

print("Imports completed successfully")

Imports completed successfully


In [3]:
# Initialize the embedding model (converts text to vectors)
embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
print("Embeddings model loaded")

Embeddings model loaded


In [4]:
# Initialize the LLM (Large Language Model) for generating responses
llm = ChatGroq(model_name="llama-3.1-8b-instant")
print("LLM initialized")

LLM initialized


In [5]:
# Load PDF document(s)
# NOTE: Change this path to your actual PDF file
pdf_path = r"D:\Projects\RAG QA Chatbot\data\pdf2.pdf"

In [6]:
documents = []
loader = PyPDFLoader(pdf_path)
docs = loader.load()
documents.extend(docs)
print(f"‚úì Loaded {len(documents)} pages from PDF")

‚úì Loaded 448 pages from PDF


In [7]:
documents[10]

Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.2 (Macintosh)', 'creationdate': '2025-05-14T15:09:41+02:00', 'moddate': '2025-06-10T11:11:17+02:00', 'title': 'UN Human Rights Report 2024', 'trapped': '/False', 'source': 'D:\\Projects\\RAG QA Chatbot\\data\\pdf2.pdf', 'total_pages': 448, 'page': 10, 'page_label': '11'}, page_content='11\nOverviewMandated by General Assembly resolution 48/141, \nUN Human Rights‚Äô responsibilities include \nthe promotion and protection of the effective \nenjoyment by all, of all civil, cultural, economic, \npolitical and social rights, taking an active role in \naddressing challenges to the full realization of all \nhuman rights and preventing the continuation of \nhuman rights violations throughout the world. To \nthis end, UN Human Rights engages all govern -\nments in dialogue regarding the implementation \nof its mandate, with a view to securing respect for \nall human rights. It provides advisory services, \nte

In [8]:
# Split documents into smaller chunks for better retrieval
# chunk_size: Maximum size of each chunk (in characters)
# chunk_overlap: Overlap between chunks to maintain context
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=200
)
splits = text_splitter.split_documents(documents)
print(f"‚úì Split into {len(splits)} chunks")

‚úì Split into 1683 chunks


In [9]:
splits[10]

Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.2 (Macintosh)', 'creationdate': '2025-05-14T15:09:41+02:00', 'moddate': '2025-06-10T11:11:17+02:00', 'title': 'UN Human Rights Report 2024', 'trapped': '/False', 'source': 'D:\\Projects\\RAG QA Chatbot\\data\\pdf2.pdf', 'total_pages': 448, 'page': 6, 'page_label': '6'}, page_content='in national censuses and surveys and land resti -\ntution policies. In Zambia, following our study \non the rights to food and education, the Govern-\nment significantly expanded its School Feeding \nProgramme. In South Africa, we made recommen-\ndations to update environmental regulations.\nIn Liberia, we provided technical and financial \nbacking to the Government‚Äôs National Action Plan \non Business and Human Rights ‚Äì the fourth on the \nAfrican continent. In Haiti, we provided advice \nto the Multinational Security Support Mission \non implementing the Mission‚Äôs human rights \ncompliance mechanism and on preven

In [10]:
# Create a vector database from document chunks
# This allows semantic search based on meaning, not just keywords
vectorstore = Chroma.from_documents(
    documents=splits, 
    embedding=embeddings
)
print("Vector store created")

Vector store created


In [11]:
# Create a retriever that will fetch relevant chunks for questions
retriever = vectorstore.as_retriever()
print("Retriever ready")

Retriever ready


In [12]:
# Dictionary to store chat histories for different sessions
# In Jupyter, this persists for the entire notebook session
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    """
    Retrieves or creates a chat history for a given session.
    
    Args:
        session_id: Unique identifier for the conversation session
    
    Returns:
        ChatMessageHistory object containing the conversation history
    """
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

print("Session history management setup complete")

Session history management setup complete


In [13]:
# This chain reformulates questions based on chat history
# Example: If user asks "What about its benefits?" after asking about solar panels,
# it reformulates to "What are the benefits of solar panels?"

context_q_sys_prompt = (
    """Given a chat history and the latest user question 
    which might reference context in the chat history, formulate a standalone 
    question which can be understood without the chat history. Do NOT answer the 
    question, just reformulate if required and otherwise return as is."""
)
context_prompt = ChatPromptTemplate.from_messages([
    ("system", context_q_sys_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}"),
])

In [14]:
# Create the history-aware retriever
history_aware_retriever = create_history_aware_retriever(
    llm, 
    retriever, 
    context_prompt
)
print("History-aware retriever created")

History-aware retriever created


In [15]:
# This chain takes the retrieved context and generates an answer

sys_prompt = (
    """You are an assistant for question-answering-tasks.
    Use the following peices of retrieved context to answer the question.
    If you don't know the answer, say that you don't know. Use three sentences
    at maximum and keep the answer concise.\n\n {context}"""
)
qa_prompt = ChatPromptTemplate.from_messages(
    [
    ("system", sys_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [16]:
# Create the question-answer chain
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
print("QA chain created")

QA chain created


In [17]:
# Combine retrieval and QA into a single chain
rag_chain = create_retrieval_chain(
    history_aware_retriever, 
    question_answer_chain
)

In [18]:
# Wrap the chain with message history management
conv_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",  # Fixed: was "chathistory"
    output_messages_key="answer"
)
print("Complete conversational RAG chain ready")
print("\n" + "="*60)
print("Setup complete! You can now ask questions.")
print("="*60 + "\n")

Complete conversational RAG chain ready

Setup complete! You can now ask questions.



In [19]:
def ask_question(question, session_id="default_session"):
    """
    Ask a question to the RAG chatbot.
    
    Args:
        question: Your question as a string
        session_id: Session identifier (use same ID to maintain conversation context)
    
    Returns:
        Dictionary containing the answer and other metadata
    """
    # Invoke the conversational RAG chain
    response = conv_rag_chain.invoke(
        {"input": question},
        config={"configurable": {"session_id": session_id}},
    )
    
    # Display the answer
    print(f"\nüìù Question: {question}")
    print(f"üí° Answer: {response['answer']}\n")
    
    # Optionally show the chat history
    session_history = get_session_history(session_id)
    print(f"üìö Total messages in history: {len(session_history.messages)}")
    
    return response

In [26]:
# Now you can ask questions! Run this cell multiple times with different questions

# Example 1: First question
#ask_question("What is this document about?")

# Example 2: Follow-up question (uses context from previous question)
#ask_question("Can you elaborate on that?")


# Example 3: Checking if context is maintained.
#ask_question("What was the question i just asked?")

# Example 4: Another question
#ask_question("Tell me about the various kinds of turmoils that can hamper human right?")

# Example 5: Another question
#ask_question("What are the various funds allocated for these purposes?")

# Example 5: Another question
ask_question("Funds Administered by UN HUMAN RIGHTS")



üìù Question: Funds Administered by UN HUMAN RIGHTS
üí° Answer: According to the document, the UN Human Rights Office administers nine trust funds and three special funds, including:

1. The United Nations Trust Fund for the Support of the Activities of the High Commissioner for Human Rights
2. The United Nations Voluntary Trust Fund on Contemporary Forms of Slavery
3. The United Nations Voluntary Fund for Victims of Torture
4. The Human Rights Up Front Programme and other funds.

üìö Total messages in history: 14


{'input': 'Funds Administered by UN HUMAN RIGHTS',
 'chat_history': [HumanMessage(content='What is this document about?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='This document appears to be a report from the UN Human Rights Office (OHCHR) detailing their activities and achievements in a specific region, including providing technical assistance, supporting the preparation of human rights reports, and engaging with governments and civil society organizations.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What is this document about?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='This document is about the activities and accomplishments of the United Nations Human Rights Office (OHCHR) in a particular region, highlighting their efforts to promote and protect human rights.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Can you elaborate on that?', additional_kwargs={}, response_metadata={}),
  A

In [27]:
# Run this cell anytime to see the full conversation history
def show_history(session_id="default_session"):
    """Display the full chat history for a session."""
    session_history = get_session_history(session_id)
    print(f"\n{'='*60}")
    print(f"CHAT HISTORY (Session: {session_id})")
    print(f"{'='*60}\n")
    
    for i, message in enumerate(session_history.messages, 1):
        role = "üßë Human" if message.type == "human" else "ü§ñ Assistant"
        print(f"{role}: {message.content}\n")
    
    if len(session_history.messages) == 0:
        print("No messages yet.")
    
    print(f"{'='*60}\n")


show_history()



CHAT HISTORY (Session: default_session)

üßë Human: What is this document about?

ü§ñ Assistant: This document appears to be a report from the UN Human Rights Office (OHCHR) detailing their activities and achievements in a specific region, including providing technical assistance, supporting the preparation of human rights reports, and engaging with governments and civil society organizations.

üßë Human: What is this document about?

ü§ñ Assistant: This document is about the activities and accomplishments of the United Nations Human Rights Office (OHCHR) in a particular region, highlighting their efforts to promote and protect human rights.

üßë Human: Can you elaborate on that?

ü§ñ Assistant: The document describes the work of the UN Human Rights Office (OHCHR) in various areas, including strengthening human rights work, providing training and support, and engaging with governments and civil society organizations to promote and protect human rights.

üßë Human: What was the 

In [28]:
# Run this cell to start a fresh conversation
def reset_conversation(session_id="default_session"):
    """Clear the chat history for a session."""
    if session_id in store:
        del store[session_id]
        print(f"‚úì Conversation reset for session: {session_id}")
    else:
        print(f"No conversation found for session: {session_id}")


reset_conversation()

‚úì Conversation reset for session: default_session
