In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
ASTRA_DB_API_ENDPOINT = os.getenv("ASTRA_DB_API_ENDPOINT")
ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_KEYSPACE = "default_keyspace"
COLLECTION_NAME = "pdf_chunks_v2"

In [3]:
from langchain_community.document_loaders import UnstructuredPDFLoader

In [4]:
pdf_path1 = "../Resources/Arihant GK 2025 - Himexam.pdf"
pdf_path2 = "../Resources/modern_indian_history_tutorial.pdf"

loader1 = UnstructuredPDFLoader(file_path=pdf_path1, strategy="hi_res", mode="elements")
loader2 = UnstructuredPDFLoader(file_path=pdf_path2, strategy="hi_res", mode="elements")

docs1 = loader1.load()
docs2 = loader2.load()

  from .autonotebook import tqdm as notebook_tqdm




In [5]:
docs2[0]

Document(metadata={'source': '../Resources/modern_indian_history_tutorial.pdf', 'coordinates': {'points': ((-5.0, -21.00055555555558), (-5.0, 2341.999444444444), (1668.3333333333333, 2341.999444444444), (1668.3333333333333, -21.00055555555558)), 'system': 'PixelSpace', 'layout_width': 1654, 'layout_height': 2339}, 'last_modified': '2025-08-13T19:56:33', 'filetype': 'application/pdf', 'languages': ['eng'], 'page_number': 1, 'file_directory': '../Resources', 'filename': 'modern_indian_history_tutorial.pdf', 'category': 'Image', 'element_id': 'a108aab2e4e2b3da1474c2dfb43cfb5e'}, page_content='        ')

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)

In [19]:
final_docs1 =[]

for element in docs1:
    if len(element.page_content) < text_splitter._chunk_size:
        final_docs1.append(element)
    else:
        chunks = text_splitter.split_documents([element])
        final_docs1.extend(chunks)

In [20]:
final_docs2 =[]

for element in docs2:
    if len(element.page_content) < text_splitter._chunk_size:
        final_docs2.append(element)
    else:
        chunks = text_splitter.split_documents([element])
        final_docs2.extend(chunks)

In [25]:
len(final_docs2)

3694

In [7]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings_model = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

In [8]:
from langchain_astradb import AstraDBVectorStore

vstore = AstraDBVectorStore(
    embedding=embeddings_model,
    collection_name=COLLECTION_NAME,
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    token=ASTRA_DB_APPLICATION_TOKEN,
    namespace=ASTRA_DB_KEYSPACE,
)

In [51]:
cleaned_chunks1 = [doc for doc in final_docs1 if doc.page_content.strip()]

In [54]:
len(cleaned_chunks1)

3947

In [52]:
cleaned_chunks2 = [doc for doc in final_docs2 if doc.page_content.strip()]

In [55]:
len(cleaned_chunks2)

3539

In [56]:
ids = vstore.add_documents(cleaned_chunks1)

In [58]:
ids2 = vstore.add_documents(cleaned_chunks2)

In [9]:
retriever = vstore.as_retriever(
    search_kwargs={"k": 100} # Optional: specify to return the top 3 results
)

query = "Who was Mahatma Gandhi"
retrieved_docs = retriever.invoke(query)

for i in range(len(retrieved_docs)):
    print(retrieved_docs[i].page_content)

Gandhiji
Mohandas Karamchand Gandhi was born on 2 October 1869 at Porbandar in Gujarat.
Mahatma Gandhi Assumes Leadership
The following issues were very close to Gandhi's heart:
Gandhiji was the first Indian nationalist leader who identified his life and his manner of living with the life of the common people.
Gandhi Irwin Pact (1931)
During this agitation, Mohandas Karamchand Gandhi, took command of the nationalist movement.
Lal Bahadur Shastri and Mahatma Gandhi's Birthday
In 1916, Gandhi founded the Sabarmati Ashram at Ahmedabad where his friends and followers were to learn and, practice the ideals of truth and non- violence.
Gandhi returned to India (1915) and founded the Sabarmati Ashram (1916), Champaran Satyagraha (1917), Satyagraha at Ahmedabad (1918), Kheda Satyagraha (1918).
The Indian National Congress
Non-Cooperation Movement (1920)
Gandhiji returned to India in 1915 at the age of 46. He was keen to serve his country and his people.
Dandi March (1930)
In 1918, Mahatma Gandh

In [120]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [121]:
rag_chain = (
    {"context":retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [122]:
rag_chain.invoke("When was UDAN scheme launched?")

'The UDAN scheme was launched in 2017.'

In [10]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [20]:
import asyncio

template1 = """
You are a quiz master. Your task is to ask one single question related to Indian History and General Knowledge.

Context:
{context}

"""

template2 = """
You are a quiz master. You are provided a question and an aswer and you have to check if the answer to that question is correct or not.
If correct then tell 'Yes! you are correct.'. Else tell 'You are incorrect.' and print correct answer to that question.

Context:
{context}

Question and answer:
{details}

"""

prompt1 = ChatPromptTemplate.from_template(template1)
prompt2 = ChatPromptTemplate.from_template(template2)

rag_chain1 = (
    {"context":retriever | format_docs}
    | prompt1
    | llm
    | StrOutputParser()
)

rag_chain2 = (
    {"context":retriever | format_docs, "details":RunnablePassthrough()}
    | prompt2
    | llm
    | StrOutputParser()
)

async def question():
    return await asyncio.to_thread(rag_chain1.invoke, "Start")

async def check(ques, answer):
    dtls = f"Question: {ques}\n Answer: {answer}"
    # If rag_chain2.invoke is blocking, run it in a thread
    return await asyncio.to_thread(rag_chain2.invoke, dtls)

In [15]:
ques = question()

In [16]:
ques

"Often referred to as the 'Father of the Indian Constitution,' which prominent figure played a pivotal role in drafting India's Constitution after independence?"

In [17]:
print(check(ques, "Mahatma Gandhi"))

You are incorrect.
The correct answer is Dr. B.R. Ambedkar.


In [21]:
async def main():
    while True:
        ques = await question()
        print(ques)
        answer = await asyncio.to_thread(input, "Your answer: ")
        print(await check(ques, answer))
        over = int(await asyncio.to_thread(input, "Do you want to continue?"))
        if over != 1:
            break

await main()

Welcome, contestants! Here is your single question:

Often referred to as the 'Father of the Indian Constitution,' who was the principal architect of India's foundational legal document?
Yes! you are correct.
Welcome, contestant! Here is your question:

The famous 'Dandi March', a pivotal event in India's independence movement, was led by which iconic Indian leader?
Yes! you are correct.
