In [1]:
print("reranking")

reranking


In [3]:
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.retrievers import EnsembleRetriever
from langchain.schema import Document
from langchain_community.retrievers import BM25Retriever

In [4]:
from langchain_community.document_loaders import (
    PyPDFLoader,
    PyMuPDFLoader
)

In [5]:
# Step1: Loaders
print("\n PyMuPDFLoader")
try:
    loader = PyMuPDFLoader("bengal.pdf")
    docs = loader.load()

    print(f"  Loaded {len(docs)} pages")
    print(f"  Includes detailed metadata")
    print(docs)
except Exception as e:
    print(f"  Error: {e}")


 PyMuPDFLoader
  Loaded 5 pages
  Includes detailed metadata
[Document(metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-09-14T11:37:04+05:30', 'source': 'bengal.pdf', 'file_path': 'bengal.pdf', 'total_pages': 5, 'format': 'PDF 1.7', 'title': '', 'author': 'Saikat Santra', 'subject': '', 'keywords': '', 'moddate': '2025-09-14T11:37:04+05:30', 'trapped': '', 'modDate': "D:20250914113704+05'30'", 'creationDate': "D:20250914113704+05'30'", 'page': 0}, page_content='Bengal: A Historical, Cultural, and Socio-Economic Study \nIntroduction \nBengal, one of the most historically significant regions of South Asia, is today divided \ninto the Indian state of West Bengal and the sovereign nation of Bangladesh. With a \ncombined population of over 250 million people, Bengal represents one of the most \ndensely populated and culturally vibrant areas of the world. For centuries, the region \nhas been known as a land of a

In [6]:
#step 2 : Text Splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Create text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,    # size of each chunk
    chunk_overlap=80   # overlap to maintain context
)

# Split documents into chunks
docs = text_splitter.split_documents(docs)
print(f"Total chunks: {len(docs)}")

Total chunks: 31


In [8]:
### FAISS and Huggingface model Embeddings

from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

embedding_model=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore=FAISS.from_documents(docs,embedding_model)
retriever=vectorstore.as_retriever(search_kwargs={"k":8})

In [9]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000226E68F9D50>, search_kwargs={'k': 8})

In [12]:
## prompt and use the llm
from dotenv import load_dotenv
import os
load_dotenv()

from langchain.chat_models import init_chat_model
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")
llm=init_chat_model("groq:gemma2-9b-it")
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000226BF3EC850>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000226BF4C3D90>, model_name='gemma2-9b-it', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [13]:
# Prompt Template
prompt = PromptTemplate.from_template("""
You are a helpful assistant. Your task is to rank the following documents from most to least relevant to the user's question.

User Question: "{question}"

Documents:
{documents}

Instructions:
- Think about the relevance of each document to the user's question.
- Return a list of document indices in ranked order, starting from the most relevant.

Output format: comma-separated document indices (e.g., 2,1,3,0,...)
""")

In [14]:
## user query
query="tell me about the history of Bengal"

In [15]:
retrieved_docs=retriever.invoke(query)
retrieved_docs

[Document(id='6100712d-47f7-4300-ac34-0981d060cea8', metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-09-14T11:37:04+05:30', 'source': 'bengal.pdf', 'file_path': 'bengal.pdf', 'total_pages': 5, 'format': 'PDF 1.7', 'title': '', 'author': 'Saikat Santra', 'subject': '', 'keywords': '', 'moddate': '2025-09-14T11:37:04+05:30', 'trapped': '', 'modDate': "D:20250914113704+05'30'", 'creationDate': "D:20250914113704+05'30'", 'page': 0}, page_content='Bengal. It explores Bengal’s history, geography, culture, economy, political \ndevelopments, and the challenges faced in the modern era. The discussion emphasizes \nBengal’s global significance as a land that has nurtured poets, philosophers, \nrevolutionaries, and reformers while also enduring famines, partitions, and struggles for \nidentity. \n \nHistorical Background \nAncient Bengal'),
 Document(id='c8f917db-cee8-486d-b0cc-db6ff1c42a6f', metadata={'producer': 'Mi

In [16]:
chain=prompt| llm | StrOutputParser()
chain

PromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, template='\nYou are a helpful assistant. Your task is to rank the following documents from most to least relevant to the user\'s question.\n\nUser Question: "{question}"\n\nDocuments:\n{documents}\n\nInstructions:\n- Think about the relevance of each document to the user\'s question.\n- Return a list of document indices in ranked order, starting from the most relevant.\n\nOutput format: comma-separated document indices (e.g., 2,1,3,0,...)\n')
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000226BF3EC850>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000226BF4C3D90>, model_name='gemma2-9b-it', model_kwargs={}, groq_api_key=SecretStr('**********'))
| StrOutputParser()

In [17]:
doc_lines = [f"{i+1}. {doc.page_content}" for i, doc in enumerate(retrieved_docs)]
formatted_docs = "\n".join(doc_lines)

In [18]:
doc_lines

['1. Bengal. It explores Bengal’s history, geography, culture, economy, political \ndevelopments, and the challenges faced in the modern era. The discussion emphasizes \nBengal’s global significance as a land that has nurtured poets, philosophers, \nrevolutionaries, and reformers while also enduring famines, partitions, and struggles for \nidentity. \n \nHistorical Background \nAncient Bengal',
 '2. identity. \n \nHistorical Background \nAncient Bengal \nThe earliest historical references to Bengal appear in ancient Indian texts and \ninscriptions. The region was known as Vanga or Banga, and was inhabited by people \nskilled in navigation and trade. Archaeological evidence from sites such as Wari-\nBateshwar suggests early urban settlements dating back to the 4th century BCE. Bengal',
 '3. history reflects cycles of prosperity and exploitation, creativity and struggle, unity and \ndivision. Whether in the streets of Kolkata or the villages of Bangladesh, Bengal remains \nalive with vib

In [19]:
formatted_docs

'1. Bengal. It explores Bengal’s history, geography, culture, economy, political \ndevelopments, and the challenges faced in the modern era. The discussion emphasizes \nBengal’s global significance as a land that has nurtured poets, philosophers, \nrevolutionaries, and reformers while also enduring famines, partitions, and struggles for \nidentity. \n \nHistorical Background \nAncient Bengal\n2. identity. \n \nHistorical Background \nAncient Bengal \nThe earliest historical references to Bengal appear in ancient Indian texts and \ninscriptions. The region was known as Vanga or Banga, and was inhabited by people \nskilled in navigation and trade. Archaeological evidence from sites such as Wari-\nBateshwar suggests early urban settlements dating back to the 4th century BCE. Bengal\n3. history reflects cycles of prosperity and exploitation, creativity and struggle, unity and \ndivision. Whether in the streets of Kolkata or the villages of Bangladesh, Bengal remains \nalive with vibrant tr

In [20]:
response=chain.invoke({"question":query,"documents":formatted_docs})
response

'Here\'s a ranking of the documents based on relevance to the user\'s question "tell me about the history of Bengal":\n\n1, 2, 3, 5, 6, 7, 8, 4 \n\n**Explanation:**\n\n* **Document 1** provides a comprehensive overview of Bengal\'s history, geography, culture, and economy, making it the most relevant.\n* **Document 2** focuses specifically on the ancient history of Bengal, offering valuable historical context.\n* **Document 3** highlights key historical themes and dynamics in Bengal, emphasizing its rich and complex past.\n* **Document 5** delves into early historical references to Bengal and its interactions with major empires.\n* **Document 6** outlines Bengal\'s history under the Mughal Empire, a significant period in its development.\n* **Document 7** is a study of Bengal\'s history, culture, and socio-economics, providing a broad historical perspective.\n* **Document 8** discusses Bengal\'s geography and its influence on history, including its vulnerability to natural disasters. \

In [21]:
# Step 5: Parse and rerank
indices = [int(x.strip()) - 1 for x in response.split(",") if x.strip().isdigit()]
indices

[1, 2, 4, 5, 6, 7]

In [22]:
retrieved_docs

[Document(id='6100712d-47f7-4300-ac34-0981d060cea8', metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-09-14T11:37:04+05:30', 'source': 'bengal.pdf', 'file_path': 'bengal.pdf', 'total_pages': 5, 'format': 'PDF 1.7', 'title': '', 'author': 'Saikat Santra', 'subject': '', 'keywords': '', 'moddate': '2025-09-14T11:37:04+05:30', 'trapped': '', 'modDate': "D:20250914113704+05'30'", 'creationDate': "D:20250914113704+05'30'", 'page': 0}, page_content='Bengal. It explores Bengal’s history, geography, culture, economy, political \ndevelopments, and the challenges faced in the modern era. The discussion emphasizes \nBengal’s global significance as a land that has nurtured poets, philosophers, \nrevolutionaries, and reformers while also enduring famines, partitions, and struggles for \nidentity. \n \nHistorical Background \nAncient Bengal'),
 Document(id='c8f917db-cee8-486d-b0cc-db6ff1c42a6f', metadata={'producer': 'Mi

In [23]:
reranked_docs = [retrieved_docs[i] for i in indices if 0 <= i < len(retrieved_docs)]
reranked_docs

[Document(id='c8f917db-cee8-486d-b0cc-db6ff1c42a6f', metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-09-14T11:37:04+05:30', 'source': 'bengal.pdf', 'file_path': 'bengal.pdf', 'total_pages': 5, 'format': 'PDF 1.7', 'title': '', 'author': 'Saikat Santra', 'subject': '', 'keywords': '', 'moddate': '2025-09-14T11:37:04+05:30', 'trapped': '', 'modDate': "D:20250914113704+05'30'", 'creationDate': "D:20250914113704+05'30'", 'page': 0}, page_content='identity. \n \nHistorical Background \nAncient Bengal \nThe earliest historical references to Bengal appear in ancient Indian texts and \ninscriptions. The region was known as Vanga or Banga, and was inhabited by people \nskilled in navigation and trade. Archaeological evidence from sites such as Wari-\nBateshwar suggests early urban settlements dating back to the 4th century BCE. Bengal'),
 Document(id='f9b3f196-8708-4d03-a7d9-afd934495b43', metadata={'producer': 'Mi

In [24]:
# Step 6: Show results
print("\n📊 Final Reranked Results:")
for i, doc in enumerate(reranked_docs, 1):
    print(f"\nRank {i}:\n{doc.page_content}")


📊 Final Reranked Results:

Rank 1:
identity. 
 
Historical Background 
Ancient Bengal 
The earliest historical references to Bengal appear in ancient Indian texts and 
inscriptions. The region was known as Vanga or Banga, and was inhabited by people 
skilled in navigation and trade. Archaeological evidence from sites such as Wari-
Bateshwar suggests early urban settlements dating back to the 4th century BCE. Bengal

Rank 2:
history reflects cycles of prosperity and exploitation, creativity and struggle, unity and 
division. Whether in the streets of Kolkata or the villages of Bangladesh, Bengal remains 
alive with vibrant traditions, linguistic pride, and social dynamism. 
The story of Bengal is not merely regional; it is global. From Tagore’s poetry to the

Rank 3:
was mentioned in Greek accounts, including those of Megasthenes, as a rich land on 
the eastern frontier of India. 
Bengal came under the influence of major dynasties such as the Mauryas (321–185 
BCE) and the Guptas (4th–