In [None]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load the PDF
loader = PyMuPDFLoader("data/RAMAYANA.pdf")
docs = loader.load()

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(docs)


In [6]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Initialize embedding model
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2",
                                        model_kwargs={"device": "cpu"})

# Create FAISS vector store
vectorstore = FAISS.from_documents(chunks, embedding)


In [12]:
query = "What is the story behind Sita's swayamvar?"
top_k = 8
retrieved_docs = vectorstore.similarity_search(query, k=top_k)
for i, doc in enumerate(retrieved_docs):
    print(f"Document {i+1}:\n{doc.page_content[0:50]}\n")
    print("-" * 80)


Document 1:
1.3 SITA'S SWAYAMVAR 
 
Vishwamitra, Rama and Laks

--------------------------------------------------------------------------------
Document 2:
1.3 SITA'S SWAYAMVAR .............................

--------------------------------------------------------------------------------
Document 3:
to look exactly like Sita. He then took this fake 

--------------------------------------------------------------------------------
Document 4:
extremely beautiful Sita made an ideal couple. 
 


--------------------------------------------------------------------------------
Document 5:
response. So, she made a fervent plea to the trees

--------------------------------------------------------------------------------
Document 6:
Rama and Lakshmana at once knew that Sita was in d

--------------------------------------------------------------------------------
Document 7:
This city was ruled by Janaka, loved and respected

------------------------------------------------------------------

In [24]:
from langchain.chat_models import ChatOpenAI
from operator import itemgetter
from langchain.schema import Document
from typing import List, Tuple

llm = ChatOpenAI(model_name="gpt-4", temperature=0)

def get_scoring_prompt(doc: Document, query: str) -> str:
    """
    Create a prompt for the LLM to score the relevance of a document to a query.
    """
    return f"""
            You are a helpful assistant. A user asked a question:
            "{query}"

            Below is a candidate context:
            \"\"\"{doc.page_content}\"\"\"

            Rate how well this context answers the question on a scale of 1 to 10, where 10 means very relevant and 1 means not relevant.
            Just return the number.
            """

def score_documents_with_llm(query: str, docs: List[Document]) -> List[Tuple[Document, int]]:
    """
    Score and rerank documents for a query using an LLM.
    Returns a list of (Document, score) tuples, sorted by score descending.
    """
    scored_docs = []
    for doc in docs:
        prompt = get_scoring_prompt(doc, query)
        score_str = llm.invoke(prompt)
        # print(f"Score: {score_str}, Type: {type(score_str)}")
        try:
            score_value = score_str.content
            # print(f"Score value: {score_value}")
            score = int(score_value)
        except Exception:
            score = 0
        scored_docs.append((doc, score))
    return sorted(scored_docs, key=itemgetter(1), reverse=True)

# Usage:
reranked_docs = score_documents_with_llm(query, retrieved_docs)


In [25]:
for i, (doc, score) in enumerate(reranked_docs):
    print(f"Document {i+1} (Score: {score}):\n{doc.page_content[0:50]}\n")
    print("-" * 80)

Document 1 (Score: 1):
1.3 SITA'S SWAYAMVAR .............................

--------------------------------------------------------------------------------
Document 2 (Score: 1):
to look exactly like Sita. He then took this fake 

--------------------------------------------------------------------------------
Document 3 (Score: 1):
extremely beautiful Sita made an ideal couple. 
 


--------------------------------------------------------------------------------
Document 4 (Score: 1):
response. So, she made a fervent plea to the trees

--------------------------------------------------------------------------------
Document 5 (Score: 1):
Rama and Lakshmana at once knew that Sita was in d

--------------------------------------------------------------------------------
Document 6 (Score: 1):
been built around the city and it is difficult to 

--------------------------------------------------------------------------------
Document 7 (Score: 0):
1.3 SITA'S SWAYAMVAR 
 
Vishwamitra, Rama

In [26]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

# Select top reranked docs
top_docs = [doc for doc, score in reranked_docs[:3]]

# QA generation
qa_chain = load_qa_with_sources_chain(llm, chain_type="stuff")
response = qa_chain({"input_documents": top_docs, "question": query}, return_only_outputs=True)

print("📖 Answer:\n", response["output_text"])


📖 Answer:
 The document does not provide information on the story behind Sita's swayamvar.
SOURCES: data/RAMAYANA.pdf
