In [1]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

In [2]:

# === Step 1: Load the PDF ===
loader = PyMuPDFLoader("data/RAMAYANA.pdf")
pages = loader.load()


In [3]:
# === Step 2: Tag each page with facet metadata ===
chapter_map = {
    "THE BIRTH OF RAMA": range(3, 4),
    "The Valiant Princes": range(4, 7),
    "SITA'S SWAYAMVAR": range(6, 9),
    "KAIKEYI AND HER WISHES": range(8, 22),
    "The demons in the forests": range(21, 25),
    "The Kidnapping of Sita": range(24, 27),
    "Rama searches for Sita": range(28, 30),
    "The land of the monkeys": range(29, 34),
    "Hanuman meets Sita - Lanka is destroyed": range(34, 38),
    "The War": range(37, 44),  # Extend to end of document or actual page if known
}


In [4]:
# Create documents with facets
tagged_documents = []
for i, page in enumerate(pages):
    chapter = next((name for name, pages in chapter_map.items() if i in pages), "Unknown Chapter")

    # Simulated facet tagging
    if "Sita" in page.page_content:
        characters = ["Sita"]
    elif "Rama" in page.page_content:
        characters = ["Rama"]
    else:
        characters = []

    theme = "War" if "battle" in page.page_content.lower() or "Ravana" in page.page_content else "Journey"

    doc = Document(
        page_content=page.page_content,
        metadata={
            "chapter": chapter,
            "page_number": i,
            "theme": theme,
            "characters": characters
        }
    )
    tagged_documents.append(doc)




In [6]:
# === Step 3: Chunk the documents ===
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30)
chunks = splitter.split_documents(tagged_documents)

# === Step 4: Create vector store with facets ===
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2",
                                        model_kwargs={"device": "cpu"})
vectorstore = FAISS.from_documents(chunks, embedding_model)

  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2",
  from .autonotebook import tqdm as notebook_tqdm


In [7]:
query = "Tell me about Rama's early life."
retriever = vectorstore.as_retriever(search_kwargs={"k": 50})
results = retriever.get_relevant_documents(query)

# Apply facet filter manually (e.g., filter by chapter or character)
filtered = [doc for doc in results if "Rama" in doc.metadata.get("characters", [])
             and doc.metadata.get("chapter") == "THE BIRTH OF RAMA"]

# Display results
for i, doc in enumerate(filtered):
    print(f"\n--- Result {i+1} ---")
    print("Chapter:", doc.metadata.get("chapter"))
    print("Theme:", doc.metadata.get("theme"))
    print("Characters:", doc.metadata.get("characters"))
    print("Content:", doc.page_content[:300], "\n...")



--- Result 1 ---
Chapter: THE BIRTH OF RAMA
Theme: Journey
Characters: ['Rama']
Content: elders and gained the love and affection of people of Ayodhya. 
The brothers loved each other very much too. Lakshmana was a very 
close companion to Rama.Bharatha and Shatrugna were the other 
two inseparables. 
 
King Dasharatha felt very proud of his sons. One day he was 
...


  results = retriever.get_relevant_documents(query)


In [8]:
def query_with_constraints(query_text: str, chapter_filter: str = None, k: int = 5):
    retriever = vectorstore.as_retriever(search_kwargs={"k": 50})  # get more to filter
    results = retriever.get_relevant_documents(query_text)

    # Apply user-defined filter
    if chapter_filter:
        results = [doc for doc in results if doc.metadata.get("chapter") == chapter_filter]

    return results[:k]  # return top-k after filtering

In [10]:
user_query = "What did Hanuman do in Lanka?"
user_chapter_constraint = "Hanuman meets Sita - Lanka is destroyed"

filtered_results = query_with_constraints(user_query, user_chapter_constraint)

# Step 7: Print results
for i, doc in enumerate(filtered_results):
    print(f"\n--- Result {i+1} ---")
    print(f"Chapter: {doc.metadata.get('chapter')}")
    print(f"Page: {doc.metadata.get('page_number')}")
    print(doc.page_content[:300], "...")


--- Result 1 ---
Chapter: Hanuman meets Sita - Lanka is destroyed
Page: 37
Hanuman had burnt down Lanka; Ravana had the city rebuilt 
immediately. Anjaneya who was aware of this told Rama “Lord! Once 
we cross the ocean, Lanka will be ours. Have no doubt about it. I 
shall summon Angada, Neela, Jabavanta and many more valiant 
Vanaras and bears and be ready to leave.” ...

--- Result 2 ---
Chapter: Hanuman meets Sita - Lanka is destroyed
Page: 35
defeated Devendra. He used one of his powerful weapons on 
Hanuman, imprisoned him and dragged him to Ravenna’s court. 
When Hanuman was brought before the king, he was very impressed 
with the ten-headed Ravana. His court was very grand and dazzling. ...

--- Result 3 ---
Chapter: Hanuman meets Sita - Lanka is destroyed
Page: 35
garden” asked Ravana. 
“Sir” replied Hanuman “I am Rama’s messenger. I came to Lanka in 
search of Sita. You have committed a great mistake by abducting 
Sita. If you want to live, you return Sita to Rama gracefully 