In [1]:
# =========================
# Query Expansion & Answer Generation
# =========================

import json
import numpy as np
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

In [2]:
embeddings = HuggingFaceEmbeddings(
    model_name="intfloat/multilingual-e5-large"
)

vectorstore = Chroma(
    collection_name="ncert_multilingual",
    embedding_function=embeddings,
    persist_directory="./chroma_ncert_db"
)

print("✅ Chroma DB loaded")

  embeddings = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm
  vectorstore = Chroma(


✅ Chroma DB loaded


In [13]:
def retrieve_chunks(query, k=5):
    results = vectorstore.similarity_search(
        query,   # ❗ NO "query:" prefix for E5
        k=k
    )
    return [doc.page_content for doc in results]

In [40]:
from langdetect import detect

def detect_language(text):
    try:
        return detect(text)
    except:
        return "en"

In [45]:
def mistral_query_expansion_ncert_multilingual(query, n_variants=4):
    lang = detect_language(query)

    prompt = f"""
You are assisting an NCERT textbook–based retrieval system.

The user question is written in language code: {lang}

TASK:
Generate {n_variants} alternative search queries in THE SAME LANGUAGE as the question.

STRICT RULES:
- Do NOT change the language
- Use ONLY NCERT textbook terminology
- Do NOT translate to another language
- Do NOT answer the question
- Do NOT add examples or applications

QUESTION:
{query}

OUTPUT:
Return ONLY a JSON array of strings written in the SAME LANGUAGE.
"""

    result = subprocess.run(
        ["ollama", "run", "mistral"],
        input=prompt.encode("utf-8"),
        capture_output=True
    )

    try:
        return json.loads(result.stdout.decode("utf-8").strip())
    except:
        return [query]

In [46]:
mistral_query_expansion_ncert_multilingual(
    "न्यूटन का पहला नियम समझाइए"
)

['NYAY 1: अर्थात, किसी सदिश पर हल होने के समय, तापको दूर होना चाहिए',
 'First Law of Motion: This means that in order for a body to move, the force acting on it must be non-zero',
 "Rule 1 of Newton's Laws: When an object is in motion, its speed should not decrease unless a net external force acts upon it",
 'Prathama Nyay: Jab tak ek objekt ko teerth rakhta hai, toh use pichane ki dikhi dekhiya jaa sakta hai']

In [47]:
# Simple in-memory conversation store
conversation_memory = {
    "history": [],   # full answers (last few)
    "summaries": []  # compressed understanding
}

In [48]:
def mistral_answer_with_memory(question, context_chunks, memory):
    lang = detect_language(question)
    context = "\n\n".join(context_chunks[:5])

    previous_summary = "\n".join(memory["summaries"][-3:])  # last 3 summaries

    prompt = f"""
You are an NCERT textbook-based teaching assistant.

The question is written in language code: {lang}

PREVIOUS UNDERSTANDING (summary so far):
{previous_summary if previous_summary else "None"}

CURRENT CONTEXT (NCERT only):
{context}

QUESTION:
{question}

RULES:
- Answer in the SAME LANGUAGE as the question
- Use ONLY NCERT content
- Be student-friendly
- Assume the student may ask follow-up doubts

ANSWER:
"""

    result = subprocess.run(
        ["ollama", "run", "mistral"],
        input=prompt.encode("utf-8"),
        capture_output=True
    )

    answer = result.stdout.decode("utf-8").strip()
    memory["history"].append(answer)

    return answer

In [49]:
def mistral_summarise_answer(answer, lang):
    prompt = f"""
Summarise the following answer for a student.

RULES:
- Use the SAME LANGUAGE
- Keep it short (3–5 bullet points or lines)
- Focus on key NCERT concepts only

ANSWER:
{answer}

SUMMARY:
"""

    result = subprocess.run(
        ["ollama", "run", "mistral"],
        input=prompt.encode("utf-8"),
        capture_output=True
    )

    return result.stdout.decode("utf-8").strip()

In [50]:
def handle_student_query(question):
    # Step 1: retrieve
    chunks = retrieve_with_expansion(question)

    # Step 2: answer with memory
    answer = mistral_answer_with_memory(
        question,
        chunks,
        conversation_memory
    )

    # Step 3: summarise
    lang = detect_language(question)
    summary = mistral_summarise_answer(answer, lang)

    # Step 4: store summary
    conversation_memory["summaries"].append(summary)

    return {
        "answer": answer,
        "summary": summary
    }

In [51]:
q1 = "Explain Newton's First Law of Motion"

result1 = handle_student_query(q1)

print("ANSWER 1:\n", result1["answer"])
print("\nSUMMARY 1:\n", result1["summary"])

ANSWER 1:
 The First Law of Motion, also known as the Law of Inertia, states that an object at rest tends to stay at rest, and an object in motion tends to stay in motion with the same speed and direction, unless acted upon by an external unbalanced force.

Imagine a ball resting on a table (NCERT Chapter 1, Lesson 2.2). If we don't push or pull it, it stays still because there is no external force to make it move. Now, if we throw the ball, it moves with a certain speed and direction. As long as no other forces act on it, it will continue moving in that same direction (NCERT Chapter 1, Lesson 2.3).

Now let's consider two balloons rubbed with a woollen cloth and brought near each other (NCERT Chapter 1, Lesson 2.6). They attract each other due to static electricity (NCERT Chapter 1, Lesson 2.5). This attraction is an external force acting on the balloons. Initially, they stay apart due to the Law of Inertia, but when we bring them closer, the force overcomes their tendency to stay sti

In [52]:
q2 = "What exactly is meant by inertia?"

result2 = handle_student_query(q2)

print("ANSWER 2:\n", result2["answer"])
print("\nSUMMARY 2:\n", result2["summary"])

ANSWER 2:
 Inertia refers to an object's tendency to resist changes in its state of motion or rest. This means that if an object is at rest, it will remain at rest unless acted upon by an external unbalanced force; and if an object is moving with a certain speed and direction, it will continue to do so unless acted upon by an external unbalanced force. In other words, inertia is the property of an object that enables it to maintain its state of motion or rest unless acted upon by an external force. This concept is explained in detail under the First Law of Motion (Law of Inertia).

SUMMARY 2:
 1. Inertia: The tendency of an object to remain at rest or keep moving in a straight line with constant speed, unless acted upon by an external unbalanced force.
2. Objects at rest tend to stay at rest (stationary), while objects in motion tend to continue moving in a straight line.
3. This is the property that helps objects maintain their state of motion or rest.
4. The First Law of Motion, also

In [53]:
q3 = "Why does an object need an external force to change its state?"

result3 = handle_student_query(q3)

print("ANSWER 3:\n", result3["answer"])
print("\nSUMMARY 3:\n", result3["summary"])

ANSWER 3:
 According to the First Law of Motion, also known as the Law of Inertia, an object at rest tends to stay at rest, and an object in motion tends to continue moving in a straight line with constant speed. This property is called inertia. For an object to change its state (either from rest to motion or vice versa), it needs an external unbalanced force to overcome the tendency of inertia. In other words, an external force is needed to accelerate or decelerate an object and change its state of motion or rest. If you have any follow-up doubts, feel free to ask!

SUMMARY 3:
 - The First Law of Motion (Law of Inertia) states that an object at rest stays at rest, and an object in motion continues moving in a straight line with constant speed unless acted upon by an external unbalanced force.
- This property is called inertia.
- For an object to change its state (rest to motion or vice versa), an external force is required to overcome the tendency of inertia.
- An external force cause

In [54]:
q4 = "जड़त्व का अर्थ क्या है?"

result4 = handle_student_query(q4)

print("ANSWER 4:\n", result4["answer"])
print("\nSUMMARY 4:\n", result4["summary"])

ANSWER 4:
 The term "जड़त्व" (jadhutva) refers to connection or bonding. In the context of physics, it can be used to describe the attraction between two objects due to an external force, such as static electricity between balloons. This connection overcomes the tendency of inertia, causing the objects to stick together or move towards each other.

SUMMARY 4:
 1. "जड़त्व" (jadhutva) means bonding or connection.
2. In physics, it can describe attraction between objects due to external forces like static electricity (e.g., balloons).
3. This force overcomes inertia, causing stuck together or moving objects towards each other.
4. Key NCERT concept: Understanding forces acting on objects and their effects.


In [70]:
def retrieve_with_expansion(query, k=5):
    queries = mistral_query_expansion_ncert_multilingual(query)
    docs = []

    for q in queries:
        results = vectorstore.similarity_search(q, k=k)
        docs.extend(results)

    # deduplicate by content
    unique = {d.page_content: d for d in docs}
    return [
        {"text": d.page_content, "metadata": d.metadata}
        for d in unique.values()
    ]

In [71]:
def build_context_with_citations(retrieved_docs):
    context_blocks = []
    citations = []

    for idx, d in enumerate(retrieved_docs, 1):
        context_blocks.append(
            f"[{idx}] {d['text']}"
        )
        citations.append({
            "id": idx,
            "metadata": d["metadata"]
        })

    return "\n\n".join(context_blocks), citations

In [72]:
def mistral_answer_with_citations(question, retrieved_docs, memory):
    lang = detect_language(question)

    context, citations = build_context_with_citations(retrieved_docs)

    language_name = {
        "hi": "Hindi (हिंदी)",
        "en": "English",
        "ml": "Malayalam (മലയാളം)",
        "mr": "Marathi (मराठी)",
        "ur": "Urdu (اردو)"
    }.get(lang, "the same language")

    previous_summary = "\n".join(memory["summaries"][-3:])

    prompt = f"""
You are an NCERT textbook-based teaching assistant.

STRICT RULES:
- Answer ONLY in {language_name}
- Use ONLY the context below
- EVERY factual statement MUST reference a citation number [1], [2], etc.
- Do NOT invent citations
- If information is missing, say so

PREVIOUS UNDERSTANDING:
{previous_summary if previous_summary else "None"}

CONTEXT (NCERT):
{context}

QUESTION:
{question}

ANSWER (with citations like [1], [2]):
"""

    result = subprocess.run(
        ["ollama", "run", "mistral"],
        input=prompt.encode("utf-8"),
        capture_output=True
    )

    return result.stdout.decode("utf-8").strip(), citations

In [73]:
def extract_pdf_sources(retrieved_docs):
    pdfs = set()

    for d in retrieved_docs:
        meta = d["metadata"]
        if "source" in meta:
            pdfs.add(meta["source"])

    return sorted(list(pdfs))

In [74]:
def handle_student_query(question):
    retrieved_docs = retrieve_with_expansion(question)

    answer, _ = mistral_answer_with_citations(
        question,
        retrieved_docs,
        conversation_memory
    )

    lang = detect_language(question)
    summary = mistral_summarise_answer(answer, lang)

    conversation_memory["summaries"].append(summary)

    pdf_sources = extract_pdf_sources(retrieved_docs)

    return {
        "answer": answer,
        "summary": summary,
        "sources": pdf_sources
    }

In [75]:
q = "जड़त्व का अर्थ क्या है?"
result = handle_student_query(q)

print("ANSWER:\n", result["answer"])
print("\nSUMMARY:\n", result["summary"])
print("\nSOURCES:")
for s in result["sources"]:
    print("•", s)

ANSWER:
 The term "जड़त्व" (Jađhṭv) in the given passages doesn't have a direct English translation, but it generally refers to adding or joining something with another. Here are some examples from the provided passages:

[16] उद्े‍चर हट्रत आइरण को बढ़ाना - To increase the size of a bird's nest
[17] (क) "जबज्टया" शबद का प्योग भी जलाना - Using the word 'jabjatya' (meaning 'betty') affectionately or to express love
[18] (ख) आपका नाम जबतिसे रखा? - What is your name recorded as?

So, "जड़त्व" can mean adding or joining something in general terms.

SUMMARY:
 1. The term "जड़त्व" (Jađhṭv) is used to denote adding, joining, or connecting things together.
2. In the context of the passages, it can be seen as increasing a bird's nest size [Example: उदे‍चर हट्रत आइरण को बढ़ाना], using affectionate or loving words like 'jabjatya' [Example: (क) "जबज्टया" शबद का प्योग भी जलाना], and asking about something being recorded [Example: (ख) आपका नाम जबतिसे रखा?].
3. This term is an important concept to und