## This is a prototype based on a book.
### Used class-6th science ncert for this.

### Required libraries 

In [1]:
!pip install -q \
    langchain langchain-community langchain-core \
    langchain-cohere cohere \
    faiss-cpu sentence-transformers \
    tiktoken python-dotenv


In [2]:
!pip install pyvis networkx




In [3]:
from langchain_community.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_cohere import ChatCohere
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import os


### api key for LLM

In [4]:
os.environ['cohere_api_key'] = "API KEY"

### creating vector store

In [5]:
file_path = "/kaggle/input/scinceclass6/science class 6.pdf"

loader = PyPDFLoader(file_path)
documents = loader.load()

chunk_size = 50  
chunk_overlap = 5

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
)
all_splits = text_splitter.split_documents(documents)

model_name = 'sentence-transformers/all-MiniLM-L6-v2'
embeddings = HuggingFaceEmbeddings(model_name=model_name)
vectorstore = FAISS.from_documents(all_splits, embedding = embeddings) 

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 25})

  embeddings = HuggingFaceEmbeddings(model_name=model_name)
2026-01-11 10:28:58.843672: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768127338.864964     368 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768127338.871449     368 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768127338.888297     368 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768127338.888321     368 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768127338.8883

### prompts

In [13]:
prompt_template = """
You are a helpful assistant answering questions from the Class 6 NCERT Science textbook.

Instructions:
1. Use the Context and the Knowledge Graph to answer the Question.
2. Do NOT use outside knowledge.
3. If the Context is completely unrelated, say:
   "The answer is not present in the provided text."
4. Write the answer in simple language suitable for a Class 6 student.

Context:
{context}

Knowledge Graph:
{kg}

Question:
{question}

Answer:
"""

kg_prompt_template = """
You are given text from a Class 6 NCERT Science textbook.

Task:
Extract important entities and relationships from the text.

Rules:
- Use only the given text.
- Keep entities simple and relevant.
- Relations should be short and clear.
- Do not add outside knowledge.

Output format (strict):
Entities:
- Entity 1
- Entity 2

Relations:
- Entity 1 -> relation -> Entity 2

Text:
{text}
"""



prompt = PromptTemplate.from_template(template=prompt_template)

In [7]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [8]:
def extract_kg(docs):
    cohere_llm = ChatCohere(
        model="command-xlarge-nightly",
        temperature=0.0,
        cohere_api_key=os.getenv("cohere_api_key")
    )

    kg_prompt = PromptTemplate(
        input_variables=["text"],
        template=kg_prompt_template
    )

    context_text = "\n".join(doc.page_content for doc in docs)

    kg_chain = kg_prompt | cohere_llm | StrOutputParser()

    return kg_chain.invoke({"text": context_text})


### for KG visualisation 

In [9]:
import re

def parse_kg(kg_text):
    entities = set()
    edges = []

    in_entities = False
    in_relations = False

    for line in kg_text.split("\n"):
        line = line.strip()

        if line.lower().startswith("entities"):
            in_entities = True
            in_relations = False
            continue

        if line.lower().startswith("relations"):
            in_entities = False
            in_relations = True
            continue

        if line.startswith("-"):
            content = line[1:].strip()

            if in_entities:
                entities.add(content)

            elif in_relations and "->" in content:
                src, rel, tgt = [p.strip() for p in content.split("->")]
                entities.add(src)
                entities.add(tgt)
                edges.append((src, rel, tgt))

    return list(entities), edges




In [10]:
from pyvis.network import Network

def visualize_kg(kg_text, output_file="kg.html"):
    entities, edges = parse_kg(kg_text)

    net = Network(
        height="600px",
        width="100%",
        directed=True,
        bgcolor="#ffffff",
        font_color="black"
    )

    # Better physics for separation
    net.barnes_hut(
        gravity=-2000,
        central_gravity=0.3,
        spring_length=200,
        spring_strength=0.05,
        damping=0.09
    )

    # Add nodes (bigger, clearer)
    for entity in entities:
        net.add_node(
            entity,
            label=entity,
            shape="ellipse",
            size=25,
            color="#97c2fc"
        )

    # Add edges (IMPORTANT PART)
    for src, rel, tgt in edges:
        net.add_edge(
            src,
            tgt,
            label=rel,
            arrows="to",
            font={"size": 14, "align": "middle"},
            color="#000000"
        )

    net.write_html(output_file)
    print(f"Knowledge graph saved to {output_file}")


### Query Retrieval chain 

In [11]:
def generate_answer(question):
    cohere_llm = ChatCohere(
        model="command-xlarge-nightly",
        temperature=0.05,
        cohere_api_key=os.getenv("cohere_api_key")
    )

    # Step 1: Retrieve documents
    docs = retriever.get_relevant_documents(question)
    context = format_docs(docs)

    # Step 2: Extract local KG
    kg = extract_kg(docs)

    # Step 3: Generate answer using context + KG
    answer_prompt = PromptTemplate(
        input_variables=["context", "kg", "question"],
        template=prompt_template
    )

    answer_chain = answer_prompt | cohere_llm | StrOutputParser()

    return {
        "answer": answer_chain.invoke({
            "context": context,
            "kg": kg,
            "question": question
        }),
        "knowledge_graph": kg
    }


In [14]:
result = generate_answer("Summarise the full chapter: FIBRE TO FABRIC")

print("Answer:\n", result["answer"])
print("\nKnowledge Graph:\n", result["knowledge_graph"])

visualize_kg(result["knowledge_graph"])

Answer:
 The chapter "Fibre to Fabric" explains how fabrics are made from fibres. Hereâ€™s a simple summary:

1. **Fibres**: These are the basic units used to make fabrics. Examples of fibres include cotton, silk, wool, and jute. Some fibres come from plants (like cotton and jute), while others come from animals (like silk and wool).

2. **Yarn**: Fibres are twisted together to form yarns. This process is called spinning. Yarns are like long threads made from fibres.

3. **Fabric**: Yarns are then woven together to make fabrics. Weaving is the process of arranging yarns in a crisscross pattern to create a strong and flexible material.

4. **Types of Fabrics**: Different fibres and weaving methods result in different types of fabrics. For example, cotton fibres make soft and breathable fabrics, while wool fibres make warm fabrics.

In short, the journey from fibre to fabric involves turning fibres into yarns through spinning and then weaving those yarns into fabrics. This process helps 