In [1]:
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone, ServerlessSpec
from groq import Groq
import json


  from .autonotebook import tqdm as notebook_tqdm


In [None]:

# Initialize Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)
if INDEX_NAME not in pc.list_indexes().names():
    pc.create_index(
        name=INDEX_NAME,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
index = pc.Index(INDEX_NAME)


In [3]:
model = SentenceTransformer("all-MiniLM-L6-v2")

with open("D:/Awais/Notebooks/cs_chapter.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Embedding and pushing
batch = []
for i, chunk in enumerate(data):
    vector = model.encode(chunk["content"]).tolist()
    meta = chunk["metadata"]
    meta["text"] = chunk["content"]
    batch.append((str(i), vector, meta))

index.upsert(batch)


{'upserted_count': 136}

In [4]:
def preprocess_query(query: str):
    return query.strip().lower()


In [5]:
import re

def extract_metadata_from_query(query: str):
    query = query.lower()
    metadata = {}

    # Detect chapters
    if "chapter 1" in query or "unit 1" in query:
        metadata["chapter"] = "Unit 1: Problem Solving"

    # Detect topic numbers like 1.2, 1.3, etc.
    topic_match = re.search(r"\b1\.\d+\b", query)
    if topic_match:
        topic_num = topic_match.group()
        metadata["topic"] = f"{topic_num} Flowcharts" if "flowchart" in query else f"{topic_num}"

    # Detect subtopics (customize for more)
    if "importance" in query:
        metadata["subtopic"] = "1.2.2 Importance of Flowcharts in Problem Solving"
    elif "requirements" in query:
        metadata["subtopic"] = "1.2.3 Determining Requirements for a Flowchart"
    elif "symbols" in query:
        metadata["subtopic"] = "1.2.4 Using Flowchart Symbols"

    # Detect types of content
    if "activity" in query:
        metadata["type"] = "activity"
    elif "mcq" in query or "multiple choice" in query:
        metadata["type"] = "mcq"
    elif "short question" in query:
        metadata["type"] = "short_question"
    elif "long question" in query:
        metadata["type"] = "long_question"

    return metadata


def retrieve_relevant_chunks(query, top_k=30, filter_meta=None):
    query_vector = model.encode(query).tolist()

    # 🔍 Auto-extract metadata from query if no external filter provided
    if filter_meta is None:
        filter_meta = extract_metadata_from_query(query)

    print("🧠 Metadata Filter Applied:", filter_meta)  # DEBUG

    response = index.query(
        vector=query_vector,
        top_k=top_k,
        include_metadata=True,
        include_values=False,
        filter=filter_meta
    )

    return [
        {
            "content": match["metadata"].get("text", ""),
            "metadata": match.get("metadata", {})
        }
        for match in response.get("matches", [])
    ]


In [6]:
def build_prompt(context_chunks, user_query):
    # Create a readable structured context
    context = "\n---\n".join([
        f"[{chunk['metadata'].get('chapter', '')} > {chunk['metadata'].get('topic', '')} > {chunk['metadata'].get('subtopic', '')} | {chunk['metadata'].get('type', '')}]\n{chunk['content']}"
        for chunk in context_chunks
        if chunk.get("content")
    ])

    prompt = f"""You are an educational AI assistant for 9th grade Computer Science students in Pakistan. 
Use only the context provided below to answer the user's question. Be specific, use chapter/topic/subtopic hierarchy when available, and **do not guess**. 
If the answer isn't in the context, say "I don't know based on the provided material."

Context:
{context}

User Question:
{user_query}

Answer:"""
    
    return prompt


In [7]:
groq_client = Groq(api_key=GROQ_API_KEY)

def run_llm(prompt, model_name="llama3-8b-8192"):
    response = groq_client.chat.completions.create(
        model=model_name,
        messages=[
            {"role": "system", "content": "You are a helpful AI tutor for 9th grade computer science."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.3,
        max_tokens=300
    )
    return response.choices[0].message.content.strip()


In [8]:
def chat():
    print("💬 AI Tutor: Hi! Ask me anything from your Computer Science book. Type 'exit' to stop.")
    while True:
        user_query = input("👦 You: ")
        if user_query.lower() in ['exit', 'quit']:
            print("👋 AI Tutor: Goodbye! Stay curious.")
            break

        processed_query = preprocess_query(user_query)
        relevant_chunks = retrieve_relevant_chunks(processed_query)
        prompt = build_prompt(relevant_chunks, processed_query)
        answer = run_llm(prompt)
        print("🤖 AI Tutor:", answer)


In [10]:
chat()


💬 AI Tutor: Hi! Ask me anything from your Computer Science book. Type 'exit' to stop.
🧠 Metadata Filter Applied: {'type': 'mcq'}
🤖 AI Tutor: Based on the provided context, there are 7 MCQs in the exercise of Chapter 2: Binary Systems.


KeyboardInterrupt: Interrupted by user