In [1]:
import os
from PyPDF2 import PdfReader
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import yake
from dotenv import load_dotenv

# Step 1: Load notes from .txt or .pdf
def load_notes(file_path):
    ext = os.path.splitext(file_path)[1].lower()
    if ext == ".txt":
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()
    elif ext == ".pdf":
        reader = PdfReader(file_path)
        text = "\n".join(page.extract_text() or "" for page in reader.pages)
    else:
        raise ValueError("Unsupported file type. Use .txt or .pdf")
    return text

# Step 2: Extract keywords using YAKE (no torch required)
def extract_keywords(text, top_n=20):
    kw_extractor = yake.KeywordExtractor(n=2, top=top_n, stopwords=None)
    keywords = kw_extractor.extract_keywords(text)
    print(keywords)
    return [kw for kw, score in keywords]

# Step 3: Prompt Groq LLM with keyword guardrails
def generate_response(query, keywords, notes_text):
    keyword_str = ", ".join(keywords)
    prompt = PromptTemplate.from_template("""
You are a helpful doubt-solving assistant for students. Only answer using the provided notes and the following syllabus keywords:
{keywords}

If the answer is not related to these keywords, say: "We appreaciate your enthusiasm to learn new topics, but This topic is beyond the current syllabus given by your Faculty."

Notes:
{notes}

Question:
{question}
""")
    llm = ChatGroq(model_name="openai/gpt-oss-20b")
    chain = LLMChain(llm=llm, prompt=prompt)
    return chain.run({"keywords": keyword_str, "notes": notes_text, "question": query})

# Step 4: Full pipeline
def handle_student_query(query, notes_path):
    notes_text = load_notes(notes_path)
    keywords = extract_keywords(notes_text)
    return generate_response(query, keywords, notes_text)

# üîç Run a test query
if __name__ == "__main__":
    os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
    os.environ["LANGCHAIN_TRACING_V2"] = 'true'
    # query = "Explain how AI is used in healthcare"
    query = input()
    notes_path = "../media/notes_file.txt"  # or "./your_notes.pdf"
    response = handle_student_query(query, notes_path)
    print("\nüìò Response:\n", response)


[('Management System', np.float64(0.005422007122230571)), ('DBMS', np.float64(0.03871911259675711)), ('Database Management', np.float64(0.04351109313584756)), ('data', np.float64(0.055257348490875)), ('end users', np.float64(0.0554765938330285)), ('System', np.float64(0.0697810567736433)), ('Management', np.float64(0.07728125293284961)), ('DBMS Hierarchical', np.float64(0.08775336927325725)), ('Hierarchical DBMS', np.float64(0.08775336927325725)), ('Database', np.float64(0.10790927263752395)), ('analyze data', np.float64(0.1566946252433182)), ('Definition', np.float64(0.17312140634439385)), ('applications', np.float64(0.1840341910636074)), ('SQL', np.float64(0.1924227163281904)), ('Network DBMS', np.float64(0.20534800815880316)), ('update', np.float64(0.21177969772947786)), ('structure', np.float64(0.21289726102949888)), ('SQL Basics', np.float64(0.21294199764330435)), ('users', np.float64(0.2143955863438802)), ('Object-oriented DBMS', np.float64(0.21646485514991587))]


  chain = LLMChain(llm=llm, prompt=prompt)
  return chain.run({"keywords": keyword_str, "notes": notes_text, "question": query})



üìò Response:
 We appreciate your enthusiasm to learn new topics, but this topic is beyond the current syllabus given by your Faculty.
