In [1112]:
from langgraph.graph import StateGraph, END, START
from typing import TypedDict,Annotated,List,Dict
from langchain_core.messages import BaseMessage, HumanMessage
from langgraph.graph.message import add_messages
from langgraph.checkpoint.memory import MemorySaver
from typing import Optional
from langchain_core.prompts import PromptTemplate
# File parsing
import PyPDF2 
import docx
import json
import re

In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="openai/gpt-oss-20b",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key="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",
    # other params...
)
llm

ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 32768, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': True, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x000001491FAF5950>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001491FAF5450>, model_name='openai/gpt-oss-20b', temperature=1e-08, model_kwargs={}, groq_api_key=SecretStr('**********'))

In [1114]:
llm.invoke("Hello, tell me a quote from Sam Altman about AI.").content

'**Sam\u202fAltman on AI**\n\n> ‚ÄúAI is the most important technology of our time.‚Äù  \n> ‚Äî Sam\u202fAltman, CEO of OpenAI (tweet, 2023)\n\nThis statement captures Altman‚Äôs view that artificial intelligence is not just another tool but a transformative force that will shape the future of society, science, and industry.'

In [1115]:
class StudyBuddyState(TypedDict):
    # user_id: str

    # File-based inputs
    file_path: Optional[str] = None
    # Topic-based input
    file_type: Optional[str] = None
    raw_text: Optional[str] = None
    # user_preference: str
    topics_no: int
    study_time_per_day: str
    exam_date: str
    user_email: str
   

    # Processing data
    extracted_text: Optional[str]
    topics: List[Dict]
    study_plan: List[Dict]


    # progress_data: Dict
    messages: List[str]
    print: str
    

In [1116]:
from pymongo import MongoClient
def get_database():
    """Simple function to connect to MongoDB"""
    client = MongoClient("mongodb://localhost:27017/")
    return client['study_buddy']

In [1117]:
# ============ NODE 1: FILE PROCESSING ============
def file_processing(state: StudyBuddyState) -> StudyBuddyState:
    """Step 2: Extract text from uploaded file or pasted text"""
    print("üìÑ Processing file...")
    
    extracted_text = ""
    
    # If user pasted text directly
    if state['file_type'] == 'text':
        extracted_text = state['raw_text']
    
    # If user uploaded PDF file
    elif state['file_type'] == 'pdf':
        with open(state['file_path'], 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                extracted_text += page.extract_text()
    
    # If user uploaded Word document
    elif state['file_type'] == 'docx':
        doc = docx.Document(state['file_path'])
        for para in doc.paragraphs:
            extracted_text += para.text + "\n"
    
    # If user uploaded text file
    elif state['file_type'] == 'txt':
        with open(state['file_path'], 'r', encoding='utf-8') as file:
            extracted_text = file.read()
    
    # Clean up extra spaces
    extracted_text = re.sub(r'\s+', ' ', extracted_text).strip()
    
    # Save to state
    state['extracted_text'] = extracted_text
    state['messages'] = [f"‚úÖ Extracted {len(extracted_text)} characters from file"]
    state['print'] = f"Extracted text: {extracted_text}"
    
    return state

In [1118]:
# def clean_text_node(state: StudyBuddyState) -> StudyBuddyState:
#     prompt = f"""Clean and normalize the following study material in following way: 
#     - Remove the Book name and authur name (if any)
#     - 
#     \n{state["extracted_text"][:31000]}."""
#     clean_text = llm.invoke(prompt).content
#     state['extracted_text'] = clean_text
#     state['messages'].append("‚úÖ Cleaned and normalized text")
#     state['print'] = clean_text
#     return state

In [1119]:
# 3. If the user query is NOT about dividing or structuring the given document,
#    you MUST return an empty JSON array: [].

In [1120]:
def semantic_chunker(state: StudyBuddyState) -> StudyBuddyState:
    text=state["extracted_text"][:8000]
    semantic_prompt = f"""Read this study material {text} and split the whole text semantic study topics.

    RULES:
    - No of topics specify by the user:{state['topics_no']}
    - Titles must come from the text itself
    - Do NOT rewrite content
    - Do NOT include explanations
    - Do NOT include content

    OUTPUT FORMAT (JSON ONLY):
    [
    {{ "id": 1, "title": "Topic or chapter name from the text", "content": "exact content of the title from text" }},
    {{ "id": 2, "title": "Topic or chapter name from the text","content": "exact content of the title from text" }}
    ]
    """

    response = llm.invoke(semantic_prompt)

    try:
        content = response.content
        json_match = re.search(r'\[.*\]', content, re.DOTALL)
        topics = json.loads(json_match.group())
    except Exception as e:
        raise ValueError(f"LLM failed to return valid topic titles: {e}")

    # Deterministic content splitting
    words = state["extracted_text"].split()
    chunk_size = len(words) // len(topics)

    for i, topic in enumerate(topics):
        start = i * chunk_size
        end = start + chunk_size if i < len(topics) - 1 else len(words)
        topic["content"] = " ".join(words[start:end])

    state["topics"] = topics
    state["messages"].append(f"‚úÖ Created {len(topics)} topics")
    state["print"] = json.dumps(topics, indent=2)

    return state


In [1121]:
from pymongo import MongoClient
from datetime import datetime
def store_topics(state: StudyBuddyState) -> StudyBuddyState:
    """Step 3: Save all topics to MongoDB database"""
    print("üíæ Storing topics in database...")
    
    
    #connect to MongoDB 
    client = MongoClient("mongodb://localhost:27017/")
    db = client['study_buddy']
    topics_collection = db['topics']
    
    # Prepare data to save
    topic_data = {
        "user_email": state['user_email'],
        "topics": state['topics'],
        "created_at": datetime.now()
    }
    
    # Save to database
    topics_collection.insert_one(topic_data)
    
    # Update state
    state['messages'].append(f"‚úÖ Saved {len(state['topics'])} topics to database")
    
    return state

In [1122]:
from datetime import datetime, timedelta


def generate_study_plan(state: StudyBuddyState) -> StudyBuddyState:
    """Step 4: AI creates personalized study schedule"""
    print("üìÖ Generating study plan...")

    # ---- Fix exam_date parsing ----
    exam_date_raw = state["exam_date"]

    if isinstance(exam_date_raw, datetime):
        exam_date = exam_date_raw
    elif isinstance(exam_date_raw, str):
        exam_date = datetime.strptime(exam_date_raw, "%Y-%m-%d")
    else:
        raise ValueError("exam_date must be a string (YYYY-MM-DD) or datetime")

    today = datetime.now()

    days_available = (exam_date.date() - today.date()).days
    if days_available <= 0:
        days_available = 1  # prevent crash

    # ---- Prepare topics text ----
    topics_text = ""
    for topic in state["topics"]:
        topics_text += f"{topic['id']}. {topic['title']}.\n"

    # ---- Prompt ----
    prompt = f"""
Create a study schedule for these topics:

{topics_text}

Study Details:
- Days until exam: {days_available}
- Study time per day: {state['study_time_per_day']} minutes
- Exam date: {exam_date.strftime('%Y-%m-%d')}

Return ONLY a JSON array like:
[
  {{"date": "2025-01-15", "topic_id": 1, "topic_title": "topic_name","content":"content of the topic from text",completed": false}},
  {{"date": "2025-01-16", "topic_id": 2, "topic_title": "topic_name","content":"content of the topic from text",completed": false}}
]

Rules:
- Start from tomorrow
- Use valid JSON only
"""

    # ---- Call LLM ----
    response = llm.invoke(prompt)
    content = response.content if hasattr(response, "content") else str(response)

    # ---- Parse JSON safely ----
    try:
        match = re.search(r"\[.*\]", content, re.DOTALL)
        study_plan = json.loads(match.group() if match else content)
    except Exception:
        # ---- Fallback plan (NO bugs now) ----
        study_plan = []
        for i, topic in enumerate(state["topics"]):
            day_number = i % days_available
            study_date = (today + timedelta(days=day_number + 1)).strftime("%Y-%m-%d")
            study_plan.append({
                "date": study_date,
                "topic_id": topic["id"],
                "topic_title": topic["title"],
                "completed": False
            })

    # ---- Save to state ----
    state["study_plan"] = study_plan
    state["messages"].append(f"‚úÖ Created {len(study_plan)} -day study plan")
    state["print"] = study_plan

    return state



In [1123]:
def save_plan(state: StudyBuddyState) -> StudyBuddyState:
    """Step 5: Save study plan to database"""
    print("üíæ Saving study plan...")
    
    # Connect to database
    db = get_database()
    plans_collection = db['study_plans']
    
    # Prepare plan data
    plan_data = {
        "user_email": state['user_email'],
        "plan": state['study_plan'],
        "created_at": datetime.now()
    }
    
    # Save to database
    plans_collection.insert_one(plan_data)
    
    # Update state
    state['messages'].append("‚úÖ Study plan saved successfully")
    
    return state

In [1124]:
# def generate_quiz(state: QuizState) -> QuizState:
#     """
#     Example:
#         quiz = generate_quiz("hello@gmail.com", "2025-12-20")
#     """
    
#     print(f"\nüéØ Generating quiz for {state['study_date']}...")
    
#     # Step 1: Connect to database
#     db = get_database()
    
#     # Step 2: Get user's study plan
#     plan = db.study_plans.find_one({"user_email": state['user_email']})
#     if not plan:
#         print("‚ùå No study plan found!")
#         return None
    
#     # Step 3: Find topic for this date
#     topic_for_today = None
#     for task in plan['plan']:
#         if task['date'] == state["study_date"]:
#             topic_for_today = task
#             break
    
#     if not topic_for_today:
#         print(f"‚ùå No topic scheduled for {state["study_date"]}")
#         return None
    
#     print(f"‚úÖ Found topic: {topic_for_today.get('topic_title', 'Topic ' + str(topic_for_today['topic_id']))}")
    
#     # Step 4: Mark this topic as completed
#     for task in plan['plan']:
#         if task['date'] == state["study_date"]:
#             task['completed'] = True  # Mark as done!
#             break
    
#     # Save updated plan back to database
#     db.study_plans.update_one(
#         {"user_email": state["user_email"]},
#         {"$set": {"plan": plan['plan']}}
#     )
#     print("‚úÖ Marked as completed in database")
    
#     # Step 5: Get full topic details
#     topics = db.topics.find_one({"user_email": state["user_email"]})
#     if not topics:
#         print("‚ùå No topics found!")
#         return None
    
#     # Find the specific topic
#     full_topic = None
#     for topic in topics['topics']:
#         if topic['id'] == topic_for_today['topic_id']:
#             full_topic = topic
#             break
    
#     if not full_topic:
#         print("‚ùå Topic details not found!")
#         return None
    
#     # Step 6: Use AI to create quiz questions
#     print("ü§ñ Asking AI to create quiz questions...")
    
#     #  Content: {full_topic.get('content', full_topic.get('summary', ''))[:2000]}
#     # Simple prompt for AI
#     prompt = f"""
#     Create 5 multiple choice quiz questions about this topic:
    
#     Topic: {full_topic['title']}
   
    
#     Return ONLY a JSON array like this:
#     [
#         {{
#             "question": "What is ICT?",
#             "options": ["A) Information and Communication Technology", "B) Internet Computer Tech", "C) International Coding Team", "D) Intelligent Computing Tool"],
#             "correct": "A"
#         }}
#     ]
#     """
    
#     response = llm.invoke(prompt)
    
#     # Get questions from AI
#     try:
#         # Extract JSON from AI response
#         text = response.content
#         json_part = re.search(r'\[.*\]', text, re.DOTALL)
#         if json_part:
#             questions = json.loads(json_part.group())
#         else:
#             questions = json.loads(text)
#     except:
#         # If AI fails, create a simple question
#         questions = [{
#             "question": f"What is {full_topic['title']}?",
#             "options": ["A) Option 1", "B) Option 2", "C) Option 3", "D) Option 4"],
#             "correct": "A"
#         }]
    
#     print(f"‚úÖ Created {len(questions)} questions")
    
#     # Step 7: Save quiz to database
#     quiz_data = {
#         "user_email": state["user_email"],
#         "date": state["study_date"],
#         "topic_id": topic_for_today['topic_id'],
#         "topic_title": full_topic['title'],
#         "questions": questions,
#         "created_at": datetime.now()
#     }
    
#     result = db.quizzes.insert_one(quiz_data)
#     quiz_id = str(result.inserted_id)
    
#     print(f"‚úÖ Quiz saved! ID: {quiz_id}")
    
#     # Return quiz data
#     return {
#         state["quiz_id"]: quiz_id,
#         state["topic"]: full_topic['title'],
#         state["date"]: study_date,
#         state["questions"]: questions
#     }

In [1125]:
# generate_quiz(
#    # "hello@gmail.com",
#       "2025-12-20"
# )

In [1126]:
# 13. LANGGRAPH DEFINITION
# -------------------------------
workflow = StateGraph(StudyBuddyState)

workflow.add_node("file_processing", file_processing)
# workflow.add_node("clean_text", clean_text_node)
workflow.add_node("semantic_chunker", semantic_chunker)
workflow.add_node("store_topics", store_topics)
workflow.add_node("generate_study_plan", generate_study_plan)
workflow.add_node("save_plan", save_plan)


workflow.add_edge(START, "file_processing")
workflow.add_edge("file_processing", "semantic_chunker")
workflow.add_edge("semantic_chunker", "store_topics")
workflow.add_edge("store_topics", "generate_study_plan")
workflow.add_edge("generate_study_plan","save_plan")
workflow.add_edge("save_plan", END)


study_graph = workflow.compile()

In [1127]:
# study_graph

In [1128]:
study_graph.invoke({
    "file_path": "./Resources/ICT_notes.pdf",
    "file_type": "pdf",
    "raw_text": None,
    "study_time_per_day": 240,
    "exam_date": "2025-12-23",
    "user_email": "mhello@gmail.com",
    "topics_no": 3,
    "topics":[],
    # "messages":[],
   })

üìÑ Processing file...
üíæ Storing topics in database...
üìÖ Generating study plan...
üíæ Saving study plan...


{'file_path': './Resources/ICT_notes.pdf',
 'file_type': 'pdf',
 'raw_text': None,
 'topics_no': 3,
 'study_time_per_day': 240,
 'exam_date': '2025-12-23',
 'user_email': 'mhello@gmail.com',
 'extracted_text': '1 | ¬©S O N A M S 2 0 2 3 MUKONO DIOCESE SCHOOL OF NURSING AND MIDWIFERY SCIENCES INFORMATION COMMUNICATION TECHNOLOGY NOTES By 2 | ¬©S O N A M S 2 0 2 3 INFORMATION COMMUNICATION TECHNOLOGY Definition: ICT (information and communications technology - or technologies) is a term that includes any communication device or application, encompassing: radio, television, cellular phones, computer and network hardware and software, satellite systems and so on, as wel l as the various services and applications associated with them, such as videoconferencing and distance learning. ICTs are often spoken of in a particular context, such as ICTs in education, health care, or libraries. Or ICT stands for Information and Comm unication Technology and are defined as a set of technological tools