<a href="https://colab.research.google.com/github/kamsalideepika/Edutuor-AI/blob/main/Edu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# -*- coding: utf-8 -*-
"""EduTutor AI: Personalized Learning Platform - Final Version"""

# --- 1. Install Necessary Libraries ---
# These commands are specific to Google Colab to install packages
# not available by default or to upgrade them.
!pip install PyPDF2
!pip install -U bitsandbytes

# --- 2. Import Libraries ---
import gradio as gr
import torch
import PyPDF2
import re
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# --- 3. Global Configurations and Initializations ---

# Dummy user database for login authentication.
# In a production environment, this would be a secure database
# with hashed passwords (e.g., using bcrypt).
if 'users_db' not in globals():
    users_db = {"student1": "pass123", "student2": "abc456"}
    print("Initialized dummy user database.")

# Store user session data (e.g., quiz attempts, progress).
# Currently not fully utilized but serves as a placeholder.
user_sessions = {}

# Set device for model inference (GPU if available, else CPU).
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device set to use: {device}")

# Initialize the text generation pipeline.
# This will be loaded once when the script starts.
generator = None
try:
    model_name = "ibm-granite/granite-3.3-2b-instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=700, # Increased max_new_tokens for potentially longer responses
        # Add a common generation parameter to reduce repetition if the model supports it
        # For 'granite' models, `do_sample=True` with `top_p` or `temperature` often works well.
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    print("✅ Model and tokenizer loaded successfully.")
except Exception as e:
    print(f"❌ Error loading model/tokenizer: {e}")
    print("Please ensure you have sufficient RAM/GPU in your Colab session.")
    # Provide a placeholder generator if loading fails to prevent immediate crashes
    def dummy_generator(prompt):
        return [{"generated_text": "Error: AI model not available. Please try again later or check server logs."}]
    generator = dummy_generator


# --- 4. Core AI Utility Functions ---

def generate_response(prompt: str) -> str:
    """
    Generates text using the loaded AI model.
    Handles potential errors during text generation.
    """
    if generator is None:
        return "❌ Error: AI model is not loaded. Cannot generate response."
    try:
        # The generator returns a list of dictionaries, extract the 'generated_text'.
        response = generator(prompt)
        if response and isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
            # The model might echo the prompt; we only want the new response.
            # A simple way to do this is to remove the original prompt from the generated text.
            full_text = response[0]["generated_text"]
            if full_text.startswith(prompt):
                return full_text[len(prompt):].strip()
            return full_text.strip()
        else:
            print(f"❌ Unexpected response format from generator: {response}")
            return "❌ Error: Unexpected response format from AI model. Please try again."
    except Exception as e:
        print(f"❌ Error during text generation for prompt '{prompt[:50]}...': {e}")
        return f"❌ Error during text generation: {e}. Please simplify your request or try again."

# --- 5. Specific Learning Functionalities ---

def get_concept_explanation(concept: str) -> str:
    """Provides a simple explanation of a given concept."""
    if not concept.strip():
        return "Please enter a concept to get an explanation."
    prompt = f"""
Explain the concept of '{concept}' in a simple and clear way that a 15-year-old student can easily understand. Include examples and real-world applications if possible.
"""
    return generate_response(prompt)

def get_language_basics(language: str) -> str:
    """Teaches basic grammar and vocabulary for a specified language."""
    if not language:
        return "Please select a language to learn."
    prompt = f"""
Teach me the basics of {language} language. Include grammar rules, common vocabulary, and parts of speech. Provide at least 5-7 key phrases or words.
"""
    return generate_response(prompt)

def extract_text_from_pdf(pdf_file) -> tuple[str, str | None]:
    """
    Extracts text from a PDF file.
    Returns extracted text and an error message (if any).
    """
    if not pdf_file:
        return None, "❌ Please upload a PDF file."
    try:
        reader = PyPDF2.PdfReader(pdf_file)
        # Join text from all pages, ensuring only pages with text are included
        text = " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
        if not text.strip(): # Check if extracted text is empty or just whitespace
            return None, "❌ Could not extract readable text from PDF. It might be an image-based PDF or empty."
        return text, None
    except PyPDF2.errors.PdfReadError:
        return None, "❌ Invalid PDF file. Please upload a valid PDF."
    except Exception as e:
        return None, f"❌ Failed to process PDF: {e}. Please ensure it's a valid PDF."

def generate_test_from_content(text_content: str) -> str:
    """Generates multiple-choice questions from provided text content."""
    if not text_content.strip():
        return "❌ No content provided for test generation."

    # Limit content length for prompt to avoid exceeding model's context window
    # Approximately 1000 tokens for context, leaving room for instructions and output
    max_content_length = 3000 # Characters, not tokens, but a rough limit
    if len(text_content) > max_content_length:
        text_content = text_content[:max_content_length] + "..."
        print(f"Truncated PDF content to {max_content_length} characters for test generation.")

    prompt = f"""
Generate 3-5 multiple-choice questions from the following content. Focus on key information.

Content:
{text_content}

Format each question strictly like this:
Qn: <question text>
A. <option A text>
B. <option B text>
C. <option C text>
D. <option D text>
Correct Answer: <correct letter A, B, C, or D>

Example:
Q1: What is the capital of France?
A. Berlin
B. Paris
C. Rome
D. Madrid
Correct Answer: B
"""
    return generate_response(prompt)

def generate_quiz_content(topic: str) -> list[dict]:
    """
    Generates a 5-question multiple-choice quiz on a given topic.
    Returns a list of dictionaries with parsed questions, options, and correct answers.
    """
    if not topic.strip():
        return [] # Return empty list if no topic

    prompt = f"""
Generate a 5-question multiple choice quiz on the topic: {topic}.
Each question should have options A, B, C, D.
Indicate the Correct Answer for each question.

Format each question strictly like this:
Qn: <question text>
A. <option A text>
B. <option B text>
C. <option C text>
D. <option D text>
Correct Answer: <correct letter A, B, C, or D>

Example:
Q1: What is the largest planet in our solar system?
A. Mars
B. Jupiter
C. Earth
D. Venus
Correct Answer: B
"""
    quiz_text = generate_response(prompt)
    print(f"Raw Quiz Generator Output: {quiz_text}")

    questions = []
    # Regex to robustly parse the questions, options, and correct answers
    # This pattern is quite flexible to handle slight variations in spacing
    question_pattern = re.compile(
        r'Q(\d+):\s*(.*?)\n' # Qn: <question>
        r'\s*A\.\s*(.*?)\n' # A. <option A>
        r'\s*B\.\s*(.*?)\n' # B. <option B>
        r'\s*C\.\s*(.*?)\n' # C. <option C>
        r'\s*D\.\s*(.*?)\n' # D. <option D>
        r'\s*Correct Answer:\s*([A-D])\.', # Correct Answer: <letter>.
        re.DOTALL # Allows . to match newlines
    )
    matches = question_pattern.findall(quiz_text)

    print(f"Matches found by regex: {matches}")

    for match in matches:
        try:
            # Extracting groups from the regex match
            # match[0] is question number (e.g., '1')
            # match[1] is question text
            # match[2] to match[5] are options A-D
            # match[6] is correct answer letter
            question_text = match[1].strip()
            options_dict = {
                'A': match[2].strip(),
                'B': match[3].strip(),
                'C': match[4].strip(),
                'D': match[5].strip()
            }
            correct_answer = match[6].strip().upper() # Ensure consistent casing

            questions.append({
                "question": question_text,
                "options": options_dict,
                "correct_answer": correct_answer
            })
        except IndexError as ie:
            print(f"Warning: Could not parse a quiz question due to unexpected format in match: {match}. Error: {ie}")
            continue # Skip this malformed question
    print(f"Final Parsed Questions list: {questions}")
    return questions

# --- 6. Authentication Logic ---

def authenticate_user(username: str, password: str) -> bool:
    """Authenticates a user against the dummy database."""
    return users_db.get(username) == password

def register_new_user(new_username: str, new_password: str) -> str:
    """Registers a new user in the dummy database."""
    if not new_username.strip() or not new_password.strip():
        return "❌ Username and password cannot be empty!"
    if new_username in users_db:
        return "❌ Username already exists! Please choose another."
    else:
        users_db[new_username] = new_password
        return "✅ User registered successfully. You can now login."

# --- 7. Gradio Interface Logic Functions ---

def login_handler(user: str, pwd: str):
    """
    Handles user login, updates UI visibility and status.
    Returns: (gr.update for app_ui, login_status text, username_state value)
    """
    if authenticate_user(user, pwd):
        # Update app_ui to be visible, login status, and set username state
        return gr.update(visible=True), "✅ Login successful. Welcome!", user
    else:
        # Keep app_ui hidden, update login status, and clear username state
        return gr.update(visible=False), "❌ Invalid credentials! Please try again.", ""

def process_learning_requests(username: str, concept: str, language: str, pdf_file):
    """
    Main function to process concept explanation, language learning, and PDF test generation.
    Returns: (concept_output, language_output, test_pdf_output, status_message)
    """
    # Initialize outputs with default messages
    concept_output = "No concept explanation requested or provided yet."
    language_output = "No language basics requested or provided yet."
    test_pdf_output = "Upload a PDF to generate a test from its content."
    status_message = "Processing your requests..."

    # Concept Understanding
    if concept.strip():
        concept_output = get_concept_explanation(concept)
    else:
        concept_output = "Enter a concept above to get an explanation."

    # Language Learning
    if language: # Radio button ensures a selection if user interacts
        language_output = get_language_basics(language)
    else:
        language_output = "Select a language above to learn its basics."

    # PDF Test Generation
    if pdf_file:
        status_message = "Extracting text from PDF and generating test..."
        pdf_text, pdf_error = extract_text_from_pdf(pdf_file)
        if pdf_error:
            test_pdf_output = pdf_error
        elif pdf_text:
            test_pdf_output = generate_test_from_content(pdf_text)
        else:
            test_pdf_output = "Failed to extract text or generate test from PDF. Check file format."
    else:
        test_pdf_output = "Upload a PDF file to generate a test from its content."

    # Final status message
    status_message = "All learning requests processed."
    return concept_output, language_output, test_pdf_output

def process_quiz_request(quiz_topic: str) -> str:
    """
    Generates a quiz for the given topic and formats it for display.
    Returns: Formatted quiz string.
    """
    if not quiz_topic.strip():
        return "Please enter a topic to generate a quiz."

    quiz_data = generate_quiz_content(quiz_topic)

    formatted_quiz_output = ""
    if quiz_data:
        formatted_quiz_output += f"### Quiz on: {quiz_topic}\n\n"
        for i, q in enumerate(quiz_data):
            formatted_quiz_output += f"**Q{i+1}:** {q['question']}\n"
            for option_key, option_value in q['options'].items():
                formatted_quiz_output += f"  **{option_key}.** {option_value}\n"
            formatted_quiz_output += f"  **Correct Answer:** {q['correct_answer']}\n\n"
    else:
        formatted_quiz_output = "Could not generate quiz for the given topic. The AI might not have sufficient knowledge or the request was too vague."

    print(f"Formatted Quiz Output for Gradio: {formatted_quiz_output}")
    return formatted_quiz_output

# --- 8. Gradio User Interface Definition ---

with gr.Blocks(theme=gr.themes.Soft(), title="EduTutor AI") as interface:
    gr.Markdown("# 👩‍🏫 EduTutor AI: Personalized Learning Platform")
    gr.Markdown("Welcome to EduTutor AI! Your personalized learning assistant. Login or Register to get started.")

    # State variable to hold the logged-in username
    username_state = gr.State("")

    # Top-level Tabs for Login/Register
    with gr.Tabs():
        with gr.TabItem("Login"):
            with gr.Row():
                login_user = gr.Textbox(label="Username", placeholder="student1", scale=1)
                login_pwd = gr.Textbox(label="Password", type="password", placeholder="pass123", scale=1)
            login_button = gr.Button("Login", variant="primary", scale=0) # Scale=0 for auto-sizing
            login_status = gr.Textbox(label="Login Status", interactive=False, placeholder="Enter credentials and click Login.")

        with gr.TabItem("Register"):
            with gr.Row():
                new_user = gr.Textbox(label="New Username", placeholder="Enter desired username", scale=1)
                new_pwd = gr.Textbox(label="New Password", type="password", placeholder="Create a password", scale=1)
            register_button = gr.Button("Register", variant="secondary", scale=0)
            registration_status = gr.Textbox(label="Registration Status", interactive=False, placeholder="Click Register to create account.")
            register_button.click(fn=register_new_user, inputs=[new_user, new_pwd], outputs=registration_status)

    # Main Application UI - Hidden until login
    with gr.Column(visible=False, elem_id="app_ui_column") as app_ui:
        gr.Markdown("---")
        gr.Markdown("## Your Learning Hub")
        gr.Markdown("Use the tabs below to explore concepts, learn languages, generate tests from PDFs, or create quizzes.")

        # Tabs for different functionalities within the main app
        with gr.Tabs():
            with gr.TabItem("Concepts & Language"):
                gr.Markdown("### Explore Concepts and Learn Languages")
                with gr.Row():
                    concept_input = gr.Textbox(
                        label="Enter Concept to Explain (e.g., Photosynthesis, Machine Learning)",
                        placeholder="Generative AI",
                        lines=2, # More lines for concept input
                        scale=2
                    )
                    language_radio = gr.Radio(
                        choices=["English", "Hindi", "French", "Spanish"], # Added more language options
                        label="Choose Language for Basics",
                        value="English", # Default selection
                        scale=1
                    )
                pdf_upload = gr.File(
                    label="Upload PDF for Test Generation (Optional)",
                    file_types=[".pdf"],
                    file_count="single" # Allow only one file
                )

                run_learning_btn = gr.Button("Get Explanations & Generate PDF Test", variant="primary")

                gr.Markdown("---")
                gr.Markdown("### Outputs:")
                # Using Markdown components for outputs for better text rendering and copy button
                concept_output_box = gr.Markdown(label="Concept Explanation", show_copy_button=True)
                language_output_box = gr.Markdown(label="Language Learning Basics", show_copy_button=True)
                pdf_test_output_box = gr.Markdown(label="Generated Test from PDF Content", show_copy_button=True)

                # Link the button click to the processing function
                run_learning_btn.click(
                    fn=process_learning_requests,
                    inputs=[username_state, concept_input, language_radio, pdf_upload],
                    outputs=[concept_output_box, language_output_box, pdf_test_output_box]
                )

            with gr.TabItem("Quiz Generator"):
                gr.Markdown("### Create a Quick Quiz on Any Topic")
                quiz_topic_input = gr.Textbox(
                    label="Enter Topic for Quiz (e.g., World War II, Python Programming)",
                    placeholder="Artificial Intelligence",
                    lines=2
                )
                generate_quiz_btn = gr.Button("Generate Quiz", variant="primary")

                gr.Markdown("---")
                gr.Markdown("### Generated Quiz:")
                # Changed to gr.Markdown for better formatting of the quiz content
                # Added an initial placeholder text and set lines for visual space.
                quiz_output_box = gr.Markdown(
                    value="Your generated quiz will appear here. It may take a moment...",
                    label="Generated Quiz",
                    show_copy_button=True,
                    # We don't use 'lines' with gr.Markdown directly, but the content will expand.
                    # For a fixed box, you'd use gr.Textbox. If the content is long, Markdown works well.
                )

                # Link the quiz generation button
                generate_quiz_btn.click(
                    fn=process_quiz_request,
                    inputs=[quiz_topic_input],
                    outputs=[quiz_output_box]
                )

    # Link the login button to its handler
    login_button.click(
        fn=login_handler,
        inputs=[login_user, login_pwd],
        outputs=[app_ui, login_status, username_state]
    )

# --- 9. Launch the Gradio Interface ---
# debug=True provides more console output for debugging.
# share=True generates a public URL, useful for Colab.
interface.launch(debug=True, share=True)

Initialized dummy user database.
Device set to use: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Device set to use cuda:0


✅ Model and tokenizer loaded successfully.
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://ad6904477c5730b5b4.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
