<a href="https://colab.research.google.com/github/Shivanihmm/AI_genesis/blob/main/Miniprojectfinal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio
!pip install langchain_community
!pip install langchain_groq
!pip install -q gradio langchain langchain-community langchain-groq faiss-cpu sentence-transformers pypdf python-docx
!pip install PyPDF2
!pip install python-pptx

Collecting gradio
  Downloading gradio-5.30.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.

In [None]:

import os
import json
import tempfile
import shutil
import requests
import re
from datetime import datetime
from typing import List, Dict, Any, Optional
import gradio as gr

# Import required libraries here to avoid imports in Google Colab
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA

# Add Groq integration
from langchain_groq import ChatGroq

# Import document processing libraries
from docx import Document
from pptx import Presentation
from PyPDF2 import PdfReader

In [None]:



class SmartStudyAI:
    """
    A study assistant that processes PDFs, answers questions, and generates
    study materials like flashcards and practice exams.
    """

    def __init__(
        self,
        pdf_directory: str = "pdfs",
        db_path: str = "studybuddy_vectordb",
        llm_type: str = "groq",  # "ollama" or "groq"
        model_name: str = "llama3-70b-8192",
        api_key: str = None
    ):
        # Setup directories
        self.pdf_directory = pdf_directory
        self.db_path = db_path
        self.api_key = api_key
        self.model_name = model_name

        # Create directories if they don't exist
        for directory in [pdf_directory, "flashcards", "exams"]:
            if not os.path.exists(directory):
                os.makedirs(directory)

        # Initialize components
        from langchain_community.embeddings import HuggingFaceEmbeddings
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        self.vectorstore = None

        # Initialize LLM based on type
        if llm_type == "groq" and api_key:
            from langchain_groq import ChatGroq
            self.llm = ChatGroq(
                api_key=api_key,
                model_name=model_name,
                temperature=0.2
            )
        else:
            # Default to Ollama
            from langchain_community.llms import Ollama
            self.llm = Ollama(model=model_name, temperature=0.2)

        self.qa_chain = None

        # Try to load existing vectorstore
        if os.path.exists(self.db_path):
            self.load_vectorstore()
            self.setup_qa_chain()

    def load_and_process_pdfs(self, force_reload: bool = False) -> None:
        """
        Load all PDFs from the directory, process them, and create a vector database.

        Args:
            force_reload: If True, reprocess PDFs even if database exists
        """
        if os.path.exists(self.db_path) and not force_reload:
            print(f"Vector database already exists at {self.db_path}. Loading...")
            self.load_vectorstore()
            return

        # Check if directory exists and has PDFs
        if not os.path.exists(self.pdf_directory):
            print(f"Directory {self.pdf_directory} does not exist. Creating it...")
            os.makedirs(self.pdf_directory)
            print(f"Please add PDF files to {self.pdf_directory} and run again.")
            return

        pdf_files = [f for f in os.listdir(self.pdf_directory) if f.endswith('.pdf')]
        if not pdf_files:
            print(f"No PDF files found in {self.pdf_directory}. Please add some PDFs.")
            return

        print(f"Processing {len(pdf_files)} PDF files...")
        documents = []

        # Process each PDF
        for filename in pdf_files:
            file_path = os.path.join(self.pdf_directory, filename)
            print(f"Loading {filename}...")
            try:
                loader = PyPDFLoader(file_path)
                docs = loader.load()

                # Add source metadata to each document
                for doc in docs:
                    doc.metadata['source'] = filename
                    # Ensure page is an integer
                    if 'page' not in doc.metadata:
                        doc.metadata['page'] = 1

                documents.extend(docs)
                print(f"  Added {len(docs)} pages from {filename}")
            except Exception as e:
                print(f"  Error processing {filename}: {str(e)}")

        if not documents:
            print("No documents were successfully processed.")
            return

        # Split documents into chunks
        print("Splitting documents into chunks...")
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=50
        )
        chunks = text_splitter.split_documents(documents)
        print(f"Created {len(chunks)} chunks from {len(documents)} pages.")

        # Create vector store
        print("Creating vector embeddings (this may take a while)...")
        self.vectorstore = FAISS.from_documents(chunks, self.embeddings)
        self.vectorstore.save_local(self.db_path)
        print(f"Vector database created and saved to {self.db_path}")

        # Setup QA chain
        self.setup_qa_chain()

    def load_vectorstore(self) -> None:
        """Load the vector database from disk"""
        try:
            self.vectorstore = FAISS.load_local(
                self.db_path,
                self.embeddings,
                allow_dangerous_deserialization=True
            )
            print("Vector database loaded successfully.")
        except Exception as e:
            print(f"Error loading vector database: {str(e)}")
            print("You may need to reprocess your PDFs.")

    def setup_qa_chain(self) -> None:
        """Initialize the question-answering chain"""
        if self.vectorstore is None:
            print("Vector database not loaded. Please load or process PDFs first.")
            return

        retriever = self.vectorstore.as_retriever(search_kwargs={"k": 4})
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=retriever,
            return_source_documents=True
        )
        print("QA chain initialized and ready for queries.")

    def query(self, question: str) -> Dict[str, Any]:
        """
        Ask a question about the content in the processed PDFs

        Args:
            question: The question to ask

        Returns:
            Dictionary with answer and source information
        """
        if self.qa_chain is None:
            return {
                "answer": "QA system not initialized. Please process PDFs first.",
                "sources": []
            }

        try:
            result = self.qa_chain({"query": question})

            # Extract source information
            sources = []
            for doc in result['source_documents']:
                sources.append({
                    "content": doc.page_content,
                    "source": doc.metadata.get('source', 'Unknown'),
                    "page": doc.metadata.get('page', 1)
                })

            return {
                "answer": result['result'],
                "sources": sources
            }
        except Exception as e:
            return {
                "answer": f"Error: {str(e)}",
                "sources": []
            }

    def get_summary(self, topic: str = "") -> str:
        """Generate a summary of content related to a topic"""
        if self.vectorstore is None:
            return "Vector database not loaded. Please process PDFs first."

        # If topic is provided, search for relevant documents
        if topic:
            docs = self.vectorstore.similarity_search(topic, k=8)
            context = "\n\n".join([doc.page_content for doc in docs])
            prompt = f"Generate a comprehensive summary about '{topic}' based on the following extracted content:\n\n{context}"
        else:
            # For general summary, use more documents to get broader coverage
            docs = self.vectorstore.similarity_search("important concepts and key ideas", k=15)
            context = "\n\n".join([doc.page_content for doc in docs])
            prompt = "Generate a comprehensive summary of the main topics and key concepts in the documents based on the following extracted content:\n\n" + context

        try:
            # Using Groq API directly for summary generation
            headers = {
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
            }

            payload = {
                "model": self.model_name,
                "messages": [
                    {"role": "system", "content": "You are a helpful assistant who creates concise but comprehensive summaries."},
                    {"role": "user", "content": prompt}
                ],
                "max_tokens": 1500,
                "temperature": 0.5,
            }

            response = requests.post("https://api.groq.com/openai/v1/chat/completions",
                                    headers=headers,
                                    json=payload)

            if response.status_code == 200:
                return response.json()["choices"][0]["message"]["content"]
            else:
                return f"Error generating summary: {response.text}"
        except Exception as e:
            return f"Error generating summary: {str(e)}"

    def extract_text_from_file(self, file_path: str) -> str:
        """Extract text from various file formats"""
        try:
            name = file_path.lower()
            if name.endswith(".pdf"):
                reader = PdfReader(file_path)
                return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
            elif name.endswith(".docx"):
                doc = Document(file_path)
                return "\n".join([para.text for para in doc.paragraphs])
            elif name.endswith(".pptx"):
                prs = Presentation(file_path)
                text = []
                for slide in prs.slides:
                    for shape in slide.shapes:
                        if hasattr(shape, "text"):
                            text.append(shape.text)
                return "\n".join(text)
            else:
                return ""
        except Exception as e:
            print(f"Error extracting text from {file_path}: {str(e)}")
            return ""

    def generate_flashcards_from_file(self, file_path: str, num_cards: int = 10) -> List[Dict[str, str]]:
        """
        Generate flashcards from an uploaded file (PDF, DOCX, PPTX)

        Args:
            file_path: Path to the file
            num_cards: Number of flashcards to generate

        Returns:
            List of flashcard dictionaries
        """
        text = self.extract_text_from_file(file_path)
        if not text or not text.strip():
            return []

        return self.generate_flashcards_with_text(text, num_cards=num_cards)

    def generate_flashcards_with_text(self, text: str, topic: str = "", num_cards: int = 10) -> List[Dict[str, str]]:
        """
        Generate flashcards using the Groq API from text

        Args:
            text: Text content to generate flashcards from
            topic: Optional topic to focus on
            num_cards: Number of flashcards to generate

        Returns:
            List of flashcard dictionaries
        """
        try:
            if topic:
                prompt = f"Based on the following content, generate {num_cards} high-quality study flashcards about '{topic}'. For each flashcard, include a question, answer, and difficulty level (easy/medium/hard).\n\nContent:\n{text}"
            else:
                prompt = f"Based on the following content, generate {num_cards} high-quality study flashcards covering the key concepts. For each flashcard, include a question, answer, and difficulty level (easy/medium/hard).\n\nContent:\n{text}"

            headers = {
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
            }

            payload = {
                "model": self.model_name,
                "messages": [
                    {"role": "system", "content": "You are a helpful assistant who generates educational flashcards."},
                    {"role": "user", "content": prompt}
                ],
                "max_tokens": 2048,
                "temperature": 0.7,
            }

            response = requests.post("https://api.groq.com/openai/v1/chat/completions",
                                    headers=headers,
                                    json=payload)

            if response.status_code == 200:
                content = response.json()["choices"][0]["message"]["content"]
                flashcards = []

                # Try to parse the response into our flashcard format
                # First, try to see if it's already formatted as we expect
                if "Q:" in content or "Question:" in content:
                    lines = content.split("\n")
                    current_card = {}

                    for line in lines:
                        line = line.strip()
                        if not line:
                            continue

                        if line.startswith("Q:") or line.startswith("Question:"):
                            if "question" in current_card:
                                flashcards.append(current_card)
                                current_card = {}
                            current_card["question"] = line.split(":", 1)[1].strip()
                            current_card["difficulty"] = "medium"  # Default difficulty
                        elif line.startswith("A:") or line.startswith("Answer:"):
                            current_card["answer"] = line.split(":", 1)[1].strip()
                        elif "Difficulty:" in line or "Level:" in line:
                            difficulty_part = line.split(":", 1)[1].strip().lower()
                            if "easy" in difficulty_part:
                                current_card["difficulty"] = "easy"
                            elif "hard" in difficulty_part:
                                current_card["difficulty"] = "hard"
                            else:
                                current_card["difficulty"] = "medium"

                    if "question" in current_card and "answer" in current_card:
                        flashcards.append(current_card)
                else:
                    # Try to extract flashcards from paragraphs
                    sections = content.split("---")
                    for section in sections:
                        if not section.strip():
                            continue

                        card = {"difficulty": "medium"}  # Default difficulty

                        # Try to find question and answer
                        q_matches = ["Question:", "Q:"]
                        a_matches = ["Answer:", "A:"]

                        for line in section.split("\n"):
                            line = line.strip()
                            if not line:
                                continue

                            # Check for question
                            for q_match in q_matches:
                                if q_match in line:
                                    card["question"] = line.split(q_match, 1)[1].strip()
                                    break

                            # Check for answer
                            for a_match in a_matches:
                                if a_match in line:
                                    card["answer"] = line.split(a_match, 1)[1].strip()
                                    break

                            # Check for difficulty
                            if "difficulty" in line.lower() or "level" in line.lower():
                                if "easy" in line.lower():
                                    card["difficulty"] = "easy"
                                elif "hard" in line.lower():
                                    card["difficulty"] = "hard"
                                else:
                                    card["difficulty"] = "medium"

                        if "question" in card and "answer" in card:
                            flashcards.append(card)

                # If we couldn't parse any flashcards, create some manually
                if not flashcards:
                    # Make some basic cards from the content
                    parts = content.split("\n\n")
                    for i, part in enumerate(parts):
                        if len(part.strip()) > 10:  # Only use substantial parts
                            # Create a basic flashcard
                            question = f"What is important to understand about {topic if topic else 'this content'} (part {i+1})?"
                            flashcards.append({
                                "question": question,
                                "answer": part.strip(),
                                "difficulty": "medium"
                            })

                            # Limit to requested number
                            if len(flashcards) >= num_cards:
                                break

                # Limit to requested number
                return flashcards[:num_cards]
            else:
                print(f"API Error: {response.text}")
                return []
        except Exception as e:
            print(f"Error generating flashcards: {str(e)}")
            return []

    def generate_flashcards(self, topic: str, num_cards: int = 10) -> List[Dict[str, str]]:
        """
        Generate flashcards about a topic from the processed PDFs

        Args:
            topic: The topic to create flashcards for
            num_cards: Number of flashcards to generate

        Returns:
            List of flashcard dictionaries
        """
        if self.vectorstore is None:
            print("Vector database not loaded. Please process PDFs first.")
            return []

        # Retrieve relevant documents for the topic
        docs = self.vectorstore.similarity_search(topic, k=8)
        content = "\n\n".join([doc.page_content for doc in docs])

        try:
            # Using Groq API directly for flashcard generation
            prompt = f"Based on the following content, generate {num_cards} high-quality study flashcards about '{topic}'. Format each flashcard with a question, answer, and difficulty level (easy/medium/hard).\n\nContent:\n{content}"

            headers = {
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
            }

            payload = {
                "model": self.model_name,
                "messages": [
                    {"role": "system", "content": "You are a helpful assistant who generates educational flashcards."},
                    {"role": "user", "content": prompt}
                ],
                "max_tokens": 2048,
                "temperature": 0.7,
            }

            response = requests.post("https://api.groq.com/openai/v1/chat/completions",
                                    headers=headers,
                                    json=payload)

            if response.status_code == 200:
                content = response.json()["choices"][0]["message"]["content"]
                flashcards = []

                # Try to parse the response into our flashcard format
                # First, try to see if it's already formatted as we expect
                if "Q:" in content or "Question:" in content:
                    lines = content.split("\n")
                    current_card = {}

                    for line in lines:
                        line = line.strip()
                        if not line:
                            continue

                        if line.startswith("Q:") or line.startswith("Question:"):
                            if "question" in current_card:
                                flashcards.append(current_card)
                                current_card = {}
                            current_card["question"] = line.split(":", 1)[1].strip()
                            current_card["difficulty"] = "medium"  # Default difficulty
                        elif line.startswith("A:") or line.startswith("Answer:"):
                            current_card["answer"] = line.split(":", 1)[1].strip()
                        elif "Difficulty:" in line or "Level:" in line:
                            difficulty_part = line.split(":", 1)[1].strip().lower()
                            if "easy" in difficulty_part:
                                current_card["difficulty"] = "easy"
                            elif "hard" in difficulty_part:
                                current_card["difficulty"] = "hard"
                            else:
                                current_card["difficulty"] = "medium"

                    if "question" in current_card and "answer" in current_card:
                        flashcards.append(current_card)
                else:
                    # Try to extract flashcards from paragraphs
                    sections = content.split("---")
                    for section in sections:
                        if not section.strip():
                            continue

                        card = {"difficulty": "medium"}  # Default difficulty

                        # Try to find question and answer
                        q_matches = ["Question:", "Q:"]
                        a_matches = ["Answer:", "A:"]

                        for line in section.split("\n"):
                            line = line.strip()
                            if not line:
                                continue

                            # Check for question
                            for q_match in q_matches:
                                if q_match in line:
                                    card["question"] = line.split(q_match, 1)[1].strip()
                                    break

                            # Check for answer
                            for a_match in a_matches:
                                if a_match in line:
                                    card["answer"] = line.split(a_match, 1)[1].strip()
                                    break

                            # Check for difficulty
                            if "difficulty" in line.lower() or "level" in line.lower():
                                if "easy" in line.lower():
                                    card["difficulty"] = "easy"
                                elif "hard" in line.lower():
                                    card["difficulty"] = "hard"
                                else:
                                    card["difficulty"] = "medium"

                        if "question" in card and "answer" in card:
                            flashcards.append(card)

                # If we couldn't parse any flashcards, create some manually
                if not flashcards:
                    # Make some basic cards from the content
                    parts = content.split("\n\n")
                    for i, part in enumerate(parts):
                        if len(part.strip()) > 10:  # Only use substantial parts
                            # Create a basic flashcard
                            question = f"What is important to understand about {topic} (part {i+1})?"
                            flashcards.append({
                                "question": question,
                                "answer": part.strip(),
                                "difficulty": "medium"
                            })

                            # Limit to requested number
                            if len(flashcards) >= num_cards:
                                break

                # Limit to requested number
                return flashcards[:num_cards]
            else:
                print(f"API Error: {response.text}")
                return []
        except Exception as e:
            print(f"Error generating flashcards: {str(e)}")
            return []

    def generate_study_links(self, topic: str, num_links: int = 5) -> List[Dict[str, str]]:
        """
        Generate study material links from Google Scholar and other sources

        Args:
            topic: The topic to search for
            num_links: Number of links to generate

        Returns:
            List of dictionaries with link information
        """
        try:
            # Using Groq API to generate search queries and links
            prompt = f"""Generate {num_links} high-quality study resource links about '{topic}'.
            For each link, provide:
            1. The title of the resource
            2. The URL (make sure it's a valid and working URL)
            3. The source (Google Scholar,udacity,stanford , Coursera, etc.)
            4. A brief description of what the resource contains

            Format each link as:
            - Title: [title]
            - URL: [url]
            - Source: [source]
            - Description: [description]"""

            headers = {
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
            }

            payload = {
                "model": self.model_name,
                "messages": [
                    {"role": "system", "content": "You are a helpful assistant who finds educational resources."},
                    {"role": "user", "content": prompt}
                ],
                "max_tokens": 2048,
                "temperature": 0.7,
            }

            response = requests.post("https://api.groq.com/openai/v1/chat/completions",
                                    headers=headers,
                                    json=payload)

            if response.status_code == 200:
                content = response.json()["choices"][0]["message"]["content"]
                links = []

                # Parse the response into our link format
                current_link = {}
                for line in content.split("\n"):
                    line = line.strip()
                    if not line:
                        continue

                    if line.startswith("- Title:"):
                        if current_link:
                            links.append(current_link)
                            current_link = {}
                        current_link["title"] = line.split(":", 1)[1].strip()
                    elif line.startswith("- URL:"):
                        url = line.split(":", 1)[1].strip()
                        # Ensure URL starts with http
                        if not url.startswith("http"):
                            url = "https://" + url
                        current_link["url"] = url
                    elif line.startswith("- Source:"):
                        current_link["source"] = line.split(":", 1)[1].strip()
                    elif line.startswith("- Description:"):
                        current_link["description"] = line.split(":", 1)[1].strip()

                if current_link and "url" in current_link:
                    links.append(current_link)

                return links[:num_links]
            else:
                print(f"API Error: {response.text}")
                return []
        except Exception as e:
            print(f"Error generating study links: {str(e)}")
            return []

# Set up the Gradio app
def create_gradio_interface():
    # Initialize global variables
    assistant = None
    temp_dir = tempfile.mkdtemp()
    pdf_dir = os.path.join(temp_dir, "pdfs")
    db_dir = os.path.join(temp_dir, "vectordb")
    os.makedirs(pdf_dir, exist_ok=True)

    # For tracking uploaded PDFs
    uploaded_pdfs = []

    def initialize_assistant(api_key=None):
        nonlocal assistant
        # Create the AI assistant with Groq by default
        llm_type = "groq"
        model_name = "llama3-70b-8192"  # Default Groq model

        if not api_key:
            return "Error: API key required for Groq"

        assistant = SmartStudyAI(
            pdf_directory=pdf_dir,
            db_path=db_dir,
            llm_type="groq",
            model_name=model_name,
            api_key=api_key
        )
        return f"Assistant initialized with {llm_type} model: {model_name}"

    def upload_pdf(files):
        nonlocal uploaded_pdfs
        if not files:
            return "No files uploaded", uploaded_pdfs

        # Save PDFs to directory
        for file in files:
            # Copy the file to our pdf directory
            shutil.copy(file.name, os.path.join(pdf_dir, os.path.basename(file.name)))
            uploaded_pdfs.append(os.path.basename(file.name))

        return f"Uploaded {len(files)} PDFs", uploaded_pdfs

    def process_pdfs():
        if not assistant:
            return "Please initialize the assistant first!"

        if not uploaded_pdfs:
            return "No PDFs uploaded yet!"

        assistant.load_and_process_pdfs(force_reload=True)
        return f"Processed {len(uploaded_pdfs)} PDFs and created vector database"

    def ask_question(question):
        if not assistant:
            return "Please initialize the assistant first!"

        if not question:
            return "Please enter a question!"

        result = assistant.query(question)

        answer = result['answer']

        # Format sources
        if result['sources']:
            source_text = "\n\nSources:\n"
            for i, source in enumerate(result['sources'], 1):
                source_text += f"{i}. {source['source']} (Page {source['page']})\n"
            answer += source_text

        return answer

    def generate_summary(topic):
        if not assistant:
            return "Please initialize the assistant first!"

        summary = assistant.get_summary(topic)
        return summary

    def generate_flashcards(topic, num_cards):
        if not assistant:
            return "Please initialize the assistant first!"

        try:
            flashcards = assistant.generate_flashcards(topic, int(num_cards))
            if not flashcards:
                return "Failed to generate flashcards"

            # Format flashcards for display
            formatted = f"# {len(flashcards)} Flashcards on {topic}\n\n"
            for i, card in enumerate(flashcards, 1):
                formatted += f"## Card {i} ({card['difficulty']})\n"
                formatted += f"**Question:** {card['question']}\n\n"
                formatted += f"**Answer:** {card['answer']}\n\n"
                formatted += "---\n\n"

            return formatted
        except Exception as e:
            return f"Error generating flashcards: {str(e)}"

    def generate_flashcards_from_file(file, num_cards):
        if not assistant:
            return "Please initialize the assistant first!"

        if not file:
            return "No file uploaded for flashcard generation!"

        try:
            flashcards = assistant.generate_flashcards_from_file(file.name, int(num_cards))
            if not flashcards:
                return "Failed to generate flashcards from file"

            # Format flashcards for display
            formatted = f"# {len(flashcards)} Flashcards from {os.path.basename(file.name)}\n\n"
            for i, card in enumerate(flashcards, 1):
                formatted += f"## Card {i} ({card.get('difficulty', 'medium')})\n"
                formatted += f"**Question:** {card['question']}\n\n"
                formatted += f"**Answer:** {card['answer']}\n\n"
                formatted += "---\n\n"

            return formatted
        except Exception as e:
            return f"Error generating flashcards from file: {str(e)}"

    def generate_study_links(topic, num_links):
        if not assistant:
            return "Please initialize the assistant first!"

        try:
            links = assistant.generate_study_links(topic, int(num_links))
            if not links:
                return "Failed to generate study links"

            # Format links as clickable HTML
            formatted = f"<h1>{len(links)} Study Resources on {topic}</h1><br>"
            for i, link in enumerate(links, 1):
                formatted += f"""
                <div style='margin-bottom: 20px; border: 1px solid #ddd; padding: 10px; border-radius: 5px;'>
                    <h3>{i}. {link.get('title', 'No title')}</h3>
                    <p><strong>Source:</strong> {link.get('source', 'Unknown')}</p>
                    <p><strong>Description:</strong> {link.get('description', 'No description')}</p>
                    <p><a href='{link.get('url', '#')}' target='_blank'>Visit Resource</a></p>
                </div>
                """
            return formatted
        except Exception as e:
            return f"Error generating study links: {str(e)}"

    # Create the Gradio Interface
    with gr.Blocks(title="SmartStudyAI") as app:
        gr.Markdown("#AI Genisis - Your Personal Study Assistant")

        with gr.Tab("Setup"):
            gr.Markdown("## Initialize the AI Assistant")

            with gr.Row():
                # Hide API key by default and use password input
                api_key = gr.Textbox(label="API Key (for Groq)", value="Your_Api_key", type="password", visible=True)
                # Set Groq and model as default
                model_name = gr.Textbox(label="Model Name", value="llama3-70b-8192", visible=False)

            init_btn = gr.Button("Initialize Assistant")
            init_output = gr.Textbox(label="Initialization Status")

            init_btn.click(initialize_assistant, inputs=[api_key], outputs=init_output)

            gr.Markdown("## Upload and Process PDFs")

            upload_button = gr.File(label="Upload PDFs", file_count="multiple")
            upload_output = gr.Textbox(label="Upload Status")
            pdf_list = gr.Dataframe(headers=["Uploaded PDFs"], label="Uploaded PDFs")

            process_button = gr.Button("Process PDFs")
            process_output = gr.Textbox(label="Processing Status")

            upload_button.upload(upload_pdf, inputs=upload_button, outputs=[upload_output, pdf_list])
            process_button.click(process_pdfs, outputs=process_output)

        with gr.Tab("Ask Questions"):
            gr.Markdown("## Ask Questions About Your Documents")

            question_input = gr.Textbox(label="Your Question", placeholder="What's the main topic of the documents?")
            ask_button = gr.Button("Ask")
            answer_output = gr.Markdown(label="Answer")

            ask_button.click(ask_question, inputs=question_input, outputs=answer_output)

        with gr.Tab("Generate Materials"):
            gr.Markdown("## Generate Study Materials")

            with gr.Row():
                with gr.Column():
                    summary_topic = gr.Textbox(label="Topic for Summary (leave empty for general summary)", placeholder="Machine Learning")
                    summary_button = gr.Button("Generate Summary")
                    summary_output = gr.Markdown(label="Summary")

                    summary_button.click(generate_summary, inputs=summary_topic, outputs=summary_output)

                with gr.Column():
                    flashcard_topic = gr.Textbox(label="Topic for Flashcards", placeholder="Neural Networks")
                    flashcard_num = gr.Number(label="Number of Flashcards", value=5, minimum=1, maximum=20)
                    flashcard_button = gr.Button("Generate Flashcards")
                    flashcard_output = gr.Markdown(label="Flashcards")

                    flashcard_button.click(generate_flashcards, inputs=[flashcard_topic, flashcard_num], outputs=flashcard_output)

        with gr.Tab("File-Based Flashcards"):
            gr.Markdown("## Generate Flashcards from Files (PDF, DOCX, PPTX)")

            with gr.Row():
                file_input = gr.File(label="Upload File", file_types=[".pdf", ".docx", ".pptx"])
                file_flashcard_num = gr.Number(label="Number of Flashcards", value=10, minimum=1, maximum=20)

            file_flashcard_button = gr.Button("Generate Flashcards from File")
            file_flashcard_output = gr.Markdown(label="File-Based Flashcards")

            file_flashcard_button.click(
                generate_flashcards_from_file,
                inputs=[file_input, file_flashcard_num],
                outputs=file_flashcard_output
            )

        with gr.Tab("Study Resources"):
            gr.Markdown("## Find Study Resources Online")

            with gr.Row():
                resource_topic = gr.Textbox(label="Topic for Resources", placeholder="Machine Learning")
                resource_num = gr.Number(label="Number of Resources", value=5, minimum=1, maximum=10)

            resource_button = gr.Button("Find Resources")
            resource_output = gr.HTML(label="Study Resources")

            resource_button.click(
                generate_study_links,
                inputs=[resource_topic, resource_num],
                outputs=resource_output
            )

    return app




In [None]:
# Launch the app when run directly
if __name__ == "__main__":
    app = create_gradio_interface()
    app.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://77a7ecc6c9106e4c57.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


  self.embeddings = HuggingFaceEmbeddings(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Processing 1 PDF files...
Loading Afterpulse_and_dark_count_simulator_for_single_pho.pdf...
  Added 8 pages from Afterpulse_and_dark_count_simulator_for_single_pho.pdf
Splitting documents into chunks...
Created 53 chunks from 8 pages.
Creating vector embeddings (this may take a while)...
Vector database created and saved to /tmp/tmp6uv7f5ry/vectordb
QA chain initialized and ready for queries.
