In [None]:
!pip install gradio
!pip install pymupdf
!pip install python-pptx
!pip install -U bitsandbytes





In [None]:
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import os
import gradio as gr
import fitz  # PyMuPDF for PDF processing
from pptx import Presentation

# Set up model and tokenizer
token = "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"
model_name = "meta-llama/Llama-2-7b-chat-hf"

# Configure 4-bit quantization for efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=token,
    torch_dtype=torch.float16,
    device_map="auto",
    quantization_config=bnb_config
)
llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
zip_file_path = '/content/Lecture_notes_zip.zip'  # Example path, replace with actual
dataset_path = '/content/Lecture_notes'
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(dataset_path)

materials_dir = "/content/Lecture_notes/Lecture_notes/"

# Function to extract text from first and last slides
def extract_text_from_first_last_slides(file_path, first_n=30, last_m=3):
    """Extracts text from the first `first_n` and last `last_m` slides of the PPT."""
    prs = Presentation(file_path)
    text = ""
    total_slides = len(prs.slides)

    # Extract text from the first N slides
    for i in range(min(first_n, total_slides)):
        slide = prs.slides[i]
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + " "

    # Extract text from the last M slides
    for i in range(max(0, total_slides - last_m), total_slides):
        slide = prs.slides[i]
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + " "

    return text.lower()

def fetch_lecture_notes(keyword):
    """Fetches lecture notes based on an exact match of the keyword in slide content."""
    ppt_files = [file for file in os.listdir(materials_dir) if file.endswith(".pptx")]
    keyword = keyword.lower()
    for filename in ppt_files:
        file_path = os.path.join(materials_dir, filename)
        slide_text = extract_text_from_first_last_slides(file_path)

        if keyword in slide_text:
            return f"Lecture notes found: {file_path}"
    return f"No lecture notes found for '{keyword}'."

def extract_text_from_ppt(file_path):
    """Extracts text from all slides in the given PPT file."""
    prs = Presentation(file_path)
    text = ""
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + " "
    return text.lower()

def extract_text_from_pdf(file_path):
    """Extracts text from all pages in the given PDF file."""
    try:
        doc = fitz.open(file_path)
        text = ""
        for page in doc:
            text += page.get_text()
        return text.lower()
    except Exception as e:
        print("Error extracting text from PDF:", e)
        return ""

def summarize_text_full(text, max_new_tokens=200):
    """Generates a high-level summary for the entire content in simple language."""
    prompt = f"Please provide a brief, easy-to-understand summary of the following content:\n\n{text}\n\nSummary:"
    response = llama_pipeline(prompt, max_new_tokens=max_new_tokens, num_return_sequences=1)
    return response[0]["generated_text"]

def filter_intro_content(text):
    """Removes non-relevant introductory content from the text."""
    intro_phrases = ["assistant professor", "ECE & CS department", "thank you"]
    filtered_text = [line for line in text.splitlines() if not any(phrase.lower() in line.lower() for phrase in intro_phrases)]
    return " ".join(filtered_text)

def summarize_uploaded_file(file_path):
    try:
        if file_path.endswith(".pdf"):
            text = extract_text_from_pdf(file_path)
        elif file_path.endswith(".pptx"):
            text = extract_text_from_ppt(file_path)
        else:
            return "Unsupported file format. Please upload a PDF or PPTX file."

        text = filter_intro_content(text)
        if not text:
            return "The uploaded document does not contain readable text for summarization."

        summary = summarize_text_full(text, max_new_tokens=100)
        return f"Summary:\n\n{summary}"
    except Exception as e:
        print("Error summarizing uploaded file:", e)
        return f"Error in summarizing the uploaded file: {str(e)}"

def llama_generate_response(prompt):
    """General question-answering functionality using LLaMA."""
    response = llama_pipeline(prompt, max_length=200, num_return_sequences=1)
    return response[0]["generated_text"]

def get_response(user_input, uploaded_file=None):
    try:
        if uploaded_file is not None:
            summary = summarize_uploaded_file(uploaded_file)
            return f"Here is the summary of the uploaded document:\n\n{summary}"

        if any(keyword in user_input.lower() for keyword in ["notes", "lecture", "ppt", "pdf"]):
            keyword = user_input.split("for")[-1].strip()
            lecture_notes_path = fetch_lecture_notes(keyword)
            return f"Here's the lecture material for '{keyword}': {lecture_notes_path}"

        return llama_generate_response(user_input)
    except Exception as e:
        print("Error in get_response function:", e)
        return "An error occurred while processing your request."

with gr.Blocks() as demo:
    gr.Markdown("## AI Chatbot with LLaMA Model, Document Summarization, and Lecture Note Retrieval")
    chatbot_ui = gr.Chatbot(label="LLaMA Assistant")
    message = gr.Textbox(label="Enter your question here:")
    upload_file = gr.File(label="Upload PDF or PPTX for Summarization", type="filepath")
    clear_button = gr.ClearButton([message, chatbot_ui, upload_file])

    def respond(message, chat_history, uploaded_file):
        bot_response = get_response(message, uploaded_file)
        chat_history.append((message, bot_response))
        return "", chat_history, None
    message.submit(respond, inputs=[message, chatbot_ui, upload_file], outputs=[message, chatbot_ui, upload_file])

# Launch Gradio app
demo.launch(share=True)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c45f8df6d67acb292b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


