In [None]:
# Install dependencies
!pip install gradio PyPDF2 sentence-transformers faiss-cpu --quiet

In [None]:
# Imports
import gradio as gr
import PyPDF2
import io
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

In [None]:
# Load the model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Define the function
def process_pdf(file, query):
    try:
        if not file:
            return "Please upload a PDF file."
        if not query or query.strip() == "":
            return "Please enter a valid question."

        # Read PDF (file is already in bytes format)
        pdf_reader = PyPDF2.PdfReader(io.BytesIO(file))
        text = ""
        for page in pdf_reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + " "

        if not text.strip():
            return "The PDF has no extractable text."

        # Split into chunks with overlap
        words = text.split()
        chunk_size = 300
        overlap = 50
        chunks = [' '.join(words[i:i+chunk_size])
                 for i in range(0, len(words), chunk_size - overlap)]

        # Get embeddings
        embeddings = model.encode(chunks)

        # Create FAISS index
        index = faiss.IndexFlatL2(embeddings.shape[1])
        index.add(np.array(embeddings))

        # Search
        query_embed = model.encode([query])
        distances, indices = index.search(query_embed, 3)

        # Return top 3 matching chunks with scores
        results = [f"Match {i+1} (Score: {1/(1+distances[0][i]):.2f}):\n{chunks[idx]}"
                  for i, idx in enumerate(indices[0])]
        return "\n\n---\n\n".join(results)

    except Exception as e:
        return f"Error: {str(e)}"

In [None]:
# Launch Gradio with custom dark theme
gr.Interface(
    fn=process_pdf,
    inputs=[
        gr.File(type="binary", label="Upload PDF"),
        gr.Textbox(label="Ask a question")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="AI-Based Document Search Assistant",
    description="Upload a text-based PDF and ask any question.",
).launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://165ce00cfa7cfc4785.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


