In [None]:
!pip install -q faiss-cpu sentence-transformers transformers pypdf gradio


In [None]:
import gradio as gr
from pypdf import PdfReader

import faiss
import numpy as np

from sentence_transformers import SentenceTransformer
from transformers import pipeline



In [14]:
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

qa_pipeline = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_length=512,
    temperature=0.3
)

summary_pipeline = pipeline(
    "summarization",
    model="facebook/bart-large-cnn",
    max_length=300,
    min_length=120
)


Device set to use cpu
Device set to use cpu


In [15]:
def read_pdf(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + " "
    return text


In [16]:
def build_faiss(text, chunk_size=500):
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
    embeddings = embedder.encode(chunks)

    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(np.array(embeddings))

    return chunks, index


In [17]:
def ask_question(pdf_file, question):
    if pdf_file is None:
        return "‚ö†Ô∏è Upload a PDF first."
    if question.strip() == "":
        return "‚ö†Ô∏è Ask a question."

    text = read_pdf(pdf_file)
    chunks, index = build_faiss(text)

    q_emb = embedder.encode([question])
    _, idx = index.search(np.array(q_emb), k=3)

    context = " ".join([chunks[i] for i in idx[0]])

    prompt = f"Answer based on the context:\n{context}\n\nQuestion: {question}"
    return qa_pipeline(prompt)[0]["generated_text"]


In [18]:
def generate_notes(pdf_file):
    if pdf_file is None:
        return "‚ö†Ô∏è Upload a PDF first."

    text = read_pdf(pdf_file)
    text = text[:3000]  # limit for summarization

    summary = summary_pipeline(text)[0]["summary_text"]
    return "üìò Generated Notes:\n\n" + summary


In [19]:
with gr.Blocks() as app:
    gr.Markdown("# üìò ReadSmart AI")
    pdf = gr.File(label="Upload PDF", file_types=[".pdf"])

    with gr.Tab("Ask Questions"):
        q = gr.Textbox(label="Your Question")
        a = gr.Textbox(label="Answer")
        gr.Button("Ask").click(ask_question, [pdf, q], a)

    with gr.Tab("Generate Notes"):
        notes = gr.Textbox(lines=12)
        gr.Button("Generate Notes").click(generate_notes, pdf, notes)

app.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fc71faedaf10ff4ffe.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


