# 04 ‚Äì Gradio Web App for Semantic Scientific Paper Recommender

In this notebook, we:
- Load the FAISS index created in Notebook 2
- Wrap the semantic search function from Notebook 3
- Builds an interactive Gradio web app where users enter a natural-language query
- Displays the top-K most similar scientific papers with metadata

In [None]:
# pip install --upgrade "huggingface_hub<0.25" gradio==4.44.0

In [1]:
import os
import gradio as gr
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

# Load API key
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
assert OPENAI_API_KEY, "Please set your OPENAI_API_KEY in .env"

# Paths and parameters
FAISS_PATH = "../data/faiss_index"
EMBEDDING_MODEL = "text-embedding-3-small"
TOP_K = 5

  from .autonotebook import tqdm as notebook_tqdm


Load FAISS Index

In [2]:
# Load embeddings and FAISS vectorstore
embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL, api_key=OPENAI_API_KEY)

vectorstore = FAISS.load_local(
    FAISS_PATH, embeddings, allow_dangerous_deserialization=True
)
print("FAISS index loaded and ready for Gradio app.")

FAISS index loaded and ready for Gradio app.


Define Search Function for teh UI

In [3]:
def search_papers(query, k=TOP_K, min_year=None, category_filter=None):
    """Semantic search function with optional filters."""
    results = vectorstore.similarity_search(query, k=50)

    filtered = []
    for doc in results:
        meta = doc.metadata
        if min_year and meta.get("year"):
            if int(meta["year"]) < min_year:
                continue
        if category_filter and meta.get("category_code"):
            if category_filter.lower() not in meta["category_code"].lower():
                continue
        filtered.append(doc)
        if len(filtered) >= k:
            break
    return filtered


def format_results(results):
    """Format papers for display in Gradio output."""
    if not results:
        return "No results found."

    display = ""
    for doc in results:
        title = doc.metadata.get("title", "Untitled")
        year = doc.metadata.get("year", "Unknown")
        cat = doc.metadata.get("category_code", "N/A")
        authors = doc.metadata.get("authors", "Unknown authors")
        snippet = doc.page_content[:400].replace("\n", " ") + "..."
        display += f"**{title}** ({year})  \n_Category:_ `{cat}`  \n_Authors:_ {authors}  \n> {snippet}\n\n---\n"
    return display


def recommend(query, top_k, min_year, category):
    results = search_papers(query, k=top_k, min_year=min_year, category_filter=category)
    return format_results(results)

Build Gradio Interface

In [4]:
import gradio as gr

with gr.Blocks(title="Semantic Scientific Paper Recommender") as demo:
    gr.Markdown("Scientific Paper Recommender by Ronald Kakooza")
    gr.Markdown(
        "Enter a natural-language query to find contextually similar research papers from arXiv."
    )

    # --- Input controls ---
    with gr.Row():
        query = gr.Textbox(
            label="üîç Query",
            placeholder="e.g. diffusion models for image generation",
            lines=2,
        )

    with gr.Row():
        top_k = gr.Number(
            value=5,
            precision=0,
            label="Top K Results",
            interactive=True,
        )
        min_year = gr.Number(
            value=2018,
            precision=0,
            label="From Year (‚â•)",
            interactive=True,
        )

    # üè∑Ô∏è Actual categories, not codes
    category = gr.Dropdown(
        choices=[
            "Computer Science",
            "Physics",
            "Mathematics",
            "Statistics",
            "Quantitative Biology",
            "Quantitative Finance",
            "Electrical Engineering and Systems Science",
            "Economics",
        ],
        label="Category (optional)",
        value=None,
    )

    output = gr.Markdown(label="Results")

    # --- Buttons ---
    with gr.Row():
        search_btn = gr.Button("üîé Search", variant="primary")
        reset_btn = gr.Button("üßπ Reset")

    # --- Actions ---
    search_btn.click(fn=recommend, inputs=[query, top_k, min_year, category], outputs=output)

    # Reset behavior
    def reset_fields():
        return "", 5, 2018, None, ""

    reset_btn.click(
        fn=reset_fields,
        inputs=[],
        outputs=[query, top_k, min_year, category, output],
    )

    gr.Markdown("Built with LangChain + FAISS + OpenAI Embeddings")

demo.launch(share=False)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




--------
