In [None]:
!pip install pypdf


Collecting pypdf
  Downloading pypdf-5.6.1-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.6.1-py3-none-any.whl (304 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/304.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m304.6/304.6 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.6.1


In [None]:
!pip install langchain transformers accelerate sentence-transformers faiss-cpu gradio


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1

In [None]:
!pip install -U langchain langchain-community


Collecting langchain
  Downloading langchain-0.3.26-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-core<1.0.0,>=0.3.66 (from langchain)
  Downloading langchain_core-0.3.66-py3-none-any.whl.metadata (5.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloadin

In [None]:
from langchain_community.document_loaders import PyPDFLoader


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline


In [None]:
from langchain.document_loaders import PyPDFLoader
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
import gradio as gr
import tempfile


In [None]:
def load_pdf(filepath):
    loader = PyPDFLoader(filepath)
    return loader.load_and_split()


In [None]:
def get_embedder():
    return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


In [None]:
class FAISSStore:
    def __init__(self, embedder):
        self.embedder = embedder
        self.index = None
        self.docs = []

    def build_index(self, docs):
        texts = [doc.page_content for doc in docs]
        self.docs = docs
        vectors = self.embedder.encode(texts)
        self.index = faiss.IndexFlatL2(vectors.shape[1])
        self.index.add(np.array(vectors))

    def query(self, question, k=5):
        q_vec = self.embedder.encode([question])
        D, I = self.index.search(np.array(q_vec), k)
        return [self.docs[i] for i in I[0]]


In [None]:
def load_llm(model_name="google/flan-t5-large"):
    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
    return HuggingFacePipeline(pipeline=pipe)


In [None]:
from langchain.vectorstores import FAISS as LangChainFAISS
from langchain.embeddings import HuggingFaceEmbeddings

def get_retrieved_context(store, question):
    texts = [doc.page_content for doc in store.docs]
    metadata = [{"source": f"chunk_{i}"} for i in range(len(store.docs))]

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    langchain_faiss = LangChainFAISS.from_texts(texts, embeddings, metadatas=metadata)
    retriever = langchain_faiss.as_retriever(search_kwargs={"k": 5})

    docs = retriever.get_relevant_documents(question)
    context = "\n\n".join([doc.page_content for doc in docs])
    return context


In [None]:
embedder = get_embedder()
llm = load_llm()

def handle_query(file, question):
    try:
        import tempfile
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp:
            temp.write(open(file.name, "rb").read())
            pdf_path = temp.name

        docs = load_pdf(pdf_path)

        if len(docs) < 5 or sum(len(d.page_content.split()) for d in docs) < 200:
            return "⚠️ That doesn't look like a research paper. Try uploading something more technical?"

        store = FAISSStore(embedder)
        store.build_index(docs)

        # Get context
        context = get_retrieved_context(store, question)

        # Informal prompt
        prompt = f"""Hey! Here's a paper I'm reading. Based on this, could you casually answer the question:

Question: {question}

Paper bits:
{context}

Try to be helpful, clear, and chill. If the answer isn't really in there, just say that politely.
"""

        response = llm(prompt)
        return response or "Hmm, couldn't find a good answer. Try rephrasing?"

    except Exception as e:
        return f"🔥 Error: {str(e)}"


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cuda:0
  return HuggingFacePipeline(pipeline=pipe)


In [None]:
import gradio as gr

# ✅ Professional minimal CSS
professional_css = """
body {
    background-color: #f8f9fa;
    font-family: 'Segoe UI', sans-serif;
}

.gradio-container {
    max-width: 800px;
    margin: auto;
    padding: 20px;
    background-color: white;
    border-radius: 8px;
    box-shadow: 0 0 15px rgba(0, 0, 0, 0.08);
}

h1, h2 {
    text-align: center;
    color: #343a40;
}

textarea, input[type="file"], input[type="text"] {
    border: 1px solid #ced4da;
    border-radius: 4px;
    padding: 10px;
    font-size: 15px;
    background-color: #ffffff;
}

button {
    background-color: #0069d9;
    color: white;
    border: none;
    border-radius: 4px;
    padding: 10px 18px;
    font-size: 15px;
    cursor: pointer;
}

button:hover {
    background-color: #0056b3;
}
"""

# ✅ Build the app layout
with gr.Blocks(css=professional_css) as demo:
    gr.Markdown("## AI Research Co-Pilot")
    gr.Markdown("Upload a research paper and ask questions about it using RAG + Hugging Face.")

    with gr.Row():
        file_input = gr.File(label="Upload Research Paper (PDF)")
        question_input = gr.Textbox(label="Ask a Question", placeholder="e.g. What is the main contribution?")

    output = gr.Textbox(label="Answer", lines=6)
    submit_button = gr.Button("Generate Answer")

    def run_query(file, question):
        return handle_query(file, question)

    submit_button.click(fn=run_query, inputs=[file_input, question_input], outputs=output)

demo.launch()



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://03516bcc4d5ea67cf0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


