In [1]:
# Step 1: Install dependencies (run once)
!pip install -U langchain langchain-community langchain-groq faiss-cpu pypdf sentence-transformers gradio

Collecting langchain
  Downloading langchain-0.3.25-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.25-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-groq
  Downloading langchain_groq-0.3.2-py3-none-any.whl.metadata (2.6 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp312-cp312-macosx_14_0_arm64.whl.metadata (4.8 kB)
Collecting pypdf
  Downloading pypdf-5.6.0-py3-none-any.whl.metadata (7.2 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Collecting gradio
  Downloading gradio-5.34.0-py3-none-any.whl.metadata (16 kB)
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain)
  Downloading langchain_core-0.3.65-py3-none-any.whl.metadata (5.8 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.8 (from langchain)
  Downloading langchain_text_splitters-0.3.8-py3-none-any.whl.metadata (1.9 kB)
Collecting langsmith<0.4,>=0.1.17 (from langchain)
  Downloadin

In [2]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
import gradio as gr

# Step 2: Setup Groq API key (replace with your key)
os.environ["GROQ_API_KEY"] = "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"


ModuleNotFoundError: No module named 'langchain_community'

In [3]:
# Helper function to build vectorstore from uploaded PDF
def build_vectorstore_from_pdf(file_path):
    loader = PyPDFLoader(file_path)
    pages = loader.load()

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = splitter.split_documents(pages)

    embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(docs, embedding_model)

    return vectorstore


In [4]:
# Initialize global variables
vectorstore = None
qa_chain = None
llm = ChatGroq(temperature=0, model_name="llama3-8b-8192", api_key=os.environ["GROQ_API_KEY"])


In [5]:
# Gradio function for file upload and building vectorstore
def upload_pdf(file):
    global vectorstore, qa_chain

    vectorstore = build_vectorstore_from_pdf(file.name)

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        return_source_documents=False
    )

    return "PDF processed successfully! Now ask your questions."


In [6]:
# Gradio function to answer questions
def answer_question(question):
    if not qa_chain:
        return "Please upload and process a PDF first."
    result = qa_chain({"query": question})
    return result["result"]


In [7]:
# Build Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# PDF Q&A with Groq + RAG")

    pdf_upload = gr.File(label="Upload PDF Document", file_types=[".pdf"])
    upload_btn = gr.Button("Process PDF")
    upload_status = gr.Textbox(label="Status", interactive=False)

    question_input = gr.Textbox(label="Ask a question about your PDF")
    answer_output = gr.Textbox(label="Answer", interactive=False)

    upload_btn.click(fn=upload_pdf, inputs=pdf_upload, outputs=upload_status)
    question_input.submit(fn=answer_question, inputs=question_input, outputs=answer_output)


In [8]:
# Launch the UI
demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://53c06e4cd764ff75ae.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


