Simple RAG

In [4]:
!pip install gradio sentence-transformers faiss-cpu PyPDF2 requests




In [5]:
import os
from google.colab import userdata
os.environ['GROQ_API_KEY'] = userdata.get('ChatBot_using_GroqAPI')

In [6]:
import gradio as gr
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import requests
import os

# Load sentence transformer model
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

# FAISS index & document store
index = faiss.IndexFlatL2(384)  # 384 = embedding dim for 'all-MiniLM-L6-v2'
documents = []  # List to hold text chunks

# Groq API setup
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL = "llama3-8b-8192"

SYSTEM_PROMPT = "You are a helpful academic supervisor. Answer questions using the provided paper content. If the answer is not in the paper, say so."

# Helper to read PDF
def read_pdf(file):
    reader = PdfReader(file)
    full_text = ""
    for page in reader.pages:
        full_text += page.extract_text() + "\n"
    return full_text

# Helper to chunk and embed
def process_pdf(file):
    global documents, index
    text = read_pdf(file)

    # Sentence chunking
    sentences = text.split(". ")
    documents = [s.strip() for s in sentences if len(s.strip()) > 20]

    # Embedding
    embeddings = embed_model.encode(documents)
    index.reset()
    index.add(np.array(embeddings, dtype=np.float32))

    return "PDF processed and indexed."

# RAG retrieval
def get_relevant_context(query, k=5):
    query_embedding = embed_model.encode([query])
    D, I = index.search(np.array(query_embedding, dtype=np.float32), k)
    return "\n".join([documents[i] for i in I[0]])

# LLaMA3 via Groq
def ask_question(query):
    if len(documents) == 0:
        return "Please upload and process a PDF first."

    context = get_relevant_context(query)

    prompt = f"Context:\n{context}\n\nUser Question:\n{query}"

    response = requests.post(
        "https://api.groq.com/openai/v1/chat/completions",
        headers={"Authorization": f"Bearer {GROQ_API_KEY}"},
        json={
            "model": GROQ_MODEL,
            "messages": [
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": prompt}
            ],
            "temperature": 0.3
        }
    )

    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        return f"❌ Error: {response.status_code} - {response.json()}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 RAG Academic Supervisor\nUpload a PDF paper and ask questions about it.")
    with gr.Row():
        with gr.Column():
            file_input = gr.File(label="Upload Paper (PDF)")
            process_button = gr.Button("📚 Process Document")
            status = gr.Textbox(label="Status", interactive=False)
        with gr.Column():
            question_input = gr.Textbox(label="Ask a Question")
            answer_output = gr.Textbox(label="Answer", lines=10)
            ask_button = gr.Button("💬 Ask")

    process_button.click(fn=process_pdf, inputs=[file_input], outputs=[status])
    ask_button.click(fn=ask_question, inputs=[question_input], outputs=[answer_output])

# Run the app
demo.launch()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ff41bcd4973b879448.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [7]:
import gradio as gr
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import requests
import os

# Load Sentence Transformer
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

# Global FAISS index and document store
index = faiss.IndexFlatL2(384)
documents = []

# API Key and LLaMA3 setup
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL = "llama3-8b-8192"
SYSTEM_PROMPT = "You are a helpful academic supervisor. Use only the provided document context to answer the student's question. Be honest if the answer is not found."

# Step 1: Read and Extract Text from PDF
def read_pdf(file):
    reader = PdfReader(file.name)
    full_text = ""
    for page in reader.pages:
        full_text += page.extract_text() + "\n"
    return full_text

# Step 2: Chunk + Embed
def process_pdf(file):
    global documents, index
    text = read_pdf(file)
    sentences = text.split(". ")
    documents = [s.strip() for s in sentences if len(s.strip()) > 20]
    embeddings = embed_model.encode(documents)
    index.reset()
    index.add(np.array(embeddings, dtype=np.float32))
    return "✅ PDF processed successfully."

# Step 3: Retrieve Top-K Chunks
def get_relevant_context(query, k=5):
    query_embedding = embed_model.encode([query])
    D, I = index.search(np.array(query_embedding, dtype=np.float32), k)
    return "\n".join([documents[i] for i in I[0]])

# Step 4: Query LLaMA3 via Groq API
def ask_question(query):
    if not documents:
        return "⚠️ Please upload and process a PDF first."

    context = get_relevant_context(query)
    prompt = f"Context:\n{context}\n\nUser Question:\n{query}"

    response = requests.post(
        "https://api.groq.com/openai/v1/chat/completions",
        headers={"Authorization": f"Bearer {GROQ_API_KEY}"},
        json={
            "model": GROQ_MODEL,
            "messages": [
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": prompt}
            ],
            "temperature": 0.3
        }
    )

    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        return f"❌ Error: {response.status_code} - {response.json()}"

# Step 5: Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 RAG Academic Supervisor\nUpload a PDF paper and ask questions about it.")

    with gr.Row():
        with gr.Column():
            file_input = gr.File(label="📄 Upload PDF")
            process_btn = gr.Button("📥 Process Document")
            status_output = gr.Textbox(label="Status", interactive=False)
        with gr.Column():
            question_input = gr.Textbox(label="💬 Ask a Question")
            ask_btn = gr.Button("🎯 Get Answer")
            answer_output = gr.Textbox(label="📖 Answer", lines=10)

    process_btn.click(fn=process_pdf, inputs=[file_input], outputs=[status_output])
    ask_btn.click(fn=ask_question, inputs=[question_input], outputs=[answer_output])

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://36eebec026c6fb0cc5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


