In [3]:
# Install Dependencies

!pip install -U \
  langchain \
  langchain-community \
  langchain-huggingface \
  langchain-text-splitters \
  sentence-transformers \
  faiss-cpu \
  groq \
  requests \
  gradio

Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-huggingface
  Downloading langchain_huggingface-1.2.0-py3-none-any.whl.metadata (2.8 kB)
Collecting langchain-text-splitters
  Downloading langchain_text_splitters-1.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-5.2.3-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Collecting groq
  Downloading groq-1.0.0-py3-none-any.whl.metadata (16 kB)
Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting gradio
  Downloading gradio-6.6.0-py3-none-any.whl.metadata (16 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community)
  Downloading langchain_classic-1.0.1-py3-none-any.whl.metadata (4.2 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from 

In [5]:
# Set Groq API Key
import os
os.environ["GROQ_API_KEY"] = "paste your api key here"

In [6]:
# Create Project Folders
import os

BASE = "/content/wikipedia-rag-agent"
os.makedirs(f"{BASE}/data", exist_ok=True)
os.makedirs(f"{BASE}/src", exist_ok=True)
os.makedirs(f"{BASE}/vectorstore", exist_ok=True)

print("Folders created!")

Folders created!


In [7]:
# Write load_wikipedia.py
%%writefile /content/wikipedia-rag-agent/src/load_wikipedia.py
import requests

def load_wikipedia_text(topic):
    url = "https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "format": "json",
        "titles": topic,
        "prop": "extracts",
        "explaintext": "",
    }
    headers = {"User-Agent": "RAG-Colab/1.0"}

    response = requests.get(url, params=params, headers=headers)
    response.raise_for_status()

    data = response.json()
    pages = data["query"]["pages"]
    page = next(iter(pages.values()))

    if "extract" not in page:
        raise ValueError(f"No Wikipedia content found for: {topic}")

    return page["extract"]


Writing /content/wikipedia-rag-agent/src/load_wikipedia.py


In [8]:
# Write chunking.py
%%writefile /content/wikipedia-rag-agent/src/chunking.py
from langchain_text_splitters import RecursiveCharacterTextSplitter

def chunk_text(text, chunk_size=800, overlap=100):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=overlap
    )
    return splitter.split_text(text)



Writing /content/wikipedia-rag-agent/src/chunking.py


In [9]:
# Write embeddings.py
%%writefile /content/wikipedia-rag-agent/src/embeddings.py
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import os

VECTORSTORE_PATH = "/content/wikipedia-rag-agent/vectorstore"

def get_embeddings():
    return HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )

def create_vectorstore(chunks):
    embeddings = get_embeddings()
    vectorstore = FAISS.from_texts(chunks, embeddings)
    vectorstore.save_local(VECTORSTORE_PATH)
    print(f"Vectorstore saved to {VECTORSTORE_PATH}")
    return vectorstore

def load_vectorstore():
    embeddings = get_embeddings()
    return FAISS.load_local(
        VECTORSTORE_PATH,
        embeddings,
        allow_dangerous_deserialization=True
    )

Writing /content/wikipedia-rag-agent/src/embeddings.py


In [10]:
# Write retrieval.py
%%writefile /content/wikipedia-rag-agent/src/retrieval.py

def retrieve_context(vectorstore, question, k=3):
    docs = vectorstore.similarity_search(question, k=k)
    sources = [doc.page_content for doc in docs]
    context = "\n\n".join(sources)
    return context, sources

Writing /content/wikipedia-rag-agent/src/retrieval.py


In [14]:
# Write prompts.py
%%writefile /content/wikipedia-rag-agent/src/prompts.py

def build_rag_prompt(context, question):
    prompt = (
        "You are a study assistant.\n\n"
        "Use ONLY the context below to answer the question.\n"
        "If the answer is not in the context, say you don't know.\n\n"
        "Return ONLY valid JSON in this format:\n\n"
        "{\n"
        '  "answer": "short answer (2-5 sentences)",\n'
        '  "supporting_quotes": ["quote from context", "quote from context"],\n'
        '  "confidence": "low | medium | high",\n'
        '  "missing_info": "what information was missing if any"\n'
        "}\n\n"
        f"Context:\n{context}\n\n"
        f"Question:\n{question}\n"
    )
    return prompt


Overwriting /content/wikipedia-rag-agent/src/prompts.py


In [15]:
# Write router.py

%%writefile /content/wikipedia-rag-agent/src/router.py

def route_question(question, llm_call):
    router_prompt = (
        "You are deciding what to do next for a Wikipedia Q&A bot.\n\n"
        "If the question is unclear or vague, return: CLARIFY\n"
        "If the question is clear and specific, return: RETRIEVE\n\n"
        "Examples:\n"
        '"What is it?" -> CLARIFY\n'
        '"Tell me about it" -> CLARIFY\n'
        '"What is the atmosphere of Mars?" -> RETRIEVE\n'
        '"When was diabetes discovered?" -> RETRIEVE\n\n'
        f"Question: {question}\n\n"
        "Return ONLY one word: RETRIEVE or CLARIFY\n"
    )
    decision = llm_call(router_prompt).strip().upper()
    return "CLARIFY" if "CLARIFY" in decision else "RETRIEVE"



Writing /content/wikipedia-rag-agent/src/router.py


In [18]:
# Write rag_pipeline.py

%%writefile /content/wikipedia-rag-agent/src/rag_pipeline.py
import json
from retrieval import retrieve_context
from prompts import build_rag_prompt
from router import route_question

def answer_question(vectorstore, question, llm_call):

    decision = route_question(question, llm_call)

    if decision == "CLARIFY":
        return {
            "answer": "Please ask a more specific question.",
            "supporting_quotes": [],
            "confidence": "N/A",
            "missing_info": "Question unclear",
            "sources": []
        }

    context, sources = retrieve_context(vectorstore, question)
    prompt = build_rag_prompt(context, question)
    raw = llm_call(prompt)

    try:
        cleaned = raw.strip().replace("```json", "").replace("```", "")
        parsed = json.loads(cleaned)
    except (json.JSONDecodeError, ValueError):
        parsed = {
            "answer": raw,
            "supporting_quotes": [],
            "confidence": "unknown",
            "missing_info": "Invalid JSON response"
        }

    parsed["sources"] = sources
    return parsed

Overwriting /content/wikipedia-rag-agent/src/rag_pipeline.py


In [19]:
# Add /src to Python Path
import sys
sys.path.append("/content/wikipedia-rag-agent/src")

In [20]:
# Setup Groq LLM Client

import os
from groq import Groq

client = Groq(api_key=os.environ["GROQ_API_KEY"])

def call_groq(prompt):
    try:
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2
        )
        return response.choices[0].message.content
    except Exception as e:
        return (
            '{"answer": "API error: ' + str(e) + '",'
            '"supporting_quotes": [], '
            '"confidence": "low", '
            '"missing_info": "Groq API call failed."}'
        )


In [22]:
# Load Wikipedia Article + Build VectorStore

import os
from load_wikipedia import load_wikipedia_text
from chunking import chunk_text
from embeddings import create_vectorstore, load_vectorstore, VECTORSTORE_PATH

TOPIC = input("Enter a Wikipedia topic (e.g. Diabetes, Black hole): ").strip()

if os.path.exists(f"{VECTORSTORE_PATH}/index.faiss"):
    print("Found existing vectorstore. Loading from disk...")
    vectorstore = load_vectorstore()
    print("Loaded!")
else:
    print(f"Fetching Wikipedia article for: {TOPIC}")
    text = load_wikipedia_text(TOPIC)

    with open("/content/wikipedia-rag-agent/data/topic.txt", "w") as f:
        f.write(text)

    print(f"Article length: {len(text)} characters")
    chunks = chunk_text(text)
    vectorstore = create_vectorstore(chunks)
    print(f"Done! {len(chunks)} chunks indexed.")

Enter a Wikipedia topic (e.g. Diabetes, Black hole): Diabetes
Fetching Wikipedia article for: Diabetes
Article length: 46237 characters
Vectorstore saved to /content/wikipedia-rag-agent/vectorstore
Done! 98 chunks indexed.


In [23]:
#Quick Terminal Test

from rag_pipeline import answer_question

result = answer_question(vectorstore, "What is diabetes?", call_groq)

print("Answer     :", result["answer"])
print("Confidence :", result["confidence"])
print("Quotes     :")
for q in result["supporting_quotes"]:
    print("  -", q)

Answer     : Diabetes mellitus is a group of common endocrine diseases characterized by sustained high blood sugar levels.
Confidence : high
Quotes     :
  - Diabetes mellitus, commonly known as diabetes, is a group of common endocrine diseases characterized by sustained high blood sugar levels.


In [28]:
# Launch Gradio Chatbot UI
import gradio as gr
from rag_pipeline import answer_question

def chat_fn(message, history):
    result = answer_question(vectorstore, message, call_groq)

    output = f"**Answer:** {result['answer']}\n\n"
    output += f"**Confidence:** {result['confidence']}\n\n"

    if result.get("supporting_quotes"):
        output += "**Supporting Quotes:**\n"
        for quote in result["supporting_quotes"]:
            output += f"> {quote}\n\n"

    if result.get("missing_info") and result["missing_info"] not in ("", "None", "null"):
        output += f"**Missing Info:** {result['missing_info']}\n\n"

    if result.get("sources"):
        output += "---\n**Retrieved Source Chunks:**\n"
        for i, src in enumerate(result["sources"], 1):
            preview = src[:300] + "..." if len(src) > 300 else src
            output += f"*[Chunk {i}]* {preview}\n\n"

    return output


chatbot = gr.Chatbot(render_markdown=True, height=500)

gr.ChatInterface(
    fn=chat_fn,
    chatbot=chatbot,
    title=f"üìö Wikipedia RAG Chatbot ‚Äî {TOPIC}",
    description=f"Ask questions about {TOPIC} based on its Wikipedia article.",
    examples=[
        "What is the main definition?",
        "What are the key causes?",
        "What treatments or solutions exist?",
    ],
).launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ea28d7761cc7af10cf.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


