# Agentic AI Workshop — Part 3: RAG Chatbot with ChromaDB

This notebook demonstrates:
- Ingesting a small set of documents
- Chunking + embedding with OpenAI Embeddings
- Storing & retrieving with ChromaDB
- Building a simple RAG prompt
- Gradio Chat UI that does retrieval on each turn

> **Prereqs**
> - `pip install chromadb openai gradio tiktoken` (tiktoken optional for token-aware chunking)


# If needed, uncomment to install:
# !pip install --upgrade chromadb openai gradio tiktoken

In [None]:
import os, re, textwrap, uuid
from typing import List, Dict

# Load environment variables from .env file
import sys
sys.path.append('../utils')
from helpers import load_environment, get_env_var

import chromadb
from chromadb.utils import embedding_functions
from openai import OpenAI
import gradio as gr

# Load environment variables
load_environment()
print("Environment variables loaded successfully!")

# Verify API key is loaded
api_key = get_env_var('OPENAI_API_KEY')
if not api_key or api_key == 'your_openai_api_key_here':
    print("⚠️  Please set your OPENAI_API_KEY in the .env file")
else:
    print("✅ OpenAI API key found!")

client = OpenAI()

# Configure Chroma (in-memory for workshop; switch to persistent_dir for disk)
chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection(
    name="workshop_docs",
)

# --- Source documents
# Replace these with real Ready Tensor publication texts or load from files.
source_docs = [
    {
        "id": "rt_pub_1",
        "title": "Benchmarking Forecasting Models",
        "text": "We benchmark classical, ML, and LLM-based forecasting models across many datasets using metrics such as RMSE and sMAPE."
    },
    {
        "id": "rt_pub_2",
        "title": "TSP Dataset Generator",
        "text": "An open-source tool to generate large-scale TSP datasets for experimentation with combinatorial optimization and LLMs."
    },
    {
        "id": "rt_pub_3",
        "title": "Distance Profile for Time Series",
        "text": "Distance profiling enables tasks like classification, motif discovery, and anomaly detection in time series analysis."
    },
    {
        "id": "rt_pub_4",
        "title": "Handling Class Imbalance in Binary Classification",
        "text": "We compare SMOTE, class weights, and decision-threshold adjustment across models and datasets with extensive metrics."
    },
    {
        "id": "rt_pub_5",
        "title": "Evaluating Multi-Agent AI Systems",
        "text": "We evaluate a multi-agent authoring assistant using RAGAS and custom metrics focusing on coherence and utility."
    },
]

In [None]:
# --- Chunking utility (simple)
def chunk_text(text: str, max_chars: int = 800, overlap: int = 100) -> List[str]:
    chunks = []
    start = 0
    n = len(text)
    while start < n:
        end = min(n, start + max_chars)
        chunk = text[start:end]
        chunks.append(chunk)
        start = end - overlap
        if start < 0:
            start = 0
        if start >= n:
            break
    return chunks


In [None]:
# --- Embedding & upsert
def embed_texts(texts: List[str], model: str = "text-embedding-3-small") -> List[List[float]]:
    resp = client.embeddings.create(model=model, input=texts)
    return [d.embedding for d in resp.data]

def ingest_documents(docs: List[Dict]):
    ids, docs_texts, metadatas = [], [], []
    for d in docs:
        chunks = chunk_text(d["text"])
        for i, ch in enumerate(chunks):
            ids.append(f"{d['id']}_{i}")
            docs_texts.append(ch)
            metadatas.append({"title": d["title"], "parent_id": d["id"], "chunk": i})
    embs = embed_texts(docs_texts)
    collection.upsert(ids=ids, documents=docs_texts, metadatas=metadatas, embeddings=embs)
    return len(ids)

num_chunks = ingest_documents(source_docs)
print(f"Ingested {num_chunks} chunks into Chroma.")


In [None]:
# --- Retrieval
def retrieve(query: str, k: int = 3):
    q_emb = embed_texts([query])[0]
    res = collection.query(query_embeddings=[q_emb], n_results=k, include=["documents","metadatas","distances"])
    hits = []
    for doc, meta, dist in zip(res["documents"][0], res["metadatas"][0], res["distances"][0]):
        hits.append({"text": doc, "meta": meta, "distance": float(dist)})
    return hits

# Test retrieval
for h in retrieve("How do you compare models on forecasting tasks?", k=2):
    print(h["meta"]["title"], "→", h["distance"])


In [None]:
# --- RAG prompt
SYSTEM = "You are a helpful RAG assistant. Answer based only on the provided CONTEXT. If the answer is not in context, say you don't know."
USER_TEMPLATE = "QUESTION: {question}\n\nCONTEXT:\n{context}"

def build_context(hits):
    ctx = []
    for h in hits:
        title = h["meta"]["title"]
        chunk_id = h["meta"]["chunk"]
        ctx.append(f"[{title} / chunk {chunk_id}]\n{h['text']}")
    return "\n\n".join(ctx[:3])

def rag_answer(question: str):
    hits = retrieve(question, k=4)
    context = build_context(hits)
    prompt = USER_TEMPLATE.format(question=question, context=context)
    resp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": SYSTEM},
            {"role": "user", "content": prompt},
        ],
        temperature=0.2,
    )
    return resp.choices[0].message.content, hits


In [None]:
# --- Gradio Chat UI with retrieval per turn
with gr.Blocks(title="RAG Chatbot (ChromaDB)") as app:
    gr.Markdown("### RAG Chatbot — ask about the sample Ready Tensor publications")
    chat = gr.Chatbot(height=300)
    q = gr.Textbox(placeholder="Ask a question about the publications...", label="Your question")
    clear = gr.Button("Clear")

    def chat_step(history, user_msg):
        answer, hits = rag_answer(user_msg)
        cited = "\n\n**Sources:**\n" + "\n".join(
            f"- {h['meta']['title']} (chunk {h['meta']['chunk']})" for h in hits[:3]
        )
        history = history + [(user_msg, answer + cited)]
        return history, ""

    def do_clear():
        return []

    q.submit(chat_step, [chat, q], [chat, q])
    clear.click(do_clear, [], [chat])

# Uncomment to run locally
# app.launch()


In [None]:
# --- Chunking utility (simple)
def chunk_text(text: str, max_chars: int = 800, overlap: int = 100) -> List[str]:
    chunks = []
    start = 0
    n = len(text)
    while start < n:
        end = min(n, start + max_chars)
        chunk = text[start:end]
        chunks.append(chunk)
        start = end - overlap
        if start < 0:
            start = 0
        if start >= n:
            break
    return chunks

In [None]:
# --- Embedding & upsert
def embed_texts(texts: List[str], model: str = "text-embedding-3-small") -> List[List[float]]:
    resp = client.embeddings.create(model=model, input=texts)
    return [d.embedding for d in resp.data]

def ingest_documents(docs: List[Dict]):
    ids, docs_texts, metadatas = [], [], []
    for d in docs:
        chunks = chunk_text(d["text"])
        for i, ch in enumerate(chunks):
            ids.append(f"{d['id']}_{i}")
            docs_texts.append(ch)
            metadatas.append({"title": d["title"], "parent_id": d["id"], "chunk": i})
    embs = embed_texts(docs_texts)
    collection.upsert(ids=ids, documents=docs_texts, metadatas=metadatas, embeddings=embs)
    return len(ids)

num_chunks = ingest_documents(source_docs)
print(f"Ingested {num_chunks} chunks into Chroma.")

In [None]:
# --- Retrieval
def retrieve(query: str, k: int = 3):
    q_emb = embed_texts([query])[0]
    res = collection.query(query_embeddings=[q_emb], n_results=k, include=["documents","metadatas","distances"])
    hits = []
    for doc, meta, dist in zip(res["documents"][0], res["metadatas"][0], res["distances"][0]):
        hits.append({"text": doc, "meta": meta, "distance": float(dist)})
    return hits

# Test retrieval
for h in retrieve("How do you compare models on forecasting tasks?", k=2):
    print(h["meta"]["title"], "→", h["distance"])

In [None]:
# --- RAG prompt
SYSTEM = "You are a helpful RAG assistant. Answer based only on the provided CONTEXT. If the answer is not in context, say you don't know."
USER_TEMPLATE = "QUESTION: {question}\n\nCONTEXT:\n{context}"

def build_context(hits):
    ctx = []
    for h in hits:
        title = h["meta"]["title"]
        chunk_id = h["meta"]["chunk"]
        ctx.append(f"[{title} / chunk {chunk_id}]\n{h['text']}")
    return "\n\n".join(ctx[:3])

def rag_answer(question: str):
    hits = retrieve(question, k=4)
    context = build_context(hits)
    prompt = USER_TEMPLATE.format(question=question, context=context)
    resp = client.responses.create(
        model="gpt-4o-mini",
        input=[
            {"role": "system", "content": SYSTEM},
            {"role": "user", "content": prompt},
        ],
        temperature=0.2,
    )
    return resp.output_text, hits

In [None]:
# --- Gradio Chat UI with retrieval per turn
with gr.Blocks(title="RAG Chatbot (ChromaDB)") as app:
    gr.Markdown("### RAG Chatbot — ask about the sample Ready Tensor publications")
    chat = gr.Chatbot(height=300)
    q = gr.Textbox(placeholder="Ask a question about the publications...", label="Your question")
    clear = gr.Button("Clear")

    def chat_step(history, user_msg):
        answer, hits = rag_answer(user_msg)
        cited = "\n\n**Sources:**\n" + "\n".join(
            f"- {h['meta']['title']} (chunk {h['meta']['chunk']})" for h in hits[:3]
        )
        history = history + [(user_msg, answer + cited)]
        return history, ""

    def do_clear():
        return []

    q.submit(chat_step, [chat, q], [chat, q])
    clear.click(do_clear, [], [chat])

# Uncomment to run locally
# app.launch()