# RAG Agent with Qdrant

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/thierrypdamiba/qdrant-etl-cookbook/blob/main/notebooks/agents/rag_agent.ipynb)

A retrieval-augmented generation agent that queries Qdrant for context and generates answers using an LLM.

**Requirements:** Set `OPENAI_API_KEY` environment variable.

In [None]:
!pip install -q "qdrant-client[fastembed]>=1.13,<1.16" openai

In [None]:
import os
from openai import OpenAI
from qdrant_client import QdrantClient

qdrant = QdrantClient(":memory:")
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [None]:
# Seed some knowledge
documents = [
    "HNSW indexing in Qdrant uses m=16 and ef_construct=100 by default. Increase m for better recall at the cost of memory.",
    "Scalar quantization converts float32 vectors to int8, reducing memory by 4x with minimal accuracy loss.",
    "Payload indexes should be created on fields you filter by frequently. Supported types: keyword, integer, float, geo, text.",
    "Multi-tenancy in Qdrant is best achieved with a tenant_id payload field and a keyword index on it.",
    "Snapshots can be created per-collection or as a full storage snapshot for disaster recovery.",
]

qdrant.add(
    collection_name="knowledge",
    documents=documents,
)
print(f"Seeded {len(documents)} knowledge documents")

In [None]:
def rag_agent(query: str, collection: str = "knowledge", top_k: int = 3):
    """Retrieve context from Qdrant, generate answer with LLM."""
    results = qdrant.query(
        collection_name=collection,
        query_text=query,
        limit=top_k,
    )

    context = "\n\n".join([r.document for r in results])

    response = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": (
                    "Answer the user's question using the provided context. "
                    "If the context doesn't contain the answer, say so."
                ),
            },
            {
                "role": "user",
                "content": f"Context:\n{context}\n\nQuestion: {query}",
            },
        ],
    )

    return {
        "answer": response.choices[0].message.content,
        "sources": [r.document for r in results],
    }

In [None]:
result = rag_agent("How do I set up HNSW indexing?")
print("Answer:", result["answer"])
print("\nSources:")
for s in result["sources"]:
    print(f"  - {s[:80]}...")