# Module 3: Multi-Stage Retrieval with Universal Query API

In [1]:
from qdrant_client import QdrantClient, models

client = QdrantClient("http://localhost:6333")

# Create collection with both single-vector and multi-vector representations
client.create_collection(
    collection_name="hybrid-search",
    vectors_config={
        # Fast single-vector for prefetch stage
        "bge-small-en-v1.5": models.VectorParams(
            size=384,
            distance=models.Distance.COSINE,
        ),
        # High-quality multi-vector for reranking stage
        "colbert": models.VectorParams(
            size=128,
            distance=models.Distance.DOT,
            multivector_config=models.MultiVectorConfig(
                comparator=models.MultiVectorComparator.MAX_SIM,
            ),
            hnsw_config=models.HnswConfigDiff(m=0),
        ),
    },
    sparse_vectors_config={
        "bm25": models.SparseVectorParams(
            modifier=models.Modifier.IDF,
        ),
    },
)

True

In [2]:
documents = [
    ("research", "Quantum computing applications are emerging in cryptography, where quantum algorithms threaten classical encryption schemes while also enabling new secure protocols."),
    ("research", "Researchers are exploring quantum computing applications in drug discovery by simulating molecular interactions that are too complex for classical computers."),
    ("research", "One promising quantum computing application is optimization, such as solving complex logistics and supply chain problems more efficiently."),
    ("finance", "Quantum computing applications in finance include portfolio optimization, risk analysis, and faster Monte Carlo simulations."),
    ("research", "Machine learning may benefit from quantum computing applications that accelerate training or improve pattern recognition in high-dimensional spaces."),
    ("research", "In materials science, quantum computing applications help model atomic-level behavior to design new superconductors and advanced materials."),
    ("research", "Quantum computing applications are being tested in climate modeling to improve the accuracy of large-scale environmental simulations."),
    ("research", "Some quantum computing applications focus on improving battery chemistry through precise simulation of electrochemical reactions."),
    ("telecom", "Quantum computing applications could transform telecommunications by optimizing network routing and error correction techniques."),
    ("security", "In cybersecurity research, quantum computing applications drive both the development of post-quantum cryptography and quantum-safe systems."),

    ("computing", "Classical high-performance computing is still the dominant approach for most scientific simulations today."),
    ("machine_learning", "Neural networks are widely used for image recognition, natural language processing, and recommendation systems."),
    ("systems", "Distributed systems rely on consensus algorithms to maintain reliability across multiple nodes."),
    ("databases", "Databases often use indexing and caching strategies to improve query performance at scale."),
    ("software", "Modern web applications are frequently built using microservices and container orchestration platforms."),
    ("research", "Reinforcement learning has shown strong results in robotics and game-playing agents."),
    ("edge_computing", "Edge computing brings computation closer to data sources to reduce latency and bandwidth usage."),
    ("blockchain", "Blockchain technology enables decentralized ledgers and smart contracts without a central authority."),
    ("cloud", "Cloud providers offer scalable storage and compute resources on a pay-as-you-go model."),
    ("data", "Data visualization helps analysts understand trends and patterns in large datasets."),

    ("biotech", "Biotechnology startups increasingly use AI to accelerate gene sequencing and analysis."),
    ("autonomous_systems", "Autonomous vehicles combine sensors, mapping, and machine learning for navigation and safety."),
    ("energy", "Energy grid optimization uses advanced algorithms to balance supply and demand in real time."),
    ("nlp", "Natural language models are trained on massive text corpora to generate and understand human language."),
    ("robotics", "Robotics research integrates control theory, perception, and hardware design to build adaptive machines.")
]

In [3]:
# Ingest data to the collection
client.upsert(
    collection_name="hybrid-search",
    points=[
        models.PointStruct(
            id=i,
            vector={
                "bge-small-en-v1.5": models.Document(
                    text=doc,
                    model="BAAI/bge-small-en-v1.5",
                ),
                "colbert": models.Document(
                    text=doc,
                    model="colbert-ir/colbertv2.0",
                ),
                "bm25": models.Document(
                    text=doc,
                    model="Qdrant/bm25",
                ),
            },
            payload={
                "text": doc,
                "category": category,
            },
        )
        for i, (category, doc) in enumerate(documents)
    ]
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [4]:
query = "quantum computing applications"

# Multi-stage query: prefetch with single-vector, rerank with ColBERT
results = client.query_points(
    collection_name="hybrid-search",
    prefetch=[
        models.Prefetch(
            query=models.Document(
                text=query,
                model="BAAI/bge-small-en-v1.5",
            ),
            using="bge-small-en-v1.5",
            limit=500,  # Retrieve 500 candidates for reranking
        ),
    ],
    query=models.Document(
        text=query,
        model="colbert-ir/colbertv2.0",
    ),
    using="colbert",
    limit=10,  # Return top 10 after reranking
)

In [5]:
results

QueryResponse(points=[ScoredPoint(id=3, version=0, score=29.231024, payload={'text': 'Quantum computing applications in finance include portfolio optimization, risk analysis, and faster Monte Carlo simulations.', 'category': 'finance'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=5, version=0, score=28.898895, payload={'text': 'In materials science, quantum computing applications help model atomic-level behavior to design new superconductors and advanced materials.', 'category': 'research'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=0, version=0, score=28.62267, payload={'text': 'Quantum computing applications are emerging in cryptography, where quantum algorithms threaten classical encryption schemes while also enabling new secure protocols.', 'category': 'research'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=2, version=0, score=28.518673, payload={'text': 'One promising quantum computing application is optimization, such as

## Multi-stage with Hybrid Prefetch

You can also use multiple retrieval methods in the prefetch stage, combining both dense and sparse vectors using query fusion.

In [6]:
# Multi-stage with hybrid prefetch: combine dense and sparse retrieval
results = client.query_points(
    collection_name="hybrid-search",
    prefetch=[
        # Dense retrieval using single-vector embeddings
        models.Prefetch(
            query=models.Document(
                text=query,
                model="BAAI/bge-small-en-v1.5",
            ),
            using="bge-small-en-v1.5",
            limit=500,
        ),
        # Sparse retrieval using BM25
        models.Prefetch(
            query=models.Document(
                text=query,
                model="Qdrant/bm25",
            ),
            using="bm25",
            limit=500,
        ),
    ],
    # Results from both prefetch queries are combined, then reranked
    query=models.Document(
        text=query,
        model="colbert-ir/colbertv2.0",
    ),
    using="colbert",
    limit=10,
)

In [7]:
results

QueryResponse(points=[ScoredPoint(id=3, version=0, score=29.231024, payload={'text': 'Quantum computing applications in finance include portfolio optimization, risk analysis, and faster Monte Carlo simulations.', 'category': 'finance'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=5, version=0, score=28.898895, payload={'text': 'In materials science, quantum computing applications help model atomic-level behavior to design new superconductors and advanced materials.', 'category': 'research'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=0, version=0, score=28.62267, payload={'text': 'Quantum computing applications are emerging in cryptography, where quantum algorithms threaten classical encryption schemes while also enabling new secure protocols.', 'category': 'research'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=2, version=0, score=28.518673, payload={'text': 'One promising quantum computing application is optimization, such as

## Filtering with Propagation

Filters in the main query are automatically propagated to all prefetch stages.

In [8]:
# Filters in the main query automatically propagate to prefetch stages
results = client.query_points(
    collection_name="hybrid-search",
    prefetch=[
        models.Prefetch(
            query=models.Document(
                text=query,
                model="BAAI/bge-small-en-v1.5",
            ),
            using="bge-small-en-v1.5",
            limit=500,
        ),
    ],
    query=models.Document(
        text=query,
        model="colbert-ir/colbertv2.0",
    ),
    using="colbert",
    limit=10,
    # This filter applies to BOTH prefetch and reranking stages
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="category",
                match=models.MatchValue(value="research"),
            ),
        ],
    ),
)

In [9]:
results

QueryResponse(points=[ScoredPoint(id=5, version=0, score=28.898895, payload={'text': 'In materials science, quantum computing applications help model atomic-level behavior to design new superconductors and advanced materials.', 'category': 'research'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=0, version=0, score=28.62267, payload={'text': 'Quantum computing applications are emerging in cryptography, where quantum algorithms threaten classical encryption schemes while also enabling new secure protocols.', 'category': 'research'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=2, version=0, score=28.518673, payload={'text': 'One promising quantum computing application is optimization, such as solving complex logistics and supply chain problems more efficiently.', 'category': 'research'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=6, version=0, score=28.337791, payload={'text': 'Quantum computing applications are being tested in cl