# Hybrid Search with Late Interaction


## Environment Setup

In [1]:
# Install necessary libraries
%pip install qdrant-client datasets fastembed tqdm hf_xet polars

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Import dependencies
from qdrant_client import QdrantClient, models
from datasets import load_dataset
from fastembed import TextEmbedding, LateInteractionTextEmbedding, SparseTextEmbedding 
import polars as pl
import pandas as pd
from IPython.display import display
from tqdm import tqdm
from getpass import getpass
import random, os

  from .autonotebook import tqdm as notebook_tqdm


## Create Embeddings

### Embedding Setup

In [3]:

dense_embedding_model = TextEmbedding("sentence-transformers/all-MiniLM-L6-v2")
bm25_embedding_model = SparseTextEmbedding("Qdrant/bm25")
late_interaction_embedding_model = LateInteractionTextEmbedding("colbert-ir/colbertv2.0")

In [4]:
documents = pl.read_parquet('hf://datasets/bluuebunny/arxiv_abstract_embedding_mxbai_large_v1_milvus_binary/**/*.parquet')
documents = documents['abstract'].sample(1000)
print(documents)

shape: (1_000,)
Series: 'abstract' [str]
[
	"  A Differentiable Neural Comp…
	"  We develop a deep reinforcem…
	"  Multi-modal sensor data fusi…
	"  As large language models con…
	"  In currently popular cosmolo…
	…
	"  Multitime system correlation…
	"  We reply to the comment arXi…
	"  In recent times, there has b…
	"  It is shown that in curved s…
	"  The ubiquity of oscillations…
]


### Generate Actual Embeddings

In [5]:
# Generate embeddings
print("Generating embeddings...")

dense_embeddings = []
for doc in tqdm(documents, desc="Dense Embeddings"):
    embedding = next(dense_embedding_model.embed(doc))
    dense_embeddings.append(embedding)

bm25_embeddings = []
for doc in tqdm(documents, desc="BM25 Embeddings"):
    embedding = next(bm25_embedding_model.embed(doc))
    bm25_embeddings.append(embedding)

late_interaction_embeddings = []
for doc in tqdm(documents, desc="Late Interaction Embeddings"):
    embedding = next(late_interaction_embedding_model.embed(doc))
    late_interaction_embeddings.append(embedding)

Generating embeddings...


Dense Embeddings: 100%|██████████| 1000/1000 [00:19<00:00, 50.16it/s]
BM25 Embeddings: 100%|██████████| 1000/1000 [00:00<00:00, 2894.32it/s]
Late Interaction Embeddings: 100%|██████████| 1000/1000 [03:33<00:00,  4.68it/s]


In [6]:
# Check shapes and types
print(f"Dense embedding shape: {dense_embeddings[0].shape}")
print(f"BM25 embedding type: {type(bm25_embeddings[0])}")
print(f"Late interaction embedding shape: {late_interaction_embeddings[0].shape}")

Dense embedding shape: (384,)
BM25 embedding type: <class 'fastembed.sparse.sparse_embedding_base.SparseEmbedding'>
Late interaction embedding shape: (108, 128)


## Using Qdrant Cloud Vector Database

### Setting up Qdrant

In [7]:
# Set up Qdrant endpoint and API key
QDRANT_ENDPOINT = (
    os.environ["QDRANT_ENDPOINT"]
    if "QDRANT_ENDPOINT" in os.environ
    else input("Qdrant endpoint: ")
)
QDRANT_API_KEY = (
    os.environ["QDRANT_API_KEY"]
    if "QDRANT_API_KEY" in os.environ
    else getpass("Qdrant API key: ")
)

COLLECTION_NAME = "hybrid-search"

client = QdrantClient(
    url=QDRANT_ENDPOINT,
    api_key=QDRANT_API_KEY
)

In [8]:
collections = client.get_collections().collections

for col in collections:
    print(f"Deleting collection: {col.name}")
    client.delete_collection(col.name)

client.create_collection(
    "hybrid-search",
    vectors_config={
        "all-MiniLM-L6-v2": models.VectorParams(
            size=dense_embeddings[0].shape[0],
            distance=models.Distance.COSINE,
        ),
        "colbertv2.0": models.VectorParams(
            size=late_interaction_embeddings[0].shape[-1],
            distance=models.Distance.COSINE,
            multivector_config=models.MultiVectorConfig(
                comparator=models.MultiVectorComparator.MAX_SIM,
            )
        ),
    },
    sparse_vectors_config={
        "bm25": models.SparseVectorParams(modifier=models.Modifier.IDF)
    }
)

Deleting collection: hybrid-search


True

In [None]:
from qdrant_client.models import PointStruct

# Point creation

points = []
for idx, (dense_embedding, bm25_embedding, late_interaction_embedding, doc) in enumerate(
    zip(dense_embeddings, bm25_embeddings, late_interaction_embeddings, documents)
):
    # Generate a random user_id for demo
    user_id = f"user_{random.randint(1, 10)}"
    
    point = PointStruct(
        id=idx,
        vector={
            "all-MiniLM-L6-v2": dense_embedding.tolist(),
            "bm25": bm25_embedding.as_object(),
            "colbertv2.0": late_interaction_embedding.tolist(),
        },
        payload={
            "document": doc,
            "user_id": user_id
        }
    )
    points.append(point)

### Ingesting Data with Qdrant

In [12]:
# Batch upsert for better performance
batch_size = 32
for i in tqdm(range(0, len(points), batch_size), desc="Uploading to Qdrant"):
    batch = points[i:i + batch_size]
    client.upsert(collection_name="hybrid-search", points=batch)

Uploading to Qdrant: 100%|██████████| 32/32 [02:03<00:00,  3.87s/it]


### Retrieve Vectors from Qdrant

In [13]:
# Query with user_id filter
query = "What are the largest galaxies in the universe?"
target_user_id = "user_3"


In [14]:
dense_vectors = next(dense_embedding_model.query_embed(query))
sparse_vectors = next(bm25_embedding_model.query_embed(query))
late_vectors = next(late_interaction_embedding_model.query_embed(query))

In [15]:
prefetch = [
        models.Prefetch(
            query=dense_vectors,
            using="all-MiniLM-L6-v2",
            limit=20,
        ),
        models.Prefetch(
            query=models.SparseVector(**sparse_vectors.as_object()),
            using="bm25",
            limit=20,
        ),
    ]

In [16]:
# Final query
results = client.query_points(
    COLLECTION_NAME,
    prefetch=prefetch,
    query=late_vectors,
    using="colbertv2.0",
    with_payload=True,
    limit=50,
)

### Filtering by User ID

In [17]:
# Filter results by user_id after retrieval
filtered_points = []
for point in results.points:
    if point.payload.get('user_id') == target_user_id:
        filtered_points.append(point)
        if len(filtered_points) >= 10:
            break

results.points = filtered_points

In [18]:
results

QueryResponse(points=[ScoredPoint(id=578, version=20, score=16.907976, payload={'document': '  We present preliminary results of a search for galaxies at z>4 through Lyman-limit imaging of the fields of known high-redshift radio-galaxies. Objects were selected by means of their broad-band colours, and spectroscopy of candidate objects in one of the fields has been performed through multi-slit spectroscopy at the 4.2m William Herschel Telescope. These spectra show some of the first z>4 galaxies to be identified using the Lyman break technique. ', 'user_id': 'user_3'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=378, version=13, score=12.813603, payload={'document': "  When measuring the Baryon Acoustic Oscillations (BAO) scale from galaxy surveys, one typically assumes a fiducial cosmology when converting redshift measurements into comoving distances and also when defining input parameters for the reconstruction algorithm. A parameterised template for the model to be 

In [19]:
output = []
for point in results.points:
    output.append({
        'id': point.id,
        'score': point.score,
        'user_id': point.payload.get('user_id', ''),
        'payload': point.payload.get('document', '')
    })


In [23]:
df = pl.DataFrame(output)
pl.Config.set_fmt_str_lengths(200)
print(df)

shape: (5, 4)
┌─────┬───────────┬─────────┬──────────────────────────────────────────────────────────────────────┐
│ id  ┆ score     ┆ user_id ┆ payload                                                              │
│ --- ┆ ---       ┆ ---     ┆ ---                                                                  │
│ i64 ┆ f64       ┆ str     ┆ str                                                                  │
╞═════╪═══════════╪═════════╪══════════════════════════════════════════════════════════════════════╡
│ 578 ┆ 16.907976 ┆ user_3  ┆ We present preliminary results of a search for galaxies at z>4       │
│     ┆           ┆         ┆ through Lyman-limit imaging of the fields of known high-redshift     │
│     ┆           ┆         ┆ radio-galaxies. Objects were selected by means of their broad-band   │
│     ┆           ┆         ┆ col…                                                                 │
│ 378 ┆ 12.813603 ┆ user_3  ┆ When measuring the Baryon Acoustic Oscillations