# Module 1: Late Interaction Basics

Define a simple query and two documents to compare how different embedding approaches represent them.

In [1]:
documents = [
    "Qdrant is an AI-native vector database and a semantic search engine",
    "Relational databases are not well-suited for search",
]
query = "What is Qdrant?"

## Single-Vector Embeddings (No Interaction)

In [2]:
from fastembed import TextEmbedding

# Load the BAAI/bge-small-en-v1.5 model
dense_model = TextEmbedding("BAAI/bge-small-en-v1.5")
# Pass the documents through the model. The .passage_embed 
# method returns a generator we can iterate over and is 
# supposed to be used for the documents only.
dense_generator = dense_model.passage_embed(documents)
# Running next on the generator yields one vector at
# the time, representing a single document.
dense_vector = next(dense_generator)
dense_vector.shape

(384,)

In [3]:
# Generate a dense vector for the query as well, using 
# the .query_embed method this time.
dense_query_vector = next(dense_model.query_embed(query))
dense_query_vector.shape

(384,)

Compute dot product similarity between the query and each document. Higher scores indicate stronger semantic matches.

In [4]:
import numpy as np

# Calculate the dot product between the query
# and the first document vector
np.dot(dense_query_vector, dense_vector)

0.6554835

Now compare the same query against the second document.

In [5]:
# Calculate the dot product between the same query
# and the second document vectors
np.dot(dense_query_vector, next(dense_generator))

0.43436375

## Cross-Encoders (Early Interaction)

In [6]:
from fastembed.rerank.cross_encoder import TextCrossEncoder

# Load the Xenova/ms-marco-MiniLM-L-6-v2 cross encoder model
cross_encoder = TextCrossEncoder("Xenova/ms-marco-MiniLM-L-6-v2")
# Run .rerank method on the query and all the documents.
# It does not create any vector representations, but gives
# the score indicating the relevance of the document for
# the provided query.
score_generator = cross_encoder.rerank(query, documents)
list(score_generator)

[10.133832931518555, -11.428644180297852]

## Late Interaction: The Core Paradigm

### The ColBERT Approach

In [10]:
from fastembed import LateInteractionTextEmbedding

# Load the colbert-ir/colbertv2.0 model
colbert_model = LateInteractionTextEmbedding("colbert-ir/colbertv2.0")
# Run .passage_embed on all the documents and create
# a generator of the multi-vector representations
colbert_generator = colbert_model.passage_embed(documents)
colbert_vector = next(colbert_generator)
colbert_vector.shape

(17, 128)

In [11]:
# Create multi-vector representation for the query
colbert_query_vector = next(colbert_model.query_embed(query))
colbert_query_vector.shape

(32, 128)