In [1]:
%%capture

%pip install qdrant-client==1.7.0
from qdrant_client import QdrantClient
from qdrant_client.http import models
import numpy as np
import random
import time
import os
import shutil

# Sample Binary Quantization

In [2]:
def binary_quantization(input_data, threshold):
    return np.where(input_data >= threshold, 1, 0)


data = np.array([0.1, -0.5, 0.9, -1.5, -2.1, 2.5])
binary_data = binary_quantization(data, 0)
print("Original Data:", data)
print("Binary Quantized Data:", binary_data)

Original Data: [ 0.1 -0.5  0.9 -1.5 -2.1  2.5]
Binary Quantized Data: [1 0 1 0 0 1]


# With a real vector database

In [19]:
# Generate fake data

n_samples = 10_000
n_features = 2_000

data = np.random.randn(n_samples, n_features).astype(np.float64)
data[0].shape, type(data[0][0])

((2000,), numpy.float64)

In [4]:
# Function to calculate how accurate the semantic search is


def calculate_recall(true_ids, predicted_ids):
    true_positive = len(set(true_ids).intersection(predicted_ids))
    total_relevant = len(true_ids)
    recall = true_positive / total_relevant
    return recall

In [5]:
# Format the data into objects that can be inserted into the DB

data_to_insert = []
for i, d in enumerate(data):
    point = models.PointStruct(
        id=i, vector=d.tolist(), payload={"city": random.choice(["New York", "Berlin"])}
    )
    data_to_insert.append(point)
data_to_insert[0]

PointStruct(id=0, vector=[0.8739856615714474, 1.807544501917782, -0.3867121777810109, 0.7475418615175148, -0.011220441841003254, 0.45257889965144166, -0.9196444213114707, -1.0290491059951938, 2.918030652711712, 1.0421167848695043, 0.9405130118965381, 0.44188452295746605, -0.7324968849739459, -0.4153768193514054, 0.615227713264581, -0.18773011892615266, -1.1561768781843726, -1.24777927355494, 1.5964806315895208, 0.8547161224294311, -0.3038598659231874, -0.6439279820971076, 0.14474043756222754, 0.3090291498030087, 0.5472755250768923, 0.31077892217254394, 1.2022719476274863, -0.5398650761004974, -2.356072668699791, 0.2853584938779716, -0.13018253455324919, 0.3848423701222905, -0.42812877678193306, -0.8459145927895287, -1.213090240744919, 0.22027091205841778, 0.7649809671645519, -0.3088909853391043, 0.19020701290005637, 2.016694675476221, 0.027413739399153304, 0.6275031142213888, -1.7072609245372927, -1.1967124653346288, 0.3951523566787642, 0.19832771617608677, -0.41679453021084445, -0.376

In [6]:
# Remove any existing DB files

if os.path.exists("data/qdrant_db"):
    shutil.rmtree("data/qdrant_db")

client = QdrantClient(path="data/qdrant_db")

# Normal


In [7]:
client.create_collection(
    collection_name="semantic_search",
    vectors_config=models.VectorParams(
        size=n_features, distance=models.Distance.COSINE
    ),
    quantization_config=None,
)

True

In [8]:
client.upsert(
    collection_name="semantic_search",
    points=data_to_insert,
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [9]:
results = client.search(collection_name="semantic_search", query_vector=data[0])
for result in results:
    print(result)
truth_ids = [result.id for result in results]
truth_ids

id=0 version=0 score=0.9999999992620814 payload={'city': 'Berlin'} vector=None shard_key=None
id=367 version=0 score=0.0935900955869998 payload={'city': 'Berlin'} vector=None shard_key=None
id=6777 version=0 score=0.09345209088487938 payload={'city': 'New York'} vector=None shard_key=None
id=9674 version=0 score=0.07236113324158687 payload={'city': 'Berlin'} vector=None shard_key=None
id=6268 version=0 score=0.07159429777598603 payload={'city': 'Berlin'} vector=None shard_key=None
id=2973 version=0 score=0.06972900526967332 payload={'city': 'New York'} vector=None shard_key=None
id=6340 version=0 score=0.06872670865599134 payload={'city': 'Berlin'} vector=None shard_key=None
id=9953 version=0 score=0.06850956476044623 payload={'city': 'New York'} vector=None shard_key=None
id=510 version=0 score=0.06589252393618895 payload={'city': 'New York'} vector=None shard_key=None
id=1519 version=0 score=0.06586400388836113 payload={'city': 'Berlin'} vector=None shard_key=None


[0, 367, 6777, 9674, 6268, 2973, 6340, 9953, 510, 1519]

# Scalar Quantization


In [10]:
client.create_collection(
    collection_name="scalar_semantic_search",
    vectors_config=models.VectorParams(
        size=n_features, distance=models.Distance.COSINE
    ),
    quantization_config=models.ScalarQuantization(
        scalar=models.ScalarQuantizationConfig(
            type=models.ScalarType.INT8,
            quantile=0.99,  # 1% of extreme values will be excluded from quantization
            always_ram=True,
        ),
    ),
)

True

In [11]:
client.upsert(
    collection_name="scalar_semantic_search",
    points=data_to_insert,
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [12]:
results = client.search(collection_name="scalar_semantic_search", query_vector=data[0])
predicted_ids = [result.id for result in results]
print(predicted_ids)
calculate_recall(truth_ids, predicted_ids)

[0, 367, 6777, 9674, 6268, 2973, 6340, 9953, 510, 1519]


1.0

# Binary Quantization


In [13]:
client.create_collection(
    collection_name="binary_semantic_search",
    vectors_config=models.VectorParams(
        size=n_features, distance=models.Distance.COSINE
    ),
    quantization_config=models.BinaryQuantization(
        binary=models.BinaryQuantizationConfig(
            always_ram=True,
        )
    ),
)

True

In [14]:
client.upsert(
    collection_name="binary_semantic_search",
    points=data_to_insert,
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [15]:
results = client.search(collection_name="binary_semantic_search", query_vector=data[0])
predicted_ids = [result.id for result in results]
print(predicted_ids)
calculate_recall(truth_ids, predicted_ids)

[0, 367, 6777, 9674, 6268, 2973, 6340, 9953, 510, 1519]


1.0

# Time Metrics


In [38]:
# With quantized vectors

n_reps = 15
times = []
for _ in range(n_reps):
    query_vector = np.random.randn(1, n_features)[0]
    st_time = time.time()
    client.search(
        collection_name="binary_semantic_search",
        query_vector=query_vector,
        search_params=models.SearchParams(
            quantization=models.QuantizationSearchParams(
                ignore=False, rescore=False
            )  # use quantized
        ),
        limit=100,
        query_filter=models.Filter(
            must=[
                models.FieldCondition(
                    key="city", match=models.MatchValue(value="Berlin")
                )
            ]
        ),
    )
    times.append(time.time() - st_time)
print(np.median(times))

0.2501802444458008


In [36]:
# With normal vectors

times = []
for _ in range(n_reps):
    query_vector = np.random.randn(1, n_features)[0]
    st_time = time.time()
    client.search(
        collection_name="binary_semantic_search",
        query_vector=query_vector,
        search_params=models.SearchParams(
            quantization=models.QuantizationSearchParams(
                ignore=True, rescore=False
            )  # ignore quantized
        ),
        limit=100,
        query_filter=models.Filter(
            must=[
                models.FieldCondition(
                    key="city", match=models.MatchValue(value="Berlin")
                )
            ]
        ),
    )
    times.append(time.time() - st_time)
print(np.median(times))

0.32148098945617676
