# Quantization Settings

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/thierrypdamiba/qdrant-etl-cookbook/blob/main/notebooks/configs/quantization.ipynb)

Reduce memory usage with scalar, binary, and product quantization. Trade-offs between memory, speed, and accuracy.

In [None]:
!pip install -q qdrant-client

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import (
    VectorParams,
    Distance,
    ScalarQuantization,
    ScalarQuantizationConfig,
    ScalarType,
    BinaryQuantization,
    BinaryQuantizationConfig,
    ProductQuantization,
    ProductQuantizationConfig,
    CompressionRatio,
    SearchParams,
    QuantizationSearchParams,
)

In [None]:
client = QdrantClient(":memory:")

In [None]:
# Scalar quantization: 4x memory reduction, minimal accuracy loss
client.create_collection(
    collection_name="scalar_quantized",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
    quantization_config=ScalarQuantization(
        scalar=ScalarQuantizationConfig(
            type=ScalarType.INT8,
            quantile=0.99,
            always_ram=True,
        ),
    ),
)

info = client.get_collection("scalar_quantized")
print(f"Scalar quantized collection created")
print(f"  Quantization: {info.config.quantization_config}")

In [None]:
# Binary quantization: 32x memory reduction, works best with high-dim vectors
client.create_collection(
    collection_name="binary_quantized",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
    quantization_config=BinaryQuantization(
        binary=BinaryQuantizationConfig(
            always_ram=True,
        ),
    ),
)
print("Binary quantized collection created")

In [None]:
# Product quantization: configurable compression ratio
client.create_collection(
    collection_name="product_quantized",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
    quantization_config=ProductQuantization(
        product=ProductQuantizationConfig(
            compression=CompressionRatio.X16,
            always_ram=True,
        ),
    ),
)
print("Product quantized collection created")

In [None]:
# Search with quantization oversampling for better accuracy
# (would work with actual data in the collection)
search_params = SearchParams(
    quantization=QuantizationSearchParams(
        ignore=False,
        rescore=True,
        oversampling=2.0,  # fetch 2x candidates, rescore with full vectors
    ),
)

print("Quantization search params configured:")
print(f"  rescore={search_params.quantization.rescore}")
print(f"  oversampling={search_params.quantization.oversampling}")