# Scalar Quantization

- each dimension of the dataset is treated independently
- The maximum and minimum values are calculated for each dimension across the dataset.
- The range between the maximum and minimum values in each dimension is then divided into equal-sized bins.


In [1]:
import numpy as np

dataset = np.random.normal(size=(2000,256))

In [2]:
# Calculate and store min and max 

ranges = np.vstack((np.min(dataset, axis=0), np.max(dataset, axis=0)))

In [3]:
starts = ranges[0,:]
steps = (ranges[1,:] - ranges[0,:]) / 255

In [4]:
scalar_quantized_dataset = np.uint8((dataset - starts) / steps)

In [5]:
def scalar_quantisation(dataset):
    # Calculate and store minimum and maximum across each dimension
    ranges = np.vstack((np.min(dataset, axis=0), np.max(dataset, axis=0)))
    starts = ranges[0,:]
    steps = (ranges[1,:] - starts) / 255
    return np.uint8((dataset - starts) / steps)

# Product Quantization

- can preserve more information by dividing each vector into sub-vectors and quantizing each sub-vector independently.

In [7]:
from sklearn.cluster import KMeans
import numpy as np

# Given array
array = np.array([
    [8.2, 10.3, 290.1, 278.1, 310.3, 299.9, 308.7, 289.7, 300.1],
    [0.1, 7.3, 8.9, 9.7, 6.9, 9.55, 8.1, 8.5, 8.99]
])

# Number of subvectors and centroids
m, k = 3, 2

# Divide each vector into m disjoint sub-vectors
subvectors = array.reshape(-1, m)

# Perform k-means on each sub-vector independently
kmeans = KMeans(n_clusters=k, random_state=0).fit(subvectors)

# Replace each sub-vector with the index of the nearest centroid
labels = kmeans.labels_

# Reshape labels to match the shape of the original array
quantized_array = labels.reshape(array.shape[0], -1)

# Output the quantized array
quantized_array

  super()._check_params_vs_input(X, default_n_init=10)


array([[0, 1, 1],
       [0, 0, 0]], dtype=int32)