# Create a simple tensor with ranodm items

In [2]:
import numpy as np 

# Supress scientific notations
np.set_printoptions(suppress=True) 

# Generate randomly distributed parameters
params = np.random.uniform(low=-50, high=150, size= 10000)

# Introduce an outlier
params[-1] = 1000

# Round each number to second decimal place
params = np.round(params, 2)

print(params)

[ -15.8    63.99   57.05 ...   84.02   99.6  1000.  ]


# Define quantization method and quantize 
1. Min-max
2. Percentile

In [3]:
def clamp(params_q: np.array, lower_bound: int, upper_bound: int) -> np.array:
    params_q[params_q < lower_bound] = lower_bound
    params_q[params_q > upper_bound] = upper_bound
    return params_q

def asymmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float, int]:
    # calculate scale and zero point
    alpha = np.max(params)
    beta = np.min(params)
    scale = (alpha - beta) / (2**bits - 1)
    zero = -1*np.round(beta/scale)
    lower_bound, upper_bound = (0, 2**bits-1)

    # Quantize the parameters
    quantized = clamp(np.round(params/scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero

def asymmetric_quantization_percentile(params: np.array, bits: int, percentile: float = 99.99) -> tuple[np.array, float, int]:
    # find the percentile value 
    alpha = np.percentile(params, percentile)
    beta = np.percentile(params, 100-percentile)
    scale = (alpha - beta) / (2**bits-1)
    zero = -1 * np.round(beta/scale)
    lower_bound, upper_bound = 0, 2**bits-1

    # quantize
    quantized = clamp(np.round(params/scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero


def asymmetric_dequantize(params_q: np.array, scale: float, zero: int):
    return scale*(params_q - zero)


def quantization_error(params: np.array, params_q: np.array):
    # Calculate the MSE
    return np.mean((params - params_q)**2)


# Quantize to 8 bits

In [4]:
(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile) = asymmetric_quantization_percentile(params, 8)

print(f"Original: {np.round(params, 2)}")
print(f"Asymmetric (min-max) scale: {asymmetric_scale}, zero: {asymmetric_zero}")
print(asymmetric_q)
print(f"Asymmetric (percentile) scale:{asymmetric_scale_percentile}, zero: {asymmetric_zero_percentile}")
print(asymmetric_q_percentile)

Original: [ -15.8    63.99   57.05 ...   84.02   99.6  1000.  ]
Asymmetric (min-max) scale: 4.117647058823529, zero: 12.0
[  8  28  26 ...  32  36 255]
Asymmetric (percentile) scale:0.7845294235270543, zero: 64.0
[ 44 146 137 ... 171 191 255]


# Dequantize back to 32 bits

In [6]:
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_asymmetric_percentile = asymmetric_dequantize(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile)


print(f"Original: {np.round(params, 2)}")
print(f"Dequantize Asymmetric(min-max): {params_deq_asymmetric}")
print("")
print(f"Dequantize Asymmetric(percentile): {params_deq_asymmetric_percentile}")

Original: [ -15.8    63.99   57.05 ...   84.02   99.6  1000.  ]
Dequantize Asymmetric(min-max): [ -16.47058824   65.88235294   57.64705882 ...   82.35294118   98.82352941
 1000.58823529]

Dequantize Asymmetric(percentile): [-15.69058847  64.33141273  57.27064792 ...  83.94464832  99.63523679
 149.84511989]


# Quantization error (excluding outlier)

In [9]:
minmax_error = quantization_error(params[:-1], params_deq_asymmetric[:-1]).round(2)
percentile_error = quantization_error(params[:-1], params_deq_asymmetric_percentile[:-1]).round(2)
print(f"Minmax-error: {minmax_error}")
print(f"Percentile error: {percentile_error}")

Minmax-error: 1.4
Percentile error: 0.05
