In [1]:
import numpy as np

# suppress scientific notation
np.set_printoptions(precision=2)

# generate randomly distributed parameters
params = np.random.uniform(low=-50, high=50, size=20)

# Make sure important values are the begining for better debugging
params[0] = params.max() + 1
params[1] = params.min() - 1
params[2] = 0

# roung each number to second decimal place
params = np.round(params, 2)

print(params)




[ 42.35 -41.27   0.    12.25 -23.1   13.71  34.11  41.35 -40.27  32.93
 -17.77  34.9  -16.92  32.86 -37.66   8.82  29.77 -35.68   5.61  -0.26]


In [10]:
params = np.random.uniform(low=-50, high=150, size = 10000)

# introduce an outlier
params[-1] = 1000

params = np.round(params, 2)

In [15]:
def clamp(params_q: np.array, lower_bound: int, upper_bound: int) -> np.array:
    params_q[params_q < lower_bound] = lower_bound
    params_q[params_q > upper_bound] = upper_bound
    return params_q

def asymmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float, int]:
    # Calculate the scale and zero point
    alpha =  np.max(params)
    beta = np.min(params)
    scale = (alpha - beta) / (2**bits - 1)
    zero = -1 * np.round(beta/scale)
    lower_bound, upper_bound = 0, 2**bits - 1

    # Quantize the parameters
    quantized = clamp(np.round(params/scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero

# percentile strategy
def asymmetric_quantization_percentile(params: np.array, bits: int, percentile: float = 99.99) -> tuple[np.array, float, int]:
    # find the percentile value
    alpha = np.percentile(params, percentile)
    beta = np.percentile(params, 100 - percentile)
    scale = (alpha - beta) / (2**bits - 1)
    zero = -1 * np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bits - 1
    quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero


def asymmetric_dequantize(params_q: np.array, scale: float, zero: int) -> np.array:
    return (params_q - zero) * scale

def symmetric_dequantize(params_q: np.array, scale: float) -> np.array:
    return params_q * scale

def symmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float]:
    # Calculate the scale and zero point
    alpha = np.max(np.abs(params))
    scale = alpha / (2**(bits-1) - 1)
    lower_bound = -2**(bits - 1)
    upper_bound = 2**(bits - 1) - 1

    # Quantize the parameters
    quantized = clamp(np.round(params/scale), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale

def quantization_error(params: np.array, params_q: np.array) -> float:
    # Calculate the MSE
    return np.mean((params - params_q)**2)

(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(symmetric_q, symmetric_scale) = symmetric_quantization(params, 8)
(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile) = asymmetric_quantization_percentile(params, 8)

print("Original parameters:")
print(params)
print('')
print(f'Asymmetric scale: {asymmetric_scale}, zero: {asymmetric_zero}')
print(asymmetric_q)
print('')
print(f'Asymmetric scale percentile: {asymmetric_scale_percentile}, zero: {asymmetric_zero_percentile}')
print(asymmetric_q_percentile)
print('')
print(f'Symmetric scale: {symmetric_scale}')
print(symmetric_q)



Original parameters:
[  15.86    8.67  123.44 ...   74.83    5.89 1000.  ]

Asymmetric scale: 4.117490196078432, zero: 12.0
[ 16  14  42 ...  30  13 255]

Asymmetric scale percentile: 0.784058866664309, zero: 64.0
[ 84  75 221 ... 159  72 255]

Symmetric scale: 7.874015748031496
[  2   1  16 ...  10   1 127]


In [16]:
# Dequantize the parameters

params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_asymmetric_percentile = asymmetric_dequantize(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile)
params_deq_symmetric = symmetric_dequantize(symmetric_q, symmetric_scale)


print('')
print(np.round(params, 2))
print('')
print(f'Dequantize Asymmetric: ')
print(np.round(params_deq_asymmetric, 2))
print('')
print(f'Dequantize Asymmetric Percentile: ')
print(np.round(params_deq_asymmetric_percentile, 2))
print('')
print(f'Dequantize Symmetric: ')
print(np.round(params_deq_symmetric, 2))


[  15.86    8.67  123.44 ...   74.83    5.89 1000.  ]

Dequantize Asymmetric: 
[  16.47    8.23  123.52 ...   74.11    4.12 1000.55]

Dequantize Asymmetric Percentile: 
[ 15.68   8.62 123.1  ...  74.49   6.27 149.76]

Dequantize Symmetric: 
[  15.75    7.87  125.98 ...   78.74    7.87 1000.  ]


In [22]:
# Calculate the quantiztion error
print(f'{"Asymmetric error: ":>20}{np.round(quantization_error(params, params_deq_asymmetric), 2)}')
print(f'{"Symmetric error: ":>20}{np.round(quantization_error(params, params_deq_symmetric), 2)}')
print(f'{"Asymmetric error percentile: ":>20}{np.round(quantization_error(params, params_deq_asymmetric_percentile), 2)}')
print(f'{"Asymmetric error percentile w/o outlier: ":>20}{np.round(quantization_error(params[:-1], params_deq_asymmetric_percentile[:-1]), 2)}')

  Asymmetric error: 1.41
   Symmetric error: 5.11
Asymmetric error percentile: 72.34
Asymmetric error percentile w/o outlier: 0.05
