https://docs.nvidia.com/deeplearning/tensorrt/tensorflow-quantization-toolkit/docs/docs/intro_to_quantization.html

# Implementing affine (assymetric) and scale (symmetric) quantization from scratch

In [1]:
import numpy as np

### Creating a simple tensor with random items

In [2]:
# Generate randomly distributed parameters
params = np.random.uniform(low=-50, high=150, size=20)

# Print the parameters
print(params)

[-10.89676688 111.20132441  28.54646861  64.08626134 -44.95071907
  89.55659292  58.83216999 -42.60038455 129.03490232  -6.83378502
 141.84353818 -42.28612992  98.82770315 138.28224062  17.29530148
 119.47436199   8.19703339  42.28238139  75.6475437    8.80807161]


### Defining the quantization methods

In [4]:
def clamp(params_q: np.array, lower_bound: int, upper_bound: int) -> np.array:
    params_q[params_q < lower_bound] = lower_bound
    params_q[params_q > upper_bound] = upper_bound
    return params_q

def affine_quantize(params: np.array, bit_width: int) -> tuple[np.array, float, int]:
    # Calculate the scale and zero point
    alpha = np.max(params)
    beta = np.min(params)
    scale = (alpha - beta) / (2**bit_width - 1)
    zeroPt = -1 * np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bit_width - 1
    # Quantize the parameters
    quantized = clamp(np.round(params / scale + zeroPt), lower_bound, upper_bound)
    return quantized, scale, zeroPt

def affine_dequantize(params_q: np.array, scale: float, zeroPt: int) -> np.array:
    params_q = params_q.astype(np.int32)
    params = (params_q - zeroPt) * scale
    params = params.astype(np.float32)
    return params

def symmetric_quantize(params: np.array, bits: int) -> tuple[np.array, float]:
    # Calculate the scale
    alpha = np.max(np.abs(params))
    scale = alpha / (2**(bits-1)-1)
    lower_bound = -2**(bits-1)
    upper_bound = 2**(bits-1)-1
    # Quantize the parameters
    quantized = clamp(np.round(params / scale), lower_bound, upper_bound)
    return quantized, scale

def symmetric_dequantize(params_q: np.array, scale: float) -> np.array:
    params_q = params_q.astype(np.int32)
    params = params_q * scale
    params = params.astype(np.float32)
    return params

def quantization_error(params: np.array, params_q: np.array):
    # calculate the MSE
    return np.mean((params - params_q)**2)

### Quantizing the parameters to 8 bits

In [5]:
(affine_q, affine_scale, affine_zero) = affine_quantize(params, 8)
(symmetric_q, symmetric_scale) = symmetric_quantize(params, 8)

print(f'Original parameters:')
print(np.round(params, 2))
print('')
print(f'Affine scale: {affine_scale}, zero point: {affine_zero}')
print(affine_q)
print('')
print(f'Symmetric scale: {symmetric_scale}')
print(symmetric_q)

Original parameters:
[-10.9  111.2   28.55  64.09 -44.95  89.56  58.83 -42.6  129.03  -6.83
 141.84 -42.29  98.83 138.28  17.3  119.47   8.2   42.28  75.65   8.81]

Affine scale: 0.7325264990067586, zero point: 61.0
[ 46. 213. 100. 148.  -0. 183. 141.   3. 237.  52. 255.   3. 196. 250.
  85. 224.  72. 119. 164.  73.]

Symmetric scale: 1.1168782533835167
[-10. 100.  26.  57. -40.  80.  53. -38. 116.  -6. 127. -38.  88. 124.
  15. 107.   7.  38.  68.   8.]


### Dequantizing the parameters back to 32 bits

In [6]:
# Dequantize the parameters back to 32 bits
params_deq_affine = affine_dequantize(affine_q, affine_scale, affine_zero)
params_deq_symmetric = symmetric_dequantize(symmetric_q, symmetric_scale)

print(f'Original parameters:')
print(np.round(params, 2))
print('')
print(f'Dequantize affine:')
print(np.round(params_deq_affine, 2))
print('')
print(f'Dequantize symmetric:')
print(np.round(params_deq_symmetric, 2))

Original parameters:
[-10.9  111.2   28.55  64.09 -44.95  89.56  58.83 -42.6  129.03  -6.83
 141.84 -42.29  98.83 138.28  17.3  119.47   8.2   42.28  75.65   8.81]

Dequantize affine:
[-10.99 111.34  28.57  63.73 -44.68  89.37  58.6  -42.49 128.92  -6.59
 142.11 -42.49  98.89 138.45  17.58 119.4    8.06  42.49  75.45   8.79]

Dequantize symmetric:
[-11.17 111.69  29.04  63.66 -44.68  89.35  59.19 -42.44 129.56  -6.7
 141.84 -42.44  98.29 138.49  16.75 119.51   7.82  42.44  75.95   8.94]


### Calculating the quantization error

In [7]:
print(f'{"Affine error: ":>20}{np.round(quantization_error(params, params_deq_affine), 2)}')
print(f'{"Symmetric error: ":>20}{np.round(quantization_error(params, params_deq_symmetric), 2)}')

      Affine error: 0.04
   Symmetric error: 0.11
