<a href="https://colab.research.google.com/github/kalyaannnn/TransforMER/blob/main/Quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Quantization using MinMax


In [14]:
import numpy as np

np.set_printoptions(suppress = True)

params = np.random.uniform(low = -50, high = 150, size = 20)

params[0] = params.max() + 1
params[1] = params.min() - 1
params[2] = 0

params = np.round(params, 2)

print(params)

[146.93 -47.31   0.   -36.45 144.12 116.43 145.93  86.63 -15.27  26.39
  53.16  24.85 -26.28  97.77 -46.31  64.6  123.86  60.37  58.95  10.8 ]


In [15]:
def clamp(params_q : np.array, lower_bound : int, upper_bound : int) -> np.array:
  params_q[params_q < lower_bound] = lower_bound
  params_q[params_q > upper_bound] = upper_bound
  return params_q

In [16]:
def asymmetric_quantization(params : np.array, bits : int) -> tuple[np.array, float, int]:
  # Calculating the scale and zero point
  alpha = np.max(params)
  beta = np.min(params)
  scale = (alpha - beta) / (2 ** bits - 1)
  zero = -1 * np.round(beta / scale)
  lower_bound, upper_bound = 0, 2 ** bits - 1

  # Quantize the parameters
  quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
  return quantized, scale, zero

In [17]:
def asymmetric_quantization(params : np.array, bits : int) -> tuple[np.array, float, int]:
  # Calculating the scale and zero point
  alpha = np.max(params)
  beta = np.min(params)
  scale = (alpha - beta) / (2 ** bits - 1)
  zero = -1 * np.round(beta / scale)
  lower_bound, upper_bound = 0, 2 ** bits - 1

  # Quantize the parameters
  quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
  return quantized, scale, zero

In [17]:
def symmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float]:
    # Calculate the scale
    alpha = np.max(np.abs(params))
    scale = alpha / (2**(bits-1)-1)
    lower_bound = -2**(bits-1)
    upper_bound = 2**(bits-1)-1
    # Quantize the parameters
    quantized = clamp(np.round(params / scale), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale

In [18]:
def asymmetric_dequantize(params_q : np.array, scale : float, zero : int) -> np.array:
  return (params_q - zero) * scale

def symmetric_dequantize(params_q : np.array, scale : float) -> np.array:
  return params_q * scale

In [20]:
def quantization_error(params : np.array, params_q : np.array):
  # Calculate the MSE
  return np.mean((params - params_q) ** 2)

In [21]:
(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(symmetric_q, symmetric_scale) = symmetric_quantization(params, 8)

In [22]:
print(f'Original')
print(np.round(params, 2))
print('')
print(f'Asymmetric Scale : {asymmetric_scale}, Zero : {asymmetric_zero}')
print(asymmetric_q)
print('')
print(f'Symmetric Scale : {symmetric_scale}')
print(symmetric_q)

Original
[146.93 -47.31   0.   -36.45 144.12 116.43 145.93  86.63 -15.27  26.39
  53.16  24.85 -26.28  97.77 -46.31  64.6  123.86  60.37  58.95  10.8 ]

Asymmetric Scale : 0.7617254901960785, Zero : 62.0
[255   0  62  14 251 215 254 176  42  97 132  95  27 190   1 147 225 141
 139  76]

Symmetric Scale : 1.1569291338582677
[127 -41   0 -32 125 101 126  75 -13  23  46  21 -23  85 -40  56 107  52
  51   9]


In [23]:
# Dequantizing the parameters
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_symmetric = symmetric_dequantize(symmetric_q, symmetric_scale)

print(f'Original')
print(np.round(params, 2))
print('')
print(f'Dequantize Asymmetric :')
print(np.round(params_deq_asymmetric, 2))
print('')
print(f'Dequantize Symmetric :')
print(np.round(params_deq_symmetric, 2))

Original
[146.93 -47.31   0.   -36.45 144.12 116.43 145.93  86.63 -15.27  26.39
  53.16  24.85 -26.28  97.77 -46.31  64.6  123.86  60.37  58.95  10.8 ]

Dequantize Asymmetric :
[147.01 -47.23   0.   -36.56 143.97 116.54 146.25  86.84 -15.23  26.66
  53.32  25.14 -26.66  97.5  -46.47  64.75 124.16  60.18  58.65  10.66]

Dequantize Symmetric :
[146.93 -47.43   0.   -37.02 144.62 116.85 145.77  86.77 -15.04  26.61
  53.22  24.3  -26.61  98.34 -46.28  64.79 123.79  60.16  59.    10.41]


In [24]:
# Calculate the quantization error
print(f'{"Asymmetric Error : ":>20}{np.round(quantization_error(params, params_deq_asymmetric), 2)}')
print(f'{"Symmetric Error : ":>20}{np.round(quantization_error(params, params_deq_symmetric), 2)}')

 Asymmetric Error : 0.04
  Symmetric Error : 0.09


Quantization using Percentile

In [27]:
import numpy as np

np.set_printoptions(suppress=True)

params = np.random.uniform(low=-50, high=150, size=10000)

# Introduce an outlier
params[-1] = 1000

params = np.round(params, 2)

print(params)

[ 112.1     3.92  126.   ...  114.76  -46.97 1000.  ]


In [28]:
def clamp(params_q: np.array, lower_bound: int, upper_bound: int) -> np.array:
    params_q[params_q < lower_bound] = lower_bound
    params_q[params_q > upper_bound] = upper_bound
    return params_q

def asymmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float, int]:
    alpha = np.max(params)
    beta = np.min(params)
    scale = (alpha - beta) / (2**bits-1)
    zero = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bits-1
    quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero

def asymmetric_quantization_percentile(params: np.array, bits: int, percentile: float = 99.99) -> tuple[np.array, float, int]:
    # find the percentile value
    alpha = np.percentile(params, percentile)
    beta = np.percentile(params, 100-percentile)
    scale = (alpha - beta) / (2**bits-1)
    zero = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bits-1
    quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero


def asymmetric_dequantize(params_q: np.array, scale: float, zero: int) -> np.array:
    return (params_q - zero) * scale

def quantization_error(params: np.array, params_q: np.array):
    # calculate the MSE
    return np.mean((params - params_q)**2)

(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile) = asymmetric_quantization_percentile(params, 8)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Asymmetric (min-max) scale: {asymmetric_scale}, zero: {asymmetric_zero}')
print(asymmetric_q)
print(f'')
print(f'Asymmetric (percentile) scale: {asymmetric_scale_percentile}, zero: {asymmetric_zero_percentile}')
print(asymmetric_q_percentile)

Original:
[ 112.1     3.92  126.   ...  114.76  -46.97 1000.  ]

Asymmetric (min-max) scale: 4.117490196078432, zero: 12.0
[ 39  13  43 ...  40   1 255]

Asymmetric (percentile) scale: 0.7837843843113675, zero: 64.0
[207  69 225 ... 210   4 255]


In [29]:
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_asymmetric_percentile = asymmetric_dequantize(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Dequantized (min-max):')
print(np.round(params_deq_asymmetric,2))
print('')
print(f'Dequantized (percentile):')
print(np.round(params_deq_asymmetric_percentile,2))

Original:
[ 112.1     3.92  126.   ...  114.76  -46.97 1000.  ]

Dequantized (min-max):
[ 111.17    4.12  127.64 ...  115.29  -45.29 1000.55]

Dequantized (percentile):
[112.08   3.92 126.19 ... 114.43 -47.03 149.7 ]


In [None]:
# Calculate the quantization error
print(f'{"Error (min-max) excluding outlier: ":>40}{np.round(quantization_error(params[:-1], params_deq_asymmetric[:-1]),2)}')
print(f'{"Error (percentile) excluding outlier: ":>40}{np.round(quantization_error(params[:-1], params_deq_asymmetric_percentile[:-1]),2)}')