<a href="https://colab.research.google.com/github/mazekehs/Quantization/blob/main/Quantization_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Creatig a simple tensor with random items


In [5]:
import numpy as np

np.set_printoptions(suppress=True) #Suppress scientific notation



In [7]:
#Generate randomly distributed parameters

params=np.random.uniform(low=-50,high=150,size=20)
print(params)

[-25.17930368 127.05329662 -38.62145207  41.05901368  62.82462623
  64.49892952 -27.51743016 114.1785816  126.33020844 -14.02329699
  75.05506091  71.50562203 115.00052807 119.65869906 -30.97827628
 130.69646697  99.06631667 -36.79862249  14.72667585 141.78488412]


In [8]:
#Making sure important values are at the beginning
params[0]

-25.179303679237176

In [10]:
params[0]=params.max()+1
params[0]

143.78488412078426

In [11]:
params[1]=params.min()-1
params[1]

-39.62145207452432

In [12]:
params[2]=0

In [14]:
params=np.round(params,2)
print(params)

[143.78 -39.62   0.    41.06  62.82  64.5  -27.52 114.18 126.33 -14.02
  75.06  71.51 115.   119.66 -30.98 130.7   99.07 -36.8   14.73 141.78]


# Define Quantization Method and Quantize

In [15]:
def clamp(params_q:np.array,lower_bound:int,upper_bound:int)->np.array:
  params_q[params_q<lower_bound]=lower_bound
  params_q[params_q>upper_bound]=upper_bound
  return params_q

In [16]:
def asymmetric_quantization(params:np.array,bits:int)->tuple[np.array,float,int]:
  alpha=np.max(params)
  beta=np.min(params)
  scale=(alpha-beta)/(2**bits-1)
  zero=-1*np.round(beta/scale)
  lower_bound,upper_bound=0,2**bits-1
  #Quantize the parameters
  quantized=clamp(np.round(params/scale+zero),lower_bound,upper_bound).astype(np.int32)
  return quantized,scale,zero

In [17]:
def asymmetric_dequantize(params_q: np.array, scale: float, zero: int) -> np.array:
    return (params_q - zero) * scale

In [18]:
def symmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float]:
    # Calculate the scale
    alpha = np.max(np.abs(params))
    scale = alpha / (2**(bits-1)-1)
    lower_bound = -2**(bits-1)
    upper_bound = 2**(bits-1)-1
    # Quantize the parameters
    quantized = clamp(np.round(params / scale), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale

In [19]:
def symmetric_dequantize(params_q: np.array, scale: float) -> np.array:
    return params_q * scale


In [20]:
def quantization_error(params: np.array, params_q: np.array):
    # calculate the MSE
    return np.mean((params - params_q)**2)

In [21]:
(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(symmetric_q, symmetric_scale) = symmetric_quantization(params, 8)

In [22]:
print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Asymmetric scale: {asymmetric_scale}, zero: {asymmetric_zero}')
print(asymmetric_q)
print('')
print(f'Symmetric scale: {symmetric_scale}')
print(symmetric_q)

Original:
[143.78 -39.62   0.    41.06  62.82  64.5  -27.52 114.18 126.33 -14.02
  75.06  71.51 115.   119.66 -30.98 130.7   99.07 -36.8   14.73 141.78]

Asymmetric scale: 0.7192156862745098, zero: 55.0
[255   0  55 112 142 145  17 214 231  36 159 154 215 221  12 237 193   4
  75 252]

Symmetric scale: 1.1321259842519684
[127 -35   0  36  55  57 -24 101 112 -12  66  63 102 106 -27 115  88 -33
  13 125]


In [23]:
# Dequantize the parameters back to 32 bits
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_symmetric = symmetric_dequantize(symmetric_q, symmetric_scale)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Dequantize Asymmetric:')
print(np.round(params_deq_asymmetric,2))
print('')
print(f'Dequantize Symmetric:')
print(np.round(params_deq_symmetric, 2))

Original:
[143.78 -39.62   0.    41.06  62.82  64.5  -27.52 114.18 126.33 -14.02
  75.06  71.51 115.   119.66 -30.98 130.7   99.07 -36.8   14.73 141.78]

Dequantize Asymmetric:
[143.84 -39.56   0.    41.    62.57  64.73 -27.33 114.36 126.58 -13.67
  74.8   71.2  115.07 119.39 -30.93 130.9   99.25 -36.68  14.38 141.69]

Dequantize Symmetric:
[143.78 -39.62   0.    40.76  62.27  64.53 -27.17 114.34 126.8  -13.59
  74.72  71.32 115.48 120.01 -30.57 130.19  99.63 -37.36  14.72 141.52]


In [24]:
# Calculate the quantization error
print(f'{"Asymmetric error: ":>20}{np.round(quantization_error(params, params_deq_asymmetric), 2)}')
print(f'{"Symmetric error: ":>20}{np.round(quantization_error(params, params_deq_symmetric), 2)}')

  Asymmetric error: 0.04
   Symmetric error: 0.13
