<a href="https://colab.research.google.com/github/kihoon71/quantization_code/blob/main/quantization_vector_wise_quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import math

## vector-wise quantization
class VectorWiseQuantization:
  def __init__(self, X, W):
    self.X = X
    self.W = W

    # scaling factor vectors
    self.C_x = self.get_abs_max(self.X, axis=1) # by row
    self.C_w = self.get_abs_max(self.W, axis=0) # by column

    #quantized_x, quantized_w
    self.q_x = self.absmax_quantization_x()
    self.q_w = self.absmax_quantization_w()

    #quantized_matrix
    self.quantized_matrix_multiplication = self.quantized_matrix_multiplication(self.q_x, self.q_w)

    #dequantized_matrix
    self.dequantized_matrix = self.dequantization()

  def get_abs_max(self, matrix, axis=0):
    return np.max(np.abs(matrix), axis=axis)

  def get_range_data_type(self, d_type='int8'):
    return float(np.iinfo(d_type).max)

  def get_absmax_scale(self, absmax, dtype='int8'):
    scale = self.get_range_data_type(dtype) / absmax
    return scale.astype('float16')

  def absmax_quantization_x(self):
    scale = self.get_absmax_scale(self.C_x)
    quantized_x = np.round(self.X * scale[:, np.newaxis] )
    return quantized_x.astype('int8')

  def absmax_quantization_w(self):
    scale = self.get_absmax_scale(self.C_w)
    quantized_w = np.round(self.W * scale[np.newaxis, :])
    return quantized_w.astype('int8')

  def quantized_matrix_multiplication(self, x, w):
    ## if we do not type-cast before the matmul overflow issue will come out.
    x_32 = x.astype('int32')
    w_32 = w.astype('int32')
    result = np.dot(x_32, w_32)
    return result

  def dequantization(self):
    outer_product = np.outer(self.C_x, self.C_w)
    matrix_ = self.quantized_matrix_multiplication * outer_product
    matrix_ = matrix_ / (self.get_range_data_type() ** 2)

    return matrix_



In [None]:
np.random.seed(0)
a = np.random.random((5,5))
print('X :\n', a)

b = np.random.random((5,5))
print('W :\n', b)

c = VectorWiseQuantization(a, b)
print("C_x :", c.C_x)
print("C_w :", c.C_w)

print("Quantized_X :\n", c.q_x)

print("Quantized_W :\n", c.q_w)

print("Quantized_multiplicated_matrix :\n", c.quantized_matrix_multiplication)

print('outer product of c_x, c_w :\n', np.outer(c.C_x, c.C_w))


print('=' * 50)
print(c.dequantized_matrix)
print(np.dot(a, b))



X :
 [[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548 ]
 [0.64589411 0.43758721 0.891773   0.96366276 0.38344152]
 [0.79172504 0.52889492 0.56804456 0.92559664 0.07103606]
 [0.0871293  0.0202184  0.83261985 0.77815675 0.87001215]
 [0.97861834 0.79915856 0.46147936 0.78052918 0.11827443]]
W :
 [[0.63992102 0.14335329 0.94466892 0.52184832 0.41466194]
 [0.26455561 0.77423369 0.45615033 0.56843395 0.0187898 ]
 [0.6176355  0.61209572 0.616934   0.94374808 0.6818203 ]
 [0.3595079  0.43703195 0.6976312  0.06022547 0.66676672]
 [0.67063787 0.21038256 0.1289263  0.31542835 0.36371077]]
C_x : [0.71518937 0.96366276 0.92559664 0.87001215 0.97861834]
C_w : [0.67063787 0.77423369 0.94466892 0.94374808 0.6818203 ]
Quantized_X :
 [[ 97 127 107  97  75]
 [ 85  58 117 127  51]
 [109  73  78 127  10]
 [ 13   3 122 114 127]
 [127 104  60 101  15]]
Quantized_W :
 [[121  24 127  70  77]
 [ 50 127  61  77   3]
 [117 100  83 127 127]
 [ 68  72  94   8 124]
 [127  35  17  42  68]]
Quantized_multiplica

In [None]:
qa = np.array([[ 97,127, 107,  97,  75],
 [ 85,  58, 117, 127, 51],
 [109,  73,  78, 127,  10],
 [ 13,   3, 122, 114, 127],
 [127, 104,  60, 101,  15]])

qb = np.array( [[121, 24, 127, 70, 77],
 [ 50, 127, 61, 77, 3],
 [117, 100,  83, 127, 127],
 [ 68,  72,  94,   8, 124],
 [127,  35,  17,  42,  68]])

ab = np.dot(qa, qb)
print(ab)

[[46727 38766 39340 34084 38567]
 [41987 32035 36849 28433 40794]
 [35871 29181 36878 24593 34946]
 [39878 25546 24835 22881 39276]
 [36360 30053 37202 25956 31255]]
