In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pprint
import re
import sys

#For tensorflow lite work
import logging
logging.getLogger("tensorflow").setLevel(logging.DEBUG)
import tensorflow as tf
from tensorflow import keras
import pathlib
import pprint
import re
import sys

In [3]:
weights = np.random.randn(256, 256)

In [4]:
def quantizeAndReconstruct(weights):
    #Scale the fp32 (floating point) values to int-8 (byte) integer values
    
    #First compute the range of weights
    max_weight = np.max(weights)
    min_weight = np.min(weights)
    range = max_weight - min_weight
    max_int8 = 2**8

    #Compute the scale
    scale = range / max_int8

    #Compute the midpoint
    midpoint = np.mean([max_weight, min_weight])

    #Next thing is to map the real fp32 with integer (int-8). By diving the weight 
    # matrix with the scale, the weight matrix has a range between (-128, 127). 
    # Now, we can simply round the full precision numbers to the closest integers.

    centered_weights = weights - midpoint
    quantized_weights = np.rint(centered_weights / scale)

    #Now reconstruct back to the fp32
    reconstructed_weights = scale * quantized_weights + midpoint
    return reconstructed_weights

In [5]:
reconstructed_weights = quantizeAndReconstruct(weights)
print("Original weight matrix \n", weights)
print("Weighted matrix after reconstruction \n", reconstructed_weights)
errors = reconstructed_weights - weights
max_error = np.max(errors)
print("Max error: ", max_error)
reconstructed_weights.shape

Original weight matrix 
 [[ 1.75334486 -1.20007517 -0.27848418 ... -0.70478553 -0.21130858
   0.99372629]
 [ 1.04166994 -1.28944703 -0.68784436 ...  1.15126735  1.80079889
   0.04714907]
 [-0.44399388 -0.10396778  1.62242267 ... -0.49975811 -1.33477914
  -0.93284698]
 ...
 [ 0.99860362 -0.61488108  0.59464445 ...  1.3878707  -0.49331756
   1.20545287]
 [-0.56413876  0.56237023 -0.18829901 ... -0.54172951 -1.08052405
   1.14641319]
 [-1.22615241  0.42857581  0.92450901 ... -1.08723349  0.24530115
  -0.30038353]]
Weighted matrix after reconstruction 
 [[ 1.75546721 -1.19364473 -0.26776075 ... -0.71355674 -0.19917675
   1.00104323]
 [ 1.03533523 -1.29652073 -0.67926474 ...  1.13821122  1.78975921
   0.04086724]
 [-0.43922075 -0.09630075  1.61829922 ... -0.50780474 -1.33081273
  -0.91930874]
 ...
 [ 1.00104323 -0.61068074  0.58953923 ...  1.37825522 -0.50780474
   1.20679522]
 [-0.57638874  0.55524724 -0.19917675 ... -0.54209674 -1.09076873
   1.13821122]
 [-1.22793673  0.41807924  0.93245

(256, 256)

In [6]:
np.unique(quantizeAndReconstruct(weights)).shape

(229,)