<a href="https://colab.research.google.com/github/swha815/Paper_List-GPGPU/blob/master/quant_02_example_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### IV. Example

Let's classify the following image.

- Network: InceptionV3
- Platform: Keras on TensorFlow

![image](https://raw.githubusercontent.com/swha815/colab/main/ILSVRC2012_val_00000002.JPEG)


In [None]:
import tensorflow as tf
import tensorflow.keras.applications as keras_app
import tensorflow.keras.preprocessing as keras_prep
import urllib
import cv2
import numpy as np
import matplotlib.pyplot as plt


def print_score(pred):
  print('{:20}{:5}'.format('Class', 'Score'))
  print('-' * 25)

  for i in pred:
    print('{:20}{:5.3f}'.format(i[1], i[2]))

In [None]:
# Prepare model for ImageNet classification
model = keras_app.InceptionV3(weights='imagenet')
img_size = (299, 299)

# Load and pre-process an image
req = urllib.request.urlopen('https://raw.githubusercontent.com/swha815/colab/main/ILSVRC2012_val_00000002.JPEG')
arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
img = org_img = cv2.imdecode(arr, -1)
img = cv2.resize(img, img_size)
img = np.expand_dims(img, axis=0)
img = keras_app.inception_v3.preprocess_input(img)

# Predict
pred = model.predict(img)
pred = keras_app.imagenet_utils.decode_predictions(pred, top=5)

# Score prediction
print_score(pred[0])

Class               Score
-------------------------
ski                 0.803
alp                 0.070
ski_mask            0.005
mountain_tent       0.003
shovel              0.001


### V. Weight Quantization

Empirically, quantization of weights is best achieved based on max-magnitude of the values.

#### Profile and Analyze

In [None]:
weight_prof_dict = dict()

for layer in model.layers:
  if not isinstance(layer, tf.keras.layers.Conv2D):
    continue

  w = layer.get_weights()
  w_max = np.amax(w[0], axis=(0, 1))
  w_min = np.amin(w[0], axis=(0, 1))

  weight_prof_dict[layer.name] = (w_max, w_min)

##### Decide and Simulate

In [None]:
bits = 8
signed = True
verbose = False

In [None]:
def get_int_range(bits, signed):
  if bits <= 0 or not isinstance(bits, int):
    raise Exception('Invalid bits specification.')

  if not isinstance(signed, bool):
    raise Exception('Invalid signed specification.')

  if signed:
    int_max = 2 ** (bits - 1) - 1
    int_min = -(2 ** (bits - 1))
  else:
    int_max = 2 ** bits - 1
    int_min = 0

  return (int_max, int_min)


def get_sf(signed, int_max, int_min, real_max, real_min):
  if np.any(real_max < real_min):
    raise Exception('Max is smaller than min.')

  if len(real_max) != len(real_min):
    raise Exception('real_max and real_min must be of equal lenghts.')

  if not isinstance(signed, bool):
    raise Exception('Invalid signed specification.')

  if signed:
    sf_max = np.divide(int_max, real_max,
        out=np.ones_like(real_max), where=(real_max != 0))
    sf_min = np.divide(int_min, real_min,
        out=np.ones_like(real_min), where=(real_min != 0))
    sf = np.minimum(np.abs(sf_min), np.abs(sf_max))
  else:
    sf = np.divide(int_max, real_max,
        out=np.ones_like(real_max), where=(real_max != 0))
    sf = np.abs(sf)

  return sf


def quantize_numpy(org_vals, scale_factor, int_max, int_min):
  qvals = np.multiply(org_vals, scale_factor)
  qvals = np.minimum(int_max, qvals)
  qvals = np.maximum(int_min, qvals)
  qvals = np.round(qvals)
  qvals = np.divide(qvals, scale_factor)

  return qvals


def compress_model_param(model, bits, signed):
  log = list()

  for layer in model.layers:
    if not layer.name in weight_prof_dict.keys():
      continue

    w = layer.get_weights()
    w_max = weight_prof_dict[layer.name][0]
    w_min = weight_prof_dict[layer.name][1]

    # calculate scale factor
    int_max, int_min = get_int_range(bits, signed)
    sf = get_sf(signed, int_max, int_min, w_max, w_min)
    
    # quantize weights with given scale factor
    qvals = quantize_numpy(w[0], sf, int_max, int_min)
    quant_loss = np.sum((w[0] - qvals) ** 2)

    # store quantized weights
    w[0] = qvals
    layer.set_weights(w)

    log.append([layer.name, bits, signed, quant_loss])

  return (model, log)

In [None]:
# Quantize weights
qmodel, qlog = compress_model_param(model, bits, signed)

if verbose == True:
  print('Layer Parameter Loss')

  for l in qlog:
    print('  {} [{}b-{}] loss: {:.3f}'.format(l[0], l[1], l[2], l[3]))

total_loss = np.sum(np.array(qlog)[:, 3].astype(float))
print('Compressed {} layers (total loss: {:.3f})\n'.format(len(qlog), total_loss))

# Predict
qpred = qmodel.predict(img)
qpred = keras_app.imagenet_utils.decode_predictions(qpred, top=5)

# Score prediction
print_score(qpred[0])

Compressed 94 layers (total loss: 0.000)

Class               Score
-------------------------
ski                 0.804
alp                 0.073
ski_mask            0.005
mountain_tent       0.003
shovel              0.001
