<a href="https://colab.research.google.com/github/kathy-lee/Lena_MNIST_Network/blob/master/MNIST_Task3_quantization_aware_training_tf2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Network quantization in weights, in tensorflow2
from __future__ import absolute_import, division, print_function, unicode_literals

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten
from keras.models import Sequential
from keras import layers
from keras import callbacks
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split
import time
import seaborn as sns
import scipy
from scipy.optimize import curve_fit
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import numpy as np


TensorFlow 2.x selected.


Using TensorFlow backend.


In [2]:
# load mnist dataset
(trainX, trainY), (testX, testY) = mnist.load_data()

# reshape dataset
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))
# one hot encode target values
train_Y = to_categorical(trainY)
test_Y = to_categorical(testY)

def preprocess_image(train, test):
	# convert from integers to floats
	train_norm = train.astype('float32')
	test_norm = test.astype('float32')
	# normalize to range 0-1
	train_norm = train_norm / 255.0
	test_norm = test_norm / 255.0
	
	return train_norm, test_norm
	 
# scale dataset
train_X, test_X = preprocess_image(trainX, testX)
print('Raw train dataset size:')
print(train_X.shape)
print(train_Y.shape)


Raw train dataset size:
(60000, 28, 28, 1)
(60000, 10)


In [3]:
# split validation dataset from train dataset
trainXX, ValX, trainYY, ValY = train_test_split(train_X, train_Y, test_size=0.1, random_state=42)
print('Split out validation set:')
print('New train dataset size:')
print(trainXX.shape)
print(trainYY.shape)
print('Validation dataset size:')
print(ValX.shape)
print(ValY.shape)

Split out validation set:
New train dataset size:
(54000, 28, 28, 1)
(54000, 10)
Validation dataset size:
(6000, 28, 28, 1)
(6000, 10)


In [0]:
# Create model with regularization
def create_model_with_l2_regularization():
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Conv2D(6, [5, 5], input_shape=[28, 28, 1], activation='relu',
                                  kernel_regularizer=tf.keras.regularizers.l2(0.001), name='conv_1'))
  model.add(tf.keras.layers.MaxPool2D())
  model.add(tf.keras.layers.Conv2D(16, [5, 5], activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001),name='conv_2'))
  model.add(tf.keras.layers.MaxPool2D())
  model.add(tf.keras.layers.Flatten())
  # add regularization to layers
  model.add(tf.keras.layers.Dense(120, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001), use_bias = False, name='dense_1'))
  model.add(tf.keras.layers.Dense(84, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001), use_bias = False, name='dense_2'))
  model.add(tf.keras.layers.Dense(10, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(0.01), use_bias = False, name='dense_3'))
  
  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  
  return model

model = create_model_with_l2_regularization()

In [18]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='./', histogram_freq=1,
                         write_graph=True,
                         write_images=True)
# train model
results = model.fit(trainXX,
                    trainYY,
                    epochs=5,
                    batch_size=128,
                    verbose=2,
                    validation_data=(ValX, ValY),
                    callbacks=[tensorboard_callback])

Train on 54000 samples, validate on 6000 samples
Epoch 1/5
54000/54000 - 4s - loss: 0.7024 - accuracy: 0.8785 - val_loss: 0.3864 - val_accuracy: 0.9672
Epoch 2/5
54000/54000 - 3s - loss: 0.3460 - accuracy: 0.9663 - val_loss: 0.3055 - val_accuracy: 0.9713
Epoch 3/5
54000/54000 - 3s - loss: 0.2747 - accuracy: 0.9737 - val_loss: 0.2568 - val_accuracy: 0.9737
Epoch 4/5
54000/54000 - 3s - loss: 0.2342 - accuracy: 0.9766 - val_loss: 0.2141 - val_accuracy: 0.9783
Epoch 5/5
54000/54000 - 3s - loss: 0.2060 - accuracy: 0.9791 - val_loss: 0.1913 - val_accuracy: 0.9828


In [7]:
# evaluate model
t_start = time.clock()
_,acc = model.evaluate(ValX, ValY, batch_size=128, verbose=2)
t = time.clock() - t_start
print(f"Test Accuracy: {acc:.4f}, Inference time: {t:.2f}s")

model.save('old_model.h5')

6000/6000 - 0s - loss: 0.2113 - accuracy: 0.9758
Test Accuracy: 0.9758, Inference time: 0.28s


In [0]:
def from_float_to_Q(float_np_arr, m_bits, n_bits):
   f = (1 << n_bits)
   q_min = -(1 << (m_bits - 1))
   q_max = (1 << (m_bits - 1))*1.0 - (1.0 / f)
   q = np.round(float_np_arr * f) * (1.0 / f)
   q[q > q_max] = q_max
   q[q < q_min] = q_min
   return q

In [0]:
# quantize the weights of dense layers using the same Q notation
def quantize_model_with_Qnotation(old_model, N_fraction_bits, scale):
  quantized_model = tf.keras.models.Sequential()
  for no,layer in enumerate(old_model.layers):
    weights = layer.get_weights()
    if no == 7 or no == 6 or no == 5: # quantize only dense_2 and dense_3 layers
      scale_weights = scale*weights[0]
      quan_weights = from_float_to_Q(scale_weights,6-N_fraction_bits, N_fraction_bits)
      quan_weights = quan_weights/scale
      new_layer = tf.keras.layers.Dense(quan_weights.shape[1], activation='softmax',use_bias=False)
      quantized_model.add(new_layer)
      new_layer.set_weights([quan_weights])
      print(f'quan_weights:{quan_weights[0,0:20]}')
    else:
      quantized_model.add(layer)
      quantized_model.layers[no].set_weights(weights)
      
  return quantized_model

In [0]:
# NEW  quantize the weights of dense layers using the same Q notation
def quantize_model_with_Qnotation_2(model, N_fraction_bits, scale):
  quantized_model = tf.keras.models.Sequential()
  for no,layer in enumerate(model.layers):
    weights = layer.get_weights()
    if no == 7 or no == 6 or no == 5: # quantize only dense_2 and dense_3 layers
      scale_weights = scale*weights[0]
      quan_weights = from_float_to_Q(scale_weights,6-N_fraction_bits, N_fraction_bits)
      quan_weights = quan_weights/scale
      #new_layer = tf.keras.layers.Dense(quan_weights.shape[1], activation='softmax',use_bias=False)
      #quantized_model.add(new_layer)
      #new_layer.set_weights([quan_weights])
      print(f'quan_weights:{quan_weights[0,0:20]}')
      model.layers[no].set_weights([quan_weights])

  return model

In [0]:
def variance_fit(weights):
  mu_esti, std_esti = norm.fit(weights)
  #print(f'estimated mu/std: {mu_esti},{std_esti}')
  Q_max = mu_esti + std_esti
  Q_min = mu_esti - std_esti
  #print(f'estimated variance interval: [{Q_min}, {Q_max}]')
  return Q_min, Q_max

def quantize_loss(weights, quantized_weights, scale):
  w = quantized_weights/scale
  loss = np.linalg.norm(weights-w)
  loss = loss / np.linalg.norm(weights)

  return loss

def find_Qformat_and_scale(weights):
  w = weights.flatten()
  max_weights_abs = max(w)
  #print(f'max/min of weights:{max(w)}, {min(w)}')
  range_min, range_max = variance_fit(w)
  range_weights_abs = max([range_max,range_min])
  #print(f'variance of weights:{range_weights_abs}')

  M = 10
  loss_min = 100000
  scale_opt = 0
  N_bits_opt = 0
  range_opt = 0

  for r in np.linspace(range_weights_abs, max_weights_abs, M):
    for k in range(0,6):
      scale = (np.power(2,5-k) - 1.0/np.power(2,k))/r
      #print(f'r:{r}, k:{k}')
      s_weights = scale * weights
      quantized_weights = from_float_to_Q(s_weights, 6-k, k)
      loss = quantize_loss(weights, quantized_weights, scale)
      #print(f'loss:{loss}\n')
      if loss < loss_min:
        loss_min = loss
        scale_opt = scale
        N_bits_opt = k
        range_opt = r

  return N_bits_opt, scale_opt,range_opt

# quantize the weights of dense layers using optimized Q notation
def quantize_model(old_model):
  quantized_model = tf.keras.models.Sequential()
  for no,layer in enumerate(old_model.layers):
    weights = layer.get_weights()
    if no == 7 or no == 6 or no == 5: # quantize only dense_2 and dense_3 layers
      print(f"\n{no} th layer:")
      N_fraction_bits, scale, range_r = find_Qformat_and_scale(weights[0])
      print(f'optimal format: Q{6-N_fraction_bits}.{N_fraction_bits}, scale: {scale}, range:{range_r}')
      scaled_weights = scale * weights[0]
      quan_weights = from_float_to_Q(scaled_weights, 6-N_fraction_bits, N_fraction_bits)
      quan_weights = quan_weights/scale
      new_layer = tf.keras.layers.Dense(quan_weights.shape[1], activation='softmax',use_bias=False)
      #new_layer.set_weights([quan_weights])
      quantized_model.add(new_layer)
      quantized_model.layers[no].set_weights([quan_weights])    
    else:
      quantized_model.add(layer)
      quantized_model.layers[no].set_weights(weights)

  return quantized_model

In [14]:
# quatize the model 
quantized_model_1 = quantize_model_with_Qnotation(model, 4, 10)
quantized_model_2 = quantize_model_with_Qnotation_2(model, 4, 10)
#quantized_model = quantize_model(model)
quantized_model_1.save('quantized_model_1.h5')
quantized_model_1.save('quantized_model_2.h5')

quan_weights:[ 0.       0.      -0.00625  0.04375  0.0375   0.       0.04375  0.
  0.01875  0.025   -0.025    0.08125  0.00625 -0.      -0.08125 -0.025
 -0.       0.0375  -0.      -0.025  ]
quan_weights:[ 0. -0. -0. -0.  0.  0. -0.  0.  0. -0.  0.  0.  0.  0. -0. -0.  0.  0.
 -0. -0.]
quan_weights:[-0.00625  0.06875 -0.01875 -0.01875 -0.01875  0.025   -0.00625 -0.01875
 -0.0125   0.     ]
quan_weights:[ 0.       0.      -0.00625  0.04375  0.0375   0.       0.04375  0.
  0.01875  0.025   -0.025    0.08125  0.00625 -0.      -0.08125 -0.025
 -0.       0.0375  -0.      -0.025  ]
quan_weights:[ 0. -0. -0. -0.  0.  0. -0.  0.  0. -0.  0.  0.  0.  0. -0. -0.  0.  0.
 -0. -0.]
quan_weights:[-0.00625  0.06875 -0.01875 -0.01875 -0.01875  0.025   -0.00625 -0.01875
 -0.0125   0.     ]


In [15]:
# compile model
quantized_model_1.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# inference
t = time.clock()
l,a = quantized_model_1.evaluate(ValX, ValY, batch_size=128, verbose=2)
t = time.clock() - t
print(f"Test Accuracy: {a:.4f}, Inference time: {t:.2f}s")

# inference
t = time.clock()
l,a = quantized_model_2.evaluate(ValX, ValY, batch_size=128, verbose=2)
t = time.clock() - t
print(f"Test Accuracy: {a:.4f}, Inference time: {t:.2f}s")

6000/6000 - 0s - loss: 2.3261 - accuracy: 0.1208
Test Accuracy: 0.1208, Inference time: 0.39s
6000/6000 - 0s - loss: 0.2119 - accuracy: 0.9755
Test Accuracy: 0.9755, Inference time: 0.28s


In [20]:
# quantization aware training
def quantization_aware_training(model, train_data, train_label, N_fraction_bits, iterations):

  # split the dataset into mulitple batches
  mini_batch = 128
  num_of_batches = train_data.shape[0]//mini_batch
  print(f're-split trainging data to {num_of_batches} batches: \n')
  trainX = train_data[0:mini_batch*num_of_batches,:,:,:]
  trainY = train_label[0:mini_batch*num_of_batches,:]
  print(trainX.shape, trainY.shape)
  trainX_in_batches = tf.split(trainX, num_of_batches, axis=0)
  trainY_in_batches = tf.split(trainY, num_of_batches, axis=0)
  print(trainX_in_batches[0].shape)
  print(trainY_in_batches[0].shape)
  print('finish data split.')

  print('quantization aware training:\n')
  for i in range(num_of_batches):
    print(f'\n{i}th mini batch:')
    # quantize the weights of dense layers
    for no,layer in enumerate(model.layers):
      weights = layer.get_weights()
      
      if 'dense' in layer.name:
        print(f'\n{layer.name}:')
        print(f'old weights:{weights[0][0,0:20]}')
        #N_fraction_bits, scale, range_r = find_Qformat_and_scale(weights[0])
        #print(f'optimal format: Q{6-N_fraction_bits}.{N_fraction_bits}, scale: {scale}, range:{range_r}')
        scale=10
        N_fraction_bits=4
        scaled_weights = scale * weights[0]
        quan_weights = from_float_to_Q(scaled_weights, 6-N_fraction_bits, N_fraction_bits)
        quan_weights = quan_weights/scale
        print(f'quantized weights:{quan_weights[0,0:20]}')
        layer.set_weights([quan_weights])

    # test directly on the quantized model
    _,acc = model.evaluate(ValX, ValY, batch_size=128, verbose=2)
    print(f"Test Accuracy directly on quantized model: {acc:.4f}")

    # re-train the quantized model once with a mini_batch
    model.fit(trainX_in_batches[i],trainY_in_batches[i], epochs=1, batch_size=mini_batch, verbose=2)

  print('one epoch training finished.')
  # evaluate model
  t_start = time.clock()
  _,acc = model.evaluate(ValX, ValY, batch_size=128, verbose=2)
  t = time.clock() - t_start
  print(f"\n\nTest performance:\n Test Accuracy: {acc:.4f}, Inference time: {t:.2f}s")


quantization_aware_training(model, trainXX, trainYY, 0, 1)

re-split trainging data to 421 batches: 

(53888, 28, 28, 1) (53888, 10)
(128, 28, 28, 1)
(128, 10)
finish data split.
quantization aware training:


0th mini batch:

dense_1:
old weights:[ 1.52355572e-22  4.36946973e-02 -2.50855312e-02  6.47369027e-03
  1.24506401e-02  1.42243443e-04 -6.06684311e-21 -1.86290983e-02
  1.85766313e-02  2.45001935e-10 -1.86811369e-02 -4.49444256e-28
 -2.50187106e-02  7.89597825e-13 -1.23875039e-02  0.00000000e+00
 -1.25505235e-02  6.05624914e-03  3.75601128e-02  6.39986806e-03]
quantized weights:[ 0.       0.04375 -0.025    0.00625  0.0125   0.      -0.      -0.01875
  0.01875  0.      -0.01875 -0.      -0.025    0.      -0.0125   0.
 -0.0125   0.00625  0.0375   0.00625]

dense_2:
old weights:[-8.1304867e-22  1.4298556e-22  4.4372022e-23 -3.2323978e-22
 -4.6382022e-22 -2.5403626e-22 -7.9085268e-23 -2.6236174e-23
 -8.1118586e-23 -2.6012392e-22  8.1014728e-23  6.2297747e-23
 -7.8635675e-23 -1.1101946e-22  3.5761226e-22 -2.0811329e-23
 -5.8001800e-23 -3.0925