In [2]:
! pip install -q tensorflow-model-optimization

In [4]:
import numpy as np
import tensorflow as tf
import tensorflow 
import time
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import re

from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras.datasets import mnist
from tensorflow.keras import models, layers
from tensorflow.keras.models import Sequential
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow_model_optimization.sparsity import keras as sparsity

# Custom Model on Mnist

In [5]:
#Load dataset as train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [6]:
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
num_classes = 10
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

60000 train samples
10000 test samples


In [7]:
from keras.models import Sequential
from keras import models, layers
from keras import regularizers
model = keras.Sequential()
model.add(keras.layers.Dropout(0.2,input_shape=(784,)))
model.add(keras.layers.Dense(1000,
                        kernel_regularizer = regularizers.l2(0.01),
                        activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(1000,
                        kernel_regularizer = regularizers.l2(0.01),
                        activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(10,  activation='softmax'))
#display the model summary
model.summary()

Using TensorFlow backend.


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout (Dropout)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 1000)              785000    
_________________________________________________________________
dropout_1 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1000)              1001000   
_________________________________________________________________
dropout_2 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                10010     
Total params: 1,796,010
Trainable params: 1,796,010
Non-trainable params: 0
______________________________________________

## (A) Post Training Quantization

In [8]:
model.compile(loss=keras.losses.categorical_crossentropy, 
              optimizer='adam', 
              metrics=['accuracy'])

In [9]:
hist = model.fit(x_train, y_train,
                        batch_size=128,
                        epochs=10,
                        verbose=1,
                        validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
score = model.evaluate(x_test, y_test, verbose=1)
print("Test loss {:.4f}, accuracy {:.2f}%".format(score[0], score[1] * 100))

Test loss 0.5148, accuracy 94.92%


In [11]:
#Save the entire model in model.h5 file
model.save("model.h5")
print("Saved model to disk")

Saved model to disk


In [12]:
model = tf.keras.models.load_model('model.h5')
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
#saving converted model in "converted_model.tflite" file
open("converted_model.tflite", "wb").write(tflite_model)

7185624

In [13]:
model = tf.keras.models.load_model('model.h5')
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()
#saving converted model in "converted_quant_model.tflite" file
open("converted_quant_model.tflite", "wb").write(tflite_quant_model)

1803744

In [16]:
import os
print("Float model in Mb:", os.path.getsize('converted_model.tflite') / float(2**20))
print("Quantized model in Mb:", os.path.getsize('converted_quant_model.tflite') / float(2**20))
print("Compression ratio:", os.path.getsize('converted_model.tflite')/os.path.getsize('converted_quant_model.tflite'))

Float model in Mb: 6.852745056152344
Quantized model in Mb: 1.720184326171875
Compression ratio: 3.983727180797275


In [17]:
# Load TFLite model and allocate tensors.
interpreter = \
tf.lite.Interpreter(model_path="converted_quant_model.tflite")
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Test model on some input data.
input_shape = input_details[0]['shape']
acc=0
for i in range(len(x_test)):
    input_data = x_test[i].reshape(input_shape)
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    if(np.argmax(output_data) == np.argmax(y_test[i])):
        acc+=1
acc = acc/len(x_test)
print(acc*100)

95.6


## (B) Quantization Aware Training 

### Quantize full model

In [31]:
from keras.models import Sequential
from keras import models, layers
from keras import regularizers
model = keras.Sequential()
model.add(keras.layers.Dropout(0.2,input_shape=(784,)))
model.add(keras.layers.Dense(1000,
                        kernel_regularizer = regularizers.l2(0.01),
                        activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(1000,
                        kernel_regularizer = regularizers.l2(0.01),
                        activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(10,  activation='softmax'))
#display the model summary
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout_12 (Dropout)         (None, 784)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 1000)              785000    
_________________________________________________________________
dropout_13 (Dropout)         (None, 1000)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 1000)              1001000   
_________________________________________________________________
dropout_14 (Dropout)         (None, 1000)              0         
_________________________________________________________________
dense_14 (Dense)             (None, 10)                10010     
Total params: 1,796,010
Trainable params: 1,796,010
Non-trainable params: 0
____________________________________________

In [32]:
model.compile(loss=keras.losses.categorical_crossentropy, 
              optimizer='adam', 
              metrics=['accuracy'])

In [33]:
hist = model.fit(x_train, y_train,
                        batch_size=128,
                        epochs=100,
                        verbose=1,
                        validation_split = 0.1)

Train on 54000 samples, validate on 6000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/10

In [35]:
import tensorflow_model_optimization as tfmot

quantize_model = tfmot.quantization.keras.quantize_model

# q_aware stands for for quantization aware.
q_aware_model = quantize_model(model)

# `quantize_model` requires a recompile.
q_aware_model.compile(loss=keras.losses.categorical_crossentropy, 
              optimizer='adam', 
              metrics=['accuracy'])

q_aware_model.summary()

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and atta

In [40]:
histq = q_aware_model.fit(x_train, y_train,
                        batch_size=128,
                        epochs=100,
                        verbose=1,
                        validation_split = 0.1)

Train on 54000 samples, validate on 6000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/10

In [41]:
_, baseline_model_accuracy = model.evaluate(
    x_test, y_test, verbose=1)

_, q_aware_model_accuracy = q_aware_model.evaluate(
    x_test, y_test, verbose=1)

print('Baseline test accuracy:', baseline_model_accuracy*100)
print('Quant test accuracy:', q_aware_model_accuracy*100)

Baseline test accuracy: 95.6499993801117
Quant test accuracy: 95.59000134468079


In [36]:
#### fine tune with QAT on a subset of the training data.

In [37]:
histq= q_aware_model.fit(x_train[:1000], y_train[:1000],
                        batch_size=128,
                        epochs=100,
                        verbose=1,
                        validation_split = 0.1)

Train on 900 samples, validate on 100 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
E

In [39]:
_, baseline_model_accuracy = model.evaluate(
    x_test, y_test, verbose=1)

_, q_aware_model_accuracy = q_aware_model.evaluate(
    x_test, y_test, verbose=1)

print('Baseline test accuracy:', baseline_model_accuracy*100)
print('Quant test accuracy:', q_aware_model_accuracy*100)

Baseline test accuracy: 95.6499993801117
Quant test accuracy: 90.52000045776367


### Quantize some layers

In [14]:
import tensorflow_model_optimization as tfmot
annotate = tfmot.quantization.keras.quantize_annotate_layer

In [15]:
from keras.models import Sequential
from keras import models, layers
from keras import regularizers
model = keras.Sequential()
model.add(keras.layers.Dropout(0.2,input_shape=(784,)))
model.add(keras.layers.Dense(1000,
                        kernel_regularizer = regularizers.l2(0.01),
                        activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(annotate(keras.layers.Dense(1000,
                        kernel_regularizer = regularizers.l2(0.01),
                        activation='relu')))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(10,  activation='softmax'))
#display the model summary
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout_3 (Dropout)          (None, 784)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1000)              785000    
_________________________________________________________________
dropout_4 (Dropout)          (None, 1000)              0         
_________________________________________________________________
quantize_annotate_1 (Quantiz (None, 1000)              1001000   
_________________________________________________________________
dropout_5 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                10010     
Total params: 1,796,010
Trainable params: 1,796,010
Non-trainable params: 0
____________________________________________

In [16]:
# Use `quantize_apply` to actually make the model quantization aware.
quant_aware_model = tfmot.quantization.keras.quantize_apply(model)

quant_aware_model.summary()

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout_3 (Dropout)          (None, 784)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1000)              785000    
_________________________________________________________________
dropout_4 (Dropout)          (None, 1000)              0         
_________________________________________________________________
quant_dense_4 (Quant

In [18]:
quant_aware_model.compile(loss=keras.losses.categorical_crossentropy, 
              optimizer='adam', 
              metrics=['accuracy'])


In [22]:
histq = quant_aware_model.fit(x_train, y_train,
                        batch_size=128,
                        epochs=100,
                        verbose=1,
                        validation_split = 0.1)

Train on 54000 samples, validate on 6000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/10

In [24]:
_, quant_aware_model_accuracy = quant_aware_model.evaluate(
    x_test, y_test, verbose=1)

print('Quant test accuracy:', quant_aware_model_accuracy*100)

Quant test accuracy: 95.63000202178955


# VGG on CIFAR10

In [5]:
from keras.datasets import cifar10
(x_train,y_train),(x_test,y_test)=cifar10.load_data()

Using TensorFlow backend.


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [6]:
vgg_model = tf.keras.applications.VGG16(input_shape=x_train[0].shape, include_top=False, weights=None)
vgg_model.trainable = True
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

model = keras.Sequential()
model.add(vgg_model)
model.add(global_average_layer)
model.add(keras.layers.Dense(1024, activation='relu'))
#model.add(keras.layers.BatchNormalization())
#model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(10, activation='softmax'))
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 1, 1, 512)         14714688  
_________________________________________________________________
global_average_pooling2d (Gl (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 1024)              525312    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                10250     
Total params: 15,250,250
Trainable params: 535,562
Non-trainable params: 14,714,688
_________________________________________________________________
None


In [7]:
from tensorflow.keras import losses
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer='adam', 
              loss=losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

In [8]:
hist = model.fit(x_train, y_train,
                        batch_size=128,
                        epochs=20,
                        verbose=1,
                        validation_split=0.1)

Train on 45000 samples, validate on 5000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [9]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss {:.4f}, accuracy {:.2f}%".format(score[0], score[1] * 100))

Test loss 1.5641, accuracy 44.35%


In [10]:
model.save("vgg_model.h5")
print("Saved model to disk")

Saved model to disk


In [11]:
model = tf.keras.models.load_model('vgg_model.h5')
model.compile(optimizer='adam', 
              loss=losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])
hist = model.fit(x_train, y_train,
                        batch_size=128,
                        epochs=50,
                        verbose=1,
                        validation_split=0.1)

Train on 45000 samples, validate on 5000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [12]:
model = tf.keras.models.load_model('vgg_model.h5')
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()
open("vgg_quant_model.tflite", "wb").write(tflite_quant_model)

15259960

In [13]:
x_test = x_test.astype('float32')

# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="vgg_quant_model.tflite")
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test model on some input data.
input_shape = input_details[0]['shape']
acc=0
for i in range(len(x_test)):
    input_data = x_test[i].reshape(input_shape)
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    if(np.argmax(output_data) == y_test[i]):
        acc+=1
acc = acc/len(x_test)
print(acc*100)

75.64


In [15]:
import os
import tempfile

# Create float TFLite model.
float_converter = tf.lite.TFLiteConverter.from_keras_model(model)
float_tflite_model = float_converter.convert()

# Measure sizes of models.
_, float_file = tempfile.mkstemp('.tflite')
_, quant_file = tempfile.mkstemp('.tflite')

with open(quant_file, 'wb') as f:
  f.write(tflite_quant_model)

with open(float_file, 'wb') as f:
  f.write(float_tflite_model)

print("Float model in Mb:", os.path.getsize(float_file) / float(2**20))
print("Quantized model in Mb:", os.path.getsize(quant_file) / float(2**20))

Float model in Mb: 58.182395935058594
Quantized model in Mb: 14.566337585449219


## QAT

### Quantize full model

In [51]:
import tensorflow as tf

In [54]:
vgg_model = tf.keras.applications.VGG16(input_shape=(32, 32, 3), include_top=False, weights=None)
vgg_model.trainable = True
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

#https://github.com/tensorflow/model-optimization/issues/40    
model = tf.keras.Sequential()
#Had to add input layer, otherwise ran into trouble when printing model summary as well as converting to tf-lite
model.add(layers.Input(shape=(32, 32, 3)))
input_layer = True
for layer in vgg_model.layers:
    model.add(layer)
model.add(layers.Flatten())
model.add(layers.Dense(1024, activation='relu'))
#model.add(layers.BatchNormalization())
#model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation='softmax'))
print(model.summary())

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        multiple                  0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 128)        

In [55]:
model.compile(loss=losses.sparse_categorical_crossentropy, 
              optimizer='adam', 
              metrics=['accuracy'])

hist= model.fit(x_train, y_train,
                        batch_size=128,
                        epochs=20,
                        verbose=1,
                        validation_split = 0.1)

Train on 45000 samples, validate on 5000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [56]:
model.save("vgg_true_model.h5")
print("Saved model to disk")

Saved model to disk


In [57]:
import tensorflow_model_optimization as tfmot
#model = tf.keras.models.load_model('vgg_model.h5')
quantize_model = tfmot.quantization.keras.quantize_model

# q_aware stands for for quantization aware.
q_aware_model = quantize_model(model)

# `quantize_model` requires a recompile.
q_aware_model.compile(loss=losses.sparse_categorical_crossentropy, 
              optimizer='adam', 
              metrics=['accuracy'])

model.compile(loss=losses.sparse_categorical_crossentropy, 
              optimizer='adam', 
              metrics=['accuracy'])

q_aware_model.summary()

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and atta

In [58]:
histq= q_aware_model.fit(x_train[:1000], y_train[:1000],
                        batch_size=128,
                        epochs=10,
                        verbose=1,
                        validation_split = 0.1)

Train on 900 samples, validate on 100 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [59]:
_, baseline_model_accuracy = model.evaluate(
    x_test, y_test, verbose=1)

_, q_aware_model_accuracy = q_aware_model.evaluate(
    x_test, y_test, verbose=1)

print('Baseline test accuracy:', baseline_model_accuracy*100)
print('Quant test accuracy:', q_aware_model_accuracy*100)

Baseline test accuracy: 74.27999973297119
Quant test accuracy: 64.0500009059906


In [60]:
q_aware_model.save("vgg_qat_full_model.h5")
print("Saved model to disk")


Saved model to disk


In [61]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()
open("vgg_qat_full_model.tflite", "wb").write(tflite_quant_model)

15277856

In [65]:
print("Float complete model in Mb:", os.stat('vgg_true_model.h5').st_size/ float(2**20))
print("QAT model in Mb:", os.stat('vgg_qat_full_model.h5').st_size/ float(2**20))
print("QAT compressed model in Mb:", os.stat('vgg_qat_full_model.tflite').st_size/ float(2**20))

Float complete model in Mb: 174.66024017333984
QAT model in Mb: 174.7544708251953
QAT compressed model in Mb: 14.570098876953125


In [67]:
import os
import tempfile

# Create float TFLite model.
float_converter = tf.lite.TFLiteConverter.from_keras_model(model)
float_tflite_model = float_converter.convert()

# Measure sizes of models.
_, float_file = tempfile.mkstemp('.tflite')
_, quant_file = tempfile.mkstemp('.tflite')

with open(quant_file, 'wb') as f:
  f.write(tflite_quant_model)

with open(float_file, 'wb') as f:
  f.write(float_tflite_model)

print("Float tflite model in Mb:", os.path.getsize(float_file) / float(2**20))
print("Quantized model in Mb:", os.path.getsize(quant_file) / float(2**20))

Float tflite model in Mb: 58.18195343017578
Quantized model in Mb: 14.570098876953125


In [68]:
x_test = x_test.astype('float32')

# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="vgg_qat_full_model.tflite")
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test model on some input data.
input_shape = input_details[0]['shape']
acc=0
for i in range(len(x_test)):
    input_data = x_test[i].reshape(input_shape)
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    if(np.argmax(output_data) == y_test[i]):
        acc+=1
acc = acc/len(x_test)
print(acc*100)

64.25


### Quantize some layer

In [19]:
annotate = tfmot.quantization.keras.quantize_annotate_layer

In [20]:
vgg_model = tf.keras.applications.VGG16(input_shape=(32, 32, 3), include_top=False, weights=None)
vgg_model.trainable = True
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

#https://github.com/tensorflow/model-optimization/issues/40    
model = tf.keras.Sequential()
#Had to add input layer, otherwise ran into trouble when printing model summary as well as converting to tf-lite
model.add(layers.Input(shape=(32, 32, 3)))
input_layer = True
for layer in vgg_model.layers:
    model.add(layer)
model.add(layers.Flatten())
model.add(layers.Dense(1024, activation='relu'))
#model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5))
model.add(annotate(layers.Dense(10, activation='softmax')))
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         multiple                  0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 128)        

In [21]:
# Use `quantize_apply` to actually make the model quantization aware.
quant_aware_model = tfmot.quantization.keras.quantize_apply(model)

quant_aware_model.summary()

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: expected an indented block (<unknown>, line 14)
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D

In [22]:
histq= q_aware_model.fit(x_train[:1000], y_train[:1000],
                        batch_size=128,
                        epochs=10,
                        verbose=1,
                        validation_split = 0.1)

Train on 900 samples, validate on 100 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
_, baseline_model_accuracy = model.evaluate(
    x_test, y_test, verbose=1)

_, q_aware_model_accuracy = q_aware_model.evaluate(
    x_test, y_test, verbose=1)

print('Baseline test accuracy:', baseline_model_accuracy*100)
print('Quant test accuracy:', q_aware_model_accuracy*100)

Baseline test accuracy: 73.94000291824341
Quant test accuracy: 66.51999950408936


In [27]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()
open("vgg_qat_model.tflite", "wb").write(tflite_quant_model)

15277840

In [28]:
import os
import tempfile

# Create float TFLite model.
float_converter = tf.lite.TFLiteConverter.from_keras_model(model)
float_tflite_model = float_converter.convert()

# Measure sizes of models.
_, float_file = tempfile.mkstemp('.tflite')
_, quant_file = tempfile.mkstemp('.tflite')

with open(quant_file, 'wb') as f:
  f.write(tflite_quant_model)

with open(float_file, 'wb') as f:
  f.write(float_tflite_model)

print("Float model in Mb:", os.path.getsize(float_file) / float(2**20))
print("Quantized model in Mb:", os.path.getsize(quant_file) / float(2**20))

Float model in Mb: 58.168704986572266
Quantized model in Mb: 14.570083618164062


In [39]:
print("Float complete model in Mb:", os.stat('vgg_model.h5').st_size/ float(2**20))
print("QAT compressed model in Mb:", os.stat('vgg_qat_model.tflite').st_size/ float(2**20))

Float complete model in Mb: 62.32586669921875
QAT compressed model in Mb: 14.570083618164062


In [29]:
x_test = x_test.astype('float32')

# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="vgg_qat_model.tflite")
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test model on some input data.
input_shape = input_details[0]['shape']
acc=0
for i in range(len(x_test)):
    input_data = x_test[i].reshape(input_shape)
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    if(np.argmax(output_data) == y_test[i]):
        acc+=1
acc = acc/len(x_test)
print(acc*100)

65.42
