# Import libs

In [14]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import seaborn as sns

# NN Model for Handwritten digits classification

In [15]:
(X_train, Y_train), (X_test, Y_test) = keras.datasets.mnist.load_data()

In [16]:
X_train_flattened = X_train.reshape(len(X_train), 28 * 28)
X_test_flattened = X_test.reshape(len(X_test), 28 * 28)

In [17]:
X_train_flattened = X_train_flattened / 255
X_test_flattened = X_test_flattened / 255

In [18]:
model = keras.Sequential([
    keras.layers.Dense(100, input_shape=(784,), activation='relu'),
    keras.layers.Dense(10, activation='sigmoid')
    ])

model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics='accuracy')

model.fit(X_train_flattened, Y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x258608b2a90>

In [19]:
model.evaluate(X_test_flattened, Y_test)



[0.0809302031993866, 0.9750000238418579]

In [20]:
model.save("model")

INFO:tensorflow:Assets written to: model\assets


INFO:tensorflow:Assets written to: model\assets


# Post training quantization

In [21]:
convertor = tf.lite.TFLiteConverter.from_saved_model("model")
tf_lite_model = convertor.convert()



In [22]:
len(tf_lite_model)

319484

## Weights Quantization

In [23]:
convertor.optimizations = [tf.lite.Optimize.DEFAULT]
tf_lite_quant_model = convertor.convert()



In [24]:
len(tf_lite_quant_model)

84432

In [25]:
print("Check TF Documentation for Quantizing Activations")

Check TF Documentation for Quantizing Activations


In [26]:
with open("tf_lite_model.tflite", "wb") as f:
    f.write(tf_lite_model)
    
with open("tf_lite_quant_model.tflite", "wb") as f:
    f.write(tf_lite_quant_model)

# Quantization aware training

In [27]:
import tensorflow_model_optimization as tfmot

In [28]:
quant_model = tfmot.quantization.keras.quantize_model
quant_aware_model = quant_model(model)

In [29]:
quant_aware_model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics='accuracy')

quant_aware_model.fit(X_train_flattened, Y_train, epochs=1)



<keras.callbacks.History at 0x25832964a90>

In [30]:
convertor = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)

convertor.optimizations = [tf.lite.Optimize.DEFAULT]
tf_lite_quant_aware_model = convertor.convert()



INFO:tensorflow:Assets written to: C:\Users\I301903\AppData\Local\Temp\tmpi9fd44ta\assets


INFO:tensorflow:Assets written to: C:\Users\I301903\AppData\Local\Temp\tmpi9fd44ta\assets


In [31]:
with open("tf_lite_quant_aware_model.tflite", "wb") as f:
    f.write(tf_lite_model)