In [4]:
# Install tensorflow_model_optimization
!pip install -q tensorflow-model-optimization
# Import necessary libraries
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import tensorflow_model_optimization as tfmot

# Display a title for the tutorial
print("Quantization Tutorial")
print("=====================")
print("Quantization is a technique to downsize a trained model so that you can deploy it on EDGE devices. In this tutorial, we will:")
print("1. Train a hand-written digits model")
print("2. Export it to disk and check the size of the model")
print("3. Use two techniques for quantization: Post-training quantization and Quantization aware training")
print("\n")

# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

# Display number of training and test samples
print("Number of training samples:", len(X_train))
print("Number of test samples:", len(X_test))

# Normalize the data
X_train = X_train / 255.0
X_test = X_test / 255.0

# Using a Flatten layer so that we don't have to call .reshape on the input dataset
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(10, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5)

# Evaluate the model
model.evaluate(X_test, y_test)

# Save the model
model.save("./saved_model/")

# (1) Post-training quantization
print("\nPost-training Quantization")
print("--------------------------")

# Convert to TensorFlow Lite model without quantization
converter = tf.lite.TFLiteConverter.from_saved_model("./saved_model")
tflite_model = converter.convert()

# Convert to TensorFlow Lite model with quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()

# Print model sizes to compare
print("Size of non-quantized model:", len(tflite_model))
print("Size of quantized model:", len(tflite_quant_model))

# (2) Quantization aware training
print("\nQuantization Aware Training")
print("---------------------------")

# Quantize the model
quantize_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantize_model(model)

# `quantize_model` requires a recompile
q_aware_model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

# Summarize the model
q_aware_model.summary()

# Train the quantization aware model
q_aware_model.fit(X_train, y_train, epochs=1)

# Evaluate the quantization aware model
q_aware_model.evaluate(X_test, y_test)

# Convert the quantization aware model
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_qaware_model = converter.convert()

# Print size of the quantization aware model
print("Size of quantization aware model:", len(tflite_qaware_model))


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/242.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.4/242.5 kB[0m [31m2.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hQuantization Tutorial
Quantization is a technique to downsize a trained model so that you can deploy it on EDGE devices. In this tutorial, we will:
1. Train a hand-written digits model
2. Export it to disk and check the size of the model
3. Use two techniques for quantization: Post-training quantization and Quantization aware training


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Number of training samples: 60000
Number of test samples: 10000
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

Post-training Quantization
--------------------------
Size of non-quant



Size of quantization aware model: 82736
