# Post-training dynamic range quantization

### Build up MNIST model

#### Setup

In [1]:
import logging
logging.getLogger("tensorflow").setLevel(logging.DEBUG)

import tensorflow as tf
from tensorflow import keras
import numpy as np
import pathlib


2024-01-18 06:53:49.293949: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Train a Tensorflow model

In [4]:
# Load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
# Normalize the input image so that each pixel value is between 0 and 1
train_images = train_images/255.0
test_images = test_images/255.0

In [11]:
# Define the model architecture 
model = keras.Sequential([
    keras.layers.InputLayer(input_shape=(28,28)),
    keras.layers.Reshape(target_shape=(28,28,1)),
    keras.layers.Conv2D(filters=12, kernel_size=(3,3), activation=tf.nn.relu),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.Flatten(),
    keras.layers.Dense(10)
])

In [12]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_1 (Reshape)         (None, 28, 28, 1)         0         
                                                                 
 conv2d_1 (Conv2D)           (None, 26, 26, 12)        120       
                                                                 
 max_pooling2d (MaxPooling2  (None, 13, 13, 12)        0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 2028)              0         
                                                                 
 dense (Dense)               (None, 10)                20290     
                                                                 
Total params: 20410 (79.73 KB)
Trainable params: 20410 (79.73 KB)
Non-trainable params: 0 (0.00 Byte)
__________________

In [15]:
# Train the digit classification model
model.compile(optimizer='adam', 
             loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
             metrics=['accuracy'])



In [17]:
model.fit(train_images,
         train_labels,
         epochs=10,
         validation_data=(test_images, test_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7fe4b16a9d30>

# Convert to the TensorFlow lite model
Here they have converted the model using the TensorFlow Lite converter where we can convert the trained model into a TensorFlow Lite model.

In [18]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

INFO:tensorflow:Assets written to: /tmp/sraj/tmplmq37yue/assets


INFO:tensorflow:Assets written to: /tmp/sraj/tmplmq37yue/assets
2024-01-17 22:36:51.611880: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2024-01-17 22:36:51.611920: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2024-01-17 22:36:51.667523: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/sraj/tmplmq37yue
2024-01-17 22:36:51.668406: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2024-01-17 22:36:51.668426: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /tmp/sraj/tmplmq37yue
2024-01-17 22:36:51.670901: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:375] MLIR V1 optimization pass is not enabled
2024-01-17 22:36:51.671547: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2024-01-17 22:36:51.812041: I tensorflow/cc/saved_model/loader.cc:215] Runnin

Write it out to a tflite file:



In [19]:
tflite_models_dir = pathlib.Path("/tmp/mnist_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

In [20]:
tflite_model_file = tflite_models_dir/"mnist_model.tflite"
tflite_model_file.write_bytes(tflite_model)

84888

To quantize the model on export, set the optimizations flag to optimize for size:



In [21]:
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()
tflite_model_quant_file = tflite_models_dir/"mnist_model_quant.tflite"
tflite_model_quant_file.write_bytes(tflite_quant_model)

INFO:tensorflow:Assets written to: /tmp/sraj/tmpndmoc9t5/assets


INFO:tensorflow:Assets written to: /tmp/sraj/tmpndmoc9t5/assets
2024-01-17 22:39:30.149314: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2024-01-17 22:39:30.149352: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2024-01-17 22:39:30.149530: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/sraj/tmpndmoc9t5
2024-01-17 22:39:30.150337: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2024-01-17 22:39:30.150353: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /tmp/sraj/tmpndmoc9t5
2024-01-17 22:39:30.152421: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2024-01-17 22:39:30.178984: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /tmp/sraj/tmpndmoc9t5
2024-01-17 22:39:30.187972: I tensorflow/cc/saved_model/loade

24136