# Quantize

The purpose of this notebook is to quantize the model in order to reduce the file size of the model required for deployment. It will also evaluate the new model on the test data, and compare the model metrics before and after quantization.

In [1]:
# Import libraries

import tensorflow as tf
from tensorflow.keras.models import load_model

import os

In [2]:
# Vars

ROOT_DIR = os.path.dirname(os.getcwd())
MODEL_NAME = 'model'

In [3]:
# Load model

model = load_model(os.path.join(ROOT_DIR, f'{MODEL_NAME}.h5'))



In [4]:

# Convert the model to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Enable post-training quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Convert the model to a quantized TensorFlow Lite model
quantized_model = converter.convert()

INFO:tensorflow:Assets written to: /var/folders/3h/stbwvzgs2pg2db9fz55n3yg80000gn/T/tmpsrrha_mq/assets


INFO:tensorflow:Assets written to: /var/folders/3h/stbwvzgs2pg2db9fz55n3yg80000gn/T/tmpsrrha_mq/assets
2023-10-09 16:03:05.896835: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2023-10-09 16:03:05.896848: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2023-10-09 16:03:05.897601: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/3h/stbwvzgs2pg2db9fz55n3yg80000gn/T/tmpsrrha_mq
2023-10-09 16:03:05.899957: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2023-10-09 16:03:05.899963: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/3h/stbwvzgs2pg2db9fz55n3yg80000gn/T/tmpsrrha_mq
2023-10-09 16:03:05.904268: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
2023-10-09 16:03:05.906658: I tensorflow/cc/saved_model/load

In [5]:
# Save the quantized model to file

with open('quantized_model.tflite', 'wb') as f:
    f.write(quantized_model)

In [None]:
# Load test dataset

test_dataset = tf.data.TFRecordDataset('test_data.tfrecord')

In [None]:
import tensorflow as tf

predictions = []

for sample in test_dataset:
    input_data, true_label = sample

    quantized_model.set_tensor(input_index, input_data)

    quantized_model.invoke()

    output_data = quantized_model.get_tensor(output_index)

    predictions.append(output_data)
