# TensorFlow Lite: Model Optimization for On-Device Machine Learning

## Base Model Training
Import the necessary libraries and packages.

In [None]:
import os
import tempfile
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
from tensorflow.keras.models import Model
import tensorflow_model_optimization as tfmot
from tensorflow.keras.layers import Dropout, Dense, BatchNormalization
%load_ext tensorboard

## Load Dataset

We can directly import the dataset from the **TensorFlow Dataset (tfds)**. We will split the dataset into training, validation and testing set with a split ratio of 0.7:0.2:0.1. The as_supervised parameter is kept True as we need the labels of the images for classification. 


In [None]:
# Load Cat vs Dog dataset
(train_ds, val_ds, test_ds), info = tfds.load('cats_vs_dogs', split=['train[:70%]', 'train[70%:90%]', 'train[90%:]'], shuffle_files=True, as_supervised=True, with_info=True)

Let us now have a look at the dataset information provided in ***tfds.info()***. The dataset has two classes labelled as ‘cat’ and ‘dog’ with 16283, 4653, 2326 training, validation and testing images.

In [None]:
print("Number of  Classes: " + str(info.features['label'].num_classes))
print("Classes : " + str(info.features['label'].names))

NUM_TRAIN_IMAGES = tf.data.experimental.cardinality(train_ds).numpy()

print("Training Images: " + str(NUM_TRAIN_IMAGES))

NUM_VAL_IMAGES = tf.data.experimental.cardinality(val_ds).numpy()

print("Validation Images: " + str(NUM_VAL_IMAGES))

NUM_TEST_IMAGES = tf.data.experimental.cardinality(test_ds).numpy()

print("Testing Images: " + str(NUM_TEST_IMAGES))

Let us now have a look at a few images and their corresponding labels in the training dataset.  The ***tfds.visualization.show_examples()*** function enables us to do so in a single line of code!

In [None]:
vis = tfds.visualization.show_examples(train_ds, info)

We have chosen 16 as batch size and 224x224 as image size so that the dataset can be processed effectively and efficiently. We wil then resize the images in the dataset and use buffered prefetching to yield data from the disk.

In [None]:
# Defining Batch Size and input image size.
batch_size = 16
img_size = [224, 224]

# Resizing images in dataset.
train_ds = train_ds.map(lambda x, y: (tf.image.resize(x, img_size), y))
val_ds = val_ds.map(lambda x, y: (tf.image.resize(x, img_size), y))
test_ds = test_ds.map(lambda x, y: (tf.image.resize(x, img_size), y))

# Buffering the dataset.
train_ds = train_ds.cache().batch(batch_size).prefetch(buffer_size=10)
val_ds = val_ds.cache().batch(batch_size).prefetch(buffer_size=10)
test_ds = test_ds.cache().batch(batch_size).prefetch(buffer_size=10)


in order to feed images to the TF Lite model, we need to extract the test images and their labels. We will store them into variables and feed them to TF lIte Model for evaluation. 

In [None]:
# Extracting and saving test images and labels from test dataset.
test_images = []
test_labels = []
for image, label in test_ds.take(len(test_ds)).unbatch():
  test_images.append(image)
  test_labels.append(label)

We have chosen the EfiicientNet B0 model pre-trained on the imagenet dataset for image classification purposes. EfficientNets are state-of-the-art image classification models. EfficientNets significantly outperform other ConvNets. 

Let’s import the model now. We have set the input image size to be 224x224 pixels and kept the pooling layer to be GlobalMaxPooling2D. We have unfreezed all the layers of the model so that they can be trainable. We have add Dense layers to the pre-trained model. And also added Dropout and BatchNormalization to reduce overfitting.


In [None]:
# Defining the model architecture.
resnet = tf.keras.applications.EfficientNetB0(include_top = False, weights ='imagenet', input_shape = (224, 224, 3), pooling = 'max')

# Unfreezing all the layers of the model.
for layer in resnet.layers:
  set_trainable = True

# Adding Dense, BatchNormalization, and Dropout layers to the base model.
x = Dense(512, activation='relu')(resnet.output)
x = BatchNormalization()(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)
predictions = Dense(2, activation='softmax')(x)

# Define the input and output layers of the model.
model = Model(inputs=resnet.input, outputs=predictions)
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss =tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics = ["accuracy"])
model.summary()

We are using Model Saving Callback and the Reduce LR Callback.


1.   Model Saving Callback saves model with best validation accuracy
2.   Reduce LR Callback reduces the learning rate by a factor of 0.1 if validation loss remains the same for 3 consecutive epochs.

In [None]:
# Defining file path.
filepath = '/content/EfnetB0/model.h5'

# Defining model Save Callback and Reduce Learning Rate Calllback for achieving better results.
model_save = tf.keras.callbacks.ModelCheckpoint(
    filepath,
    monitor="val_accuracy",
    verbose=0,
    save_best_only=True,
    save_weights_only=False,
    mode="max",
    save_freq="epoch")

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', 
                                                 factor=0.1, 
                                                 patience=3, 
                                                 verbose=1, 
                                                 min_delta=5*1e-3,
                                                 min_lr =5*1e-9,)

callback = [reduce_lr, model_save]

We will now train the model using the ***model.fit()*** method. We will pass the training dataset and validation dataset and train the model for 15 epochs.

In [None]:
# Training the model for 15 epochs.
model.fit(train_ds, epochs=15, steps_per_epoch=(len(train_ds)//batch_size), validation_data=val_ds, validation_steps=(len(val_ds)//batch_size), shuffle=False, callbacks=callback)

Let’s check the model’s performance on the test set.

In [None]:
# Evaluating the Model on test dataset.
_, baseline_model_accuracy = model.evaluate(test_ds, verbose=0)
print('Baseline test accuracy:', baseline_model_accuracy)

In [None]:
model.save('/content/EfnetB0/efnetb0_saved_model.h5')

In [None]:
# Function for evaluating TFLite model over test images.
def evaluate(interpreter):
  prediction= []

  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]
  input_format = interpreter.get_output_details()[0]['dtype']
  
  for i, test_image in enumerate(test_images):
    if i % 100 == 0:
      print('Evaluated on {n} results so far.'.format(n=i))
    test_image = np.expand_dims(test_image, axis=0).astype(input_format)
    interpreter.set_tensor(input_index, test_image)

    # Run inference.
    interpreter.invoke()
    output = interpreter.tensor(output_index)
    predicted_label = np.argmax(output()[0])
    prediction.append(predicted_label)
    
  print('\n')
  
  # Comparing prediction results with ground truth labels to calculate accuracy.
  prediction = np.array(prediction)
  accuracy = (prediction == test_labels).mean()
  return accuracy

## Quanttization

Quantization works by reducing the precision of the numbers used to represent a model's parameters, which by default are 32-bit floating-point numbers. This results in a smaller model size and faster computation.

### Float 16 Quantaziation


In Float-16 quantization, weights are converted to 16-bit floating-point values. This results in a 2x reduction in model size. There is a significant reduction in model size in exchange for minimal impacts to latency and accuracy.

In [None]:
# Passing Keras Model to TF Lite Converter.
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Using float 16 quantization.
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

# Converting the model 
tflite_fp16_model = converter.convert()

# Saving the model.
with open('/content/EfnetB0/fp_16_model.tflite', 'wb') as f:
  f.write(tflite_fp16_model)

We have passed the Float 16 quantization to the ***converter.target_spec.supported_type*** to specify the type of quantization. The rest of the code remains the same for a general way of conversion for the TF Lite Model.


Let’s check this Float 16 quantized TF Lite’s model performance on the Test Set.


In [None]:
# Passing FP16 TFLite model to the interpreter
interpreter = tf.lite.Interpreter('/content/EfnetB0/fp_16_model.tflite')
# Allocating tensors.
interpreter.allocate_tensors()
# Evaluating the model on test dataset.
test_accuracy = evaluate(interpreter)
print('Float 16 Quantized TFLite Model Test Accuracy:', test_accuracy*100)
print('Baseline Keras Model Test Accuracy:', baseline_model_accuracy*100)

### Integer Qunatization

Integer quantization is an optimization strategy that converts 32-bit floating-point numbers (such as weights and activation outputs) to the nearest 8-bit fixed-point numbers. This resulted in a smaller model and increased inferencing speed.

The integer quantization requires a representative dataset, i.e. a few images from the training dataset, for the conversion to happen.


In [None]:
model = tf.keras.models.load_model('/content/EfnetB0/model.h5')
model.summary()

In [None]:
# Passing the baseline Keras model to TFLite converter.
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Defining the representative dataset from training images.
def representative_data_gen():
  for input_value in tf.data.Dataset.from_tensor_slices(test_images).batch(1).take(100):
    yield [input_value]

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen

# Using integer quantization.
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

# Setting the input and output tensors to uint8 (APIs added in r2.3).
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

# Converting the model.
int_quant_model = converter.convert()

# Saving the integer quantized TFLite model.
with open('/content/EfnetB0/int_quant_model.tflite', 'wb') as f:
  f.write(int_quant_model)

Let’s evaluate the obtained Integer Quantized TF Lite model on Test Dataset.

In [None]:
# Passing the integer quantized TFLitemodel to the interpreter.
interpreter = tf.lite.Interpreter('/content/EfnetB0/int_quant_model.tflite')
# Allocating tensors.
interpreter.allocate_tensors()
# Evaluating the model on test images.
test_accuracy = evaluate(interpreter)
print('Integer Quantized TFLite Model Test Accuracy:', test_accuracy*100)
print('Baseline Keras Model Test Accuracy:', baseline_model_accuracy*100)

### Dynamic Range Quantization

In Dynamic Range Quantization, weights are converted to 8-bit precision values. Dynamic range quantization achieves a 4x reduction in the model size.

In [None]:
# Passing baseline Keras model to TFLite converter.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# Using Dynamic Range quantization.
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Converting the model.
tflite_quant_model = converter.convert()
# Saving the model.
with open('/content/EfnetB0/dynamic_quant_model.tflite', 'wb') as f:
  f.write(tflite_quant_model)

Let’s evaluate this TF Lite model on the test dataset.

In [None]:
# Passing Dynamic Range quantized TFLite model to the interpreter.
interpreter = tf.lite.Interpreter('/content/EfnetB0/dynamic_quant_model.tflite') 
# Allocating tensors.
interpreter.allocate_tensors()
# Evaluating the model on the test images.
test_accuracy = evaluate(interpreter)
print('Dynamically  Quantized TFLite Model Test Accuracy:', test_accuracy*100)
print('Baseline Keras Model Test Accuracy:', baseline_model_accuracy*100)