# SETUP

## is GPU on?

In [None]:
# from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())

## boilerplate

In [1]:
from tensorflow.keras.callbacks import Callback, EarlyStopping
from numpy import arange

MODEL_NAME = 'mobilenet'
IMG_SIZE = (224, 224)
INPUT_SHAPE=(224, 224, 3)
CLASSES = 2
FT_BLOCK = 10 # FROM feature extractor TO fine tuning scratch
BATCH_SIZE = 64 # these data points will be passed as a batch at one time to the network
# PR_RATIO = 50
tag = 'mobilenet_BS64_FT100_PR50'

INPUT_MODEL = f'../../models/PR/{tag}'
MODEL_DIR = '../../models/QT/'
MODEL_FILE_TFLITE = MODEL_DIR+f'{tag}.tflite'
MODEL_FILE_TFLITE_FT16 = MODEL_DIR+f'{tag}_ft16.tflite' 
MODEL_FILE_TFLITE_INT16 = MODEL_DIR+f'{tag}_int16.tflite' 
MODEL_FILE_TFLITE_INT8 = MODEL_DIR+f'{tag}_int8.tflite'

## helper functions

In [2]:
from timeit import default_timer as timer
from json import dump
from tensorflow.keras.preprocessing import image_dataset_from_directory
import shutil
from keras.models import load_model
import tempfile
import tensorflow_model_optimization as tfmot
import tensorflow as tf

def get_gzipped_model_size(file):
    # Returns size of gzipped model, in bytes.
    import os
    from zipfile import ZipFile, ZIP_DEFLATED
    import tempfile
    
    _, zipped_file = tempfile.mkstemp('.zip')
    with ZipFile(zipped_file, 'w', compression=ZIP_DEFLATED) as f:
        f.write(file)

    return os.path.getsize(zipped_file) / float(2**20)

def get_file_size(file):
    from os import stat
    return stat(file).st_size / float(2**20)


def unzip_model(target_dir, model_file):
    from os import remove
    from zipfile import ZipFile
    
    with ZipFile(model_file, 'r') as f:
        f.extractall(target_dir)
        target_name = f.namelist()[0]
        tmp = target_dir+'/'+target_name
        pruned_model = load_model(tmp)
        remove(tmp)
        pruned_model.compile(loss='binary_crossentropy', metrics='accuracy', optimizer = 'adam')
    return pruned_model

# LOAD THE PRUNED MODEL

In [3]:
input_model_zip = f'{INPUT_MODEL}.zip'
input_model_dir = f'../../models/PR'

pruned_keras_file = unzip_model(input_model_dir, input_model_zip)
# pruned_keras_file.evaluate(test_ds)

model_for_export = tfmot.sparsity.keras.strip_pruning(pruned_keras_file)

_, pruned_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export, pruned_keras_file, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file)

target_file = f'{INPUT_MODEL}.h5'
shutil.move(pruned_keras_file, target_file)
print("Size of pruned Keras model: %.2f MB" % (get_file_size(target_file)))
print("Size of gzipped pruned Keras model: %.2f MB" % (get_gzipped_model_size(target_file)))

Saved pruned Keras model to: /tmp/tmpvy6ftjad.h5
Size of pruned Keras model: 12.57 MB
Size of gzipped pruned Keras model: 7.29 MB


# QUANTIZATION

In [4]:
test_dir = '../../data/test/'
test_ds = image_dataset_from_directory(
    directory=test_dir,
    label_mode='binary',
    batch_size=1000,
    image_size=IMG_SIZE)

test_images, test_labels = next(iter(test_ds))

# A helper function to evaluate the TF Lite model using "test" dataset.
def evaluate_model(interpreter):
    import numpy as np
        
    input_index = interpreter.get_input_details()[0]["index"]
    output_index = interpreter.get_output_details()[0]["index"]

    # Run predictions on every image in the "test" dataset.
    prediction = []
    test_time = 0
    for test_image in test_images:
        # Pre-processing: add batch dimension and convert to float32 to match with
        # the model's input data format.
        test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
        interpreter.set_tensor(input_index, test_image)

        # Run inference.
        starttime = timer()
        interpreter.invoke()
        test_time += (timer()-starttime)

        # Post-processing: remove batch dimension and 
        # find the digit with highest probability.
        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])
        prediction.append(digit)

    # Compare prediction results with ground truth labels to calculate accuracy.
    accurate_count = 0
    for index in range(len(prediction)):
        if prediction[index] == test_labels[index]:
            accurate_count += 1
    accuracy = accurate_count * 1.0 / len(prediction)

    return accuracy, test_time

Found 1000 files belonging to 2 classes.


## float32

In [5]:
model_for_export = load_model(target_file)
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
pruned_tflite_model = converter.convert()

_, pruned_tflite_file = tempfile.mkstemp('tflite')
with open(pruned_tflite_file, 'wb') as f:
    f.write(pruned_tflite_model)
print("Size of gzipped pruned TFlite model: %.2f MB" % (get_gzipped_model_size(pruned_tflite_file)))

target_file = MODEL_FILE_TFLITE
shutil.move(pruned_tflite_file, target_file)

INFO:tensorflow:Assets written to: /tmp/tmprd53yvru/assets
Size of gzipped pruned TFlite model: 7.15 MB


'/tf/kim/gramstain/model/QT/mobilenet_BS64_FT100_PR50.tflite'

In [126]:
interpreter = tf.lite.Interpreter(model_path=str(MODEL_FILE_TFLITE))
interpreter.allocate_tensors()

acc, e_time = evaluate_model(interpreter)
print(acc*100, e_time)

90.5 36.33864077180624


## float16

In [6]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
quantized_and_pruned_tflite_model = converter.convert()

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(quantized_and_pruned_tflite_file, 'wb') as f:
  f.write(quantized_and_pruned_tflite_model)
print("Size of gzipped pruned and quantized TFlite (float 16) model: %.2f MB" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))

target_file = MODEL_FILE_TFLITE_FT16
shutil.move(quantized_and_pruned_tflite_file, target_file)





INFO:tensorflow:Assets written to: /tmp/tmpi6j9fpv7/assets


INFO:tensorflow:Assets written to: /tmp/tmpi6j9fpv7/assets


Size of gzipped pruned and quantized TFlite (float 16) model: 4.02 MB


'/tf/kim/gramstain/model/QT/mobilenet_BS64_FT100_PR50_ft16.tflite'

In [137]:
interpreter = tf.lite.Interpreter(model_path=str(MODEL_FILE_TFLITE_FT16))
interpreter.allocate_tensors()
acc, e_time = evaluate_model(interpreter)
print(acc*100, e_time)

90.4 36.44199927896261


## int16

In [7]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
quantized_and_pruned_tflite_model = converter.convert()

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(quantized_and_pruned_tflite_file, 'wb') as f:
  f.write(quantized_and_pruned_tflite_model)
print("Size of gzipped pruned and quantized TFlite (int 16) model: %.2f MB" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))

target_file = MODEL_FILE_TFLITE_INT16
shutil.move(quantized_and_pruned_tflite_file, target_file)





INFO:tensorflow:Assets written to: /tmp/tmpbb0bb6ow/assets


INFO:tensorflow:Assets written to: /tmp/tmpbb0bb6ow/assets


Size of gzipped pruned and quantized TFlite (int 16) model: 2.22 MB


'/tf/kim/gramstain/model/QT/mobilenet_BS64_FT100_PR50_int16.tflite'

In [141]:
interpreter = tf.lite.Interpreter(model_path=str(MODEL_FILE_TFLITE_INT16))
interpreter.allocate_tensors()
acc, e_time = evaluate_model(interpreter)
print(acc*100, e_time)

90.10000000000001 2437.689823202789


## int8

In [8]:
train_dir = '../../data/train/'
train_ds = image_dataset_from_directory(
    directory=train_dir,
    label_mode='binary',
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    seed=0,
    validation_split=0.1,
    subset='training')


train_images, train_labels = next(iter(train_ds))

Found 9980 files belonging to 2 classes.
Using 8982 files for training.


In [9]:
def representative_data_gen():
    for input_value in tf.data.Dataset.from_tensor_slices(train_images).batch(1).take(100):
    # Model has only one input so each data point has one element.
        yield [input_value]

In [10]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to uint8 (APIs added in r2.3)
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(quantized_and_pruned_tflite_file, 'wb') as f:
  f.write(quantized_and_pruned_tflite_model)
print("Size of gzipped pruned and quantized TFlite (int 8) model: %.2f MB" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))

target_file = MODEL_FILE_TFLITE_INT8
shutil.move(quantized_and_pruned_tflite_file, target_file)

Size of gzipped pruned and quantized TFlite (int 8) model: 2.22 MB


'/tf/kim/gramstain/model/QT/mobilenet_BS64_FT100_PR50_int8.tflite'

In [286]:
test_dir = '../../data/test/'
test_ds = image_dataset_from_directory(
    directory=test_dir,
    label_mode='binary',
    batch_size=1000,
    image_size=IMG_SIZE)

test_images, test_labels = next(iter(test_ds))

# Helper function to evaluate a TFLite model on all images
def evaluate_model(tflite_file, model_type):

    test_image_indices = range(test_images.shape[0])
    predictions, test_time = run_tflite_model(tflite_file, test_image_indices)

    # Compare prediction results with ground truth labels to calculate accuracy.
    accurate_count = 0
    for index in range(len(predictions)):
        if predictions[index] == test_labels[index]:
            accurate_count += 1
    accuracy = accurate_count * 100 / len(predictions)

    print(f'{model_type} model accuracy is {accuracy} (Number of test samples={len(test_images)})')
    print('execution time: ', test_time)

Found 1000 files belonging to 2 classes.


In [288]:
# Helper function to run inference on a TFLite model
def run_tflite_model(tflite_file, test_image_indices):
    global test_images

    # Initialize the interpreter
    interpreter = tf.lite.Interpreter(model_path=str(tflite_file))
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()[0]
#     print('input: ', input_details)
    output_details = interpreter.get_output_details()[0]
#     print('output: ', output_details)

    predictions = []
    test_time = 0
    for i, test_image_index in enumerate(test_image_indices):
        test_image = test_images[test_image_index]
        test_label = test_labels[test_image_index]

        # Check if the input type is quantized, then rescale input data to uint8
        if input_details['dtype'] == np.uint8:
            input_scale, input_zero_point = input_details["quantization"]
            test_image = test_image / input_scale + input_zero_point

        test_image = np.expand_dims(test_image, axis=0).astype(input_details["dtype"])
        interpreter.set_tensor(input_details["index"], test_image)

        starttime = timer()
        interpreter.invoke()
        test_time += (timer()-starttime)

        output = interpreter.get_tensor(output_details["index"])[0]
        predictions.append(output.argmax())
    return predictions, test_time

In [289]:
evaluate_model(MODEL_FILE_TFLITE_INT8, model_type="Quantized")

Quantized model accuracy is 89.4 (Number of test samples=1000)
execution time:  2881.4320004060864
