In [32]:
import tensorflow as tf
import os
import tensorflow_model_optimization as tfmot
from tensorflow.keras.preprocessing import image_dataset_from_directory
import numpy as np
import tempfile
import matplotlib.pyplot as plotter_lib
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Flatten
from keras.layers.core import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image_dataset_from_directory

**TL;DR**
No significant change in model size while pruning; Quantized TFlite model could be worth it, but requires big change in pipeline architecture caused by the TFlite architecture


- Size of gzipped baseline Keras model: 89477336.00 bytes
- Size of gzipped pruned Keras model: 89477336.00 bytes
- Size of gzipped pruned TFlite model: 89287269.00 bytes
- Size of gzipped pruned and quantized TFlite model: 20650452.00 bytes

In [None]:
test_ds = image_dataset_from_directory(
        directory = "../../data/test",
        seed = 1324,
        label_mode = 'categorical',
        image_size = (350, 350),
        batch_size=32
)

train_ds = image_dataset_from_directory(
        directory = "../../data/train",
        seed = 1324,
        label_mode = 'categorical',
        image_size = (350, 350),
        batch_size=32
    )

valid_ds = image_dataset_from_directory(
        directory = "../../data/valid",
        seed = 1324,
        label_mode = 'categorical',
        image_size = (350, 350),
        batch_size=32
    )

In [33]:
# initialize model
model = Sequential()

# import pretrained model
resnet_model = tf.keras.applications.ResNet50(
    include_top=False,
    input_shape=(350,350,3),
    pooling='avg',
    weights='imagenet'
)

# exclude pretrained model weights from being recalculated
for layer in resnet_model.layers:
    layer.trainable = False

# add pretrained ResNet50 model to sequential model
model.add(resnet_model)

# add additional layers to model
model.add(Flatten())
model.add(Dense(512, tf.nn.relu))
model.add(Dense(2, tf.nn.softmax))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 512)               1049088   
                                                                 
 dense_1 (Dense)             (None, 2)                 1026      
                                                                 
Total params: 24,637,826
Trainable params: 1,050,114
Non-trainable params: 23,587,712
_________________________________________________________________


In [34]:
epochs = 1
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_ds, validation_data=valid_ds, epochs=epochs)



<keras.callbacks.History at 0x7f3d44c2ed00>

In [35]:
_, baseline_model_accuracy = model.evaluate(test_ds)

print('Baseline test accuracy:', baseline_model_accuracy)

_, keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model, keras_file, include_optimizer=False)
print('Saved baseline model to:', keras_file)

Baseline test accuracy: 0.9863448739051819
Saved baseline model to: /tmp/tmpjw4q6k4j.h5


In [38]:
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

# Compute end step to finish pruning after 2 epochs.
batch_size = 32
epochs = 5
validation_split = 0 # 10% of training set will be used for validation set. 

num_images = 30247  * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs


# Define the pruning parameters
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.50,
        final_sparsity=0.80,
        begin_step=0,
        end_step=5000
    )
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer=tf.keras.optimizers.Adam(), loss='binary_crossentropy', metrics=['accuracy'])

model_for_pruning.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 2048)              47042852  
                                                                 
 prune_low_magnitude_flatten  (None, 2048)             1         
  (PruneLowMagnitude)                                            
                                                                 
 prune_low_magnitude_dense (  (None, 512)              2097666   
 PruneLowMagnitude)                                              
                                                                 
 prune_low_magnitude_dense_1  (None, 2)                2052      
  (PruneLowMagnitude)                                            
                                                                 
Total params: 49,142,571
Trainable params: 1,050,114
Non-trainable params: 48,092,457
____________________________________

In [39]:
model_for_pruning.evaluate(test_ds)



[0.044921599328517914, 0.9863448739051819]

In [None]:
tf.keras.models.save_model(model_for_pruning, "models/pruned_model/", include_optimizer=False)

In [44]:
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
model_for_export.compile(optimizer=tf.keras.optimizers.Adam(), loss='binary_crossentropy', metrics=['accuracy'])

In [45]:
model_for_export.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 512)               1049088   
                                                                 
 dense_1 (Dense)             (None, 2)                 1026      
                                                                 
Total params: 24,637,826
Trainable params: 1,050,114
Non-trainable params: 23,587,712
_________________________________________________________________


In [46]:
model_for_export.evaluate(test_ds)



[0.04492153227329254, 0.9863448739051819]

In [None]:

tf.keras.models.save_model(model_for_export, "models/stripped_model/", include_optimizer=False)

In [47]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
pruned_tflite_model = converter.convert()

_, pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file, 'wb') as f:
  f.write(pruned_tflite_model)

print('Saved pruned TFLite model to:', pruned_tflite_file)

INFO:tensorflow:Assets written to: /tmp/tmpvoq752je/assets


INFO:tensorflow:Assets written to: /tmp/tmpvoq752je/assets


Saved pruned TFLite model to: /tmp/tmpqcchwqyn.tflite


2023-07-15 14:58:31.997766: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:357] Ignored output_format.
2023-07-15 14:58:31.997817: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:360] Ignored drop_control_dependency.


In [25]:
def get_gzipped_model_size(file):
  # Returns size of gzipped model, in bytes.
  import os
  import zipfile

  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  return os.path.getsize(zipped_file)


In [12]:
print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned Keras model: %.2f bytes" % (get_gzipped_model_size(pruned_keras_file)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file)))

Size of gzipped baseline Keras model: 89477336.00 bytes
Size of gzipped pruned Keras model: 89477336.00 bytes
Size of gzipped pruned TFlite model: 89287269.00 bytes


In [50]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_and_pruned_tflite_model = converter.convert()

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(quantized_and_pruned_tflite_file, 'wb') as f:
  f.write(quantized_and_pruned_tflite_model)

print('Saved quantized and pruned TFLite model to:', quantized_and_pruned_tflite_file)

print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))


INFO:tensorflow:Assets written to: /tmp/tmpb4vs2tug/assets


INFO:tensorflow:Assets written to: /tmp/tmpb4vs2tug/assets
2023-07-15 14:59:35.977494: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:357] Ignored output_format.
2023-07-15 14:59:35.977552: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:360] Ignored drop_control_dependency.


Saved quantized and pruned TFLite model to: /tmp/tmpbha3y0ls.tflite
Size of gzipped baseline Keras model: 91415683.00 bytes
Size of gzipped pruned and quantized TFlite model: 21231324.00 bytes
