<a href="https://colab.research.google.com/github/lmEshoo/pruning/blob/master/mobilenetv2_pruning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Mobilenetv2 model pruning**

**Build Model**

In [0]:
import tensorflow as tf
tf.__version__
# !pip uninstall tensorflow -y
! pip uninstall -y tensorflow
! pip uninstall -y tf-nightly
!pip install tensorflow==2.1.0
# ! pip install -q -U tensorflow-gpu==1.14.0

! pip install -q tensorflow-model-optimization

Uninstalling tensorflow-1.15.0:
  Successfully uninstalled tensorflow-1.15.0
Collecting tensorflow==2.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/85/d4/c0cd1057b331bc38b65478302114194bd8e1b9c2bbc06e300935c0e93d90/tensorflow-2.1.0-cp36-cp36m-manylinux2010_x86_64.whl (421.8MB)
[K     |████████████████████████████████| 421.8MB 38kB/s 
Collecting tensorboard<2.2.0,>=2.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/d9/41/bbf49b61370e4f4d245d4c6051dfb6db80cec672605c91b1652ac8cc3d38/tensorboard-2.1.1-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.9MB 61.7MB/s 
Collecting tensorflow-estimator<2.2.0,>=2.1.0rc0
[?25l  Downloading https://files.pythonhosted.org/packages/18/90/b77c328a1304437ab1310b463e533fa7689f4bfc41549593056d812fab8e/tensorflow_estimator-2.1.0-py2.py3-none-any.whl (448kB)
[K     |████████████████████████████████| 450kB 53.2MB/s 
Installing collected packages: tensorboard, tensorflow-estimator, tensorflow
  Found exi

[?25l[K     |███▌                            | 10kB 39.0MB/s eta 0:00:01[K     |███████                         | 20kB 5.4MB/s eta 0:00:01[K     |██████████▌                     | 30kB 7.8MB/s eta 0:00:01[K     |██████████████                  | 40kB 5.3MB/s eta 0:00:01[K     |█████████████████▌              | 51kB 6.5MB/s eta 0:00:01[K     |█████████████████████           | 61kB 7.7MB/s eta 0:00:01[K     |████████████████████████▌       | 71kB 8.8MB/s eta 0:00:01[K     |████████████████████████████    | 81kB 9.9MB/s eta 0:00:01[K     |███████████████████████████████▌| 92kB 11.0MB/s eta 0:00:01[K     |████████████████████████████████| 102kB 7.4MB/s 
[?25h

In [0]:
"""
# Reference
- [Inverted Residuals and Linear Bottlenecks Mobile Networks for
   Classification, Detection and Segmentation]
   (https://arxiv.org/abs/1801.04381)
"""

import tensorflow as tf

# Define ReLU6 activation
relu6 = tf.keras.layers.ReLU(6.)

def _conv_block(inputs, filters, kernel, strides):
    """Convolution Block
    This function defines a 2D convolution operation with BN and relu6.

    # Arguments
        inputs: Tensor, input tensor of conv layer.
        filters: Integer, the dimensionality of the output space.
        kernel: An integer or tuple/list of 2 integers, specifying the
            width and height of the 2D convolution window.
        strides: An integer or tuple/list of 2 integers,
            specifying the strides of the convolution along the width and height.
            Can be a single integer to specify the same value for
            all spatial dimensions.

    # Returns
        Output tensor.
    """

    x = tf.keras.layers.Conv2D(filters, kernel, padding='same', strides=strides)(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    return relu6(x)


def _bottleneck(inputs, filters, kernel, t, s, r=False):
    """Bottleneck
    This function defines a basic bottleneck structure.

    # Arguments
        inputs: Tensor, input tensor of conv layer.
        filters: Integer, the dimensionality of the output space.
        kernel: An integer or tuple/list of 2 integers, specifying the
            width and height of the 2D convolution window.
        t: Integer, expansion factor.
            t is always applied to the input size.
        s: An integer or tuple/list of 2 integers,specifying the strides
            of the convolution along the width and height.Can be a single
            integer to specify the same value for all spatial dimensions.
        r: Boolean, Whether to use the residuals.

    # Returns
        Output tensor.
    """

    tchannel = inputs.shape[-1] * t

    x = _conv_block(inputs, tchannel, (1, 1), (1, 1))

    x = tf.keras.layers.DepthwiseConv2D(kernel, strides=(s, s), depth_multiplier=1, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = relu6(x)

    x = tf.keras.layers.Conv2D(filters, (1, 1), strides=(1, 1), padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)

    if r:
        x = tf.keras.layers.add([x, inputs])
    return x


def _inverted_residual_block(inputs, filters, kernel, t, strides, n):
    """Inverted Residual Block
    This function defines a sequence of 1 or more identical layers.

    # Arguments
        inputs: Tensor, input tensor of conv layer.
        filters: Integer, the dimensionality of the output space.
        kernel: An integer or tuple/list of 2 integers, specifying the
            width and height of the 2D convolution window.
        t: Integer, expansion factor.
            t is always applied to the input size.
        s: An integer or tuple/list of 2 integers,specifying the strides
            of the convolution along the width and height.Can be a single
            integer to specify the same value for all spatial dimensions.
        n: Integer, layer repeat times.
    # Returns
        Output tensor.
    """

    x = _bottleneck(inputs, filters, kernel, t, strides)

    for i in range(1, n):
        x = _bottleneck(x, filters, kernel, t, 1, True)

    return x


def MobileNetV2(input_shape, k, plot_model=False):
    """MobileNetv2
    This function defines a MobileNetv2 architecture.

    # Arguments
        input_shape: An integer or tuple/list of 3 integers, shape
            of input tensor.
        k: Integer, number of classes.
        plot_model: Boolean, whether to plot model architecture or not
    # Returns
        MobileNetv2 model.
    """

    inputs = tf.keras.layers.Input(shape=input_shape, name='input')
    x = _conv_block(inputs, 32, (3, 3), strides=(2, 2))

    x = _inverted_residual_block(x, 16, (3, 3), t=1, strides=1, n=1)
    x = _inverted_residual_block(x, 24, (3, 3), t=6, strides=2, n=2)
    x = _inverted_residual_block(x, 32, (3, 3), t=6, strides=2, n=3)
    x = _inverted_residual_block(x, 64, (3, 3), t=6, strides=2, n=4)
    x = _inverted_residual_block(x, 96, (3, 3), t=6, strides=1, n=3)
    x = _inverted_residual_block(x, 160, (3, 3), t=6, strides=2, n=3)
    x = _inverted_residual_block(x, 320, (3, 3), t=6, strides=1, n=1)

    x = _conv_block(x, 1280, (1, 1), strides=(1, 1))
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Reshape((1, 1, 1280))(x)
    x = tf.keras.layers.Dropout(0.3, name='Dropout')(x)
    x = tf.keras.layers.Conv2D(k, (1, 1), padding='same')(x)
    x = tf.keras.layers.Activation('softmax', name='final_activation')(x)
    output = tf.keras.layers.Reshape((k,), name='output')(x)
    model = tf.keras.models.Model(inputs, output)
    model.summary()
    if plot_model:
        tf.keras.utils.plot_model(model, to_file='model.png', show_shapes=True)

    return model


In [0]:
import cv2
import os
from keras.datasets import cifar100



train = 'train//'
val = 'validation//'

(X_train, y_train), (X_test, y_test) = cifar100.load_data(label_mode='fine')

for i in range(len(X_train)):
    x = X_train[i]
    y = y_train[i]
    path = train + str(y[0])
    x = cv2.resize(x, (224, 224), interpolation=cv2.INTER_CUBIC)
    if not os.path.exists(path):
        os.makedirs(path)
    cv2.imwrite(path + '//' + str(i) + '.jpg', x)

for i in range(len(X_test)):
    x = X_test[i]
    y = y_test[i]
    path = val + str(y[0])
    x = cv2.resize(x, (224, 224), interpolation=cv2.INTER_CUBIC)
    if not os.path.exists(path):
        os.makedirs(path)
    cv2.imwrite(path + '//' + str(i) + '.jpg', x)


Using TensorFlow backend.


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz


In [0]:
X_train.shape

(50000, 32, 32, 3)

In [0]:
"""
Train the MobileNet V2 model (https://github.com/xiaochus/MobileNetV2)
"""
import os
import sys
import argparse
import pandas as pd
import tensorflow as tf

from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
from keras.layers import Conv2D, Reshape, Activation
from keras.models import Model

def generate(batch, size):
    """Data generation and augmentation

    # Arguments
        batch: Integer, batch size.
        size: Integer, image size.

    # Returns
        train_generator: train set generator
        validation_generator: validation set generator
        count1: Integer, number of train set.
        count2: Integer, number of test set.
    """

    #  Using the data Augmentation in traning data
    ptrain = './train'
    pval = './validation'

    datagen1 = ImageDataGenerator(
        rescale=1. / 255,
        shear_range=0.2,
        zoom_range=0.2,
        rotation_range=90,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True)

    datagen2 = ImageDataGenerator(rescale=1. / 255)

    train_generator = datagen1.flow_from_directory(
        ptrain,
        target_size=(size, size),
        batch_size=batch,
        class_mode='categorical')

    validation_generator = datagen2.flow_from_directory(
        pval,
        target_size=(size, size),
        batch_size=batch,
        class_mode='categorical')

    count1 = 0
    for root, dirs, files in os.walk(ptrain):
        for each in files:
            count1 += 1

    count2 = 0
    for root, dirs, files in os.walk(pval):
        for each in files:
            count2 += 1

    return train_generator, validation_generator, count1, count2


def train(batch=64, epochs=20, num_classes=100, size=32):

    train_generator, validation_generator, count1, count2 = generate(batch, size)



    print("{} classes found".format(num_classes))

    model = MobileNetV2((size, size, 3), num_classes, True)
    # model = new_pruned_model
    opt = tf.keras.optimizers.Adam()
    # opt = tf.keras.optimizers.SGD(lr=1e-3, momentum=0.3, decay=0, nesterov=False)
    earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=30, verbose=1, mode='auto')
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])

    hist = model.fit_generator(
        train_generator,
        validation_data=validation_generator,
        steps_per_epoch=count1 // batch,
        validation_steps=count2 // batch,
        epochs=epochs,
        callbacks=[earlystop])
    
    # model = MobileNetv2((size, size, 3), num_classes)
    # inp = Input(shape=(size, size, 3), batch_size=batch, dtype='float32')
    # model = Model((size, size, 3), MobileNetv2((size, size, 3),num_classes))

    # print(train_generator[-1], validation_generator[-1], count1, count2)
    # print(count1 // batch, count2 // batch)
    # # Optimizer = tf.keras.optimizers.Adam()
    # Optimizer = tf.keras.optimizers.SGD(lr=1e-3, momentum=0.3, decay=0, nesterov=False)
    # model.compile(loss=tf.keras.losses.categorical_crossentropy,
    #                   optimizer=Optimizer,
    #                   metrics=['accuracy'])
    # earlystop = EarlyStopping(monitor='val_acc', patience=30, verbose=1, mode='auto')
    
    # model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    # hist = model.fit_generator(
    #     train_generator,
    #     validation_data=validation_generator,
    #     steps_per_epoch=count1 // batch,
    #     validation_steps=count2 // batch,
    #     epochs=epochs,
    #     callbacks=[earlystop])
    
    # hist = model.fit(
    #     train_generator,
    #     validation_data=validation_generator,
    #     steps_per_epoch=count1 // batch,
    #     validation_steps=count2 // batch,
    #     epochs=epochs,
    #     callbacks=[earlystop],
    #     use_multiprocessing=True)

    if not os.path.exists('model'):
        os.makedirs('model')

    df = pd.DataFrame.from_dict(hist.history)
    df.to_csv('model/hist.csv', encoding='utf-8', index=False)
    model.save_weights('model/weights.h5')

    return model, train_generator, validation_generator, count1, count2, batch

# if __name__ == '__main__':
#     main(sys.argv)

In [0]:
initial_model, train_generator, validation_generator, count1, count2, batch = train()


Found 50000 images belonging to 100 classes.
Found 10000 images belonging to 100 classes.
100 classes found
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 16, 16, 32)   896         input[0][0]                      
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 16, 16, 32)   128         conv2d[0][0]                     
__________________________________________________________________________________________________
re_lu (ReLU)                    multiple             0           batch_normalization[

In [0]:
# from tensorflow.keras.utils import plot_model
# print(initial_model.summary())
# plot_model(initial_model, to_file='model.png', show_shapes=True)

# **Pruning**

In [0]:
import numpy as np
from tensorflow_model_optimization.sparsity import keras as sparsity


print(initial_model._is_graph_network)
# print(initial_model.Model)

print('Pruning')

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

print('Found GPU at: {}'.format(device_name))

with tf.device('/device:GPU:0'):
  epochs = 4
  # num_train_samples = self.X_train.shape[0]
  end_step = np.ceil(1.0 * count1 / 64).astype(np.int32) * epochs
  print(end_step)

  new_pruning_params = {
        'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.50,
                                                    final_sparsity=0.90,
                                                    begin_step=0,
                                                    end_step=end_step,
                                                    frequency=100)
  }

  # new_pruning_params = {
  #       'pruning_schedule': sparsity.ConstantSparsity(0.75, begin_step=2000, frequency=100)
  # }
        # PolynomialDecay(initial_sparsity=0.2,
        #                                             final_sparsity=0.5,
        #                                             begin_step=0,
        #                                             end_step=end_step)

  new_pruned_model = sparsity.prune_low_magnitude(initial_model, **new_pruning_params)
  new_pruned_model.summary()

  new_pruned_model.compile(
      loss=tf.keras.losses.sparse_categorical_crossentropy,
      optimizer='adam',
      metrics=['accuracy'])


logdir = './'
print('Writing training logs to ' + logdir)

callbacks = [
    sparsity.UpdatePruningStep(),
    sparsity.PruningSummaries(log_dir=logdir, profile_batch=0)
]



True
Pruning
Found GPU at: /device:GPU:0
3128
Instructions for updating:
Please use `layer.add_weight` method instead.
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
prune_low_magnitude_conv2d (Pru (None, 16, 16, 32)   1762        input[0][0]                      
__________________________________________________________________________________________________
prune_low_magnitude_batch_norma (None, 16, 16, 32)   129         prune_low_magnitude_conv2d[0][0] 
__________________________________________________________________________________________________
prune_low_magnitude_re_lu (Prun multiple             1           prune_low

In [0]:
new_pruned_model.fit(X_train, y_train,
          batch_size=batch,
          epochs=epochs,
          verbose=1,
          callbacks=callbacks,
          validation_data=(X_test, y_test))

score = new_pruned_model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

final_model = sparsity.strip_pruning(new_pruned_model)
final_model.summary()

Train on 50000 samples, validate on 10000 samples
Epoch 1/4
INFO:tensorflow:Summary name prune_low_magnitude_conv2d_1/mask:0/sparsity is illegal; using prune_low_magnitude_conv2d_1/mask_0/sparsity instead.
INFO:tensorflow:Summary name prune_low_magnitude_conv2d_2/mask:0/sparsity is illegal; using prune_low_magnitude_conv2d_2/mask_0/sparsity instead.
INFO:tensorflow:Summary name prune_low_magnitude_conv2d_3/mask:0/sparsity is illegal; using prune_low_magnitude_conv2d_3/mask_0/sparsity instead.
INFO:tensorflow:Summary name prune_low_magnitude_conv2d_4/mask:0/sparsity is illegal; using prune_low_magnitude_conv2d_4/mask_0/sparsity instead.
INFO:tensorflow:Summary name prune_low_magnitude_conv2d_5/mask:0/sparsity is illegal; using prune_low_magnitude_conv2d_5/mask_0/sparsity instead.
INFO:tensorflow:Summary name prune_low_magnitude_conv2d_6/mask:0/sparsity is illegal; using prune_low_magnitude_conv2d_6/mask_0/sparsity instead.
INFO:tensorflow:Summary name prune_low_magnitude_conv2d_7/mask:0

In [0]:

import numpy as np
names, params, percent = [] ,[], []
dash = '-' * 80
print(dash)
print('{:<20s}{:>40s}{:>12s}'.format('Name','Total Parameters','    Pruned %'))
print(dash)

for i, w in enumerate(final_model.get_weights()):
    print('{0:<40} {1:>10} {2:>18.3f}'.format(final_model.weights[i].name, w.size, np.sum(w == 0) / w.size * 100) )
    
    if(np.sum(w == 0) > 0):
      names.append(final_model.weights[i].name)
      params.append(w.size)
      percent.append(np.sum(w == 0))

print(params[0])
print(percent[0])

--------------------------------------------------------------------------------
Name                                        Total Parameters    Pruned %
--------------------------------------------------------------------------------
conv2d/kernel:0                                 864             90.046
conv2d/bias:0                                    32              0.000
batch_normalization/gamma:0                      32              0.000
batch_normalization/beta:0                       32              0.000
batch_normalization/moving_mean:0                32              0.000
batch_normalization/moving_variance:0            32              0.000
conv2d_1/kernel:0                              1024             90.039
conv2d_1/bias:0                                  32              0.000
batch_normalization_1/gamma:0                    32              0.000
batch_normalization_1/beta:0                     32              0.000
batch_normalization_1/moving_mean:0              32    

In [0]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Bar(x=names,
                y=params,
                name='Total Parameters',
                marker_color='rgb(55, 83, 109)'
                ))
fig.add_trace(go.Bar(x=names,
                y=percent,
                name='Pruned Parameters',
                marker_color='rgb(26, 118, 255)'
                ))

fig.update_layout(
    title='MobilenetV2 Pruned Parameters',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Parameters',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

Size of the original model after compression: 2.26 Mb
Size of the pruned model after compression: 2.25 Mb


**Quantization**

In [0]:

converter = tf.lite.TFLiteConverter.from_keras_model(final_model)

converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]

tflite_quant_model = converter.convert()

tflite_quant_model_file = '/tmp/sparse_mnist_quant.tflite'
with open(tflite_quant_model_file, 'wb') as f:
  f.write(tflite_quant_model)

In [0]:
import zipfile 

# models = [[initial_model,'initial_model']
#           ,[final_model,'final_model']
#           ,[new_pruned_model,'new_pruned_model']]
models = [[final_model,'final_model']]
size_before = []
size_after = []
for i in models:
  print(i[1])

  tf.keras.models.save_model(i[0], './'+i[1]+'.h5', 
                          include_optimizer=False)

  print("Size of the pruned model before compression: %.4f Mb" 
        % (os.path.getsize('./'+i[1]+'.h5') / float(2**20)))
  size_before.append(os.path.getsize('./'+i[1]+'.h5') / float(2**20))
  with zipfile.ZipFile('./'+i[1]+'.zip', 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write('./'+i[1]+'.h5')
  
  print("Size of the pruned model after compression: %.4f Mb" 
        % (os.path.getsize('./'+i[1]+'.zip') / float(2**20)))
  size_after.append(os.path.getsize('./'+i[1]+'.zip') / float(2**20))
  

import tempfile
_, zip_tflite = tempfile.mkstemp('.zip')
with zipfile.ZipFile(zip_tflite, 'w', compression=zipfile.ZIP_DEFLATED) as f:
  f.write(tflite_quant_model_file)
print("Size of the tflite quantized model before compression: %.2f Mb" 
      % (os.path.getsize(tflite_quant_model_file) / float(2**20)))
size_before.append(os.path.getsize(tflite_quant_model_file) / float(2**20))
print("Size of the tflite quantized model after compression: %.2f Mb" 
      % (os.path.getsize(zip_tflite) / float(2**20)))
size_after.append(os.path.getsize(zip_tflite) / float(2**20))

final_model
Size of the pruned model before compression: 9.5953 Mb
Size of the pruned model after compression: 2.2219 Mb
Size of the tflite quantized model before compression: 2.31 Mb
Size of the tflite quantized model after compression: 0.52 Mb


In [0]:
import plotly.graph_objects as go

months = ['Pruned', 'Quantized']

fig = go.Figure()
fig.add_trace(go.Bar(
    x=months,
    y=size_before,
    name='Before',
    marker_color='indianred'
))
fig.add_trace(go.Bar(
    x=months,
    y=size_after,
    name='After',
    marker_color='lightsalmon'
))

# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig.update_layout(
    yaxis=dict(
        title='Size (MB)',
        titlefont_size=16,
        tickfont_size=14,
    ),
    barmode='group', xaxis_tickangle=-45)
fig.show()

Size of the tflite model before compression: 2.31 Mb
Size of the tflite model after compression: 0.52 Mb
