## Accelerate Inference: Neural Network Pruning

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pickle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models, regularizers
from tensorflow.keras.layers import *

print(tf.version.VERSION)

2.9.2


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# untar
!tar -xvzf "/content/drive/My Drive/10605-ProjA/dataset.tar.gz"
#!tar -xvzf "/content/drive/My Drive/Dataset.tar.gz"   #ztt path 

# load train
train_images = pickle.load(open('train_images.pkl', 'rb'))
train_labels = pickle.load(open('train_labels.pkl', 'rb'))
# load val
val_images = pickle.load(open('val_images.pkl', 'rb'))
val_labels = pickle.load(open('val_labels.pkl', 'rb'))

train_images.pkl
train_labels.pkl
val_images.pkl
val_labels.pkl


In [None]:
# Define the neural network architecture (don't change this)

model = models.Sequential()
model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-5), input_shape=(25,25,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3), kernel_regularizer=regularizers.l2(1e-5)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-5)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(1e-5)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))

In [None]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 25, 25, 32)        896       
                                                                 
 activation (Activation)     (None, 25, 25, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 23, 23, 32)        9248      
                                                                 
 activation_1 (Activation)   (None, 23, 23, 32)        0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 11, 11, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 11, 11, 32)        0         
                                                        

In [None]:
# you can use the default hyper-parameters for training, 
# and val accuracy ~59% after 25 epochs and > 63% after 50 epochs

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001, decay=1e-6),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(train_images, train_labels, batch_size=32, verbose = 0, epochs=180, 
                    validation_data=(val_images, val_labels)) # train for 180 epochs, with batch size 32

In [None]:
# evaluate baseline model
loss, val_accuracy = model.evaluate(val_images, val_labels, batch_size=128)

print("Baseline model, accuracy: {:5.2f}%".format(100 * val_accuracy))

Baseline model, accuracy: 74.53%


In [None]:
# create dir to save model & accuracy on google drive
!mkdir -p "/content/drive/My Drive/10605-ProjA/saved_model/"

In [None]:
# save model
model.save('/content/drive/My Drive/10605-ProjA/saved_model/baseline_model')



In [None]:
# save baseline_model val_accuracy
acc_base_model = {"acc":val_accuracy}
import numpy as np
np.save('/content/drive/My Drive/10605-ProjA/saved_model/baseline_model/acc_base_model_score.npy', acc_base_model) 

In [None]:
np.load('/content/drive/My Drive/10605-ProjA/saved_model/baseline_model/acc_base_model_score.npy',allow_pickle=True)

array({'acc': 0.7453465461730957}, dtype=object)

In [None]:
#reload model
re_model = tf.keras.models.load_model('/content/drive/My Drive/10605-ProjA/saved_model/baseline_model')

# Check its architecture
re_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 25, 25, 32)        896       
                                                                 
 activation (Activation)     (None, 25, 25, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 23, 23, 32)        9248      
                                                                 
 activation_1 (Activation)   (None, 23, 23, 32)        0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 11, 11, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 11, 11, 32)        0         
                                                        

In [None]:
#recompile model
re_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001, decay=1e-6),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# verify the saved model
loss, val_accuracy = re_model.evaluate(val_images, val_labels, batch_size=128)

print("Baseline model, accuracy: {:5.2f}%".format(100 * val_accuracy))


 1/20 [>.............................] - ETA: 4s - loss: 1.4206 - accuracy: 0.6172

  return dispatch_target(*args, **kwargs)


Baseline model, accuracy: 74.53%


## **Method 1: Magnitude-based pruning**

In [None]:
#reload model
model_1 = tf.keras.models.load_model('/content/drive/My Drive/10605-ProjA/saved_model/baseline_model')

#recompile model
model_1.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001, decay=1e-6),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_1.fit(train_images, train_labels, batch_size=32, verbose = 0, epochs=1, 
                    validation_data=(val_images, val_labels)) # train for 1 epoch, with batch size 32

# verify the saved model
loss_1, val_accuracy_1 = model_1.evaluate(val_images, val_labels, batch_size=128)

print(val_accuracy_1)


0.7485148310661316


In [None]:
# pruning function
from copy import deepcopy

def prune_weights(weight_list,pruning_rate):
    for i in range(weight_list.shape[-1]):
        copy_weight= deepcopy(weight_list[...,i])
        std=np.std(copy_weight)
        threshold= std*pruning_rate
        weight_list[...,i][np.abs(weight_list[...,i]) < threshold]=0
    return weight_list



In [None]:
def print_nonzeros(model):
    nonzero = total = 0
    for i in range(len(model.variables)):
      if "kernel" in model.variables[i].name:
        name=model.variables[i].name
        tensor=model.variables[i].numpy()
        nz_count=np.count_nonzero(tensor)
        total_params = np.prod(tensor.shape)
        nonzero += nz_count
        total += total_params
        print(f'{name:20} | nonzeros = {nz_count:7} / {total_params:7} ({100 * nz_count / total_params:6.2f}%) | total_pruned = {total_params - nz_count :7} | shape = {tensor.shape}')
    print(f'Active: {nonzero}, Pruned : {total - nonzero}, Total: {total}, Compression rate : {total/nonzero:10.2f}x  ({100 * (total-nonzero) / total:6.2f}% pruned)')
 

In [None]:
# weight pruning
pretrained_model= model_1  
quality_parameter = 1.0      #sensitivity factor to calculate threshold

#Pruning
for layerid in range(len(pretrained_model.layers)):
  layer=pretrained_model.layers[layerid]
  weight=layer.get_weights()
  if len(weight) > 0:
          temp_weight=deepcopy(weight)
          updated_weights = prune_weights(temp_weight[0],quality_parameter) #function call to prune weight based on threshold
          temp_weight[0]= updated_weights
          layer.set_weights(temp_weight)   #set layers weights with pruned weight

#retrain the model
pretrained_model.fit(train_images, train_labels, batch_size=32, verbose = 0, epochs=50, 
                  validation_data=(val_images, val_labels)) # train for 50 epochs, with batch size 32

pruned_loss,pruned_accuracy = pretrained_model.evaluate(val_images, val_labels, batch_size=128, verbose=0) 

print('Accuracy before Pruning:',val_accuracy_1)
print('Accuracy after Pruning:',pruned_accuracy)

print_nonzeros(pretrained_model)   #function to measure pruned weights and compression rate

Accuracy before Pruning: 0.7485148310661316
Accuracy after Pruning: 0.7556435465812683
conv2d/kernel:0      | nonzeros =     864 /     864 (100.00%) | total_pruned =       0 | shape = (3, 3, 3, 32)
conv2d_1/kernel:0    | nonzeros =    9216 /    9216 (100.00%) | total_pruned =       0 | shape = (3, 3, 32, 32)
conv2d_2/kernel:0    | nonzeros =   18432 /   18432 (100.00%) | total_pruned =       0 | shape = (3, 3, 32, 64)
conv2d_3/kernel:0    | nonzeros =   36864 /   36864 (100.00%) | total_pruned =       0 | shape = (3, 3, 64, 64)
dense/kernel:0       | nonzeros =  522037 /  524288 ( 99.57%) | total_pruned =    2251 | shape = (1024, 512)
dense_1/kernel:0     | nonzeros =    2555 /    2560 ( 99.80%) | total_pruned =       5 | shape = (512, 5)
Active: 589968, Pruned : 2256, Total: 592224, Compression rate :       1.00x  (  0.38% pruned)


### **Save weights**

In [None]:
# you need to save the model's weights, naming it 'my_model_weights.h5'
pretrained_model.save_weights("my_model_weights_1.h5")

# running this cell will immediately download a file called 'my_model_weights.h5'
from google.colab import files
files.download("my_model_weights_1.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## **Method 2: SNIP（Single-shot Network Pruning）**

In [None]:
#reload model
model_2 = tf.keras.models.load_model('/content/drive/My Drive/10605-ProjA/saved_model/baseline_model')

#recompile model
model_2.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001, decay=1e-6),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
import tensorflow as tf

class Newcallback(tf.keras.callbacks.Callback):
  def __init__(self, masks):
    super(Newcallback, self).__init__()
    self.masks = masks

  def _prune(self, model) :
    for var, mask in list(zip(self.model.trainable_variables, self.masks)):
      var.assign(tf.math.multiply(var.read_value(), mask))

  def on_train_batch_begin(self, batch, logs=None):
    self._prune(self.model)

  def on_train_batch_end(self, batch, logs=None):
    self._prune(self.model)


def makeCallback(model, sparsity, x, y) :
  with tf.GradientTape() as tape:
    y_pred  = model(x)
    loss = model.compiled_loss(y,y_pred)    

  grads = tape.gradient(loss,model.trainable_variables)
  saliences = [tf.abs(grad*weight) for weight, grad in zip(model.trainable_variables, grads)]
  saliences_flat = tf.concat([tf.reshape(x,-1) for x in saliences], 0)
  
  k = tf.dtypes.cast(
          tf.math.round(
              tf.dtypes.cast(tf.size(saliences_flat), tf.float32) *
              (1 - sparsity)), tf.int32)
  values,_ = tf.math.top_k(saliences_flat, k=tf.size(saliences_flat))
  current_threshold = tf.gather(values, k-1)
  masks = [tf.cast(tf.greater_equal(s,current_threshold),dtype=s.dtype) for s in saliences]

  return Newcallback(masks)

In [None]:
def calc_sparsity(model):
    total_weights = np.concatenate([x.numpy().flatten() for x in model.trainable_variables])
    sparsity = 1 - np.count_nonzero(total_weights)/total_weights.size
    return sparsity

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.callbacks import ReduceLROnPlateau

def lr_schedule(epoch):
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),cooldown=0,patience=5,min_lr=0.5e-6)

In [None]:
sparsity = 0.4

pc = makeCallback(
    model_2,
    sparsity,
    tf.convert_to_tensor(train_images[1:10]),
    tf.convert_to_tensor(train_labels[1:10])
    )

callbacks = [pc,lr_reducer,lr_scheduler]

In [None]:
model_2.fit(train_images, train_labels, batch_size=32, 
            verbose=0, epochs=20, 
            validation_data=(val_images, val_labels),
            callbacks=callbacks
          )

<keras.callbacks.History at 0x7fd2e740f340>

In [None]:
print("Sparsity after pruning:")
print(calc_sparsity(model_2))
print("Loss and Accuracy after pruning:")
print(model_2.evaluate(val_images,val_labels))

Sparsity after pruning:
0.47933240349246875
Loss and Accuracy after pruning:
[0.7645334005355835, 0.7635643482208252]


### **Save weights**

In [None]:
model_2.save_weights("my_model_weights_2.h5")

In [None]:
from google.colab import files
files.download("my_model_weights_2.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## **Method 3: Neuron Pruning**

In [None]:
#reload model
model_3 = tf.keras.models.load_model('/content/drive/My Drive/10605-ProjA/saved_model/baseline_model')   # ztt path

#recompile model
model_3.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001, decay=1e-6),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


In [None]:
model_3.layers

[<keras.layers.convolutional.conv2d.Conv2D at 0x7ffb6016c510>,
 <keras.layers.core.activation.Activation at 0x7ffaf37de9d0>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x7ffaf37de190>,
 <keras.layers.core.activation.Activation at 0x7ffaf37dfe10>,
 <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x7ffaf37e68d0>,
 <keras.layers.regularization.dropout.Dropout at 0x7ffaf37e53d0>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x7ffaf37e59d0>,
 <keras.layers.core.activation.Activation at 0x7ffaf37e8b50>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x7ffaf37ed550>,
 <keras.layers.core.activation.Activation at 0x7ffb0a386a50>,
 <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x7ffb0a2eee10>,
 <keras.layers.regularization.dropout.Dropout at 0x7ffb0a2dfcd0>,
 <keras.layers.reshaping.flatten.Flatten at 0x7ffb0a2e4390>,
 <keras.layers.core.dense.Dense at 0x7ffb0a354710>,
 <keras.layers.core.activation.Activation at 0x7ffaf37edf50>,
 <keras.layers.regularization.dropout.Dropout at 0x7f

In [None]:
total_layers = len(model_3.layers)
total_layers

18

In [None]:
len(model_3.get_weights())

12

In [None]:
# pruning function
from numpy import linalg as LA
def unit_prune(k_weights, b_weights, k_sparsity):
    """
    Takes in matrices of kernel and bias weights (for a dense
      layer) and returns the unit-pruned versions of each
    Args:
      k_weights: matrix of the 
      b_weights: matrix of the biases of a dense layer
      k_sparsity: percentage of weights to set to 0
    Returns:
      kernel_weights: sparse matrix with same shape as the original
        kernel weight matrix
      bias_weights: sparse array with same shape as the original
        bias array
    """

    # Copy the kernel weights and get ranked indeces of the
    # column-wise L2 Norms
    kernel_weights = np.copy(k_weights)
    
    ind = np.argsort(LA.norm(kernel_weights, axis=0))
    
    # Number of indexes to set to 0
    cutoff = int(len(ind)*k_sparsity)
    # The indexes in the kernel weight matrix to set to 0
    sparse_cutoff_inds = ind[0:cutoff]
    
    kernel_weights[:, sparse_cutoff_inds] = 0.
        
    # Copy the bias weights and get ranked indeces of the abs
    bias_weights = np.copy(b_weights)
    # The indexes in the 1D bias weight matrix to set to 0
    # Equal to the indexes of the columns that were removed in this case
    #sparse_cutoff_inds
    bias_weights[sparse_cutoff_inds] = 0.
    
    return kernel_weights, bias_weights



In [None]:
def sparsify_model(model, k_sparsity):
    """
    Takes in a model made of dense layers and prunes the weights
    Args:
      model: Keras model
      k_sparsity: target sparsity of the model
    Returns:
      sparse_model: sparsified copy of the previous model
    """
    # Copying a temporary sparse model from our original
    sparse_model = tf.keras.models.clone_model(model)
    
    # Getting a list of the names of each weights of each layer
    names = [weight.name for layer in sparse_model.layers for weight in layer.weights]
    # Getting the list of the weights for each of each layer
    weights = sparse_model.get_weights()
    
    # Initializing list that will contain the new sparse weights
    newWeightList = []

    # Iterate over all but the final 1 layer
    for i in range(0, len(weights)-2, 2):
        kernel_weights, bias_weights = unit_prune(weights[i],weights[i+1],k_sparsity)
        
        # Append the new weight list with our sparsified kernel weights
        newWeightList.append(kernel_weights)
        
        # Append the new weight list with our sparsified bias weights
        newWeightList.append(bias_weights)
    # Add the last layer weight     
    for i in range(len(weights)-2, len(weights)):
        unmodified_weight = np.copy(weights[i])
        newWeightList.append(unmodified_weight)
    
    # Setting the weights of our model to the new ones
    sparse_model.set_weights(newWeightList)
    
    # Re-compiling the Keras model
    sparse_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001, decay=1e-6),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
    
    return sparse_model
 

In [None]:
k_sparsities = [0.2]

for k in k_sparsities:
    sparse_model = sparsify_model(model_3, k_sparsity=k)
    sparse_model.fit(train_images, train_labels, batch_size=32, verbose = 0, epochs=50, 
                    validation_data=(val_images, val_labels)) # retrain for 50 epochs, with batch size 32
    print("sparsity" + str(k))
    print("Loss and Accuracy after pruning:")
    print(sparse_model.evaluate(val_images,val_labels))
    


sparsity0.2
Loss and Accuracy after pruning:
[0.7296556234359741, 0.7192079424858093]


In [None]:
def calc_sparsity(model):
    total_weights = np.concatenate([x.numpy().flatten() for x in model.trainable_variables]  )
    sparsity = 1 - np.count_nonzero(total_weights)/total_weights.size
    return sparsity

In [None]:
calc_sparsity(sparse_model)

0.17632683625299994

### save weights

In [None]:
# you need to save the model's weights, naming it 'my_model_weights.h5'
sparse_model.save_weights("my_model_weights_3.h5")

# running this cell will immediately download a file called 'my_model_weights.h5'
from google.colab import files
files.download("my_model_weights_3.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>