In this notebook, we make a CNN model with 3 conv2D layer, 1 flatten and 1 dense layer. We turn of training in the very beginning and train + evaluate on the training and test dataset. After that, we evaluate 10 most irrelevant filters based upon their average sum of their activations on 3 different axes (0,1,2). '0' takes all the datasets, '1' is along the height, and '2' is along the width. After removing the irrelevant filters, we train+evaluate to check the accuracy and time elapsed on training.

In [80]:
#defining imports
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
from tensorflow.keras import datasets
from keras.layers import Dense
from keras.layers import Conv2D, Layer
import tensorflow_model_optimization as tfmot

In [81]:
# get MNIST fashion
from keras.datasets import fashion_mnist
(fashion_train_img, fashion_train_labels), (fashion_test_img, fashion_test_labels) = fashion_mnist.load_data()

In [82]:
#adding 4th dimension as 1 to declare as grayscale image
#normalizing the images
fashion_train_img = fashion_train_img.reshape((60000, 28, 28, 1))
fashion_train_img = fashion_train_img/255.0

fashion_test_img = fashion_test_img.reshape((10000, 28, 28, 1))
fashion_test_img = fashion_test_img/255.0

Defining our model

In [83]:
# Define the CNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1), name='conv2d_1'),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', name='conv2d_2'),
    tf.keras.layers.Flatten(name='flatten'),
    tf.keras.layers.Dense(10, activation='softmax', name='output')
])

Turning off Backprop for Conv2D layers

In [14]:
for layer in model.layers[:3]:
    layer.trainable=False
    #print (layer)


In [84]:
# compile the model
opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [85]:
# Train the model and display the activations after each epoch
start_time = time.time()
history = model.fit(fashion_train_img, fashion_train_labels, epochs=1, validation_data=(fashion_test_img, fashion_test_labels))
end_time = time.time()



In [86]:
test_loss, test_acc = model.evaluate(fashion_test_img, fashion_test_labels, verbose=2)
print('Test accuracy:', test_acc)
print('Time elapsed: ', end_time - start_time)

313/313 - 1s - loss: 0.3276 - accuracy: 0.8862 - 1s/epoch - 4ms/step
Test accuracy: 0.8862000107765198
Time elapsed:  18.401186227798462


Filter Pruning for 1st Convolutional Layers

In [87]:
# Get the activations of the convolutional layer
layer_name = 'conv2d_1'
activation_model = tf.keras.models.Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
activations = activation_model.predict(fashion_train_img)

# Determine the filter indices with the least activations
filter_activations = np.sum(activations, axis=(0, 1, 2))
indices_to_prune = np.argsort(filter_activations)[:10]  # Prune the 10 filters with the least activations

# Delete the irrelevant filters
layer = model.get_layer(layer_name)
weights, biases = layer.get_weights()
pruned_weights = np.delete(weights, indices_to_prune, axis=3)
pruned_biases = np.delete(biases, indices_to_prune, axis=0)
 

# Copying the remaining filters to a new layer, but using the old flatten and dense layer

new_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(22, (3, 3), activation='relu', input_shape=(28, 28, 1), name='new_conv2d_1'),
])

    
# Copy the weights and biases of the remaining filters
new_layer = new_model.get_layer('new_conv2d_1')
new_weights = pruned_weights[:, :, :, :22]
new_biases = pruned_biases[:22]

new_layer.set_weights([new_weights, new_biases])




Filter Pruning for 2nd Convolutional Layers

In [88]:
# Get the activations of the convolutional layer
layer_name = 'conv2d_2'

new_model.add(Conv2D(22, (3, 3), activation='relu', name='new_conv2d_2'))

activation_model = tf.keras.models.Model(inputs=model.layers[0].input, outputs=model.get_layer(layer_name).output)
activations = activation_model.predict(fashion_train_img)

# Determine the filter indices with the least activations
filter_activations = np.sum(activations, axis=(0, 1, 2))
indices_to_prune = np.argsort(filter_activations)[:10]  # Prune the 10 filters with the least activations

# Delete the irrelevant filters
layer = model.get_layer(layer_name)
weights, biases = layer.get_weights()
pruned_weights = np.delete(weights, indices_to_prune, axis=3)
pruned_biases = np.delete(biases, indices_to_prune, axis=0)

# Copy the weights and biases of the remaining filters
new_layer = new_model.get_layer('new_conv2d_2')
new_layer_weights, new_layer_biases = new_layer.get_weights()
new_layer_weights = pruned_weights[:, :, :22, :]
new_layer_biases = pruned_biases[:22]
new_layer.set_weights([new_layer_weights, new_layer_biases])




Adding a Flatten and Dense Layer

In [89]:
new_model.add(model.get_layer('flatten'))
new_model.add(tf.keras.layers.Dense(10, activation='softmax', name='new_output'))

In [92]:
# compile the model
opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
new_model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [93]:
# Train the model and display the activations after each epoch
start_time = time.time()
history = new_model.fit(fashion_train_img, fashion_train_labels, epochs=1, validation_data=(fashion_test_img, fashion_test_labels))
end_time = time.time()



In [94]:
# Evaluate the pruned model
_, pruned_accuracy = new_model.evaluate(fashion_test_img, fashion_test_labels, verbose=0)
print('Pruned accuracy:', pruned_accuracy)
print('Time elapsed: ', end_time - start_time)


Pruned accuracy: 0.8838000297546387
Time elapsed:  15.876929998397827
