In this notebook, we make a CNN model with 3 conv2D layer, 1 flatten and 1 dense layer. We turn off training in the very beginning and train + evaluate on the training and test dataset. After that, we evaluate 10 most irrelevant filters based upon their average sum of their activations on 3 different axes (0,1,2). '0' takes all the datasets, '1' is along the height, and '2' is along the width. After removing the irrelevant filters, we train+evaluate to check the accuracy and time elapsed on training.

In [2]:
#defining imports
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
from tensorflow.keras import datasets
from keras.layers import Dense
from keras.layers import Conv2D, Layer
import tensorflow_model_optimization as tfmot

In [3]:
# get MNIST fashion
from keras.datasets import fashion_mnist
(fashion_train_img, fashion_train_labels), (fashion_test_img, fashion_test_labels) = fashion_mnist.load_data()

In [4]:
#adding 4th dimension as 1 to declare as grayscale image
#normalizing the images
fashion_train_img = fashion_train_img.reshape((60000, 28, 28, 1))
fashion_train_img = fashion_train_img/255.0

fashion_test_img = fashion_test_img.reshape((10000, 28, 28, 1))
fashion_test_img = fashion_test_img/255.0

Defining our model

In [5]:
# Define the CNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1), name='conv2d_1'),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu',input_shape=(28, 28, 1), name='conv2d_2'),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1), name='conv2d_3'),
    tf.keras.layers.Flatten(name='flatten'),
    tf.keras.layers.Dense(10, activation='softmax', name='output')
])

Turning off Backprop for Conv2D layers

In [6]:
# compile the model
opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [7]:
# Train the model and display the activations after each epoch
start_time = time.time()
history = model.fit(fashion_train_img, fashion_train_labels, epochs=5, validation_data=(fashion_test_img, fashion_test_labels))
end_time = time.time()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
test_loss, test_acc = model.evaluate(fashion_test_img, fashion_test_labels, verbose=2)
print('Test accuracy:', test_acc)
print('Time elapsed: ', end_time - start_time)

313/313 - 2s - loss: 0.2607 - accuracy: 0.9141 - 2s/epoch - 5ms/step
Test accuracy: 0.9140999913215637
Time elapsed:  137.08772587776184


Filter Pruning for 1st Convolutional Layers

In [9]:
# Get the activations of the convolutional layer
layer_name = 'conv2d_1'
activation_model = tf.keras.models.Model(inputs=model.input, outputs=model.get_layer('conv2d_1').output)

activations = activation_model.predict(fashion_train_img)

# Determine the filter indices with the least activations
filter_activations = np.sum(activations, axis=(0, 1, 2))
indices_to_prune = np.argsort(filter_activations)[:10]  # Prune the 10 filters with the least activations

# Delete the irrelevant filters
layer = model.get_layer(layer_name)
weights, biases = layer.get_weights()
pruned_weights = np.delete(weights, indices_to_prune, axis=3)
pruned_biases = np.delete(biases, indices_to_prune, axis=0)
 

# Copying the remaining filters to a new layer, but using the old flatten and dense layer

new_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(22, (3, 3), activation='relu', input_shape=(28, 28, 1), name='new_conv2d_1'),
])

    
# Copy the weights and biases of the remaining filters
new_layer = new_model.get_layer('new_conv2d_1')
new_weights = pruned_weights[:, :, :, :22]
new_biases = pruned_biases[:22]

new_layer.set_weights([new_weights, new_biases])




Filter Pruning for 2nd Convolutional Layers

In [10]:
# Get the activations of 'conv2d_1' from the original model
activation_model_conv1 = tf.keras.models.Model(inputs=model.input, outputs=model.get_layer('conv2d_1').output)
activations_conv1 = activation_model_conv1.predict(fashion_train_img)

# Add 'conv2d_2' to the new model
new_model.add(tf.keras.layers.Conv2D(22, (3, 3), activation='relu', name='new_conv2d_2'))

# Get the activations of 'conv2d_2' using activations from 'conv2d_1'
activation_model_conv2 = tf.keras.models.Model(inputs=model.get_layer('conv2d_1').output, outputs=model.get_layer('conv2d_2').output)
activations_conv2 = activation_model_conv2.predict(activations_conv1)  # Pass activations from 'conv2d_1' to 'conv2d_2'

# Determine the filter indices with the least activations in 'conv2d_2'
filter_activations_conv2 = np.sum(activations_conv2, axis=(0, 1, 2))
indices_to_prune_conv2 = np.argsort(filter_activations_conv2)[:10]  # Prune the 10 filters with the least activations

# Delete the irrelevant filters in 'conv2d_2' layer
layer_conv2 = model.get_layer('conv2d_2')
weights_conv2, biases_conv2 = layer_conv2.get_weights()
pruned_weights_conv2 = np.delete(weights_conv2, indices_to_prune_conv2, axis=3)
pruned_biases_conv2 = np.delete(biases_conv2, indices_to_prune_conv2, axis=0)

# Set the weights and biases of 'new_conv2d_2'
new_layer_conv2 = new_model.get_layer('new_conv2d_2')
new_weights_conv2 = pruned_weights_conv2[:, :, :22, :22]
new_biases_conv2 = pruned_biases_conv2[:22]
new_layer_conv2.set_weights([new_weights_conv2, new_biases_conv2])





Filter Pruning for 3rd Convolutional Layers

In [11]:
# Get the activations of 'conv2d_1' from the original model
activation_model_conv1 = tf.keras.models.Model(inputs=model.input, outputs=model.get_layer('conv2d_1').output)
activations_conv1 = activation_model_conv1.predict(fashion_train_img)

# Add 'conv2d_2' to the new model
new_model.add(tf.keras.layers.Conv2D(22, (3, 3), activation='relu', name='new_conv2d_3'))

# Get the activations of 'conv2d_2' using activations from 'conv2d_1'
activation_model_conv2 = tf.keras.models.Model(inputs=model.get_layer('conv2d_1').output, outputs=model.get_layer('conv2d_2').output)
activations_conv2 = activation_model_conv2.predict(activations_conv1)  # Pass activations from 'conv2d_1' to 'conv2d_2'

# Get the activations of 'conv2d_3' using activations from 'conv2d_2'
activation_model_conv3 = tf.keras.models.Model(inputs=model.get_layer('conv2d_2').output, outputs=model.get_layer('conv2d_3').output)
activations_conv3 = activation_model_conv2.predict(activations_conv2)  # Pass activations from 'conv2d_1' to 'conv2d_2'

# Determine the filter indices with the least activations in 'conv2d_2'
filter_activations_conv3 = np.sum(activations_conv3, axis=(0, 1, 2))
indices_to_prune_conv3 = np.argsort(filter_activations_conv3)[:10]  # Prune the 10 filters with the least activations

# Delete the irrelevant filters in 'conv2d_2' layer
layer_conv3 = model.get_layer('conv2d_3')
weights_conv3, biases_conv3 = layer_conv3.get_weights()
pruned_weights_conv3 = np.delete(weights_conv3, indices_to_prune_conv3, axis=3)
pruned_biases_conv3 = np.delete(biases_conv3, indices_to_prune_conv3, axis=0)

# Set the weights and biases of 'new_conv2d_2'
new_layer_conv3 = new_model.get_layer('new_conv2d_3')
new_weights_conv3 = pruned_weights_conv3[:, :, :22, :22]
new_biases_conv3 = pruned_biases_conv3[:22]
new_layer_conv3.set_weights([new_weights_conv3, new_biases_conv3])





Adding a Flatten and Dense Layer

In [25]:
new_model.add(model.get_layer('flatten'))
new_model.add(tf.keras.layers.Dense(10, activation='softmax', name='new_output'))

In [13]:
# compile the model
opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
new_model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [14]:
# Train the model and display the activations after each epoch
start_time = time.time()
history = new_model.fit(fashion_train_img, fashion_train_labels, epochs=5, validation_data=(fashion_test_img, fashion_test_labels))
end_time = time.time()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [15]:
# Evaluate the pruned model
_, pruned_accuracy = new_model.evaluate(fashion_test_img, fashion_test_labels, verbose=0)
print('Pruned accuracy:', pruned_accuracy)
print('Time elapsed: ', end_time - start_time)


Pruned accuracy: 0.910099983215332
Time elapsed:  98.7699089050293
