*Adapted from: https://keras.io/examples/vision/mnist_convnet/*

# Setup

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.keras as keras
from keras import layers
print(keras.__version__)
keras.backend.backend()

# Load the data

In [None]:
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Display the data

In [None]:
plt.clf()
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(x_train[i], cmap='gray')
    plt.title(y_train[i])
    plt.axis("off")

plt.show()

# Prepare the data

In [None]:
# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# Model / data parameters
num_classes = 10
input_shape = x_train.shape[1:]

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# Build the model

In the case of a convolutional layer, the expected input dimensions should match the kernel size dimension, in this case 2D, so we won't reshape our 28x28 input data here. In the model we built here, we use a total of 2 convolutional layers that are each followed by a pooling layer that downsamples the output from the convolutional layer.

After the convolution, the output is flatten to 1D and a dropout layer is applied before the output layer. The dropout helps prevent overfitting and our network's reliance on a small set of neurons, making the resultant network more generalizable.

In [None]:
model = keras.Sequential(
    [
        layers.InputLayer(input_shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

# Train the model

Same as the basic nn model, we train for 15 epochs with batch size of 128 using the same loss and optimization parameters. This will probably take longer than the basic nn model as there are more operations to perform.

In [None]:
batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

# Evaluate the trained model

We expect a 99% test accuracy here.

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

In [None]:
layer = model.get_layer("conv2d_1").get_weights()[0]
W = np.squeeze(layer)

plt.clf()
fig, axs = plt.subplots(5, 5, figsize=(8,8))
fig.subplots_adjust(hspace = .5, wspace=.001)
axs = axs.ravel()
for i in range(25):
    axs[i].imshow(W[:,:,i], cmap="gray")
    axs[i].set_title(str(i))
    
plt.show()

In [None]:
def visualize_conv_layer(model, layer_name):
    layer_output=model.get_layer(layer_name).output

    intermediate_model=keras.models.Model(inputs=model.input, outputs=layer_output)

    intermediate_prediction=intermediate_model.predict(x_train[5].reshape(1,28,28,1))
    
    row_size=4
    col_size=8

    img_index=0

    print(layer_name, np.shape(intermediate_prediction))
 
    fig,ax=plt.subplots(row_size, col_size, figsize=(10,8)) 

    for row in range(0, row_size):
        for col in range(0, col_size):
            ax[row][col].imshow(intermediate_prediction[0, :, :, img_index], cmap='gray')

            img_index = img_index + 1
        
    plt.show()
    
def visualize_flat_layer(model, layer_name):
    layer_output=model.get_layer(layer_name).output
    intermediate_model=keras.models.Model(inputs=model.input, outputs=layer_output)
    intermediate_prediction=intermediate_model.predict(x_train[1].reshape(1,28,28,1))
    
    print(layer_name, np.shape(intermediate_prediction))
    
    plt.imshow(intermediate_prediction.reshape((40, 40)), cmap="gray")
        
    plt.show()

visualize_conv_layer(model, "conv2d_1")
# visualize_conv_layer(model, "max_pooling2d_1")
# visualize_conv_layer(model, "conv2d_2")
# visualize_conv_layer(model, "max_pooling2d_2")
# visualize_flat_layer(model, "flatten_1")