# CNN Keras lab

Read and run the code.  Answer every question in the notebook, and fill in code where you see # YOUR CODE HERE.

In [None]:
from tensorflow.keras import models, layers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

In [None]:
def plot_metric(history, metric='loss'):
    """ Plot training and test values for a metric. """

    val_metric = 'val_'+metric
    plt.plot(history.history[metric])
    plt.plot(history.history[val_metric])
    plt.title('model '+metric)
    plt.ylabel(metric)
    plt.xlabel('epoch')
    plt.legend(['train', 'test'])
    plt.show();

### Original model

This is a very standard, simple convolutional model.

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
# classification part
model.add(layers.Dense(10, activation='softmax'))

Basic CNN network

In [None]:
model.summary()

Prepare the MNIST data

In [None]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32') / 255

test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

Create a subset of the data set to make training faster.

In [None]:
num_train = 5000
train_images = train_images[:num_train]
train_labels = train_labels[:num_train]
num_test = 1000
test_images = test_images[:num_test]
test_labels = test_labels[:num_test]

Compile and train

In [None]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=5, batch_size=64, validation_data=(test_images, test_labels))

#### Question: are we experiencing overfitting?

In [None]:
plot_metric(history)

In [None]:
plot_metric(history, metric='accuracy')

### Experiment 1:

Simplify the model by reducing the size of the dense layer from 64 to 32.  Also, reduce the number of channels in the second convolutional layer from 64 to 32.

In [None]:
# YOUR CODE HERE

In [None]:
model.summary()

#### Question: What is the new model size as a fraction of the original model size?

Compile and train the model.

In [None]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=5, batch_size=64, validation_data=(test_images, test_labels))

In [None]:
plot_metric(history, metric='accuracy')

#### Question: How were the training and test accuracy affected?  Explain why.

#### Question: Do we still have significant overfitting?

### Experiment 2:

Simplify the original model by eliminating the second convolution/pooling layers.

In [None]:
# YOUR CODE HERE

In [None]:
model.summary()

#### Question: How much smaller is the new model?  Explain.

### Experiment 3:

Figure out how to modify the original model by adding one more convolution and one more pooling layer pair so that the output shape before flattening is (None, 1, 1, 128).


In [None]:
# YOUR CODE HERE

In [None]:
model.summary()

In [None]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=5, batch_size=64, validation_data=(test_images, test_labels))

Question: How do the performance and size of this model compare to the original model?

### Experiment 4

Modify the original model by removing the next-to-last dense layer.

In [None]:
# YOUR CODE HERE

In [None]:
model.summary()

In [None]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=5, batch_size=64, validation_data=(test_images, test_labels))

#### Question: How do the performance and size of this model compare to the original model?

#### If you still have time, create a variant of the original model in which the first convolutional layer is followed by another convolutional layer.