# Exercise Sheet 5

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import mnist  # MNIST data
from tensorflow.python.keras.utils import to_categorical
from tensorflow.python.keras.losses import categorical_crossentropy #loss function
from tensorflow.python.keras.optimizers import Adam, SGD #optimisers

## [1] MNIST - Conv2D

The digits in the MNIST dataset are in the center. Here we will undo this operation and compare the performance of convolutional neural networks vs fully connected networks.  
* Create a new dataset of size 50x50 where you place the handwritten digit at different random positions in the dataset.
* Now train a neural network with a single hidden dense layer (as on the original MNIST dataset in the lectures).
* Now try to improve your performance in comparison to your previous layout by using an architecture involving convolutional layers (Conv2D).
* $\star$ We have mentioned various options to improve the performance of networks. Check whether methods like Dropout, BatchNormalization, Pooling layers can improve your results. Try to fine-tune the performance (you may also try deeper architectures, i.e. with more hidden layers.)

### Solution

For a very accessible explanation of CNNs, see e.g. [link](https://ujjwalkarn.me/2016/08/11/intuitive-explanation-convnets/).  

You can have a look at some architectures in the literature:  
[LeNet5](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf)  
[GoogLeNet](https://arxiv.org/pdf/1409.4842.pdf)

See also: [Why is training loss higher than testing loss?](https://keras.io/getting-started/faq/#why-is-the-training-loss-much-higher-than-the-testing-loss)  
Very useful when training takes long: [Keras Callbacks](https://keras.io/callbacks/)

Fun stuff about fruit flies: [link](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0205043)

Copy code from lecture

In [None]:
batch_size = 128
num_classes = 10
epochs = 20

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()


x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

Set up some code for random positioning in a 50x50 initialized with 0

In [None]:
picx = 28
picy = 28
bigpicx = 50
bigpicy = 50

def embed(pic, pos):
    # pos expects a tuple of slice objects
    bigpic = np.zeros((bigpicx, bigpicy))
    bigpic[pos] = pic
    return bigpic

def randpos():
    xpos = np.random.randint(0, (bigpicx-picx))
    ypos = np.random.randint(0, (bigpicy-picy))
    # we use np.s_ to return a tuple of slice objects
    return np.s_[xpos:xpos+picx, ypos:ypos+picy]

Create the dataset

In [None]:
# the final processing of the data is quite compact using list comprehensions
x_train_original = x_train
x_test_original = x_test
x_train = np.array([embed(pic, randpos()) for pic in x_train])
x_test = np.array([embed(pic, randpos()) for pic in x_test])
x_train_conv = np.expand_dims(x_train, axis=3)
x_test_conv = np.expand_dims(x_test, axis=3)
x_train_original_conv = np.expand_dims(x_train_original, axis=3)
x_test_original_conv = np.expand_dims(x_test_original, axis=3)

#### Try with only dense layers:

In [None]:
model1 = tf.keras.Sequential()
model1.add(tf.keras.layers.Flatten())
model1.add(tf.keras.layers.Dense(250, activation='relu'))
model1.add(tf.keras.layers.Dense(10, activation='softmax'))
model1.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy'])

In [None]:
batch_size = 128
epochs = 3

hist1 = model1.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

# summarize history for accuracy
plt.plot(hist1.history['acc'])
plt.plot(hist1.history['val_acc'])
plt.ylabel('model accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

# summarize history for loss
plt.plot(hist1.history['loss'])
plt.plot(hist1.history['val_loss'])
plt.ylabel('model loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

The result is not bad, training is fast, but it is hard to go significantly beyond 90% accuracy on the test data

#### Now also add convolutional layers

In [None]:
model_conv1 = tf.keras.Sequential()

# Convolutional preprocessing
model_conv1.add(tf.keras.layers.Conv2D(16, (5,5), input_shape=(50,50,1), activation='relu'))
model_conv1.add(tf.keras.layers.MaxPooling2D(pool_size=2))

model_conv1.add(tf.keras.layers.Conv2D(16, (5,5), input_shape=(50,50,1), activation='relu'))
model_conv1.add(tf.keras.layers.MaxPooling2D(pool_size=2))

# Flatten to prepare for dense layers
model_conv1.add(tf.keras.layers.Flatten())

# Dense postprocessing
model_conv1.add(tf.keras.layers.Dense(648, activation='relu'))

model_conv1.add(tf.keras.layers.Dense(10, activation='softmax'))

model_conv1.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy'])

model_conv1.summary()

In [None]:
batch_size = 128
epochs = 3

hist_conv1 = model_conv1.fit(x_train_conv, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test_conv, y_test))

# summarize history for accuracy
plt.plot(hist_conv1.history['acc'])
plt.plot(hist_conv1.history['val_acc'])
plt.ylabel('model accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

# summarize history for loss
plt.plot(hist_conv1.history['loss'])
plt.plot(hist_conv1.history['val_loss'])
plt.ylabel('model loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

Try another architecture

In [None]:
model_conv2 = tf.keras.Sequential()

# Convolutional preprocessing
model_conv2.add(tf.keras.layers.Conv2D(64, (7,7), input_shape=(50,50,1), activation='relu'))
model_conv2.add(tf.keras.layers.MaxPooling2D(pool_size=3))
model_conv2.add(tf.keras.layers.Conv2D(128, (3,3), activation='relu'))
model_conv2.add(tf.keras.layers.MaxPooling2D(pool_size=3))
model_conv2.add(tf.keras.layers.Conv2D(192, (3,3), activation='relu'))
model_conv2.add(tf.keras.layers.MaxPooling2D(pool_size=2))

# Flatten to prepare for dense layers
model_conv2.add(tf.keras.layers.Flatten())

# Dense postprocessing
model_conv2.add(tf.keras.layers.Dense(128, activation='relu'))

model_conv2.add(tf.keras.layers.Dense(10, activation='softmax'))

model_conv2.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy'])

model_conv2.summary()

In [None]:
batch_size = 128
epochs = 3

hist_conv2 = model_conv2.fit(x_train_conv, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test_conv, y_test))

# summarize history for accuracy
plt.plot(hist_conv2.history['acc'])
plt.plot(hist_conv2.history['val_acc'])
plt.ylabel('model accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

# summarize history for loss
plt.plot(hist_conv2.history['loss'])
plt.plot(hist_conv2.history['val_loss'])
plt.ylabel('model loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

Add batch normalization and dropout.

In [None]:
model_conv3 = tf.keras.Sequential()

# Convolutional preprocessing
model_conv3.add(tf.keras.layers.Conv2D(64, (7,7), input_shape=(50,50,1), use_bias=False))
model_conv3.add(tf.keras.layers.BatchNormalization())
model_conv3.add(tf.keras.layers.Activation('relu'))

model_conv3.add(tf.keras.layers.MaxPooling2D(pool_size=3))

model_conv3.add(tf.keras.layers.Conv2D(128, (3,3), use_bias=False))
model_conv3.add(tf.keras.layers.BatchNormalization())
model_conv3.add(tf.keras.layers.Activation('relu'))

model_conv3.add(tf.keras.layers.MaxPooling2D(pool_size=3))

model_conv3.add(tf.keras.layers.Conv2D(192, (3,3), use_bias=False))
model_conv3.add(tf.keras.layers.BatchNormalization())
model_conv3.add(tf.keras.layers.Activation('relu'))
model_conv3.add(tf.keras.layers.Dropout(0.5))

model_conv3.add(tf.keras.layers.MaxPooling2D(pool_size=2))

# Flatten to prepare for dense layers
model_conv3.add(tf.keras.layers.Flatten())

# Dense postprocessing
model_conv3.add(tf.keras.layers.Dense(128, use_bias=False))
model_conv3.add(tf.keras.layers.BatchNormalization())
model_conv3.add(tf.keras.layers.Activation('relu'))

model_conv3.add(tf.keras.layers.Dense(10, activation='softmax'))

model_conv3.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy'])

model_conv3.summary()

In [None]:
batch_size = 64
epochs = 2

hist_conv3 = model_conv3.fit(x_train_conv, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test_conv, y_test))

In [None]:

# summarize history for accuracy
plt.plot(hist_conv3.history['acc'])
plt.plot(hist_conv3.history['val_acc'])
plt.ylabel('model accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

# summarize history for loss
plt.plot(hist_conv3.history['loss'])
plt.plot(hist_conv3.history['val_loss'])
plt.ylabel('model loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

## [2] Network Analysis

The aim of this exercise is to familiarise yourself with analysing tools for trained neural networks, i.e. opening the black box. In Keras you can access the value of the weights using get weights(). Here we analyse the network we have discussed in the lectures, you can find a link to the pre-trained weights on the course website.  
* In a first step, identify examples where the network is not yet performing well, i.e. which are incorrectly classified.
* In a second step, visualise the average activation of several hidden layers and in particular different hidden filters in the convolutional layers. Try to identify the role of some of the hidden filters. It might be useful to consider the activations for a particular class of the dataset.

### Solution

Have a look at what Google does!  
[DeepDream Blog Post](https://ai.googleblog.com/2015/06/inceptionism-going-deeper-into-neural.html)  
[DeepDream Github](https://github.com/google/deepdream/blob/master/dream.ipynb)  
Also have a look at this [link](https://towardsdatascience.com/how-to-visualize-convolutional-features-in-40-lines-of-code-70b7d87b0030) for maximizing activation of feature maps

In [None]:
# import CIFAR dataset
from tensorflow.keras.datasets import cifar10

In [None]:
# Load model from downloaded file
model2 = tf.keras.models.load_model('keras_cifar10_trained_model.h5')
model2.summary()

In [None]:
# Data preprocessing

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

n_classes = 10

y_train = tf.keras.utils.to_categorical(y_train, n_classes)
y_test = tf.keras.utils.to_categorical(y_test, n_classes)

In [None]:
# take only a few
predictions = model2.predict_classes(x_test[:100])
correct_classes = np.argwhere(y_test==1)[:100,1]

wrong_pred = x_test[:100][predictions != correct_classes]
wrong_pred_predicted_classes = predictions[predictions != correct_classes]
wrong_pred_correct_classes = correct_classes[predictions != correct_classes]
wrong_pred.shape

In [None]:
# Plot one example:

examplenum = 0

plt.imshow(wrong_pred[examplenum], interpolation='nearest')
plt.title("predicted: " + str(wrong_pred_predicted_classes[examplenum]) + "   " + "should have been: " + str(wrong_pred_correct_classes[0]));

In [None]:
# Extract the layer outputs of model2
layer_outputs = [layer.output for layer in model2.layers] 

# Create model that spits out the outputs of model2
model_out = tf.keras.models.Model(inputs=model2.input, outputs=layer_outputs) 

In [None]:
# Evaluate on the wrong predictions
activations = model_out.predict(wrong_pred[examplenum].reshape(-1,32,32,3))

In [None]:
first_layer_activation = activations[0]
# have a look at different feature maps
plt.matshow(first_layer_activation[0, :, :, 0], cmap='inferno')
plt.matshow(first_layer_activation[0, :, :, 1], cmap='inferno')
plt.matshow(first_layer_activation[0, :, :, 2], cmap='inferno')
plt.matshow(first_layer_activation[0, :, :, 3], cmap='inferno')
plt.matshow(first_layer_activation[0, :, :, 4], cmap='inferno')

In [None]:
# plot all layer activations in the convolutional part of the net
def plot_layers(examplenum = 0):
    activations = model_out.predict(wrong_pred[examplenum].reshape(-1,32,32,3))

    layer_names = []
    for layer in model2.layers:
        # Get layer names for plot titles
        layer_names.append(layer.name)

    images_per_row = 16
    
# The code below looks complicated, but its just about the presentation and plotting.
# You might as well ignore it.
# The essence of how we extract the activations was already shown above.
    
    for layer_name, layer_activation in zip(layer_names, activations):
        if len(layer_activation.shape) == 4:     # ignore dense layers
            # number of feature maps
            n_features = layer_activation.shape[-1]
            # layer_activation.shape -> (1, size, size, n_features)
            size = layer_activation.shape[1]
            # arange in grid
            n_cols = n_features // images_per_row
            display_grid = np.zeros((size * n_cols, images_per_row * size))
            for col in range(n_cols):
                for row in range(images_per_row):
                    channel_image = layer_activation[0,:,:, col * images_per_row + row]
                    # post processing
                    channel_image -= channel_image.mean()
                    channel_image /= channel_image.std()
                    channel_image *= 64
                    channel_image += 128
                    channel_image = np.clip(channel_image, 0, 255).astype('uint8')
                    # place into grid
                    display_grid[col*size:(col+1)*size, row*size:(row+1)*size] = channel_image
            scale = 1. / size
            plt.figure(figsize=(scale*display_grid.shape[1], scale*display_grid.shape[0]))
            plt.title(layer_name)
            plt.grid(False)
            plt.imshow(display_grid, aspect='auto', cmap='inferno')

In [None]:
plot_layers(0)

In [None]:
plot_layers(3)