# Exercise 9
Try using a denoising autoencoder to pretrain an image classifier. You can use MNIST (the simplest option), or a more complex image dataset such as CIFAR10 if you want a bigger challenge. Regardless of the dataset you’re using, follow these steps:Split the dataset into a training set and a test set.

* Train a deep denoising autoencoder on the full training set.Check that the images are fairly well reconstructed.
* Visualize the images that most activate each neuron in the coding layer.Build a classification DNN, reusing the lower layers of the autoencoder.
* Train it using only 500 images from the training set. Does it perform better with or without pretraining?

## Load Dependencies

In [57]:
from functools import partial
from tensorflow import keras

import collections
import numpy as np
import pandas as pd

## Get CIFAR 10 data

In [32]:
(X_train, y_train), (X_test, y_test) = keras.datasets.cifar10.load_data()

In [33]:
X_train = (X_train / 255.).astype('float32')
X_test = (X_test / 255.).astype('float32')

In [74]:
np.random.seed(42)
n = 512
idx = np.random.choice(len(X_train), size=n, replace=False)

In [75]:
collections.Counter(y_train[idx].squeeze())

Counter({7: 62, 8: 53, 0: 42, 6: 47, 1: 50, 5: 49, 2: 46, 4: 52, 3: 57, 9: 54})

In [76]:
X_train_small = X_train[idx]
y_train_small = y_train[idx]

## Train CNN on reduced data

In [77]:
cnn_layer = partial(keras.layers.Conv2D, kernel_size=3, padding='same', activation='selu')

In [78]:
model = keras.models.Sequential([
    keras.layers.Input(shape=X_train.shape[1:]),
    cnn_layer(filters=16),
    keras.layers.MaxPooling2D(),
    cnn_layer(filters=32),
    keras.layers.MaxPooling2D(),
    cnn_layer(filters=64),
    keras.layers.MaxPooling2D(),
    keras.layers.Flatten(),
    keras.layers.Dense(100, activation='selu'),
    keras.layers.Dense(10, activation='softmax')
])

In [79]:
optimizer = keras.optimizers.Nadam(learning_rate=0.001)

In [80]:
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [81]:
history = model.fit(X_train_small, y_train_small, batch_size=32, epochs=1, validation_data=(X_test, y_test))

Train on 512 samples, validate on 10000 samples


In [None]:
pd.DataFrame(history.history).plot()
plt.show()

## Train Autoencoder

In [34]:
X_train.shape

(50000, 32, 32, 3)

In [36]:
encoder = keras.models.Sequential([
    keras.layers.Input(shape=X_train.shape[1:]),
    cnn_layer(filters=16),
    keras.layers.MaxPooling2D(),
    cnn_layer(filters=32),
    keras.layers.MaxPooling2D(),
    cnn_layer(filters=64),
    keras.layers.MaxPooling2D(),
])

In [37]:
encoder.output_shape

(None, 4, 4, 64)

In [None]:
cnn_t_layer = partial(keras.layers.Conv2DTranspose, kernel_size=3, padding='same', activation='selu', strides=2)

In [38]:
decoder = keras.models.Sequential([
    keras.layers.Input(shape=encoder.output_shape[1:]),
    cnn_t_layer(filters=32),
    cnn_t_layer(filters=16),
    cnn_t_layer(filters=3)
])

In [39]:
decoder.output_shape

(None, 32, 32, 3)

In [40]:
autoencoder = keras.models.Sequential([encoder, decoder])

In [43]:
autoencoder.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['mse'])

In [44]:
history = autoencoder.fit(X_train, X_train, batch_size=32, epochs=1, validation_data=(X_test, X_test))

Train on 50000 samples, validate on 10000 samples


## Apply transfer learning

In [89]:
transfer_model = keras.models.clone_model(encoder)

In [96]:
transfer_model.add(keras.layers.Flatten())
transfer_model.add(keras.layers.Dense(100, activation='selu'))
transfer_model.add(keras.layers.Dense(10, activation='softmax'))

Freeze pre-trained conv layers for initial training

In [97]:
transfer_model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 32, 32, 16)        448       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 16, 16, 16)        0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 16, 16, 32)        4640      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 8, 8, 64)          18496     
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 4, 4, 64)          0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 1024)             

In [98]:
for l in transfer_model.layers:
    if 'conv2d' in l.name:
        l.trainable = False

In [99]:
transfer_model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 32, 32, 16)        448       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 16, 16, 16)        0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 16, 16, 32)        4640      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 8, 8, 64)          18496     
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 4, 4, 64)          0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 1024)             

In [91]:
l = transfer_model.layers[0]

In [100]:
transfer_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [101]:
transfer_history = transfer_model.fit(X_train_small, y_train_small, batch_size=32, epochs=1, validation_data=(X_test, y_test))

Train on 512 samples, validate on 10000 samples


Unfreeze the last convolutional layer

In [104]:
transfer_model.layers[4].trainable = True
transfer_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [107]:
transfer_history_2 = transfer_model.fit(X_train_small, y_train_small, batch_size=32, epochs=1, validation_data=(X_test, y_test))

Train on 512 samples, validate on 10000 samples


In [108]:
transfer_model.layers[2].trainable = True
transfer_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [109]:
transfer_history_3 = transfer_model.fit(X_train_small, y_train_small, batch_size=32, epochs=1, validation_data=(X_test, y_test))

Train on 512 samples, validate on 10000 samples


In [110]:
transfer_model.layers[0].trainable = True
transfer_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [111]:
transfer_history_4 = transfer_model.fit(X_train_small, y_train_small, batch_size=32, epochs=1, validation_data=(X_test, y_test))

Train on 512 samples, validate on 10000 samples
