# Autoencoder Using CNN

## Preparations
### Load libraries

In [None]:
import numpy as np 
import pandas as pd
import pickle

import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.layers import Conv2D, Flatten, Input, MaxPool2D
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras import regularizers

In [None]:
import tensorflow as tf
tf.random.set_seed(123)
np.random.seed(123)

In [None]:
# Using 10 epochs will make this notebook run rather quickly.
# If you have time and are willing to wait a bit longer for better results, increase this value to e.g., 50.
nEpochs = 10

### Prepare data

In [None]:
# Load data:
mnist = tf.keras.datasets.mnist
(train_val_images, train_val_labels), (test_images, test_labels) = mnist.load_data()

# Scale image data:
train_val_images = train_val_images / 255.0
test_images = test_images / 255.0

# Split into training / validation
train_images, val_images, train_labels, val_labels = train_test_split(train_val_images, train_val_labels,
                                                                      test_size=0.20, random_state=42)

In [None]:
train_val_label_df = pd.DataFrame(train_val_labels)
train_val_label_df.columns = ['label']
train_val_label_df['label'].value_counts()

In [None]:
val_label_df = pd.DataFrame(val_labels)
val_label_df.columns = ['label']
val_label_df['label'].value_counts(sort=False, ascending=True)

## The autoencoder
Every autoencoder consists of two parts: an encoder and a decoder.

* The **encoder** receives the original data (in our case, the black and white images) as input and generates a lower-dimensional code from it.
* The **decoder** receives the code and decodes it into original data (e.g. the images) in the same format as the encoder's inputs.

### Model definition

The two parts put together form the autoencoder:

**Comments on activation functions:**

***Why ReLU?***

* Simplicity and Efficiency: ReLU (Rectified Linear Unit) is computationally efficient because it involves simple thresholding at zero. This makes it faster to compute compared to other activation functions.
* Sparse Activation: ReLU promotes sparsity in the network by setting negative values to zero, which can help in learning more robust features.
* Gradient Propagation: ReLU helps mitigate the vanishing gradient problem, allowing gradients to propagate more effectively during backpropagation.

***Sigmoid*** at the end to get an output between 0 and 1.

In [None]:
CNNmnist_encoder = tf.keras.Sequential([
    tf.keras.layers.Input(shape = (28, 28, 1)),
    tf.keras.layers.Conv2D(16, 3, padding="same", activation="relu"),
    tf.keras.layers.MaxPool2D(pool_size=2),  # output: 14 × 14 x 16
    tf.keras.layers.Conv2D(32, 3, padding="same", activation="relu"),
    tf.keras.layers.MaxPool2D(pool_size=2),  # output: 7 × 7 x 32
    tf.keras.layers.Conv2D(64, 3, padding="same", activation="relu")
])

CNNmnist_decoder = tf.keras.Sequential([
    tf.keras.layers.Input(shape = (7, 7, 64)),
    tf.keras.layers.Conv2D(32, kernel_size = (3,3), activation = 'selu', padding = 'same'),
    tf.keras.layers.UpSampling2D((2,2)),
    tf.keras.layers.Conv2D(16, kernel_size = (3,3), activation = 'selu', padding = 'same'),
    tf.keras.layers.UpSampling2D((2,2)),
    tf.keras.layers.Conv2D(1, kernel_size = (3,3), activation = 'sigmoid', padding = 'same'),
])

CNNmnist_ae = tf.keras.Sequential([CNNmnist_encoder, CNNmnist_decoder])

### Training
The autoencoder uses unsupervised learning, i.e. we do not pass any predefined labels or similar. Instead, the output should be as similar as possible to the input. We use the squared error as a measure of the quality of the reconstruction.

For a start, we allow a maximum of 10 epochs to train - as with the other convolutional networks (and with deep neural networks in general), you should allow significantly more epochs for real applications.

The training takes a little longer here... With the following code you can save and reload the learned weights - just set `train_from_scratch` as needed.
Please note that only the weights, not the models themselves, are saved and loaded again. The model definition is made in the code and managed as such.

Loading the weights only works if the model definition is exactly the same.

In [None]:
# compile and fit the model
tf.random.set_seed(42) 
CNNmnist_ae.compile(loss="mse", optimizer="nadam")

In [None]:
nPatience = 50

# define paths:
encoder_weights_path_10 = './CNNmnist_encoder_10.weights.h5'
decoder_weights_path_10 = './CNNmnist_decoder_10.weights.h5'
history_path = './CNNmnist_ae.history.h5'

train_from_scratch = True
if train_from_scratch:
    history_ae = CNNmnist_ae.fit(train_images, train_images, epochs=nEpochs, validation_data=(val_images, val_images),
                                 callbacks=[EarlyStopping(monitor='val_loss', patience=nPatience,
                                                          verbose=False, restore_best_weights=True)])
    # Save the weights:
    CNNmnist_encoder.save_weights(encoder_weights_path_10)
    CNNmnist_decoder.save_weights(decoder_weights_path_10)

    # Save training history:
    with open(history_path, 'wb') as f:
        pickle.dump(history_ae, f)
else:
    # load previsously computed weights
    CNNmnist_encoder.load_weights(encoder_weights_path_10)
    CNNmnist_decoder.load_weights(decoder_weights_path_10)
    # connect
    CNNmnist_ae = tf.keras.Sequential([CNNmnist_encoder, CNNmnist_decoder])
    CNNmnist_ae.build(input_shape=(None, 28, 28, 1))

    # load history:
    with open(history_path, 'rb') as f:
        history_ae = pickle.load(f)

Please note that only the weights, not the models themselves, are saved and loaded again. The model definition is made in the code and managed as such.

Loading the weights only works if the model definition is exactly the same.

In [None]:
def plot_history(history):
    """
    Plot model training history.
    Args:
    - history: tensorflow history object.

    Returns:
    None
    """
    plt.plot(history['loss'], label='Training')
    plt.plot(history['val_loss'], label='Validation')
    plt.legend()
    plt.xlabel('Loss history')
    plt.ylabel('Loss')

    plt.show()

In [None]:
plot_history(history_ae.history)

### Model Summary
We compile the model and can then get a summary of the model:

In [None]:
CNNmnist_ae.summary()

In [None]:
CNNmnist_encoder.summary()

In [None]:
CNNmnist_decoder.summary()

This CNN-based model has less than a tenth of the parameters of the network without the convolution layers!

### Looking at the reconstructions
Let's look at some of the reconstructed images:

In [None]:
def plot_image(image):
    plt.imshow(image, cmap="binary")
    plt.axis("off")
    
def show_reconstructions(model, images=test_images, n_images=5):
    reconstructions = model.predict(images[:n_images])
    fig = plt.figure(figsize=(n_images * 1.5, 3))
    for image_index in range(n_images):
        plt.subplot(2, n_images, 1 + image_index)
        plot_image(images[image_index])
        plt.subplot(2, n_images, 1 + n_images + image_index)
        plot_image(reconstructions[image_index])
    plt.show()

In [None]:
show_reconstructions(CNNmnist_ae)

In [None]:
plot_history(history_ae.history)

## Low-dimensional bottleneck autoencoders


### Encoding Dimension 30

In [None]:
CNNmnist_encoder_30 = tf.keras.Sequential([
    tf.keras.layers.Input(shape = (28, 28, 1)),
    tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2),  # output: 14 × 14 x 16
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2),  # output: 7 × 7 x 32
    tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(30, activation='relu')
])

CNNmnist_decoder_30 = tf.keras.Sequential([
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(7*7*16, activation= 'relu'),
    tf.keras.layers.Reshape(target_shape = (7, 7, 16)),
    tf.keras.layers.Conv2D(32, kernel_size = (3,3), activation = 'selu', padding = 'same'),
    tf.keras.layers.UpSampling2D((2,2)),
    tf.keras.layers.Conv2D(16, kernel_size = (3,3), activation = 'selu', padding = 'same'),
    tf.keras.layers.UpSampling2D((2,2)),
    tf.keras.layers.Conv2D(1, kernel_size = (3,3), activation = 'sigmoid', padding = 'same'),
])

CNNmnist_ae_30 = tf.keras.Sequential([CNNmnist_encoder_30, CNNmnist_decoder_30])

In [None]:
tf.random.set_seed(42) 
CNNmnist_ae_30.compile(loss="mse", optimizer="nadam")

In [None]:
# define paths:
mnist_encoder_30_path = './CNNmnist_encoder_30.weights.h5'
mnist_decoder_30_path = './CNNmnist_decoder_30.weights.h5'
history_path = './CNNmnist_decoder_30.history.h5'

if train_from_scratch:
    history30 = CNNmnist_ae_30.fit(train_images, train_images, epochs=nEpochs, validation_data=(val_images, val_images),
                                         callbacks=[EarlyStopping(monitor='val_loss', patience=nPatience,
                                                                  verbose=False, restore_best_weights=True)])

    # Save the weights:
    CNNmnist_encoder_30.save_weights(mnist_encoder_30_path)
    CNNmnist_decoder_30.save_weights(mnist_decoder_30_path)

    # Save training history:
    with open(history_path, 'wb') as f:
        pickle.dump(history30, f)

else:
    # load previsously computed weights
    CNNmnist_encoder_30.build(input_shape=(None, 28, 28, 1))
    CNNmnist_decoder_30.build(input_shape=(None, 30))
    CNNmnist_encoder_30.load_weights(mnist_encoder_30_path)
    CNNmnist_decoder_30.load_weights(mnist_decoder_30_path)

    # load history:
    with open(history_path, 'rb') as f:
        history30 = pickle.load(f)

In [None]:
plot_history(history30.history)

In [None]:
show_reconstructions(CNNmnist_ae_30)

In [None]:
CNNmnist_encoder_30.summary()

In [None]:
CNNmnist_decoder_30.summary()

### Encoding Dimension 2

**TODO**: Implement an autoencoder with an encoding dimension of 2. Follow the above example with an encoding dimension of 30.

In [None]:
# Model definition
# CNNmnist_encoder_2 = ...
# 
# CNNmnist_decoder_2 = ...
# 
# CNNmnist_ae_2 = ...

tf.random.set_seed(42) 
CNNmnist_ae_2.compile(loss="mse", optimizer="nadam")

In [None]:
# define paths:
mnist_encoder_2_path = './CNNmnist_encoder_2.weights.h5'
mnist_decoder_2_path = './CNNmnist_decoder_2.weights.h5'
history_path = './CNNmnist_decoder_2.history.h5'

if train_from_scratch:
    history2 = CNNmnist_ae_2.fit(train_images, train_images, epochs=nEpochs, validation_data=(val_images, val_images),
                                       callbacks=[EarlyStopping(monitor='val_loss', patience=nPatience,
                                                                verbose=False, restore_best_weights=True)])

    # Save the weights:
    CNNmnist_encoder_2.save_weights(mnist_encoder_2_path)
    CNNmnist_decoder_2.save_weights(mnist_decoder_2_path)

    # Save training history:
    with open(history_path, 'wb') as f:
        pickle.dump(history2, f)

else:
    # load previsously computed weights
    CNNmnist_encoder_2.build(input_shape=(None, 28, 28, 1))
    CNNmnist_decoder_2.build(input_shape=(None, 2))
    CNNmnist_encoder_2.load_weights(mnist_encoder_2_path)
    CNNmnist_decoder_2.load_weights(mnist_decoder_2_path)

    # load history:
    with open(history_path, 'rb') as f:
        history2 = pickle.load(f)

In [None]:
plot_history(history2.history)

In [None]:
show_reconstructions(CNNmnist_ae_2)

### Visualizing Decodings
In the following, we will see how a few chosen codes will be decoded into an image:

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(6, 6))

for ID1 in range(2):
    for ID2 in range(2):
        code = np.zeros(shape=[1, 2])
        code[0, ID1] = 1
        code[0, ID2] = 1
        coding = np.squeeze(CNNmnist_decoder_2(code))
        im = axs[ID1, ID2].imshow(coding, vmin=0, vmax=1)
        axs[ID1, ID2].axis('off')
        axs[ID1, ID2].set_title(code)

fig.subplots_adjust(right=0.9)
cbar_ax = fig.add_axes([0.95, 0.15, 0.025, 0.7])
fig.colorbar(im, cax=cbar_ax)

plt.show()

In [None]:
fig, axs = plt.subplots(6, 6, figsize=(12, 12))

for val1 in np.linspace(0, 5, 6):
    for val2 in np.linspace(0, 5, 6):
        code = np.zeros(shape=[1, 2])
        if val1>0:
            code[0, 0] = 2**(val1-1)
        if val2>0:
            code[0, 1] = 2**(val2-1)
        coding = np.squeeze(CNNmnist_decoder_2(code))
        im = axs[int(val1), int(val2)].imshow(coding, vmin=0, vmax=1)
        axs[int(val1), int(val2)].axis('off')
        axs[int(val1), int(val2)].set_title(code)

fig.subplots_adjust(right=0.9)
cbar_ax = fig.add_axes([0.95, 0.15, 0.025, 0.7])
fig.colorbar(im, cax=cbar_ax)

plt.show()

### Visualizing Encodings
Having 2 dimensions is a very good starting points for visualization: We can look at where in the 2D code space the images representing the individual digits are projected to:

In [None]:
val_encodings2 = CNNmnist_encoder_2.predict(val_images, batch_size=256)

In [None]:
val_encodings2.shape

In [None]:
val_encodings2[:10,:]

In [None]:
sparseAE_embedding = pd.DataFrame(val_encodings2)
sparseAE_embedding.columns = ['Dimension 1', 'Dimension 2']
sparseAE_embedding['label'] = val_labels

In [None]:
sparseAE_embedding.shape

In [None]:
val_labels.shape

In [None]:
sns.scatterplot(sparseAE_embedding, x='Dimension 1', y='Dimension 2', hue='label', legend='full', palette='deep')
plt.show()

In [None]:
sns.scatterplot(sparseAE_embedding, x='Dimension 1', y='Dimension 2', hue='label', legend='full', palette='deep')
plt.xscale('log')
plt.yscale('log')
plt.show()

**TODO:** Given the above graph, can you find a code that will be decoded into an image representing the digit "0"? Use the above two plots as basis for your guess, and adapt the below code cell to verify your finding.

In [None]:
code = np.zeros(shape=[1, 2])
# modify the values of the first and second dimension
# code[0, 0] = ...
# code[0, 1] = ...
coding = np.squeeze(CNNmnist_decoder_2(code))
plt.imshow(coding, vmin=0, vmax=1)
plt.axis('off')
plt.title(code)
plt.show()