# Algorithms for Big Data - Exercise 4
This lecture is focused in more detailed understanding of the Convolution neural networks. 

The visualization and the response of the CNN layers will be intestigated and a proper.

We will use the MNIST dataset but other may be used as well.


[Open in Google colab](https://colab.research.google.com/github/jplatos/VSB-FEI-Deep-Learning/blob/master/dl_04_autoencoder.ipynb)
[Download from Github](https://github.com/jplatos/VSB-FEI-Deep-Learning/blob/master/dl_04_autoencoder.ipynb)

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import matplotlib.pyplot as plt # plotting
import matplotlib.image as mpimg # images
import numpy as np #numpy
import tensorflow.compat.v2 as tf #use tensorflow v2 as a main 
import tensorflow.keras as keras # required for high level applications
from sklearn.model_selection import train_test_split # split for validation sets
from sklearn.preprocessing import normalize # normalization of the matrix
from scipy.signal import convolve2d # convolutionof the 2D signals
import scipy
import datetime, os

tf.version.VERSION

In [None]:
%load_ext tensorboard

In [None]:
def show_history(history):
    plt.figure()
    for key in history.history.keys():
        plt.plot(history.epoch, history.history[key], label=key)
    plt.legend()
    plt.tight_layout()

def show_example(train_x, train_y, class_names):
    plt.figure(figsize=(10,10))
    for i in range(25):
        plt.subplot(5,5,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(train_x[i].reshape(28,28), cmap=plt.cm.binary)
        plt.xlabel(class_names[train_y[i]])
    plt.show()

In [None]:
class Mish(keras.layers.Activation):
    '''
    Mish Activation Function.
    .. math::
        mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))
    Shape:
        - Input: Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
        - Output: Same shape as the input.
    Examples:
        >>> X = Activation('Mish', name="conv1_act")(X_input)
    '''

    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = 'Mish'


def mish(inputs):
    return inputs * tf.math.tanh(tf.math.softplus(inputs))

keras.utils.get_custom_objects().update({'mish': Mish(mish)})

In [None]:
# mnist is the basic dataset for image classifaction
dataset = tf.keras.datasets.mnist

# data from any dataset are loaded using the load_Data function
(train_x, train_y), (test_x, test_y) = dataset.load_data()

train_x = train_x.reshape(*train_x.shape, 1)
test_x = test_x.reshape(*test_x.shape, 1)

train_x = train_x/255.0
test_x = test_x/255.0

train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=0.2, random_state=42)

# the data are in the form of 32x32 pixes with values 0-255.
print('Train data shape: ', train_x.shape, train_y.shape)
print('Validation data shape: ', valid_x.shape, valid_y.shape)
print('Test data shape:  ', test_x.shape, test_y.shape)

# class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
class_names = [str(x) for x in range(10)]
# class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
class_count = len(class_names)
print('Class count:', class_count, class_names)


#### Show example images of the dataset

In [None]:
show_example(train_x, train_y, class_names)

### Create a well defined model 

The model is able achieve more the 99% precision on the validation as well as testing sets.

In [None]:
model = keras.Sequential([
    keras.layers.Conv2D(64, (3,3), activation='relu', padding='same', input_shape=(28,28,1)),
    keras.layers.MaxPooling2D((2, 2), padding='same'),
    keras.layers.Conv2D(64, (3,3), padding='same', activation='relu'),
    keras.layers.MaxPooling2D((2, 2), padding='same'),
    keras.layers.Conv2D(16, (3,3), padding='same', activation='relu'),    
    keras.layers.MaxPooling2D((2, 2), padding='same'),
    
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(10, activation='softmax'),
])

model.summary()

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

#### Fit the model on the train data.
Lets train the model on the training data and find the best model using the EarlyStopping callback to find the best model avaialble and achievable.

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=7, restore_best_weights=True)
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

# batch_size = 32
# epochs = 50

batch_size = 32
epochs = 10
history = model.fit(train_x, train_y, validation_data=(valid_x, valid_y), callbacks=[es, tensorboard_callback], epochs=epochs, batch_size=batch_size)

show_history(history)

test_loss, test_acc = model.evaluate(test_x, test_y)
print('Test accuracy: ', test_acc)

## Let's try the tensorboard plugin

In [None]:
%tensorboard --logdir logs

## Visualize the layers
Lest see what the network was able to learn from the train data. For that, we need to prepare a new model and see the ouputs of the layers.

In [None]:
# get the outputs form all layers in the model
layer_outputs = [layer.output for layer in model.layers]
# create the model that has single input and as an output all the outputs from the layers. 
# Because the layers are connected then the output from first layer is propagated into second layer and the output is computed o it.
activation_model = keras.models.Model(inputs=model.input, outputs=layer_outputs)

# get all the outputs from the model for 10-th input
activations = activation_model.predict(train_x[10].reshape(1,28,28,1))
 
# this functions shows the output from each filters
def display_activation(activations, col_size, row_size, act_index): 
    activation = activations[act_index]
    activation_index=0
    fig, ax = plt.subplots(row_size, col_size, figsize=(row_size*2.5,col_size*1.5))
    for row in range(0,row_size):
        for col in range(0,col_size):
            ax[row][col].imshow(activation[0, :, :, activation_index], cmap='gray')
            activation_index += 1

In [None]:
# show the input image
plt.imshow(train_x[10][:,:,0]);

In [None]:
# show the output from the first layer - CNN2D
display_activation(activations, 8, 8, 0)

In [None]:
# show the second convolution layer
display_activation(activations, 8, 8, 2)

In [None]:
# show the third activation layer
display_activation(activations, 4, 4, 4)

## The weights of each layer
The weight can be extracted from layer as a tuple of weights and biasses

In [None]:
filters, biases = model.layers[0].get_weights()
print(filters.shape, biases.shape)

In [None]:
#### The weights may be normalized in to 0-1 interval

In [None]:
f_min, f_max = filters.min(), filters.max()
filters = (filters - f_min) / (f_max - f_min)

In [None]:
plt.figure(figsize=(10,10))

# plot first few filters
n_filters = 64
for i in range(n_filters):
    # get the filter
    f = filters[:, :, :, i]
    # plot each channel separately
    # specify subplot and turn of axis
    ax = plt.subplot(8, 8, i+1)
    ax.set_xticks([])
    ax.set_yticks([])
    # plot filter channel in grayscale
    plt.imshow(f[:, :, 0], cmap='gray')
  
# show the figure
plt.show()

## Autoencoder
The autoencoder is a special type of neural network that is able to learn without the classes just from the input data. It is equivalent to the feature extraction from the data.

It's worth a mention that we are using binary crossentropy loss, thus we compare images on per-pixel basis.
- You can view [this link](https://towardsdatascience.com/understanding-binary-cross-entropy-log-loss-a-visual-explanation-a3ac6025181a) for more information about Bin. CE

In [None]:
autoencoder = keras.Sequential([
    keras.layers.Conv2D(64, (3,3), activation='relu', padding='same', input_shape=(28,28,1)),
    keras.layers.MaxPooling2D((2, 2), padding='same'),
    keras.layers.Conv2D(32, (3,3), padding='same', activation='relu'),
    keras.layers.MaxPooling2D((2, 2), padding='same'),
    keras.layers.Conv2D(8, (3,3), padding='same', activation='relu'),    
    keras.layers.MaxPooling2D((2, 2), padding='same'),
    # a 128 values of the minimized knowledge / features
    keras.layers.Conv2D(8, (3,3), padding='same', activation='relu'),
    keras.layers.UpSampling2D((2,2)),
    keras.layers.Conv2D(32, (3,3), padding='same', activation='relu'),
    keras.layers.UpSampling2D((2,2)),
    keras.layers.Conv2D(64, (3,3), activation='relu'),
    keras.layers.UpSampling2D((2,2)),
    
    keras.layers.Conv2D(1, (3,3), activation='sigmoid', padding='same')
])

autoencoder.summary()

autoencoder.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

### Fit the model
The model may be fitted as much as possible, this model converges but slowly.

In [None]:
history = autoencoder.fit(train_x, train_x, validation_data=(valid_x, valid_x), epochs=10, batch_size=128)

show_history(history)

### Generate original and reconstructed images
The autoencoder fits on the original data on input as well as on output, therefore it is possible to generate reconstructed images.

In [None]:
predicted = autoencoder.predict(test_x)

n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i+1)
    plt.imshow(test_x[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1+n)
    plt.imshow(predicted[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

### Vizualize the encoded vectors
The vectors that are generated by the encoder may be vizualized.

In [None]:
encoder = keras.Sequential([
    keras.layers.Conv2D(64, (3,3), activation='relu', padding='same', input_shape=(28,28,1)),
    keras.layers.MaxPooling2D((2, 2), padding='same'),
    keras.layers.Conv2D(32, (3,3), padding='same', activation='relu'),
    keras.layers.MaxPooling2D((2, 2), padding='same'),
    keras.layers.Conv2D(8, (3,3), padding='same', activation='relu'),    
    keras.layers.MaxPooling2D((2, 2), padding='same'),
])

# encoder.compile(optimizer='adam', loss='binary_crossentropy')
encoder.set_weights(autoencoder.get_weights()[:6])

for layer in encoder.layers:
  layer.trainable = False

encoder.summary()

In [None]:
encoded = encoder.predict(test_x)

n = 10
plt.figure(figsize=(20, 8))
for i in range(n):
    ax = plt.subplot(2, n, i+1)
    plt.imshow(encoded[i].reshape(8, 16).T)
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    
    ax = plt.subplot(2, n, n+i+1)
    plt.imshow(test_x[i].reshape(28,28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

### Is the encoder-based classifier better than the previous one above?
The encoder generated using the autoencoder principle generate a compressed representation of the input. The inner vector with 128 values is much smaller and the goal of the autoencoder is different than from the classifier, therefore the generated representation is usually better using the classifier directly.
Some variants of encoder are able to generate better representation - a sparse autoencoders that generate sparse representation for example.

### Is it possible to control how sparse our representation will be? If it's possible, what method would you use? [API](https://keras.io/api/layers/regularizers/)

### Why is it usually better to use classifier directly and not train encoder for feature extraction first and then put classification layers on top of it? 

In [None]:

encoder.add(keras.layers.Flatten())
encoder.add(keras.layers.Dense(64, activation='relu'))
encoder.add(keras.layers.Dropout(0.25))
encoder.add(keras.layers.Dense(32, activation='relu'))
encoder.add(keras.layers.Dropout(0.25))
encoder.add(keras.layers.Dense(10, activation='softmax'))




In [None]:
encoder.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics = ['accuracy'])
encoder.summary()

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=7, restore_best_weights=True)

history = encoder.fit(train_x, train_y, validation_data=(valid_x, valid_y), callbacks = [es], epochs=10)

show_history(history)

test_loss, test_acc = encoder.evaluate(test_x, test_y)
print('Test accuracy: ', test_acc)

## Outlier analysis
Let use the autoencoder as an outlier analyzer. 

First, prepare the testing set. then create a outlier data and compare the prediction error on the both.

In [None]:
sample_x = test_x[:25]
sample_y = test_y[:25]

show_example(sample_x, sample_y, class_names)

In [None]:
modified_x = np.clip(sample_x + 0.1 * np.random.normal(loc=0.0, scale=1.0, size=sample_x.shape), 0., 1.) 
modified_y = sample_y
show_example(modified_x, modified_y, class_names)

In [None]:
sample_predicted = autoencoder.predict(sample_x)
show_example(sample_predicted, sample_y, class_names)

In [None]:
modified_pred = autoencoder.predict(modified_x)
show_example(modified_pred, modified_y, class_names)

In [None]:
# https://numpy.org/doc/stable/reference/generated/numpy.linalg.norm.html
# np.linalg.norm -> Frobenius norm ~ root of squared errors

sample_norms = [np.linalg.norm(sample_x[i].reshape(28,28)- sample_predicted[i].reshape(28,28)) for i in range(len(sample_x))]
modified_norms = [np.linalg.norm(modified_x[i].reshape(28,28)- modified_pred[i].reshape(28,28)) for i in range(len(sample_x))]

sample_mean, sample_std = np.mean(sample_norms), np.std(sample_norms)
modified_mean, modified_std = np.mean(modified_norms), np.std(modified_norms)

In [None]:
plt.figure(figsize=(16,9))
xmin = np.min([sample_mean-4*sample_std, modified_mean-4*modified_std])
xmax = np.max([sample_mean+4*sample_std, modified_mean+4*modified_std])

x = np.linspace(xmin, xmax, 100)

y = scipy.stats.norm.pdf(x,sample_mean,sample_std)

plt.plot(x,y)

y = scipy.stats.norm.pdf(x,modified_mean,modified_std)

plt.plot(x,y)

plt.bar(x=sample_norms, height=[0.6 for x in sample_norms], width=0.01)
plt.bar(x=modified_norms, height=[0.6 for x in sample_norms], width=0.01)
plt.show()

## Denoising-autoencoder
The denoising autoencoder is a autoencoder that will learn how to remove random noise from the images. 

First, noisy images have to be generated. 

Then the autoencoder need to be created and trained. 

Then denosed images may be reconstructed.

In [None]:
noise_factor = 0.5
noisy_train_x = np.clip(train_x + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=train_x.shape), 0., 1.)
noisy_valid_x = np.clip(valid_x + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=valid_x.shape), 0., 1.)
noisy_test_x = np.clip(test_x + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=test_x.shape), 0., 1.) 


n = 10
plt.figure(figsize=(2*n, 4))
for i in range(n):
    ax = plt.subplot(2, n, i+1)
    plt.imshow(train_x[i].reshape(28,28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    
    ax = plt.subplot(2, n, n+i+1)
    plt.imshow(noisy_train_x[i].reshape(28,28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

## Tasks for Lecture / Exercise
1. Implement the autoencoder that do something usefull like denoising, implement it on the data defined above.
2. The Keras contains stored models that may be used for classification. The pretrained models may be used effectivelly to classify data, e.g. images, using the state of the art models. Try to investigate the architecture of the stored models and use the for classification of sample data downloaded from the internet.
  1. Try VGG16 model and investigrate its architecture.
  2. Try ResNet model architecture.
- See [Keras Functional API](https://keras.io/guides/functional_api/)
- See [this for VGG16 and ResNet fine tuning](https://keras.io/api/applications/), chapter Fine-tune InceptionV3 on a new set of classes

In [None]:
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.resnet50 import ResNet50
import cv2

In [None]:
def grayscale_to_rgb(images, channel_axis=-1):
    # images= keras.backend.expand_dims(images, axis=channel_axis)
    tiling = [1] * 4    # 4 dimensions: B, H, W, C
    tiling[channel_axis] *= 3
    images= keras.backend.tile(images, tiling)
    return images

In [None]:
size = 32

In [None]:
train_x_res = np.array([cv2.resize(x, (size, size)) for x in train_x])
test_x_res = np.array([cv2.resize(x, (size, size)) for x in test_x])
valid_x_res = np.array([cv2.resize(x, (size, size)) for x in valid_x])

In [None]:
train_x_r = train_x_res.reshape(-1, size, size, 1).astype('float32')
test_x_r = test_x_res.reshape(-1, size, size, 1).astype('float32')
valid_x_r = valid_x_res.reshape(-1, size, size, 1).astype('float32')

In [None]:
train_x_r = grayscale_to_rgb(train_x_r)
test_x_r = grayscale_to_rgb(test_x_r)
valid_x_r = grayscale_to_rgb(valid_x_r)