## Assignment 1 - Exploring CNNs on CIFAR-10 dataset
### Equipe:
- Kaleb Roncatti de Souza
- Nelson Gomes Brasil Junior

Todas as atividades são iniciadas em código por:
```
#################################################################
## Atividade X. Activity description
```

In [None]:
!pip3 install visualkeras

In [None]:
import tensorflow as tf
import numpy as np
from typing import Tuple
import matplotlib.pyplot as plt
from IPython import display
%matplotlib inline
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras import models
from sklearn.utils.class_weight import compute_class_weight
import visualkeras
import os
import random as rn
from keras import callbacks
from keras.activations import leaky_relu, relu, sigmoid


rs = 321
# Setting up random state to specific seed so we can have reproductibility
os.environ['PYTHONHASHSEED']=str(rs)
np.random.seed(rs)
tf.random.set_seed(rs)
rn.seed(rs)

In [None]:
# Loading train and test set
(x_train , y_train), (x_test , y_test) = tf.keras.datasets.cifar10.load_data()

# Device that will be used to train the deep learning models
device = '/gpu:0' # or '/cpu:0' if you don't have GPU

In [None]:
# Function to shuffle
def shuffle_tensor(x: np.ndarray , y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    indices = tf.range(start=0, limit=tf.shape(x)[0], dtype=tf.int32)
    shuffled_indices = tf.random.shuffle(indices, seed=rs)
    return tf.gather(x, shuffled_indices), tf.gather(y, shuffled_indices)

# Function to split dataset
def split_dataset(x: np.ndarray, y: np.ndarray, percentage: float) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    if len(x) != len(y):
        raise ValueError("array x and y must have the same length")
    
    # Shuffling dataset
    x, y = shuffle_tensor(x=x, y=y)

    # Finding the splits after shuffling
    total_elements = len(y)
    data = int(percentage * total_elements)
    return x[0:data], y[0:data], x[data:], y[data:]


In [None]:
# Function to plot accuracy and loss (train/validation)
def plot_train_val_acc_loss(model: tf.keras.Sequential(), model_name: str):
    plt.plot(model.history.history['accuracy'])
    plt.plot(model.history.history['val_accuracy'])
    plt.title(f'model accuracy ({model_name})')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

    plt.plot(model.history.history['loss'])
    plt.plot(model.history.history['val_loss'])
    plt.title(f'model loss ({model_name})')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

In [None]:
#################################################################
## Atividade 1. Splitting training and validation
x_train, y_train, x_val, y_val = split_dataset(x=x_train, y=y_train, percentage = 0.8)
print(f"We splitted the training dataset using the following percentage: {len(x_train)/(len(x_train) + len(x_val))}")
print(f"Training dataset has the following shape: x ==> {x_train.shape}, y ==> {y_train.shape}")
print(f"Validation dataset has the following shape: x ==> {x_val.shape}, y ==> {y_val.shape}")

In [None]:
# Normalize data between [0, 1]
x_train = tf.cast(x_train, tf.float32) / 255.0
x_val = tf.cast(x_val, tf.float32) / 255.0
x_test = tf.cast(x_test, tf.float32) / 255.0


# One hot encoding on labels
num_classes = len(np.unique(y_train))
y_train_oh = tf.keras.utils.to_categorical(y_train, num_classes)
y_val_oh = tf.keras.utils.to_categorical(y_val, num_classes)
y_test_oh = tf.keras.utils.to_categorical(y_test, num_classes)
print(x_train.shape, y_train_oh.shape)

In [None]:
# Checking if we have a balanced dataset
# Para o conjunto de treino
unique, counts = np.unique(y_train, return_counts=True)
print("Train ---> ", dict(zip(unique, counts)))
unique, counts = np.unique(y_val, return_counts=True)
print("Validation ---> ", dict(zip(unique, counts)))

We can consider that the dataset is more or less balanced. We could also adjust the weights based on their proportion, but for simplicity, let's keep going considering the dataset is completely balanced.

In [None]:
print(x_train[0].shape)


In [None]:
#################################################################
## Atividade 2. Creating a very simple convolutional neural net

def create_baseline_model(input_shape=x_train[0].shape, activation = relu) -> tf.keras.Sequential():
    model = tf.keras.Sequential()

    # Convolutional layer with 32 filters, 3x3 each
    model.add(
        layers.Conv2D(
            filters=32, 
            kernel_size=(3, 3), 
            padding='valid', 
            activation=activation, 
            input_shape=x_train[0].shape))
    #Max pooling of size 3x3
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    # Flattening
    model.add(layers.Flatten())
    # Output layer
    model.add(layers.Dense(10))

    return model

model = create_baseline_model()
model.summary()
# Taking a look at the neural net
visualkeras.layered_view(model)


In [None]:
# Compiling the model
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


In [None]:
# Training the baseline model and saving it
with tf.device(device):
    model.fit(
        x_train, y_train_oh, 
        epochs=30, 
        batch_size=64,
        validation_data=(x_val, y_val_oh),
        verbose=1,
        callbacks=[
            callbacks.TerminateOnNaN(),
            callbacks.ModelCheckpoint(
                'weights_baseline.h5',
                save_best_only=True,
                verbose=1),
        ]
        )

In [None]:
# Observing the model accuracy through the epochs
plot_train_val_acc_loss(model=model, model_name=f"Baseline")

In [None]:
#################################################################
## Atividade 3. Exploring OTHER two activation types, since we already played with ReLU

model_sigmoid = create_baseline_model(activation=sigmoid)
model_sigmoid.summary()
# Compiling the model
model_sigmoid.compile(optimizer=tf.keras.optimizers.Adam(0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_leaky_relu = create_baseline_model(activation=leaky_relu)
model_leaky_relu.summary()
# Compiling the model
model_leaky_relu.compile(optimizer=tf.keras.optimizers.Adam(0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
# Training the baseline model and saving it with OTHER ACTIVATIONS
# Sigmoid
with tf.device(device):
    model_sigmoid.fit(
        x_train, y_train_oh, 
        epochs=30, 
        batch_size=64,
        validation_data=(x_val, y_val_oh),
        verbose=1,
        callbacks=[
            callbacks.TerminateOnNaN(),
            callbacks.ModelCheckpoint(
                'weights_baseline_sigmoid.h5',
                save_best_only=True,
                verbose=1),
        ]
        )
        
# Leaky ReLU
with tf.device(device):
    model_leaky_relu.fit(
        x_train, y_train_oh, 
        epochs=30, 
        batch_size=64,
        validation_data=(x_val, y_val_oh),
        verbose=1,
        callbacks=[
            callbacks.TerminateOnNaN(),
            callbacks.ModelCheckpoint(
                'weights_baseline_leaky_relu.h5',
                save_best_only=True,
                verbose=1),
        ]
        )

In [None]:
# Observing the model accuracy through the epochs for SIGMOID
plot_train_val_acc_loss(model=model_sigmoid, model_name=f"Baseline Sigmoid")

In [None]:
# Observing the model accuracy through the epochs for LeakyReLU
plot_train_val_acc_loss(model=model_leaky_relu, model_name=f"Baseline LeakyReLU")

In [None]:
#################################################################
## Atividade 4. Exploring more complex CNNs to improve our model

model_complex = tf.keras.Sequential()

model_complex.add(
    layers.Conv2D(
        filters=32, 
        kernel_size=(3, 3), 
        padding='valid', 
        activation=leaky_relu, 
        input_shape=x_train[0].shape))
model_complex.add(
    layers.Conv2D(
        filters=32, 
        kernel_size=(3, 3), 
        padding='valid', 
        activation=leaky_relu))

model_complex.add(layers.MaxPooling2D(pool_size=(2, 2)))
# Flattening
model_complex.add(layers.Flatten())
# Output layer
model_complex.add(layers.Dense(10))

model_complex.summary()
# Compiling the model
model_complex.compile(optimizer=tf.keras.optimizers.Adam(0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# Taking a look at the neural net
visualkeras.layered_view(model_complex)

In [None]:
with tf.device(device):
    model_complex.fit(
        x_train, y_train_oh, 
        epochs=30, 
        batch_size=64,
        validation_data=(x_val, y_val_oh),
        verbose=1,
        callbacks=[
            callbacks.TerminateOnNaN(),
            callbacks.ModelCheckpoint(
                'weights_complex.h5',
                save_best_only=True,
                verbose=1),
        ]
        )

In [None]:
# Observing the model accuracy through the epochs for model complex 0
plot_train_val_acc_loss(model=model_complex, model_name=f"Model Complex 0")

In [None]:
model_complex_1 = tf.keras.Sequential()

model_complex_1.add(
    layers.Conv2D(
        filters=32, 
        kernel_size=(3, 3), 
        padding='valid', 
        activation=leaky_relu, 
        input_shape=x_train[0].shape))
model_complex_1.add(
    layers.Conv2D(
        filters=32, 
        kernel_size=(3, 3), 
        padding='valid', 
        activation=leaky_relu))

model_complex_1.add(layers.MaxPooling2D(pool_size=(2, 2)))

model_complex_1.add(
    layers.Conv2D(
        filters=16, 
        kernel_size=(3, 3), 
        padding='valid', 
        activation=leaky_relu, 
        input_shape=x_train[0].shape))
model_complex_1.add(
    layers.Conv2D(
        filters=16, 
        kernel_size=(3, 3), 
        padding='valid', 
        activation=leaky_relu))

model_complex_1.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Flattening
model_complex_1.add(layers.Flatten())
# Output layer
model_complex_1.add(layers.Dense(10))

model_complex_1.summary()
# Compiling the model
model_complex_1.compile(optimizer=tf.keras.optimizers.Adam(0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# Taking a look at the neural net
visualkeras.layered_view(model_complex_1)

In [None]:
with tf.device(device):
    model_complex_1.fit(
        x_train, y_train_oh, 
        epochs=30, 
        batch_size=64,
        validation_data=(x_val, y_val_oh),
        verbose=1,
        callbacks=[
            callbacks.TerminateOnNaN(),
            callbacks.ModelCheckpoint(
                'weights_complex_1.h5',
                save_best_only=True,
                verbose=1),
        ]
        )

In [None]:
# Observing the model accuracy through the epochs for model complex 1
plot_train_val_acc_loss(model=model_complex_1, model_name=f"Model Complex 1")

In [None]:
#################################################################
## Atividade 5. Exploring different types of initialization and regularization

## Varying the initialization ##
# We are going to use the same models as model_complex_1
# Since the glorot_uniform is the default, we are going to test
# ["random_normal", "ones"]


inits = ["random_normal", "he_normal"]
models_init = []
for initialization in inits:

    model_tmp = tf.keras.Sequential()

    model_tmp.add(
        layers.Conv2D(
            filters=32, 
            kernel_size=(3, 3), 
            padding='valid', 
            activation=leaky_relu, 
            input_shape=x_train[0].shape,
            kernel_initializer=initialization))
    model_tmp.add(
        layers.Conv2D(
            filters=32, 
            kernel_size=(3, 3), 
            padding='valid', 
            activation=leaky_relu,
            kernel_initializer=initialization))

    model_tmp.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model_tmp.add(
        layers.Conv2D(
            filters=16, 
            kernel_size=(3, 3), 
            padding='valid', 
            activation=leaky_relu, 
            input_shape=x_train[0].shape,
            kernel_initializer=initialization))
    model_tmp.add(
        layers.Conv2D(
            filters=16, 
            kernel_size=(3, 3), 
            padding='valid', 
            activation=leaky_relu,
            kernel_initializer=initialization))

    model_tmp.add(layers.MaxPooling2D(pool_size=(2, 2)))

    # Flattening
    model_tmp.add(layers.Flatten())
    # Output layer
    model_tmp.add(layers.Dense(10, kernel_initializer=initialization))

    model_tmp.summary()
    # Compiling the model
    model_tmp.compile(optimizer=tf.keras.optimizers.Adam(0.001),
                loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])

    # Taking a look at the neural net
    visualkeras.layered_view(model_tmp)

    # Training model
    with tf.device(device):
        model_tmp.fit(
            x_train, y_train_oh, 
            epochs=30, 
            batch_size=64,
            validation_data=(x_val, y_val_oh),
            verbose=1,
            callbacks=[
                callbacks.TerminateOnNaN(),
                callbacks.ModelCheckpoint(
                    f'weights_complex_1_init-{initialization}.h5',
                    save_best_only=True,
                    verbose=1),
            ]
            )

    models_init.append(model_tmp)
    # Observing results
    plot_train_val_acc_loss(model=model_tmp, model_name=f"Model Complex 1 Init - {initialization}")

In [None]:
## Varying the regularization using the default initializer (Glorot/Bengio: glorot_uniform) ##
# We are going to use the same models as model_complex_1


regs = [regularizers.l2(0.001), regularizers.l2(0.01)]
regs_str = ["l2-0.001", "l2-0.01"]
models_reg = []
acc = 0
for regularization in regs:

    model_tmp = tf.keras.Sequential()

    model_tmp.add(
        layers.Conv2D(
            filters=32, 
            kernel_size=(3, 3), 
            padding='valid', 
            activation=leaky_relu, 
            input_shape=x_train[0].shape,
            kernel_regularizer=regularization))
    model_tmp.add(
        layers.Conv2D(
            filters=32, 
            kernel_size=(3, 3), 
            padding='valid', 
            activation=leaky_relu,
            kernel_regularizer=regularization))

    model_tmp.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model_tmp.add(
        layers.Conv2D(
            filters=16, 
            kernel_size=(3, 3), 
            padding='valid', 
            activation=leaky_relu, 
            input_shape=x_train[0].shape,
            kernel_regularizer=regularization))
    model_tmp.add(
        layers.Conv2D(
            filters=16, 
            kernel_size=(3, 3), 
            padding='valid', 
            activation=leaky_relu,
            kernel_regularizer=regularization))

    model_tmp.add(layers.MaxPooling2D(pool_size=(2, 2)))

    # Flattening
    model_tmp.add(layers.Flatten())
    # Output layer
    model_tmp.add(layers.Dense(10, kernel_regularizer=regularization))

    model_tmp.summary()
    # Compiling the model
    model_tmp.compile(optimizer=tf.keras.optimizers.Adam(0.001),
                loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])

    # Taking a look at the neural net
    visualkeras.layered_view(model_tmp)

    # Training model
    with tf.device(device):
        model_tmp.fit(
            x_train, y_train_oh, 
            epochs=30, 
            batch_size=64,
            validation_data=(x_val, y_val_oh),
            verbose=1,
            callbacks=[
                callbacks.TerminateOnNaN(),
                callbacks.ModelCheckpoint(
                    f'weights_complex_1_reg-{regs_str[acc]}.h5',
                    save_best_only=True,
                    verbose=1),
            ]
            )
    
    models_reg.append(model_tmp)
    
    # Observing results
    plot_train_val_acc_loss(model=model_tmp, model_name=f"Model Complex 1 Reg - {regs_str[acc]}")
    acc += 1

In [None]:
#################################################################
## Atividade 6. Exploring Droupout
# Using it before the fully connected (dense) layer as suggested by Hinton (2012)
# https://arxiv.org/pdf/1207.0580.pdf

model_complex_1_dropout = tf.keras.Sequential()

model_complex_1_dropout.add(
    layers.Conv2D(
        filters=32, 
        kernel_size=(3, 3), 
        padding='valid', 
        activation=leaky_relu, 
        input_shape=x_train[0].shape))
model_complex_1_dropout.add(
    layers.Conv2D(
        filters=32, 
        kernel_size=(3, 3), 
        padding='valid', 
        activation=leaky_relu))

model_complex_1_dropout.add(layers.MaxPooling2D(pool_size=(2, 2)))

model_complex_1_dropout.add(
    layers.Conv2D(
        filters=16, 
        kernel_size=(3, 3), 
        padding='valid', 
        activation=leaky_relu, 
        input_shape=x_train[0].shape))
model_complex_1_dropout.add(
    layers.Conv2D(
        filters=16, 
        kernel_size=(3, 3), 
        padding='valid', 
        activation=leaky_relu))

model_complex_1_dropout.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Flattening
model_complex_1_dropout.add(layers.Flatten())

# Dropout layer
model_complex_1_dropout.add(layers.Dropout(0.2))

# Output layer
model_complex_1_dropout.add(layers.Dense(10))

model_complex_1_dropout.summary()
# Compiling the model
model_complex_1_dropout.compile(optimizer=tf.keras.optimizers.Adam(0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# Taking a look at the neural net
visualkeras.layered_view(model_complex_1_dropout)

with tf.device(device):
    model_complex_1_dropout.fit(
        x_train, y_train_oh, 
        epochs=30, 
        batch_size=64,
        validation_data=(x_val, y_val_oh),
        verbose=1,
        callbacks=[
            callbacks.TerminateOnNaN(),
            callbacks.ModelCheckpoint(
                'weights_complex_1_dropout.h5',
                save_best_only=True,
                verbose=1),
        ]
        )

# Observing the model accuracy
plot_train_val_acc_loss(model=model_complex_1_dropout, model_name="Model Complex 1 Dropout")

In [None]:
#################################################################
## Atividade 7. Plotting Loss VS Epochs
