 # Homework 5 - opti 0 (original), 1 (L1), 2 (L2), 3 (data augmentation), 4 (dropout), 5 (batchnorm)
 ## Group 10

## Assignment 1 - Reviews

We review the homeworks for groups 15 and 32.

## Assignment 2 - CIFAR-10 Classification

In [1]:
# the necessary imports

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import datetime as datetime
import tqdm
import keras_cv # install keras_cv with pip for data augmentation

  from .autonotebook import tqdm as notebook_tqdm


### 2.1 Prepare the Dataset

In [49]:
def preprocess(dataset):

    # convert data from uint8 to float32
    dataset = dataset.map(lambda img, target: (tf.cast(img, tf.float32), target))

    # flatten the images into vectors - we don't do this step for CNN, CNN layers expect standard image format input
    # dataset = dataset.map(lambda img, target: (tf.reshape(img, (-1,)), target))

    # input normalization, just bringing image values from range [0, 255] to [-1, 1]
    dataset = dataset.map(lambda img, target: ((img / 128.) - 1., target))

    # create one-hot targets with depth 10 since cifar 10 has 10 classes
    dataset = dataset.map(lambda img, target: (img, tf.one_hot(target, depth=10)))

    # cache
    dataset = dataset.cache()

    # shuffle, batch, prefetch
    dataset = dataset.shuffle(1000)
    dataset = dataset.batch(32)
    dataset = dataset.prefetch(tf.data.AUTOTUNE) 

    # return preprocessed dataset
    return dataset

In [50]:
def preprocess_aug(dataset, augmentation):

    # convert data from uint8 to float32
    dataset = dataset.map(lambda img, target: (tf.cast(img, tf.float32), target))

    # flatten the images into vectors - we don't do this step for CNN, CNN layers expect standard image format input
    # dataset = dataset.map(lambda img, target: (tf.reshape(img, (-1,)), target))

    # input normalization, just bringing image values from range [0, 255] to [-1, 1]
    dataset = dataset.map(lambda img, target: ((img / 128.) - 1., target))

    # create one-hot targets with depth 10 since cifar 10 has 10 classes
    dataset = dataset.map(lambda img, target: (img, tf.one_hot(target, depth=10)))

    # cache
    dataset = dataset.cache()

    # shuffle, batch, prefetch
    dataset = dataset.shuffle(1000)
    dataset = dataset.batch(32)

    if augmentation:
        dataset = dataset.map(lambda x, y : (augmentation_model(x), y),num_parallel_calls=tf.data.AUTOTUNE)

    dataset = dataset.prefetch(tf.data.AUTOTUNE) 

    # return preprocessed dataset
    return dataset

In [51]:
(train_ds, test_ds), ds_info = tfds.load ('cifar10', split =['train', 'test'], as_supervised = True, with_info = True)

# print("ds_info: \n", ds_info)

# visualize a sample of the dataset
# tfds.show_examples(train_ds, ds_info)

In [52]:
train_dataset = preprocess(train_ds)
train_dataset_aug = preprocess_aug(train_ds)
test_dataset = preprocess(test_ds)

In [53]:
class ConvModel(tf.keras.Model):
    def __init__(self, L1_reg=0, L2_reg=0, dropout_rate=0, batch_norm=False):
        super().__init__()
        
        if L2_reg >= 0: # L2 is the default
            kernel_regularizer=tf.keras.regularizers.L2(L2_reg)
        elif L1_reg > 0:
            kernel_regularizer=tf.keras.regularizers.L2(L1_reg)
        else:
            None

        self.dropout_rate = dropout_rate
        if self.dropout_rate:
            self.dropout_layer = tf.keras.layers.Dropout(dropout_rate)
            
        self.layer_list = [tf.keras.layers.Conv2D(32, 3, activation="relu", kernel_regularizer=kernel_regularizer), 
            tf.keras.layers.Conv2D(32, 3,activation="relu", kernel_regularizer=kernel_regularizer),
            tf.keras.layers.Conv2D(32, 3, activation="relu", kernel_regularizer=kernel_regularizer),
            tf.keras.layers.Conv2D(32, 3, activation="relu", kernel_regularizer=kernel_regularizer),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(10, activation="softmax", kernel_regularizer=kernel_regularizer)]
        
        if batch_norm:    
                self.layer_list = [tf.keras.layers.Conv2D(32, 3, activation="relu", kernel_regularizer=kernel_regularizer), 
                                   tf.keras.layers.BatchNormalization(),
                                    tf.keras.layers.Conv2D(32, 3,activation="relu", kernel_regularizer=kernel_regularizer),
                                   tf.keras.layers.BatchNormalization(),
                                    tf.keras.layers.Conv2D(32, 3, activation="relu", kernel_regularizer=kernel_regularizer),
                                   tf.keras.layers.BatchNormalization(),
                                    tf.keras.layers.Conv2D(32, 3, activation="relu", kernel_regularizer=kernel_regularizer),
                                   tf.keras.layers.BatchNormalization(),
                                    tf.keras.layers.Flatten(),
                                    tf.keras.layers.BatchNormalization(), # why was this commented?
                                    tf.keras.layers.Dense(10, activation="softmax", kernel_regularizer=kernel_regularizer)]
        # metrics to update
        self.frobenius_metric = tf.keras.metrics.Mean(name="total_frobenius_norm")
        self.loss_metric = tf.keras.metrics.Mean(name="loss")
        self.accuracy_metric = tf.keras.metrics.CategoricalAccuracy(name="accuracy")
        
    def call(self, x, training=False):
        for layer in self.layer_list[:-1]:
            x = layer(x)
            if self.dropout_rate:
                x = self.dropout_layer(x, training)
        
        return self.layer_list[-1](x)
    @property
    def metrics(self):
        return self.metrics_list

    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()
            
    def compute_frobenius(self):
        frobenius_norm = tf.zeros((1,))
        for var in self.trainable_variables:
            frobenius_norm += tf.norm(var, ord="euclidean")
        return frobenius_norm
    
    @tf.function
    def train_step(self, data):
        x, target = data
        with tf.GradientTape() as tape:
            prediction = self(x, training=True)
            loss = self.compiled_loss(target, prediction, regularization_losses=self.losses)
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        self.frobenius_metric.update_state(self.compute_frobenius())
        self.loss_metric.update_state(loss)
        self.accuracy_metric.update_state(target, prediction)
        
        return {metric.name: metric.result() for metric in self.metrics}
    
    @tf.function
    def test_step(self, data):
        x, target = data
        prediction = self(x, training=False)
        loss = self.compiled_loss(target, prediction, regularization_losses=self.losses)
        
        self.frobenius_metric.update_state(self.compute_frobenius())
        self.loss_metric.update_state(loss)
        self.accuracy_metric.update_state(target, prediction)
        
        return {metric.name: metric.result() for metric in self.metrics}

In [54]:
# Initiate the logs and metrics
config_name= "HW06"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/{config_name}/{current_time}/train"
val_log_path = f"logs/{config_name}/{current_time}/val"

# log writer for training metrics
train_summary_writer = tf.summary.create_file_writer(train_log_path)

# log writer for validation metrics
val_summary_writer = tf.summary.create_file_writer(val_log_path)

# Initiate epochs and learning rate as global variables
epochs = 15
learning_rate = 0.05

# Define arrays for saving values for later visualization
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []

In [55]:
def training(x: int):

    if x == 0: # original
        network = ConvModel()
    elif x == 1: # L1
        network = ConvModel(L1_reg=0.001)
    elif x == 2: # L2
        network = ConvModel(L2_reg=0.01)
    elif x == 3: # data aug
        augmentation_model = tf.keras.Sequential([keras_cv.layers.RandAugment(value_range=[0,1],magnitude=0.1)])
        network = ConvModel()
    elif x == 4: # dropout
        network = ConvModel(dropout_rate=0.5)
    elif x == 5: # batchnorm
        network = ConvModel(batch_norm=True)
    else:
        print(f"Choose a number between 0 - 5.")

    # train the model
    for epoch in range(epochs):
        print(f"Epoch: {epoch}, optimizer: {network.optimizer}")

        for data in tqdm.tqdm(train_dataset, position=0, leave=True):
            metrics = network.train_step(data)

            with train_summary_writer.as_default():
                for metric in network.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)

        # print the end acc and loss
        print([f"train_{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # make a list of losses and accuracies
        for (key, value) in metrics.items():
            if key == "loss":
                train_losses.append(value.numpy())
            elif key == "acc":
                train_accuracies.append(value.numpy())

        # reset metrics for next round
        network.reset_metrics()

        # Testing
        for data in test_dataset:
            metrics = network.test_step(data)

            # log the accs and losses
            with val_summary_writer.as_default():
                for metric in network.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step = epoch)

        # print the end acc and loss
        print([f"val_{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # make a list of losses and accuracies
        for (key, value) in metrics.items():
            if key == "loss":
                val_losses.append(value.numpy())
            elif key == "acc":
                val_accuracies.append(value.numpy())

        # reset all metrics
        network.reset_metrics()

In [56]:
training(1)

Epoch: 0, optimizer: None


  0%|          | 0/1563 [00:00<?, ?it/s]


TypeError: in user code:

    File "C:\Users\prizl\AppData\Local\Temp\ipykernel_8924\706703681.py", line 63, in train_step  *
        loss = self.compiled_loss(target, prediction, regularization_losses=self.losses)

    TypeError: 'NoneType' object is not callable


## 0. Training the model without any extra regularization (original)

In [None]:
model = ConvModel()
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer="adam")
original = model.fit(train_dataset, validation_data=test_dataset, epochs=15)

## 1. Training the same model with L1 regularization (lassoReg)

In [None]:
model = ConvModel(L1_reg=0.001)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer="adam")
lassoReg = model.fit(train_dataset, validation_data=test_dataset, epochs=15)

## 2. Training the same model with L2 regularization (ridgeReg)

In [None]:
model = ConvModel(L2_reg=0.001)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer="adam")
ridgeReg = model.fit(train_dataset, validation_data=test_dataset, epochs=15)

## 3. Training the same model with only data augmentation --> not done (aug)

In [None]:
augmentation_model = tf.keras.Sequential([keras_cv.layers.RandAugment(value_range=[0,1],magnitude=0.1)])

In [None]:
# new training dataset for data augmentation since we defined two different preprocess methods: one with, the other without augmentation
train_dataset_aug = preprocess_aug(train_ds, augmentation=augmentation_model)

model = ConvModel()
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer="adam")
augment = model.fit(train_dataset_aug, validation_data=test_dataset, epochs=15)

## 4. Training the same model with only dropout between layers (dropout)

In [None]:
model = ConvModel(dropout_rate=0.5)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer="adam")
dropout = model.fit(train_dataset, validation_data=test_dataset, epochs=15)

## 5. Training the same model with only batch normalization (batchnorm)

In [None]:
model = ConvModel(batch_norm=True)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer="adam")
batchnorm = model.fit(train_dataset, validation_data=test_dataset, epochs=15)

In [None]:
# plotting
fig, (ax0, ax1, ax2, ax3, ax4, ax5) = plt.subplots(6, 1, figsize=(8, 10))

ax0.set_title("original")
ax0.plot(original.history["total_frobenius_norm"]/np.max(original.history["total_frobenius_norm"]) * np.max(original.history["val_loss"]))
ax0.plot(original.history["val_loss"])
ax0.plot(original.history["loss"])
ax0.legend(labels=["Total Frobenius Norm", "Validation Loss", "Loss"])
# ax0.savefig("convnet_original")
# ax0.show()

ax1.set_title("L1")
ax1.plot(lassoReg.history["total_frobenius_norm"]/np.max(lassoReg.history["total_frobenius_norm"]) * np.max(lassoReg.history["val_loss"]))
ax1.plot(lassoReg.history["val_loss"])
ax1.plot(lassoReg.history["loss"])
# ax1.legend(labels=["Total Frobenius Norm", "Validation Loss", "Loss"])
# ax1.savefig("convnet_L1")
# ax1.show()

ax2.set_title("L2")
ax2.plot(ridgeReg.history["total_frobenius_norm"]/np.max(ridgeReg.history["total_frobenius_norm"]) * np.max(ridgeReg.history["val_loss"]))
ax2.plot(ridgeReg.history["val_loss"])
ax2.plot(ridgeReg.history["loss"])
# ax2.legend(labels=["Total Frobenius Norm", "Validation Loss", "Loss"])
# ax2.savefig("convnet_L2")
# ax2.show()

ax3.set_title("L2")
ax3.plot(augment.history["total_frobenius_norm"]/np.max(augment.history["total_frobenius_norm"]) * np.max(augment.history["val_loss"]))
ax3.plot(augment.history["val_loss"])
ax3.plot(augment.history["loss"])
# ax3.legend(labels=["Total Frobenius Norm", "Validation Loss", "Loss"])
# ax3.savefig("convnet_augment")
# ax3.show()

ax4.set_title("Dropout")
ax4.plot(dropout.history["total_frobenius_norm"]/np.max(dropout.history["total_frobenius_norm"]) * np.max(dropout.history["val_loss"]))
ax4.plot(dropout.history["val_loss"])
ax4.plot(dropout.history["loss"])
# ax4.legend(labels=["Total Frobenius Norm", "Validation Loss", "Loss"])
# ax4.savefig("convnet_dropout")
# ax4.show()

ax5.set_title("Batch Norm")
ax5.plot(batchnorm.history["total_frobenius_norm"]/np.max(batchnorm.history["total_frobenius_norm"]) * np.max(batchnorm.history["val_loss"]))
ax5.plot(batchnorm.history["val_loss"])
ax5.plot(batchnorm.history["loss"])
# ax5.legend(labels=["Total Frobenius Norm", "Validation Loss", "Loss"])
# ax5.savefig("convnet_batchnorm")
# ax5.show()

ax5.set_xlabel("Epochs")
# ax5.set_ylabel("Loss")

plt.legend()
fig.savefig("5 optimization methods comparison (e=15)")

fig.tight_layout(pad=0.1)
plt.show()

revised until here (9.12 Wooki)

In [None]:
# Initiate the logs and metrics
config_name= "config_name"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/{config_name}/{current_time}/train"
val_log_path = f"logs/{config_name}/{current_time}/val"

# log writer for training metrics
train_summary_writer = tf.summary.create_file_writer(train_log_path)

# log writer for validation metrics
val_summary_writer = tf.summary.create_file_writer(val_log_path)

# Initiate epochs and learning rate as global variables
epochs = 15
learning_rate = 0.05

# Define arrays for saving values for later visualization
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []

### 2.3 Training

In [None]:
# define the training loop

def training():

    # Select the model to use - the original or the modified one for optimization
    # network = BasicConv()
    network = ConvModel()

    # Train the model
    for epoch in range(epochs):
        print(f"Epoch: {epoch}, optimizer: {network.optimizer}")

        for data in tqdm.tqdm(train_dataset, position=0, leave=True):
            metrics = network.train_step(data)

            with train_summary_writer.as_default():
                for metric in network.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)

        # print the end acc and loss
        print([f"train_{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # make a list of losses and accuracies
        for (key, value) in metrics.items():
            if key == "loss":
                train_losses.append(value.numpy())
            elif key == "acc":
                train_accuracies.append(value.numpy())

        # reset metrics for next round
        network.reset_metrics()

        # Testing
        for data in test_dataset:
            metrics = network.test_step(data)

            # log the accs and losses
            with val_summary_writer.as_default():
                for metric in network.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step = epoch)

        # print the end acc and loss
        print([f"val_{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # make a list of losses and accuracies
        for (key, value) in metrics.items():
            if key == "loss":
                val_losses.append(value.numpy())
            elif key == "acc":
                val_accuracies.append(value.numpy())

        # reset all metrics
        network.reset_metrics()

In [None]:
# prepare the tensorboard ahead of training

%load_ext tensorboard
%tensorboard --logdir logs/fit

In [None]:
training()

### 2.4 - Visualization

In [None]:
# visualize the results

fig = plt.figure()
line1, = plt.plot(train_losses)
line2, = plt.plot(val_losses)
line3, = plt.plot(train_accuracies)
line4, = plt.plot(val_accuracies)
plt.xlabel("Epochs")
plt.ylabel("Loss/Accuracy")
plt.legend((line1,line2,line3,line4),("Training Loss","Test Loss","Training Accuracy","Test Accuracy"))
fig.savefig("CNN Performance CIFAR-10")
plt.show()

## Report re Overfitting

Our original model overfit the data, as evidenced by the plateau in validation (testing) performance versus training. This discrepancy indicates it began to use features in the training data which did not help it generalize when it saw the new data in the test batch.

## Report re Optimization

We attempted the following optimization techniques and report on our reasoning for them and the results we obtained with them below:

0. original (no optimization applied)

1. L2 Regularization

2. Augmentation

3. Dropout

