# Notebook Description

This notebook shows the last state of our code after cumulative additions. More detailed code for each experiment can be reached from https://wandb.ai/takim/CIFAR-10_Classification 

# Common Data Loading Steps

In [None]:
import pickle
import numpy as np
from tensorflow.keras.utils import to_categorical

def load_cifar10_batch(file_path):
    with open(file_path, 'rb') as file:
        batch = pickle.load(file, encoding='bytes')
    return batch

def load_cifar10_data(folder_path):
    train_data = []
    train_labels = []

    for i in range(1, 6):
        batch_file = f"{folder_path}/data_batch_{i}"
        batch = load_cifar10_batch(batch_file)
        train_data.append(batch[b'data'])
        train_labels.extend(batch[b'labels'])

    test_batch_file = f"{folder_path}/test_batch"
    test_batch = load_cifar10_batch(test_batch_file)
    test_data = test_batch[b'data']
    test_labels = test_batch[b'labels']

    train_data = np.vstack(train_data)
    train_labels = np.array(train_labels)
    test_labels = np.array(test_labels)

    return train_data, train_labels, test_data, test_labels

def preprocess_data(train_data, train_labels, test_data, test_labels):
    train_data = train_data.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
    test_data = test_data.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)

    train_labels_onehot = to_categorical(train_labels)
    test_labels_onehot = to_categorical(test_labels)

    return train_data, train_labels_onehot, test_data, test_labels_onehot

cifar10_folder = 'cifar-10-batches-py'

train_data, train_labels, test_data, test_labels = load_cifar10_data(cifar10_folder)

x_train, y_train, x_test, y_test = preprocess_data(
    train_data, train_labels, test_data, test_labels
)

print("Train Data Shape:", x_train.shape)
print("Train Labels Shape:", y_train.shape)
print("Test Data Shape:", x_test.shape)
print("Test Labels Shape:", y_test.shape)

# EDA

In [None]:
import matplotlib.pyplot as plt

# Define the class labels
class_labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Function to plot sample images
def plot_sample_images(images, labels):
    fig, axes = plt.subplots(2, 5, figsize=(12, 6))
    axes = axes.ravel()

    for i in range(10):
        axes[i].imshow(images[i])
        axes[i].set_title(class_labels[labels[i]])
        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

# Plot sample images
plot_sample_images(x_train, np.argmax(y_train, axis=1))

In [None]:
import seaborn as sns

# Plot class distribution of training data
sns.countplot(x=np.argmax(y_train, axis=1))
plt.title('Class Distribution - Training Data')
plt.xlabel('Class')
plt.ylabel('Count')
plt.show()

# Plot class distribution of test data
sns.countplot(x=np.argmax(y_test, axis=1))
plt.title('Class Distribution - Test Data')
plt.xlabel('Class')
plt.ylabel('Count')
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Flatten the image data
flattened_data = x_train.flatten()

# Plot the histogram
plt.hist(flattened_data, bins=256, color='gray')
plt.title('Pixel Value Distribution')
plt.xlabel('Pixel Value')
plt.ylabel('Frequency')
plt.show()

#### Weights & Biases Related Inputs

In [None]:
import wandb
from wandb.keras import WandbMetricsLogger

In [None]:
wandb.login()

# Super Resolution Preprocessing

Note that the usage of super resolution images is not given in this notebook, since they were not tested on some architectures. However, it is just a matter of replacing the variables.

In [None]:
import tensorflow_hub as hub
from tqdm import tqdm

supres_model = hub.load('https://tfhub.dev/captain-pool/esrgan-tf2/1')
x_train_superres = supres_model(tf.cast(x_train[0:100], tf.float32), training=False).numpy().astype('uint8')
for i in tqdm(range(100, x_train.shape[0], 100)):
    result = supres_model(tf.cast(x_train[i:i+100], tf.float32), training=False).numpy().astype('uint8')
    x_train_superres = np.concatenate((x_train_superres, result), axis=0)

x_test_superres = supres_model(tf.cast(x_test[0:100], tf.float32), training=False).numpy().astype('uint8')
for i in tqdm(range(100, x_test.shape[0], 100)):
    result = supres_model(tf.cast(x_test[i:i+100], tf.float32), training=False).numpy().astype('uint8')
    x_test_superres = np.concatenate((x_test_superres, result), axis=0)

np.save('x_train_superres.npy', x_train_superres)
np.save('x_test_superres.npy', x_test_superres)

# Model Definitions

Following blocks will have TWO merged blocks that contains the configuration and model definition for a specific architecture, run the correct configuration and model definition

## AlexNet

In [None]:
sweep_config = {
    'method': 'grid'
    }

metric = {
    'name': 'val_loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'augmentation': {
          'values': ['none', 'light', 'heavy']
        }
    }

sweep_config['parameters'] = parameters_dict

parameters_dict.update({
    'earlystopping_patience': {
        'value': 10},
    'epochs': {
        'value': 100},
    'learning_rate': {
        'value': 0.000063
        },
    'batch_size': {
          'value': 64
        },
    'dropout': {
          'value': True
        },
    'batchnorm': {
          'value': True
        },
    'regularization': {
          'value': False
        },
    'normalization': {
        'value': True}
    })

In [None]:
import tensorflow as tf

# Define the AlexNet architecture
def create_model(dropout, batchnorm, regularization):

    model = tf.keras.Sequential()

    if regularization:
        model.add(tf.keras.layers.Conv2D(filters=96, kernel_size=(11, 11), strides=(2, 2), activation='relu', input_shape=(32, 32, 3), kernel_regularizer=tf.keras.regularizers.l2(0.001)))
    else:
        model.add(tf.keras.layers.Conv2D(filters=96, kernel_size=(11, 11), strides=(2, 2), activation='relu', input_shape=(32, 32, 3)))
    if batchnorm:
        model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(1, 1)))


    if regularization:
        model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding="same", kernel_regularizer=tf.keras.regularizers.l2(0.001)))
    else:
        model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding="same"))
    if batchnorm:
        model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(1, 1)))


    if regularization:
        model.add(tf.keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same", kernel_regularizer=tf.keras.regularizers.l2(0.001)))
        model.add(tf.keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same", kernel_regularizer=tf.keras.regularizers.l2(0.001)))
        model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same", kernel_regularizer=tf.keras.regularizers.l2(0.001)))
    else:
        model.add(tf.keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"))
        model.add(tf.keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"))
        model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"))
    if batchnorm:
        model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(1, 1)))


    model.add(tf.keras.layers.Flatten())


    model.add(tf.keras.layers.Dense(units=4096, activation='relu'))
    if dropout:
        model.add(tf.keras.layers.Dropout(0.5))


    model.add(tf.keras.layers.Dense(units=4096, activation='relu'))
    if dropout:
        model.add(tf.keras.layers.Dropout(0.5))


    model.add(tf.keras.layers.Dense(10, activation='softmax'))

    return model

## VGG

In [None]:
sweep_config = {
    'method': 'grid'
    }

metric = {
    'name': 'val_loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'augmentation': {
          'values': ['none', 'light', 'heavy']
        }
    }

sweep_config['parameters'] = parameters_dict

parameters_dict.update({
    'earlystopping_patience': {
        'value': 10},
    'epochs': {
        'value': 100},
    'learning_rate': {
        'value': 0.00025118864
        },
    'batch_size': {
          'value': 64
        },
    'kernel_size': {
        'value': (3, 3)
        },
    'dropout': {
          'value': True
        },
    'pooling': {
          'value': 'max'
        },
    'batchnorm': {
          'value': True
        },
    'a_layers': {
          'value': 16
        },
    'reg_alpha': {
        'value': 0
        },
    'normalization': {
        'value': False}
    })

In [None]:
import tensorflow as tf

def create_model(kernel_size, dropout, pooling, batchnorm, n_layers, reg_alpha):
    model = tf.keras.Sequential()
    
    model.add(tf.keras.layers.Conv2D(64, kernel_size, activation='relu', padding='same', input_shape=(32, 32, 3), kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    model.add(tf.keras.layers.Conv2D(64, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    if batchnorm:
        model.add(tf.keras.layers.BatchNormalization())
    if pooling == 'max':
        model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    else:
        model.add(tf.keras.layers.AveragePooling2D((2, 2)))

    model.add(tf.keras.layers.Conv2D(128, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    model.add(tf.keras.layers.Conv2D(128, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    if batchnorm:
        model.add(tf.keras.layers.BatchNormalization())
    if pooling == 'max':
        model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    else:
        model.add(tf.keras.layers.AveragePooling2D((2, 2)))

    model.add(tf.keras.layers.Conv2D(256, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    model.add(tf.keras.layers.Conv2D(256, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    model.add(tf.keras.layers.Conv2D(256, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    if batchnorm:
        model.add(tf.keras.layers.BatchNormalization())
    if pooling == 'max':
        model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    else:
        model.add(tf.keras.layers.AveragePooling2D((2, 2)))

    model.add(tf.keras.layers.Conv2D(512, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    model.add(tf.keras.layers.Conv2D(512, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    model.add(tf.keras.layers.Conv2D(512, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    if batchnorm:
        model.add(tf.keras.layers.BatchNormalization())
    if pooling == 'max':
        model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    else:
        model.add(tf.keras.layers.AveragePooling2D((2, 2)))

    model.add(tf.keras.layers.Conv2D(512, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    model.add(tf.keras.layers.Conv2D(512, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    model.add(tf.keras.layers.Conv2D(512, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    if batchnorm:
        model.add(tf.keras.layers.BatchNormalization())
    if pooling == 'max':
        model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    else:
        model.add(tf.keras.layers.AveragePooling2D((2, 2)))

    if n_layers == 19:
        model.add(tf.keras.layers.Conv2D(512, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
        model.add(tf.keras.layers.Conv2D(512, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
        model.add(tf.keras.layers.Conv2D(512, kernel_size, activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
        if batchnorm:
            model.add(tf.keras.layers.BatchNormalization())


    model.add(tf.keras.layers.Flatten())

    model.add(tf.keras.layers.Dense(units=4096, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    if dropout:
        model.add(tf.keras.layers.Dropout(0.5))

    model.add(tf.keras.layers.Dense(units=4096, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg_alpha)))
    if dropout:
        model.add(tf.keras.layers.Dropout(0.5))

    model.add(tf.keras.layers.Dense(10, activation='softmax'))
    
    return model

## GoogLeNet

In [None]:
sweep_config = {
    'method': 'grid'
    }

metric = {
    'name': 'val_loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'augmentation': {
          'values': ['none', 'light', 'heavy']
        }
    }

sweep_config['parameters'] = parameters_dict

parameters_dict.update({
    'earlystopping_patience': {
        'value': 10},
    'epochs': {
        'value': 1},
    'learning_rate': {
        'value': 0.000016},
    'batch_size': {
        'value': 128},
    'reg_alpha': {
          'value': 0.00001
        },
    'del_5x5': {
          'value': False
        },
    'add_7x7': {
          'value': False
        },
    'normalization': {
          'value': True}
    })

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, ReLU, Lambda, Add, Input, GlobalAveragePooling2D, Flatten, Dense, Softmax, MaxPooling2D, AveragePooling2D, Dropout
from tensorflow.keras import Model
from tensorflow.keras.layers import concatenate
from tensorflow.keras.regularizers import l2


def InceptionBlock(x, f1, f3_reduce, f3, f5_reduce, f5, pool_reduce, reg_alpha, del_5x5, add_7x7):

    f7_reduce = f5_reduce
    f7 = f5

    p1_x = Conv2D(filters=f1, kernel_size=(1, 1), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x)
    p1_x = ReLU()(p1_x)

    p3_x = Conv2D(filters=f3_reduce, kernel_size=(1, 1), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x)
    p3_x = ReLU()(p3_x)
    p3_x = Conv2D(filters=f3, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(p3_x)
    p3_x = ReLU()(p3_x)

    if not del_5x5:
        p5_x = Conv2D(filters=f5_reduce, kernel_size=(1, 1), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x)
        p5_x = ReLU()(p5_x)
        p5_x = Conv2D(filters=f5, kernel_size=(5, 5), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(p5_x)
        p5_x = ReLU()(p5_x)

    if add_7x7:
        p7_x = Conv2D(filters=f7_reduce, kernel_size=(1, 1), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x)
        p7_x = ReLU()(p7_x)
        p7_x = Conv2D(filters=f7, kernel_size=(7, 7), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(p7_x)
        p7_x = ReLU()(p7_x)  

    pool_x = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same')(x)
    pool_x = Conv2D(filters=pool_reduce, kernel_size=(1, 1), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(pool_x)
    pool_x = ReLU()(pool_x)

    if del_5x5 and add_7x7:
        x = concatenate(inputs=[p1_x, p3_x, p7_x, pool_x], axis=-1)
    elif del_5x5 and not add_7x7:
        x = concatenate(inputs=[p1_x, p3_x, pool_x], axis=-1)
    elif not del_5x5 and add_7x7:
        x = concatenate(inputs=[p1_x, p3_x, p5_x, p7_x, pool_x], axis=-1)
    elif not del_5x5 and not add_7x7:
        x = concatenate(inputs=[p1_x, p3_x, p5_x, pool_x], axis=-1)
        
    return x


# Define the GoogLeNet architecture
def create_model(config):

    reg_alpha = config['reg_alpha']
    del_5x5 = config['del_5x5']
    add_7x7 = config['add_7x7']

    inputs = Input(shape=(32, 32, 3))

    x = Conv2D(filters=64, kernel_size=(7, 7), strides=(2, 2), padding='same', kernel_regularizer=l2(reg_alpha))(inputs)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
            
    x = Conv2D(filters=64, kernel_size=(1, 1), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x)
    x = ReLU()(x)
    x = Conv2D(filters=192, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

    x = InceptionBlock(x, 64, 96, 128, 16, 32, 32, reg_alpha, del_5x5, add_7x7)
    x = InceptionBlock(x, 128, 128, 192, 32, 96, 64, reg_alpha, del_5x5, add_7x7)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

    x = InceptionBlock(x, 192, 96, 208, 16, 48, 64, reg_alpha, del_5x5, add_7x7)

    # Auxilary loss-output
    x_aux1 = AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same')(x)
    x_aux1 = Conv2D(filters=128, kernel_size=(1, 1), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x_aux1)
    x_aux1 = ReLU()(x_aux1)
    x_aux1 = Flatten()(x_aux1)
    x_aux1 = Dense(1024, kernel_regularizer=l2(reg_alpha))(x_aux1)
    x_aux1 = ReLU()(x_aux1)
    x_aux1 = Dropout(0.7)(x_aux1)
    x_aux1 = Dense(10)(x_aux1)
    x_aux1 = Softmax(name='aux1_out')(x_aux1)

    x = InceptionBlock(x, 160, 112, 224, 24, 64, 64, reg_alpha, del_5x5, add_7x7)
    x = InceptionBlock(x, 128, 128, 256, 24, 64, 64, reg_alpha, del_5x5, add_7x7)
    x = InceptionBlock(x, 112, 144, 288, 32, 64, 64, reg_alpha, del_5x5, add_7x7)

    # Auxilary loss-output
    x_aux2 = AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same')(x)
    x_aux2 = Conv2D(filters=128, kernel_size=(1, 1), strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x_aux2)
    x_aux2 = ReLU()(x_aux2)
    x_aux2 = Flatten()(x_aux2)
    x_aux2 = Dense(1024, kernel_regularizer=l2(reg_alpha))(x_aux2)
    x_aux2 = ReLU()(x_aux2)
    x_aux2 = Dropout(0.7)(x_aux2)
    x_aux2 = Dense(10)(x_aux2)
    x_aux2 = Softmax(name='aux2_out')(x_aux2)

    x = InceptionBlock(x, 256, 160, 320, 32, 128, 128, reg_alpha, del_5x5, add_7x7)

    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

    x = InceptionBlock(x, 256, 160, 320, 32, 128, 128, reg_alpha, del_5x5, add_7x7)
    x = InceptionBlock(x, 384, 192, 384, 48, 128, 128, reg_alpha, del_5x5, add_7x7)

    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.4)(x)
    x = Dense(10)(x)
    x = Softmax(name='og_out')(x)

    outputs = [x_aux1, x_aux2, x]

    model = Model(inputs, outputs, name='GoogLeNet')

    return model

## ResNet

In [None]:
sweep_config = {
    'method': 'grid'
    }

metric = {
    'name': 'val_loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'augmentation': {
          'values': ['none', 'light', 'heavy']
        }
    }

sweep_config['parameters'] = parameters_dict

parameters_dict.update({
    'earlystopping_patience': {
        'value': 10},
    'epochs': {
        'value': 100},
    'learning_rate': {
        'value': 0.001},
    'batch_size': {
        'value': 64},
    'kernel_size': {
          'value': '5x5'},
    'net_filter_size': {
          'value': 32},
    'net_n': {
          'value': 3},
    'reg_alpha': {
          'value': 0.0001},
    'normalization': {
          'value': False}
    })

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, ReLU, Lambda, Add, Input, GlobalAveragePooling2D, Flatten, Dense, Softmax
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2


def ResidualBlock(x, filter_size, is_switch_block, kernel_size, reg_alpha):

    # note that if is_switch_block true, it means that output will not be the same as the input
    # so while merging the residual connection, we need to adapt to it
    # this adaptation could be with a conv layer, or a simple downsampling + padding is enough.

    x_skip = x # save original input to the block

    if not is_switch_block:
        x = Conv2D(filter_size, kernel_size=kernel_size, strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x)
    else:
        x = Conv2D(filter_size, kernel_size=kernel_size, strides=(2, 2), padding='same', kernel_regularizer=l2(reg_alpha))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv2D(filter_size, kernel_size=kernel_size, strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(x)
    x = BatchNormalization()(x)

    if is_switch_block: # takes every second element to half(v) spatial dimension and then adds padding to each side for matching filter (last) dimension
        x_skip = Lambda(lambda x: tf.pad(x[:, ::2, ::2, :], tf.constant([[0, 0,], [0, 0], [0, 0], [filter_size//4, filter_size//4]]), mode="CONSTANT"))(x_skip)

    x = Add()([x, x_skip])
    x = ReLU()(x)

    return x

def ResidualBlocks(x, filter_size, n, kernel_size, reg_alpha):
    for group in range(3): # a stack of 6n layers, 3×3 convolutions, feature maps of sizes {4fs, 2fs, fs}, 2n layers for each size
        for block in range(n):
            if group > 0 and block == 0: # double filter size
                filter_size *= 2
                is_switch_block = True
            else:
                is_switch_block = False
                
            x = ResidualBlock(x, filter_size, is_switch_block, kernel_size, reg_alpha)

    return x

# Define the ResNet architecture
def create_model(config):

    filter_size = config['net_filter_size']
    n = config['net_n']
    kernel_size = (3, 3) if config['kernel_size'] == '3x3' else (5, 5)

    reg_alpha = config['reg_alpha']

    inputs = Input(shape=(32, 32, 3))
    x = Conv2D(filter_size, kernel_size=kernel_size, strides=(1, 1), padding='same', kernel_regularizer=l2(reg_alpha))(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = ResidualBlocks(x, filter_size, n, kernel_size, reg_alpha)
    x = GlobalAveragePooling2D()(x)
    x = Flatten()(x)
    x = Dense(10)(x)
    outputs = Softmax()(x)

    model = Model(inputs, outputs, name=f"ResNet-{n*6+2}")
    return model

#### W&B creating sweep (for the grid search)

In [None]:
import pprint

pprint.pprint(sweep_config)

In [None]:
sweep_id = wandb.sweep(sweep_config, project="CIFAR-10_Classification")

# Training Code

This is for VGG, others are similar too

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

def train(config = None):
    with wandb.init(config=config):

        config = wandb.config

        do_normalization = config['normalization']
        do_augmentation = config['augmentation'] != 'none'

        x_train_to_use = (x_train.astype('float32') / 255) if do_normalization else x_train
        x_test_to_use = (x_test.astype('float32') / 255) if do_normalization else x_test

        tf.keras.backend.clear_session()
        model = create_model(config["kernel_size"], config["dropout"], config["pooling"], config["batchnorm"], config["a_layers"], config["reg_alpha"])
        model.compile(
            optimizer = Adam(learning_rate=config["learning_rate"]),
            loss = "categorical_crossentropy",
            metrics = ["accuracy", tf.keras.metrics.TopKCategoricalAccuracy(k=3, name='top@3_accuracy')]
        )

        early_stopping = EarlyStopping(monitor='val_loss',
                                    patience=config["earlystopping_patience"],
                                    restore_best_weights=True)

        if not do_augmentation:
            history = model.fit(x_train_to_use, y_train,
                                epochs=config["epochs"],
                                batch_size=config["batch_size"],
                                validation_split=0.1,
                                callbacks=[
                                    WandbMetricsLogger(log_freq='epoch'),
                                    early_stopping
                                ], verbose=1
                                )
        else:
            if config['augmentation'] == 'light':
                datagen = ImageDataGenerator(
                    rotation_range=20,
                    horizontal_flip=True,
                    width_shift_range=0.1,
                    height_shift_range=0.1,
                    fill_mode='nearest'
                )
            else:
                datagen = ImageDataGenerator(
                    rotation_range=40,
                    horizontal_flip=True,
                    width_shift_range=0.2,
                    height_shift_range=0.2,
                    shear_range=0.1,
                    zoom_range=0.1,
                    fill_mode='nearest'
                )

            x_tr, x_vl, y_tr, y_vl = train_test_split(x_train_to_use, y_train, test_size=0.1, random_state=42)

            train_datagen = datagen.flow(x_tr, y_tr, batch_size=config["batch_size"])
            history = model.fit(train_datagen,
                                epochs=config["epochs"],
                                batch_size=config["batch_size"],
                                validation_data=(x_vl, y_vl),
                                callbacks=[
                                    WandbMetricsLogger(log_freq='epoch'),
                                    early_stopping
                                ], verbose=1
                                )
            
        
        test_stats = model.evaluate(x_test_to_use, y_test)
        wandb.log({"test_loss": test_stats[0]})
        wandb.log({"test_acc": test_stats[1]})

        val_loss_history = history.history['val_loss']
        val_acc_history = history.history['val_accuracy']

        best_epoch_num = -1 if (len(val_loss_history) == 100 or len(val_loss_history) <= 10) else (len(val_loss_history) - 11)

        wandb.log({"best_val_loss": val_loss_history[best_epoch_num]})
        wandb.log({"best_val_acc": val_acc_history[best_epoch_num]})

#### Starting the grid search

In [None]:
wandb.agent(sweep_id, train)