# CIFAR10 Classfier: Optuna Edition


Author: Katnoria | Created: 18-Oct-2020

# 1. Imports & Setup 

In [1]:
import pickle
from time import time
from datetime import datetime
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.layers import BatchNormalization, Input, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras import Model
import IPython
import optuna
from optuna.integration.tensorboard import TensorBoardCallback

In [2]:
def version_info(cls):
    print(f"{cls.__name__}: {cls.__version__}")

In [3]:
print("Version Used in this Notebook:")
version_info(tf)
version_info(tfds)
version_info(optuna)

Version Used in this Notebook:
tensorflow: 2.3.0
tensorflow_datasets: 3.2.1
optuna: 2.2.0


In [4]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [5]:
EPOCHS = 25
BATCH_SIZE=128
IMG_SIZE=32
NUM_CLASSES=10

# 2. Dataset

Tensorflow Datasets already provides this dataset in a format that we can use out of the box.

https://github.com/optuna/optuna/blob/master/examples/tensorflow_eager_simple.py

In [6]:
def get_dataset():
    (ds_train, ds_test), metadata = tfds.load(
        'cifar10', split=['train', 'test'], shuffle_files=True, 
        with_info=True, as_supervised=True
    )
    
    train_ds = ds_train \
        .cache() \
        .batch(1, drop_remainder=True) \
        .prefetch(tf.data.experimental.AUTOTUNE)
    
    test_ds = ds_test \
        .cache() \
        .batch(1, drop_remainder=True) \
        .prefetch(tf.data.experimental.AUTOTUNE)
    return (train_ds, test_ds)

# Build Model

In [7]:
transforms = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
])

In [8]:
## Create Model
def create_model(trial):
    inputs = Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = transforms(inputs)
    x = tf.keras.applications.resnet.preprocess_input(x)
    x = tf.keras.applications.ResNet50(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False)(x, training=False)
    # Flatten or GAP
    use_gap = trial.suggest_categorical('use_gap', [True, False])
    if use_gap:
        x = GlobalAveragePooling2D()(x)
    else:
        x = Flatten()(x)
    # Dense Layer Units
    num_hidden = trial.suggest_int('dense_1', 32, 512, log=True)
    # Activation
    activation = trial.suggest_categorical('activation', ['relu', 'selu', 'elu'])
    x = Dense(num_hidden, activation=activation)(x)
    # Dropout rate    
    drop_rate = trial.suggest_float('drop_rate', 0.0, 0.8)
    x = Dropout(drop_rate)(x)
    outputs = Dense(NUM_CLASSES)(x)
    model = tf.keras.Model(inputs, outputs)
    return model

In [9]:
## Create Optimizer
def create_optimizer(trial):
    # LR
    lr = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    return optimizer

In [10]:
## Define Objective
def train(model, optimizer, dataset, mode="eval"):
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)    
    accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")
    mean_loss = tf.keras.metrics.Mean(name="loss")
    for images, labels in dataset:
        with tf.GradientTape() as tape:
            predictions = model(images, training=(mode=='train'))
            loss = loss_object(labels, predictions)            
            if mode == "train":
                gradients = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            accuracy(labels, predictions)
            mean_loss(loss)
    return accuracy, mean_loss

# Run Trials

Setup the objective function

In [11]:
def objective(trial):
    # dataset
    train_ds, test_ds = get_dataset()
    # model
    model = create_model(trial)
    # optimizer
    optimizer = create_optimizer(trial)
    # train
    for _ in range(EPOCHS):
        train_acc, train_loss = train(model, optimizer, train_ds, "train")
        test_acc, test_loss = train(model, optimizer, test_ds, "eval")
    return test_acc.result()

Create study and execute

In [19]:
# Track using Tensorboard
tensorboard_cb = TensorBoardCallback("logs/", metric_name="accuracy")


# Run
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=25, callbacks=[tensorboard_cb])

[32m[I 2020-10-20 08:20:10,569][0m A new study created in memory with name: no-name-87d4ab73-fc4f-47b2-91d0-a2bb0aa13255[0m


KeyboardInterrupt: 

**The best test accuracy is 68.67%, which is slightly better than our baseline**

Keras-Tuner also supports bayesian optimization to search the best model (BayesianOptimization Tuner). You could give it a try too.

# 5. Conclusion

We saw that best architecture does not use any image augmentations 😂 and `selu` seems to be the activation that keeps showing up.

Here are a few things that we could try:
- additional image augmentation search
- search the pooling options (Global Average, Global Max Pooling)
- add batchnorm?
- add more layers?
