# CIFAR10 Classfier: Optuna Edition


Author: Katnoria | Created: 18-Oct-2020

# 1. Imports & Setup 

In [1]:
import pickle
from time import time
from datetime import datetime
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.layers import BatchNormalization, Input, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras import Model
import IPython
import optuna

In [2]:
def version_info(cls):
    print(f"{cls.__name__}: {cls.__version__}")

In [3]:
print("Version Used in this Notebook:")
version_info(tf)
version_info(tfds)
version_info(optuna)

Version Used in this Notebook:
tensorflow: 2.3.0
tensorflow_datasets: 3.2.1
optuna: 2.2.0


In [4]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [14]:
EPOCHS = 25
BATCH_SIZE=128
IMG_SIZE=32
NUM_CLASSES=10

# 2. Dataset

Tensorflow Datasets already provides this dataset in a format that we can use out of the box.

https://github.com/optuna/optuna/blob/master/examples/tensorflow_eager_simple.py

In [9]:
def get_dataset():
    (ds_train, ds_test), metadata = tfds.load(
        'cifar10', split=['train', 'test'], shuffle_files=True, 
        with_info=True, as_supervised=True
    )
    
    train_ds = ds_train \
        .cache() \
        .batch(1, drop_remainder=True) \
        .prefetch(tf.data.experimental.AUTOTUNE)
    
    test_ds = ds_test \
        .cache() \
        .batch(1, drop_remainder=True) \
        .prefetch(tf.data.experimental.AUTOTUNE)
    return (train_ds, test_ds)

# Build Model

In [16]:
transforms = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
])

In [23]:
## Create Model
def create_model(trial):
    inputs = Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = transforms(inputs)
    x = tf.keras.applications.resnet.preprocess_input(x)
    x = tf.keras.applications.ResNet50(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False)(x, training=False)
    # Flatten or GAP
    use_gap = trial.suggest_categorical('use_gap', [True, False])
    if use_gap:
        x = GlobalAveragePooling2D()(x)
    else:
        x = Flatten()(x)
    # Dense Layer Units
    num_hidden = trial.suggest_int('dense_1', 32, 512, log=True)
    # Activation
    activation = trial.suggest_categorical('activation', ['relu', 'selu', 'elu'])
    x = Dense(num_hidden, activation=activation)(x)
    # Dropout rate    
    drop_rate = trial.suggest_float('drop_rate', 0.0, 0.8)
    x = Dropout(drop_rate)(x)
    outputs = Dense(NUM_CLASSES)(x)
    model = tf.keras.Model(inputs, outputs)
    return model

In [19]:
## Create Optimizer
def create_optimizer(trial):
    # LR
    lr = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    return lr

In [20]:
## Define Objective
def train(model, optimizer, dataset, mode="eval"):
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)    
    accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")
    mean_loss = tf.keras.metrics.Mean(name="loss")
    for images, labels in dataset:
        with tf.GradientTape() as tape:
            predictions = model(images, training=(mode=='train'))
            loss = loss_object(labels, predictions)            
            if mode == "train":
                gradients = tape.gradients(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            accuracy(labels, predictions)
            loss(loss)
    return accuracy, loss

# Run Trials

In [25]:
def objective(trial):
    # dataset
    train_ds, test_ds = get_dataset()
    # model
    model = create_model(trial)
    # optimizer
    optimizer = create_optimizer(trial)
    # train
    for _ in range(EPOCHS):
        train_acc, train_loss = train(model, optimizer, train_ds, "train")
        test_acc, test_loss = train(model, optimizer, test_ds, "eval")
    return test_acc.result()

In [26]:
# Run
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=40)

[32m[I 2020-10-19 19:27:49,593][0m A new study created in memory with name: no-name-43d5c55a-eab8-455a-babd-326088d1f35a[0m
[33m[W 2020-10-19 19:27:53,207][0m Trial 0 failed because of the following error: AttributeError("'GradientTape' object has no attribute 'gradients'",)
Traceback (most recent call last):
  File "/home/ashish/miniconda3/envs/tf2_3/lib/python3.6/site-packages/optuna/study.py", line 799, in _run_trial
    result = func(trial)
  File "<ipython-input-25-effd22e538c1>", line 10, in objective
    train_acc, train_loss = train(model, optimizer, train_ds, "train")
  File "<ipython-input-20-6ad9b766c09e>", line 11, in train
    gradients = tape.gradients(loss, model.trainable_variables)
AttributeError: 'GradientTape' object has no attribute 'gradients'[0m


AttributeError: 'GradientTape' object has no attribute 'gradients'

**The best test accuracy is 68.67%, which is slightly better than our baseline**

Keras-Tuner also supports bayesian optimization to search the best model (BayesianOptimization Tuner). You could give it a try too.

# 5. Conclusion

We saw that best architecture does not use any image augmentations 😂 and `selu` seems to be the activation that keeps showing up.

Here are a few things that we could try:
- additional image augmentation search
- search the pooling options (Global Average, Global Max Pooling)
- add batchnorm?
- add more layers?
