<a href="https://colab.research.google.com/github/robagby/Computer-Vision/blob/main/Image_Classification_with_ResNetv2_on_CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 2.x

# install tensorflow 2 and tensorflow datasets on a personal machine
# !pip install tensorflow-gpu
# !pip install tensorflow-datasets

import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds

from keras.layers import Input, Activation, Conv2D, Dense, Dropout, BatchNormalization, ReLU, DepthwiseConv2D, GlobalAveragePooling2D, GlobalMaxPooling2D, Add
from keras.models import Model
from keras import regularizers

import math
import numpy as np
import matplotlib.pyplot as plt

import os
import warnings

In [3]:
DATA_NUM_CLASSES         = 10
DATA_CHANNELS            = 3
DATA_ROWS                = 32
DATA_COLS                = 32
DATA_CROP_ROWS           = 28
DATA_CROP_COLS           = 28
DATA_MEAN                = np.array([[[125.30691805, 122.95039414, 113.86538318]]]) # CIFAR10
DATA_STD_DEV             = np.array([[[ 62.99321928,  62.08870764,  66.70489964]]]) # CIFAR10

MODEL_LEVEL_0_BLOCKS     = 4
MODEL_LEVEL_1_BLOCKS     = 6
MODEL_LEVEL_2_BLOCKS     = 3

TRAINING_BATCH_SIZE      = 32
TRAINING_SHUFFLE_BUFFER  = 5000
TRAINING_BN_MOMENTUM     = 0.99
TRAINING_BN_EPSILON      = 0.001
TRAINING_LR_MAX          = 0.001
# TRAINING_LR_SCALE        = 0.1
# TRAINING_LR_EPOCHS       = 2
TRAINING_LR_INIT_SCALE   = 0.01
TRAINING_LR_INIT_EPOCHS  = 5
TRAINING_LR_FINAL_SCALE  = 0.01
TRAINING_LR_FINAL_EPOCHS = 55

# training (derived)
TRAINING_NUM_EPOCHS = TRAINING_LR_INIT_EPOCHS + TRAINING_LR_FINAL_EPOCHS
TRAINING_LR_INIT    = TRAINING_LR_MAX*TRAINING_LR_INIT_SCALE
TRAINING_LR_FINAL   = TRAINING_LR_MAX*TRAINING_LR_FINAL_SCALE

# saving
SAVE_MODEL_PATH = './save/model/'
!mkdir -p "$SAVE_MODEL_PATH"

In [4]:
def pre_processing_train(example):
    image = example["image"]
    label = example["label"]
  
    image = tf.math.divide(tf.math.subtract(tf.dtypes.cast(image, tf.float32), DATA_MEAN), DATA_STD_DEV)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_crop(image, size=[DATA_CROP_ROWS, DATA_CROP_COLS, 3])

    label = tf.dtypes.cast(label, tf.int32)
    
    return image, label


def pre_processing_test(example):
    image = example["image"]
    label = example["label"]

    image = tf.math.divide(tf.math.subtract(tf.dtypes.cast(image, tf.float32), DATA_MEAN), DATA_STD_DEV)
    image = tf.image.crop_to_bounding_box(image, (DATA_ROWS - DATA_CROP_ROWS) // 2, (DATA_COLS - DATA_CROP_COLS) // 2, DATA_CROP_ROWS, DATA_CROP_COLS)

    label = tf.dtypes.cast(label, tf.int32)
    
    return image, label

In [5]:
# download data and split into training and testing datasets
dataset_train, info = tfds.load("cifar10", split=tfds.Split.TRAIN, with_info=True)
dataset_test,  info = tfds.load("cifar10", split=tfds.Split.TEST,  with_info=True)

# transform training dataset
dataset_train = dataset_train.map(pre_processing_train, num_parallel_calls=4)
dataset_train = dataset_train.shuffle(buffer_size=TRAINING_SHUFFLE_BUFFER)
dataset_train = dataset_train.batch(TRAINING_BATCH_SIZE)
dataset_train = dataset_train.prefetch(buffer_size=1)

# transform testing dataset
dataset_test  = dataset_test.map(pre_processing_test, num_parallel_calls=4)
dataset_test  = dataset_test.batch(TRAINING_BATCH_SIZE)
dataset_test  = dataset_test.prefetch(buffer_size=1)

[1mDownloading and preparing dataset cifar10/3.0.2 (download: 162.17 MiB, generated: 132.40 MiB, total: 294.58 MiB) to /root/tensorflow_datasets/cifar10/3.0.2...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]






0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteN622FB/cifar10-train.tfrecord


  0%|          | 0/50000 [00:00<?, ? examples/s]

0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteN622FB/cifar10-test.tfrecord


  0%|          | 0/10000 [00:00<?, ? examples/s]

[1mDataset cifar10 downloaded and prepared to /root/tensorflow_datasets/cifar10/3.0.2. Subsequent calls will reuse this data.[0m


In [7]:
# create and compile model
def create_model(rows, cols, channels, level_0_blocks, level_1_blocks, level_2_blocks, num_classes, lr_initial):

    # encoder - input
    model_input = keras.Input(shape=(rows, cols, channels), name='input_image')
    x           = model_input
    
    # encoder - tail
    x = keras.layers.Conv2D(16, 3, strides=1, padding='same', activation=None, use_bias=False)(x)

    # encoder - level 0 - special bottleneck - repeat 1x
    # input
    #    tensor: 28 x 28 x 16
    # residual path
    #    filter: 16 x 1 x 1 x 16
    #    filter: 16 x 3 x 3 x 16
    #    filter: 64 x 1 x 1 x 16
    # main path
    #    filter: 64 x 1 x 1 x 16
    # output
    #    tensor: 28 x 28 x 64
    residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(x)
    residual = keras.layers.ReLU()(residual)
    residual = keras.layers.Conv2D(16, 1, strides=1, padding='same', activation=None, use_bias=False)(residual)
    residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
    residual = keras.layers.ReLU()(residual)
    residual = keras.layers.Conv2D(16, 3, strides=1, padding='same', activation=None, use_bias=False)(residual)
    residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
    residual = keras.layers.ReLU()(residual)
    residual = keras.layers.Conv2D(64, 1, strides=1, padding='same', activation=None, use_bias=False)(residual)
    x        = keras.layers.Conv2D(64, 1, strides=1, padding='same', activation=None, use_bias=False)(x)
    x        = keras.layers.Add()([x, residual])

    # encoder - level 0 - standard bottleneck - repeat (level_0_blocks - 1)x
    # input
    #    tensor: 28 x 28 x 64
    # residual path
    #    filter: 16 x 1 x 1 x 64
    #    filter: 16 x 3 x 3 x 16
    #    filter: 64 x 1 x 1 x 16
    # main path
    #    filter: identity
    # output
    #    tensor: 28 x 28 x 64
    for n0 in range(level_0_blocks - 1):
        residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(x)
        residual = keras.layers.ReLU()(residual)
        residual = keras.layers.Conv2D(16, 1, strides=1, padding='same', activation=None, use_bias=False)(residual)
        residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
        residual = keras.layers.ReLU()(residual)
        residual = keras.layers.Conv2D(16, 3, strides=1, padding='same', activation=None, use_bias=False)(residual)
        residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
        residual = keras.layers.ReLU()(residual)
        residual = keras.layers.Conv2D(64, 1, strides=1, padding='same', activation=None, use_bias=False)(residual)
        x        = keras.layers.Add()([x, residual])

    # encoder - level 1 - down sampling bottleneck - repeat 1x
    # input
    #    tensor:  28 x 28 x 64
    # residual path
    #    filter:  32 x 1 x 1 x 64 / 2
    #    filter:  32 x 3 x 3 x 32
    #    filter: 128 x 1 x 1 x 32
    # main path
    #    filter: 128 x 1 x 1 x 64 / 2
    # output
    #    tensor:  14 x 14 x 128
    residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(x)
    residual = keras.layers.ReLU()(residual)
    residual = keras.layers.Conv2D(32, 1, strides=2, padding='same', activation=None, use_bias=False)(residual)
    residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
    residual = keras.layers.ReLU()(residual)
    residual = keras.layers.Conv2D(32, 3, strides=1, padding='same', activation=None, use_bias=False)(residual)
    residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
    residual = keras.layers.ReLU()(residual)
    residual = keras.layers.Conv2D(128, 1, strides=1, padding='same', activation=None, use_bias=False)(residual)
    x        = keras.layers.Conv2D(128, 1, strides=2, padding='same', activation=None, use_bias=False)(x)
    x        = keras.layers.Add()([x, residual])

    # encoder - level 1 - standard bottleneck - repeat (level_1_blocks - 1)x
    # input
    #    tensor:  14 x 14 x 128
    # residual path
    #    filter:  32 x 1 x 1 x 128
    #    filter:  32 x 3 x 3 x 32
    #    filter: 128 x 1 x 1 x 32
    # main path
    #    filter: identity
    # output
    #    tensor:  14 x 14 x 128
    for n1 in range(level_1_blocks - 1):
        residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(x)
        residual = keras.layers.ReLU()(residual)
        residual = keras.layers.Conv2D(32, 1, strides=1, padding='same', activation=None, use_bias=False)(residual)
        residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
        residual = keras.layers.ReLU()(residual)
        residual = keras.layers.Conv2D(32, 3, strides=1, padding='same', activation=None, use_bias=False)(residual)
        residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
        residual = keras.layers.ReLU()(residual)
        residual = keras.layers.Conv2D(128, 1, strides=1, padding='same', activation=None, use_bias=False)(residual)
        x        = keras.layers.Add()([x, residual])

    # encoder - level 2 - down sampling bottleneck - repeat 1x
    # input
    #    tensor:  14 x 14 x 128
    # residual path
    #    filter:  64 x 1 x 1 x 128 / 2
    #    filter:  64 x 3 x 3 x 64
    #    filter: 256 x 1 x 1 x 64
    # main path
    #    filter: 256 x 1 x 1 x 128 / 2
    # output
    #    tensor:   7 x 7 x 256
    residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(x)
    residual = keras.layers.ReLU()(residual)
    residual = keras.layers.Conv2D(64, 1, strides=2, padding='same', activation=None, use_bias=False)(residual)
    residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
    residual = keras.layers.ReLU()(residual)
    residual = keras.layers.Conv2D(64, 3, strides=1, padding='same', activation=None, use_bias=False)(residual)
    residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
    residual = keras.layers.ReLU()(residual)
    residual = keras.layers.Conv2D(256, 1, strides=1, padding='same', activation=None, use_bias=False)(residual)
    x        = keras.layers.Conv2D(256, 1, strides=2, padding='same', activation=None, use_bias=False)(x)
    x        = keras.layers.Add()([x, residual])

    # encoder - level 2 - standard bottleneck - repeat (level_2_blocks - 1)x
    # input
    #    tensor:   7 x 7 x 256
    # residual path
    #    filter:  64 x 1 x 1 x 256
    #    filter:  64 x 3 x 3 x 64
    #    filter: 256 x 1 x 1 x 64
    # main path
    #    filter: identity
    # output
    #    tensor:   7 x 7 x 256
    for n2 in range(level_2_blocks - 1):
        residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(x)
        residual = keras.layers.ReLU()(residual)
        residual = keras.layers.Conv2D(64, 1, strides=1, padding='same', activation=None, use_bias=False)(residual)
        residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
        residual = keras.layers.ReLU()(residual)
        residual = keras.layers.Conv2D(64, 3, strides=1, padding='same', activation=None, use_bias=False)(residual)
        residual = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(residual)
        residual = keras.layers.ReLU()(residual)
        residual = keras.layers.Conv2D(256, 1, strides=1, padding='same', activation=None, use_bias=False)(residual)
        x        = keras.layers.Add()([x, residual])

    # encoder - level 2 - standard bottleneck complete
    # input
    #    tensor:   7 x 7 x 256
    # main path
    #    batch norm
    #    ReLU
    # output
    #    tensor:   7 x 7 x 256
    x = keras.layers.BatchNormalization(axis=-1, momentum=TRAINING_BN_MOMENTUM, epsilon=TRAINING_BN_EPSILON, center=True, scale=True)(x)
    x = keras.layers.ReLU()(x)

    # encoder - output
    encoder_output = x

    # decoder
    y              = keras.layers.GlobalAveragePooling2D()(encoder_output)
    decoder_output = keras.layers.Dense(num_classes, activation='softmax')(y)

    # forward path
    model = keras.Model(inputs=model_input, outputs=decoder_output, name='resnetv2_model')

    # loss, backward path (implicit) and weight update
    model.compile(optimizer=tf.keras.optimizers.Adam(lr_initial), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # return model
    return model

In [None]:
# create and compile model
model = create_model(
    DATA_CROP_ROWS, 
    DATA_CROP_COLS, 
    DATA_CHANNELS, 
    MODEL_LEVEL_0_BLOCKS, 
    MODEL_LEVEL_1_BLOCKS, 
    MODEL_LEVEL_2_BLOCKS, 
    DATA_NUM_CLASSES, 
    TRAINING_LR_MAX)

# model description and figure
model.summary()
keras.utils.plot_model(model, 'cifar_model.png', show_shapes=True)

In [10]:
def lr_schedule(epoch):
    # staircase
    # lr = TRAINING_LR_MAX*math.pow(TRAINING_LR_SCALE, math.floor(epoch/TRAINING_LR_EPOCHS))

    # Linear warmup followed by cosine decay.
    if epoch < TRAINING_LR_INIT_EPOCHS:
        lr = (TRAINING_LR_MAX - TRAINING_LR_INIT)*(float(epoch)/TRAINING_LR_INIT_EPOCHS) + TRAINING_LR_INIT
    else:
        lr = (TRAINING_LR_MAX - TRAINING_LR_FINAL)*max(0.0, math.cos(((float(epoch) - TRAINING_LR_INIT_EPOCHS)/(TRAINING_LR_FINAL_EPOCHS - 1.0))*(math.pi/2.0))) + TRAINING_LR_FINAL

    return lr

# plot training accuracy and loss curves
def plot_training_curves(history):

    # Training and validation data accuracy
    acc     = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    # Training and validation loss.
    loss     = history.history['loss']
    val_loss = history.history['val_loss']

    # Plot accuracy.
    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.ylabel('Accuracy')
    plt.ylim([min(plt.ylim()), 1])
    plt.title('Training and Validation Accuracy')

    # Plot loss.
    plt.subplot(2, 1, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.ylabel('Cross Entropy')
    plt.ylim([0, 2.0])
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')
    plt.show()

In [None]:
# callbacks (learning rate schedule, model checkpointing during training)
callbacks = [
             keras.callbacks.LearningRateScheduler(lr_schedule),
             keras.callbacks.ModelCheckpoint(
                filepath=SAVE_MODEL_PATH+'model_{epoch}.h5', 
                save_best_only=True, monitor='val_loss', verbose=1),
             ]

# training
initial_epoch_num = 0
history           = model.fit(x=dataset_train, 
                              epochs=TRAINING_NUM_EPOCHS, 
                              verbose=1, 
                              callbacks=callbacks, 
                              validation_data=dataset_test, 
                              initial_epoch=initial_epoch_num)


In [None]:
# plot accuracy and loss curves
plot_training_curves(history)

test_loss, test_accuracy = model.evaluate(x=dataset_test)
print('Test loss:     ', test_loss)
print('Test accuracy: ', test_accuracy)

In [None]:
dataset_display                = dataset_test.take(1)
it                             = iter(dataset_display)
display_images, display_labels = next(it)

# predict pmf and labels for this dataset
predict_labels_pmf = model.predict(x=dataset_display)
predict_labels     = np.argmax(predict_labels_pmf, axis=1)

# for display normalize images to [0, 1]
display_images     = ((display_images*DATA_STD_DEV.reshape((1, 1, 1, 3))) + DATA_MEAN.reshape((1, 1, 1, 3)))/255.0;

# cycle through the images in the batch
for image_index in range(predict_labels.size):
    # display the predicted label, actual label and image
    print('Predicted label: {0:1d} and actual label: {1:1d}'.format(predict_labels[image_index], display_labels[image_index]))
    plt.imshow(display_images[image_index, :, :, :])
    plt.show()