##### Copyright 2020 The TensorFlow Authors.



In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Only BatchNorm Experiments: ResNet for TinyImageNet (TF 2.3)


<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/examples/blob/master/template/notebook.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/examples/blob/master/template/notebook.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

## Overview
The ImageNet challenge (ILSVRC) is the most common benchmark to evaluate image classification and localization models. Although ImageNet is a good dataset for large scale evaluation, the resources required to build and train a model becomes a limiting factor even with a good GPU. The Tiny ImageNet is a dataset made famous by Stanford University which consists of 200 classes and 100,000 training images. It is a good subset of the actual ImageNet dataset that still manages to capture the variety and 'hardness' of the ILSVRC albeit at a smaller scale.
#  
As a researcher with resource constraints, it becomes imperative to adopt smart training strategies and efficiency tactics to effectively solve a hard problem. This notebook shows how building a model with 10x lesser parameters and using good training strategies (with Tensorflow 2.0) can enable one to build a good model with the available resources.
#  
This notebook will show the following:
- Training with image augmentation on the fly with tf.keras' ImageDataGenerator class
- Building an efficient custom ResNet model with 10x lesser parameters
- Progressive resizing for the model to learn scene dependent features from diffeent image sizes
  
    
**Note**: The top accuracies for this dataset found on Kaggle use pre-trained models with freeze layer training. This 
The top scorers for the 1st Stanford challenge can be found at this link: [Tiny ImageNet Visual Recognition Challenge](https://tiny-imagenet.herokuapp.com/)

## Setup

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
assert tf.__version__.startswith('2.3')

print(f'{tf.__version__}')

In [None]:
from tensorflow.keras.layers import BatchNormalization, Conv2D, AveragePooling2D, MaxPooling2D
from tensorflow.keras.layers import ZeroPadding2D, Activation, Flatten, add
from tensorflow.keras.layers import GlobalAveragePooling2D, SeparableConv2D
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

In [None]:
# Import the data

import os
download_path = os.getcwd()
    
import pathlib
path = tf.keras.utils.get_file('tiny-imagenet-200.zip', extract=True, 
                               cache_subdir=download_path,
                               origin='http://cs231n.stanford.edu/tiny-imagenet-200.zip')

data_dir = pathlib.Path(path).with_suffix('')

TRAIN = data_dir/"train"
VAL = data_dir/"val/images"
VAL_ANNOT = data_dir/'val/val_annotations.txt'

## Image augmentation and image generators
- The function below returns the generators for the ImageDataGenerator objects we will use to train and validate our ResNet model.


In [None]:
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

val_data = pd.read_csv(VAL_ANNOT , sep='\t', names=['File', 'Class', 'X', 'Y', 'H', 'W'])
val_data.drop(['X','Y','H', 'W'], axis=1, inplace=True)


def train_val_gen(train_target=64, train_batch=64, val_target=64, val_batch=64):

        train_datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=18,
            zoom_range=0.15,  
            width_shift_range=0.2, 
            height_shift_range=0.2, 
            shear_range=0.15, 
            horizontal_flip=True,
            fill_mode="reflect", # Fills empty with reflections
            brightness_range=[0.4, 1.6]  
    )

        train_generator = train_datagen.flow_from_directory(
                TRAIN,
                target_size=(train_target, train_target),
                batch_size=train_batch,
                class_mode='categorical')

        val_datagen = ImageDataGenerator(rescale=1./255)

        val_generator = val_datagen.flow_from_dataframe(
            val_data, directory=VAL, 
            x_col='File', 
            y_col='Class', 
            target_size=(val_target, val_target),
            color_mode='rgb', 
            class_mode='categorical', 
            batch_size=val_batch, 
            shuffle=False, 
            seed=42
        )

        return train_generator, val_generator

## Defining callbacks to employ different training strategies

In [None]:
# Creating a custom callback to save the model after every 5 epochs

class EpochCheckpoint(tf.keras.callbacks.Callback):
    def __init__(self, outputPath, every=5, startAt=0):
        super(EpochCheckpoint, self).__init__()
        
        self.outputPath = outputPath
        self.every = every
        self.intEpoch = startAt
        
    def on_epoch_end(self, epoch, log={}):
        
        if (self.intEpoch+1) % self.every == 0:
            path = os.path.sep.join([self.outputPath, 
                    "custom_resnet.hdf5".format(self.intEpoch+1)])
            self.model.save(path, overwrite=True)
        self.intEpoch+=1

## Learning rate decay function to enable the model to converge better

In [None]:
from tensorflow.keras.callbacks import LearningRateScheduler

NUM_EPOCHS = 30
INIT_LR = 0.01

def poly_decay(epoch):
    # The epoch value is passed on by LearningRateScheduler
    maxEpochs = NUM_EPOCHS
    baseLR = INIT_LR
    power = 1.0
    
    alpha = baseLR * (1 - (epoch / float(maxEpochs))) ** power
    
    return alpha

## Custom ResNet that uses Pre-Activation and BottleNeck Blocks with SeparableConv2D

---


-  We use 1x1 to increase the number of channels to create a wider model with minimum increase in trainable parameters.
- This [reserach paper](https://arxiv.org/abs/1812.01187) documents improved accuracy with AveragePooling2D in the shortcut connection. This model showed a performance drop and hence was replaced with a 1x1 convolution.
- Uses SeparableConv2D rather than vanilla Conv2D to reduce the nmber of parameters and make the model feasible to train on constrained environments like Google colab.

In [None]:
class ResNet:

    def residual_module(data, K, stride, chanDim, red=False, reg=0.0001, bnEps=2e-5, bnMom=0.9):
        shortcut = data

        bn1 = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom, beta_initializer="zeros", gamma_initializer="ones")(data)
        act1 = Activation("relu")(bn1)
        conv1 = Conv2D(int(K * 0.25), (1, 1), use_bias=False, kernel_regularizer=l2(reg))(act1)

        bn2 = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom, beta_initializer="zeros", gamma_initializer="ones")(conv1)
        act2 = Activation("relu")(bn2)
        conv2 = SeparableConv2D(int(K * 0.25), (3, 3), strides=stride, padding="same", use_bias=False, depthwise_regularizer=l2(reg), depthwise_initializer='glorot_uniform')(act2)

        bn3 = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom, beta_initializer="zeros", gamma_initializer="ones")(conv2)
        act3 = Activation("relu")(bn3)
        conv3 = Conv2D(K, (1, 1), use_bias=False, kernel_regularizer=l2(reg))(act3)

        if red and stride == (2,2):
            shortcut = AveragePooling2D((2,2))(bn1)

        shortcut = Conv2D(K, (1,1))(shortcut)
        x = add([conv3, shortcut])

        return x


    def build(width, height, depth, classes, stages, filters, reg=0.0001, bnEps=2e-5, bnMom=0.9):
        inputShape = (height, width, depth)
        chanDim = -1

        inputs = tf.keras.Input(shape=inputShape)
        x = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom, beta_initializer="zeros", gamma_initializer="ones")(inputs)
        x = Activation("relu")(x)
        x = SeparableConv2D(64, (3, 3), use_bias=False, padding="same", depthwise_regularizer=l2(reg), depthwise_initializer='glorot_uniform')(x)
        x = SeparableConv2D(128, (3, 3), use_bias=False, padding="same", depthwise_regularizer=l2(reg), depthwise_initializer='glorot_uniform')(x)
        x = SeparableConv2D(256, (3, 3), use_bias=False, padding="same", depthwise_regularizer=l2(reg), depthwise_initializer='glorot_uniform')(x)
        x = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom, beta_initializer="zeros", gamma_initializer="ones")(x)
        x = Activation("relu")(x)
        x = ZeroPadding2D((1, 1))(x)
        x = MaxPooling2D((3, 3), strides=(2, 2))(x)

        for i in range(0, len(stages)):
            stride = (1, 1) if i == 0 else (2, 2)
            x = ResNet.residual_module(x, filters[i], stride, chanDim, red=True, bnEps=bnEps, bnMom=bnMom)

            for j in range(0, stages[i] - 1):
                x = ResNet.residual_module(x, filters[i], (1, 1), chanDim, bnEps=bnEps, bnMom=bnMom)

        x = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom)(x)
        x = Activation("relu")(x)
        x = Conv2D(200, (1,1), kernel_regularizer=l2(reg))(x)
        x = GlobalAveragePooling2D('channels_last')(x)
        x = Activation("softmax")(x)

        model = tf.keras.Model(inputs, x, name="resnet")

        return model

## Build the model
---
We pass in (None, None, 3) for the shape of the image to allow th emodel to take up any image sizes we provide it. This way, we can employ progresive resizing to feed in different sizes of images to get the model to learn scene dependent features as well.

In [None]:
model = ResNet.build(None, None, 3, 200, (3, 4, 6), (64, 128, 256, 512), reg=0.0005)

In [None]:
callbacks = [EpochCheckpoint("/content/", every=5),
            LearningRateScheduler(poly_decay)]

In [None]:
model.summary()

## Compile the model

In [None]:
opt = Adam(learning_rate=0.1, beta_1=0.9, beta_2=0.999, epsilon=0.1, amsgrad=False)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

## Using fit_generator to train the model
- ImageDataGenerator is best suited for augmenting images on the fly and training the model. Custom image augmentation functions can also be used.


In [None]:
train_gen, val_gen = train_val_gen(train_target=64, train_batch=64, val_target=64, val_batch=64)

## Training the model on 64x64 sized images
- First train the model with 64x64 sized images with a batch size of 64.
- **Note**: Although training with higher batch sizes is recommended, ensure that Colab provides 25.51 GB of RAM rather than 12.72 GB. This will happen if colab crashes due to RAM usage and will prompt the user to allow RAM increase. 
- **Note**: Every epoch should take around 9.5 minutes with Colab's GPU hardware accelerator. Please ensure that the Hardware Accelerator in the Runtime settings is appropriately set.

In [None]:
model.fit_generator(
  train_gen,
  steps_per_epoch=100000 // 64,
  validation_data=val_gen,
  validation_steps=10000 // 64,
  epochs=20,
  max_queue_size=64 * 2,
  callbacks=callbacks,
  verbose=1
)

In [None]:
# Save the model
filepath = "/content/epoch_20.hdf5"

model.save(
    filepath,
    overwrite=True,
    include_optimizer=True
)

# Load it again to continue training
model = tf.keras.models.load_model(
    filepath,
    custom_objects=None,
    compile=True
)

## Training with 32x32 sized images
- Train the model for 20 epochs with 32x32 sized images to enable the model to learn semantic scene dependent features for the same layers.

In [None]:
train_gen, val_gen = train_val_gen(train_target=32, train_batch=64, val_target=64, val_batch=64)

model.fit_generator(
  train_gen,
  steps_per_epoch=100000 // 64,
  validation_data=val_gen,
  validation_steps=10000 // 64,
  epochs=20,
  max_queue_size=128,
  callbacks=callbacks,
  verbose=1
)

In [None]:
filepath = "/content/epoch_40.hdf5"

model.save(
    filepath,
    overwrite=True,
    include_optimizer=True
)

# Load it again to continue training
model = tf.keras.models.load_model(
    filepath,
    custom_objects=None,
    compile=True
)

## Training with 16x16 sized images
- Train the model for 20 epochs with 32x32 sized images to enable the model to learn semantic scene dependent features for the same layers.

In [None]:
train_gen, val_gen = train_val_gen(train_target=16, train_batch=64, val_target=64, val_batch=64)

In [None]:
model.fit_generator(
  train_gen,
  steps_per_epoch=100000 // 64,
  validation_data=val_gen,
  validation_steps=10000 // 64,
  epochs=20,
  max_queue_size=64,
  callbacks=callbacks,
  verbose=1
)

In [None]:
# Save the model
filepath = "/content/epoch_60.hdf5"

model.save(
    filepath,
    overwrite=True,
    include_optimizer=True
)

In [None]:
# Load it again to continue training
model = tf.keras.models.load_model(
    filepath,
    custom_objects=None,
    compile=True
)

## Back to training with 32x32 sized images


In [None]:
train_gen, val_gen = train_val_gen(train_target=32, train_batch=64, val_target=64, val_batch=64)

model.fit_generator(
  train_gen,
  steps_per_epoch=100000 // 64,
  validation_data=val_gen,
  validation_steps=10000 // 64,
  epochs=20,
  max_queue_size=64,
  verbose=1
)

In [None]:
# Save the model
filepath = "/content/epoch_80.hdf5"

model.save(
    filepath,
    overwrite=True,
    include_optimizer=True
)

## Back to training with 64x64 sized images


In [None]:
# Load it again to continue training
model = tf.keras.models.load_model(
    filepath,
    custom_objects=None,
    compile=True
)

train_gen, val_gen = train_val_gen(train_target=64, train_batch=64, val_target=64, val_batch=64)

model.fit_generator(
  train_gen,
  steps_per_epoch=100000 // 64,
  validation_data=val_gen,
  validation_steps=10000 // 64,
  epochs=20,
  max_queue_size=64,
  verbose=1
)

## List of references for easy lookup

---

1. Building blocks of interpretability: [Link](https://distill.pub/2018/building-blocks/) (Holy Grail of Intuition!)
2. Deep Residual Learning for image classification: [Link](https://arxiv.org/abs/1512.03385) (Resnet Paper)
3. Bag of tricks for image classification: [Link](https://arxiv.org/abs/1812.01187) (Tweaks and tricks to Resnet for increased performance paper)
2. Imbalanced Deep Learning by Minority Class
Incremental Rectification: [Link](https://arxiv.org/pdf/1804.10851.pdf) (Selectively Sampling Data paper)
2. Improved Regularization of Convolutional Neural Networks with Cutout: [Link](https://arxiv.org/pdf/1708.04552.pdf) (Cutout/Occlusion Augmentation paper)
3. Survey of resampling techniques for improving
classification performance in unbalanced datasets [Link](https://arxiv.org/pdf/1608.06048v1.pdf) (Resampling paper)