### CNN on CIFR Assignment:

1.  Please visit [this](https://drive.google.com/file/d/1NGQjke72AS93IOpNcnE9diQEg78-sU3C/view?usp=sharing) link to access the state-of-art DenseNet code for reference - DenseNet - cifar10 notebook link
2.  You need to create a copy of this and "retrain" this model to achieve 90+ test accuracy. 
3.  You cannot use DropOut layers.
4.  You MUST use Image Augmentation Techniques.
5.  You cannot use an already trained model as a beginning points, you have to initilize as your own
6.  You cannot run the program for more than 300 Epochs, and it should be clear from your log, that you have only used 300 Epochs
7.  You cannot use test images for training the model.
8.  You cannot change the general architecture of DenseNet (which means you must use Dense Block, Transition and Output blocks as mentioned in the code)
9.  You are free to change Convolution types (e.g. from 3x3 normal convolution to Depthwise Separable, etc)
10. You cannot have more than 1 Million parameters in total
11. You are free to move the code from Keras to Tensorflow, Pytorch, MXNET etc. 
12. You can use any optimization algorithm you need. 
13. You can checkpoint your model and retrain the model from that checkpoint so that no need of training the model from first if you lost at any epoch while training. You can directly load that model and Train from that epoch. 

In [1]:
# import keras
# from keras.datasets import cifar10
# from keras.models import Model, Sequential
# from keras.layers import Dense, Dropout, Flatten, Input, AveragePooling2D, merge, Activation
# from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
# from keras.layers import Concatenate
# from keras.optimizers import Adam
from tensorflow.keras import models, layers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization, Activation, Flatten
from tensorflow.keras.optimizers import Adam

In [2]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)
        
from tensorflow.python.client import device_lib 
device_lib.list_local_devices()

1 Physical GPUs, 1 Logical GPU


[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 13943713944571600737,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 15685569792
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 11733522451806359857
 physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"]

In [3]:
# Load CIFAR10 Data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [4]:
X_train.shape, y_train.shape

((50000, 32, 32, 3), (50000, 1))

In [5]:
X_test.shape, y_test.shape

((10000, 32, 32, 3), (10000, 1))

### Creating validation data which will be used for training dense-net

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_cv, y_train, y_cv = train_test_split(X_train, y_train, test_size=0.20, stratify=y_train, random_state=42)

In [7]:
X_train.shape, y_train.shape

((40000, 32, 32, 3), (40000, 1))

In [8]:
X_cv.shape, y_cv.shape

((10000, 32, 32, 3), (10000, 1))

In [9]:
img_height, img_width, channel = X_train.shape[1],X_train.shape[2],X_train.shape[3]
num_classes = 10
# convert to one hot encoing 
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_cv = tf.keras.utils.to_categorical(y_cv, num_classes) 
y_test = tf.keras.utils.to_categorical(y_test, num_classes) 

### To use Image Augmentation Techniques import ImageDataGenerator

In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
        zca_epsilon=1e-06,
        rotation_range=15,
        zoom_range = 0.20,
        width_shift_range=0.20,
        height_shift_range=0.20,
        horizontal_flip=True)

datagen.fit(X_train)

In [11]:
# Dense Block
def denseblock(input, num_filter = 12, dropout_rate = 0.2):
    global compression
    temp = input
    for _ in range(l): 
        BatchNorm = layers.BatchNormalization()(temp)
        relu = layers.Activation('relu')(BatchNorm)
        Conv2D_3_3 = layers.Conv2D(int(num_filter*compression), (3,3), use_bias=False ,padding='same')(relu)
        if dropout_rate>0:
            Conv2D_3_3 = layers.Dropout(dropout_rate)(Conv2D_3_3)
        concat = layers.Concatenate(axis=-1)([temp,Conv2D_3_3])
        
        temp = concat
        
    return temp

## transition Blosck
def transition(input, num_filter = 12, dropout_rate = 0.2):
    global compression
    BatchNorm = layers.BatchNormalization()(input)
    relu = layers.Activation('relu')(BatchNorm)
    Conv2D_BottleNeck = layers.Conv2D(int(num_filter*compression), (1,1), use_bias=False ,padding='same')(relu)
    if dropout_rate>0:
         Conv2D_BottleNeck = layers.Dropout(dropout_rate)(Conv2D_BottleNeck)
    avg = layers.AveragePooling2D(pool_size=(2,2))(Conv2D_BottleNeck)
    return avg

#output layer
def output_layer(input):
    global compression
    BatchNorm = layers.BatchNormalization()(input)
    relu = layers.Activation('relu')(BatchNorm)
    AvgPooling = layers.AveragePooling2D(pool_size=(2,2))(relu)
    flat = layers.Flatten()(AvgPooling)
    output = layers.Dense(num_classes, activation='softmax')(flat)
    return output

In [12]:
# Hyperparameters
batch_size = 128
l = 18
num_filter = 14
compression = 0.9
dropout_rate = 0.0

In [13]:
from tensorflow.keras.regularizers import l1_l2
adamax = tf.keras.optimizers.Adamax(
    learning_rate=0.1,
    name='Adamax',)

In [14]:
input = layers.Input(shape=(img_height, img_width, channel,))
First_Conv2D = layers.Conv2D(num_filter, (3,3),activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4),use_bias=False ,padding='same')(input)

First_Block = denseblock(First_Conv2D, num_filter, dropout_rate)
First_Transition = transition(First_Block, num_filter, dropout_rate)

Second_Block = denseblock(First_Transition, num_filter, dropout_rate)
Second_Transition = transition(Second_Block, num_filter, dropout_rate)

Third_Block = denseblock(Second_Transition, num_filter, dropout_rate)
Third_Transition = transition(Third_Block, num_filter, dropout_rate)

Last_Block = denseblock(Third_Transition,  num_filter, dropout_rate)
output = output_layer(Last_Block)

model = Model(inputs=[input], outputs=[output])

print(f"Number of layers in model: {len(model.layers)}")

# determine Loss function and Optimizer
model.compile(loss='categorical_crossentropy',optimizer=adamax, metrics=['accuracy'])


Number of layers in model: 307


In [15]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 14)   378         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 14)   56          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 32, 14)   0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [16]:
# https://stackoverflow.com/questions/39779710/setting-up-a-learningratescheduler-in-keras

from sklearn.metrics import roc_auc_score, f1_score
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import Callback
import numpy as np

class CustomCallback(Callback):
    
    def __init__(self,train_data, train_label, cv_data, cv_label, lr):
        self.x_train = train_data
        self.y_train = train_label
        self.x_cv = cv_data
        self.y_cv = cv_label
        self.lrs = lr
        self.best_weights = None
        
    def on_train_begin(self, logs={}):
        ## on begin of training, we are creating a instance varible called history
        ## it is a dict with keys [loss, acc, val_loss, val_acc]
        self.history = {'loss':[], 'f1_score':[],'val_loss':[], 'val_f1_score':[], 'auc':[], 'val_auc':[], 'epoch':[],
                       'accuracy':[], 'val_accuracy':[], 'learning_rate':[]}
        self.model.optimizer.lr= self.lrs
        return

    def on_epoch_end(self, epoch, logs={}):
        
        # LOSS
        ## on end of each epoch, we will get logs and update the self.history dict
        self.history['loss'].append(logs.get('loss'))
        
        self.history['val_loss'].append(logs.get('val_loss'))
        
        ############################################################################
        
        # Accuracy
        self.history['accuracy'].append(logs.get('accuracy'))
        
        self.history['val_accuracy'].append(logs.get('val_accuracy'))
        
        ############################################################################
        
        # Terminate if nan
        loss = logs.get('loss')
        if loss is not None:
            if np.isnan(loss) or np.isinf(loss):
                print("Invalid loss and terminated at epoch {}".format(epoch))
                self.model.stop_training = True
        
        #############################################################################
        
        # Stop Training
        if epoch>5:
            if (logs.get('val_accuracy') is not None) and (logs.get('val_accuracy')>0.90):
                print('\n')
                print('val auc reached above 90% and val loss reached below 0.50. Cancelling training!')
                self.model.stop_training=True
                self.best_weights = self.model.get_weights()
                self.model.set_weights(self.best_weights)
lr =0.001

callbacks_ = CustomCallback(X_train, y_train, X_cv, y_cv, lr)

In [17]:
# Learning rate scheduler according to the densenet paper
def lr_scheduler(epoch, lr):
    total_epochs = 300
    if epoch==int(total_epochs*0.1):
        lr = 0.01
        return lr
    elif epoch==int(total_epochs*0.3):
        lr = 0.001
        return lr
    elif epoch==int(total_epochs*0.5):
        lr = 0.0001
        return lr
    elif epoch==int(total_epochs*0.7):
        lr = 1e-4
        return lr
    elif epoch == int(total_epochs*0.9):
        lr = 1e-5
        return lr
    else:
        return lr

learning_rate_scheduler = tf.keras.callbacks.LearningRateScheduler(lr_scheduler, verbose=1)

In [18]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('model_weights.{epoch:02d}-{val_accuracy:.3f}.hdf5',
                                                  monitor='val_accuracy', verbose=1, save_best_only=True, mode = 'auto')

In [19]:
model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size) , epochs=300, verbose=1, validation_data=(X_cv, y_cv), steps_per_epoch=len(X_train) / batch_size, 
              callbacks=[learning_rate_scheduler, model_checkpoint, callbacks_])



Epoch 1/300

Epoch 00001: LearningRateScheduler reducing learning rate to 0.0010000000474974513.

Epoch 00001: val_accuracy improved from -inf to 0.37710, saving model to model_weights.01-0.377.hdf5
Epoch 2/300

Epoch 00002: LearningRateScheduler reducing learning rate to 0.0010000000474974513.

Epoch 00002: val_accuracy improved from 0.37710 to 0.44360, saving model to model_weights.02-0.444.hdf5
Epoch 3/300

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0010000000474974513.

Epoch 00003: val_accuracy improved from 0.44360 to 0.53220, saving model to model_weights.03-0.532.hdf5
Epoch 4/300

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0010000000474974513.

Epoch 00004: val_accuracy improved from 0.53220 to 0.57330, saving model to model_weights.04-0.573.hdf5
Epoch 5/300

Epoch 00005: LearningRateScheduler reducing learning rate to 0.0010000000474974513.

Epoch 00005: val_accuracy improved from 0.57330 to 0.58840, saving model to model_weights.05-0.58

<tensorflow.python.keras.callbacks.History at 0x7fb40f331650>

In [20]:
# Test the model
score = model.evaluate(X_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.2952300012111664
Test accuracy: 0.9106000065803528


In [21]:
# Save the trained weights in to .h5 format
model.save_weights("DenseNet_model.h5")
print("model saved as DenseNet_model.h5")

model saved as DenseNet_model.h5


### References
1. https://arxiv.org/pdf/1608.06993.pdf