# CIFAR-10 by DenseNet Implementation
## Cifar-10 is a popular dataset available at https://www.cs.toronto.edu/~kriz/cifar.html
## We plan to solve this problem by the use of Densenet Architecture. An awesome way of solving this problem with help of Resnet is available at keras website https://keras.io/examples/cifar10_resnet/
## But why not Transfer Learning? <br> As The weights trained on ResNet or DenseNet are for ImageNet which compromises of Images of Dimension 224x224 and the image dimensions in CIFAR-10 are of 32x32 that means we cannot upsample that much anyhow. So instead we will use the same architecture of DenseNet explained in https://arxiv.org/pdf/1608.06993.pdf and will try to get as much as Accuracy possible on the dataset.

### PS. We are using Google Colab for the training purpose.


In [1]:
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.models import Sequential, Model
from keras.layers import Conv2D
from keras.layers import MaxPooling2D, Dropout, BatchNormalization, Activation, Concatenate
from keras.layers import Dense, Flatten, GlobalAveragePooling2D, Input, AveragePooling2D
from keras.preprocessing.image import ImageDataGenerator

from keras.optimizers import SGD, Adam
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.datasets import cifar10
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from keras import backend as k




Using TensorFlow backend.


In [0]:
# Allocate the memory as needed instead of preloading
config = tf.ConfigProto()
config.gpu_options.allow_growth = True


In [0]:
# Hyperparameters
batch_size = 64
num_classes = 10
epochs = 200
num_filter = 20
compression = 0.8
dropout_rate = 0.3

In [0]:
train_datagen = ImageDataGenerator(
        rotation_range=90,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')


In [4]:
# Loading the CIFAR data from the keras dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [0]:
img_height, img_width, channel = x_train.shape[1],x_train.shape[2],x_train.shape[3]

In [0]:
# convert to one hot encoing 
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [0]:
validation_datagen = ImageDataGenerator()

In [0]:
def add_denseblock(input, num_filter = 12, dropout_rate = 0.2):
    global compression
    temp = input
    for _ in range(l):
        BatchNorm = BatchNormalization()(temp)
        relu = Activation('relu')(BatchNorm)
        Conv2D_3_3 = Conv2D(int(num_filter*compression), (1,3), use_bias=False ,padding='same')(relu)
        if dropout_rate>0:
          Conv2D_3_3 = Dropout(dropout_rate)(Conv2D_3_3)
        concat = Concatenate(axis=-1)([temp,Conv2D_3_3])
        
        temp = concat
        
    return temp


In [0]:
def add_transition(input, num_filter = 12, dropout_rate = 0.2):
    global compression
    BatchNorm = BatchNormalization()(input)
    relu = Activation('relu')(BatchNorm)
    Conv2D_BottleNeck = Conv2D(int(num_filter*compression), (1,1), use_bias=False ,padding='same')(relu)
    if dropout_rate>0:
      Conv2D_BottleNeck = Dropout(dropout_rate)(Conv2D_BottleNeck)
    avg = AveragePooling2D(pool_size=(2,2))(Conv2D_BottleNeck)
    
    return avg



In [0]:
def output_layer(input):
    global compression
    BatchNorm = BatchNormalization()(input)
    relu = Activation('relu')(BatchNorm)
    AvgPooling = AveragePooling2D(pool_size=(2,2))(relu)
    flat = Flatten()(AvgPooling)
    output = Dense(num_classes, activation='softmax')(flat)
    
    return output


In [0]:
# Lets give learning rate according to the paper

def lr_schedule(epoch):
    lr = 0.1
    if epoch > 90:
        lr = 0.01
    elif epoch > 135:
        lr = 0.001
    
    print('Learning rate: ', lr)
    return lr

In [0]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau
filepath="weights.best.hdf5"
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)

lr_scheduler = LearningRateScheduler(lr_schedule)

In [13]:
# !pip install -q tensorboardcolab
# from tensorboardcolab import *
# tbc=TensorBoardColab()

Wait for 8 seconds...
TensorBoard link:
https://adaff676.ngrok.io


In [15]:
num_filter = 20
dropout_rate = 0.15
l = 20
input = Input(shape=(img_height, img_width, channel,))
First_Conv2D = Conv2D(num_filter, (3,3), use_bias=False ,padding='same')(input)

First_Block = add_denseblock(First_Conv2D, num_filter, dropout_rate)
First_Transition = add_transition(First_Block, num_filter, dropout_rate)

Second_Block = add_denseblock(First_Transition, num_filter, dropout_rate)
Second_Transition = add_transition(Second_Block, num_filter, dropout_rate)

Third_Block = add_denseblock(Second_Transition, num_filter, dropout_rate)
Third_Transition = add_transition(Third_Block, num_filter, dropout_rate)

Last_Block = add_denseblock(Third_Transition,  num_filter, dropout_rate)
output = output_layer(Last_Block)

model = Model(inputs=[input], outputs=[output])
model.summary()












Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 32, 32, 20)   540         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 32, 32, 20)   80          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 32, 32, 20)   0          

In [16]:
learning_rate = lr_schedule(0)
sgd = SGD(lr=learning_rate, momentum=0.9, nesterov=False)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])


Learning rate:  0.1




In [0]:
from datetime import datetime
start = datetime.now()
history = model.fit_generator(
  train_datagen.flow(x_train, y_train),
  steps_per_epoch = len(x_train) / batch_size,
  epochs = epochs,
  verbose=1, validation_data=(x_test, y_test),
  callbacks = [lr_reducer, checkpoint, 
               lr_scheduler]
)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/200
Learning rate:  0.1

Epoch 00001: val_acc improved from -inf to 0.18680, saving model to weights.best.hdf5
Epoch 2/200
Learning rate:  0.1

Epoch 00002: val_acc improved from 0.18680 to 0.20450, saving model to weights.best.hdf5
Epoch 3/200
Learning rate:  0.1

Epoch 00003: val_acc improved from 0.20450 to 0.26460, saving model to weights.best.hdf5
Epoch 4/200
Learning rate:  0.1

Epoch 00004: val_acc improved from 0.26460 to 0.28050, saving model to weights.best.hdf5
Epoch 5/200
Learning rate:  0.1

Epoch 00005: val_acc improved from 0.28050 to 0.29580, saving model to weights.best.hdf5
Epoch 6/200
Learning rate:  0.1

Epoch 00006: val_acc improved from 0.29580 to 0.30260, saving model to weights.best.hdf5
Epoch 7/200
Learning rate:  0.1

Epoch 00007: val_acc did not improve from 0.30260
Epoch 8/200
Learning rate:  0.1

Epoch 00008: val_acc improved from 0.30260 to 0.32540, savi

In [0]:
print("Time for training was : ", datetime.now() - start)

In [0]:
# Test the model
score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [0]:
model.save_weights("DNST_model.h5")
print("Saved model to disk")

from google.colab import files

files.download('DNST_model.h5')

In [0]:
def visualize(x, y, history):
  # Plot training & validation accuracy values
  plt.plot(history.history['acc'])
  plt.plot(history.history['val_acc'])
  plt.title('Model accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Test'], loc='upper left')
  plt.show()

  # Plot training & validation loss values
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('Model loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Test'], loc='upper left')
  plt.show()
  
visualize(x_test, y_test, history)

In [0]:
def plot_conf_matrix(model, testX, testY):
    class_names=['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    y_pred=model.predict_classes(testX)
    y_true=np.argmax(testY,axis=1)
    sns.heatmap(confusion_matrix(y_true, y_pred))