**First, We import the necessary python libraries.**

In [0]:
import os
import numpy as np
from keras.datasets import cifar10
from keras import Sequential
from keras.utils import np_utils
from keras.optimizers import Adagrad, Adam
from keras.layers import Conv2D, MaxPooling2D, Dropout, Dense, Activation, Flatten, BatchNormalization
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import KFold
from keras.models import model_from_json


**Next, We load the CIFAR10 dataset. It will be downloaded first which might take couple of seconds.**

In [0]:
## Load the CIFAR10 dataset  
(train_images_1, train_labels_1),(test_images, test_labels) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


**After that, we need to create the validation set using 20% of the training samples**

In [0]:
## Get the training set length
training_len = np.shape(train_images_1)[0]
validation_len = np.int32(0.2 * training_len)

## Get the indices for training and validation split
indices = np.random.permutation(len(train_images_1))
val_indices = indices[0:validation_len]
train_indices = indices[validation_len:]

## Separate the validation set from training dataset
val_images, val_labels = train_images_1[val_indices], train_labels_1[val_indices]
train_images, train_labels = train_images_1[train_indices], train_labels_1[train_indices]

**We can normalize the train, test and validation dataset. The labels are also set for categorical cross entropy loss**

In [0]:
## Normalizing the train, test and validation sets
X_train = (train_images/255).astype('float32')
y_train = np_utils.to_categorical(train_labels, 10)

X_val = (val_images/255).astype('float32')
y_val = np_utils.to_categorical(val_labels, 10)

X_test = (test_images/255).astype('float32')
y_test = np_utils.to_categorical(test_labels, 10)

**Addidtional information for training**

In [0]:
# Keras Parameters
batch_size = 64
nb_classes = 10
nb_epochs = 50

## Weight deacy for kernel regularizer
weight_decay = 1e-4


**Next, We create the model with Keras. We have used 6 convolution layers, one hidden FC layer and one output layer. **

In [36]:
## Build the CNN Model with keras
model = Sequential()
model.add(Conv2D(32, (3,3), strides = (1,1), padding='same', activation ='relu', input_shape =(32,32,3),kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Conv2D(48, (3,3), strides = (1,1), padding='same', activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))

model.add(Conv2D(64, (3,3), strides= (1,1),padding='same', activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding= 'same'))
 
model.add(Conv2D(128, (3,3), strides= (1,1), padding='same', activation = 'relu',kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))

model.add(Conv2D(256, (3,3), strides= (1,1), padding='same', activation = 'relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))

model.add(Conv2D(512, (3,3), strides= (1,1), padding='same', activation = 'relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
 
  
## Add Fully Connected Layer here
model.add(Flatten())
model.add(Dense(256, activation= 'relu', kernel_regularizer= regularizers.l2(weight_decay)))
model.add(Dense(10, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_37 (Conv2D)           (None, 32, 32, 32)        896       
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 32, 32, 48)        13872     
_________________________________________________________________
conv2d_39 (Conv2D)           (None, 32, 32, 64)        27712     
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_40 (Conv2D)           (None, 16, 16, 128)       73856     
_________________________________________________________________
max_pooling2d_26 (MaxPooling (None, 8, 8, 128)         0         
_________________________________________________________________
conv2d_41 (Conv2D)           (None, 8, 8, 256)         295168    
__________

**Next step is for training and validation. I have used ADAgrad Optimizer here. This Section  has been simulated without Dropout.**

In [37]:
## Initializing Adagrad Optimizer
model.compile(loss='categorical_crossentropy',
              optimizer='adagrad',
              metrics=['accuracy'])

## Train the model and get the training and validation loss
history = model.fit(X_train, y_train, batch_size= batch_size, epochs= nb_epochs,
                    verbose=1, validation_data=(X_val, y_val))

## Create the saving directory for different configured trained model
save_dir = os.path.join(os.getcwd(), 'saved_models')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

## Serialize model to JSON
model_json = model.to_json()
with open("model_without_dropout.json", "w") as json_file:
    json_file.write(model_json)


## Serialize weights to HDF5
model_for_weight = 'Keras_Model_Without Dropout.h5'
model_path_1 = os.path.join(save_dir, model_for_weight) 
model.save_weights(model_path_1)
print("Saved model to disk")

Train on 40000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Saved model to disk


**Second part is for the model with Dropout. We have added this to avoid Overfitting (As we can see from previous architecture, training accuracy gets saturated while validation accuracy is stuck to a certain value. It means that the model is overfitted).**


In [38]:
## Build the CNN Model with keras
model = Sequential()
model.add(Conv2D(32, (3,3), strides = (1,1), padding='same', activation ='relu', input_shape =(32,32,3),kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Conv2D(48, (3,3), strides = (1,1), padding='same', activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))

model.add(Conv2D(64, (3,3), strides= (1,1),padding='same', activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding= 'same'))
model.add(Dropout(0.2))                                      #adding dropout layer

 
model.add(Conv2D(128, (3,3), strides= (1,1), padding='same', activation = 'relu',kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.3))                                      #adding dropout layer


model.add(Conv2D(256, (3,3), strides= (1,1), padding='same', activation = 'relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.4))                                      #adding dropout layer

model.add(Conv2D(512, (3,3), strides= (1,1), padding='same', activation = 'relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.5))                                      #adding dropout layer
 
  
## Add Fully Connected Layer here
model.add(Flatten())
model.add(Dense(256, activation= 'relu', kernel_regularizer= regularizers.l2(weight_decay)))
model.add(Dense(10, activation='softmax'))

model.summary()

          
          
## Initializing Adagrad Optimizer
model.compile(loss='categorical_crossentropy',
              optimizer='adagrad',
              metrics=['accuracy'])

## Train the model and get the training and validation loss
history = model.fit(X_train, y_train, batch_size= batch_size, epochs= nb_epochs,
                    verbose=1, validation_data=(X_val, y_val))

## Create the saving directory for different configured trained model
save_dir = os.path.join(os.getcwd(), 'saved_models')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

## Serialize model to JSON
model_json = model.to_json()
with open("model_with_dropout.json", "w") as json_file:
    json_file.write(model_json)


## Serialize weights to HDF5
model_for_weight = 'Keras_Model_With Dropout.h5'
model_path_2 = os.path.join(save_dir, model_for_weight) 
model.save_weights(model_path_2)
print("Saved model to disk")

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_43 (Conv2D)           (None, 32, 32, 32)        896       
_________________________________________________________________
conv2d_44 (Conv2D)           (None, 32, 32, 48)        13872     
_________________________________________________________________
conv2d_45 (Conv2D)           (None, 32, 32, 64)        27712     
_________________________________________________________________
max_pooling2d_29 (MaxPooling (None, 16, 16, 64)        0         
_________________________________________________________________
dropout_17 (Dropout)         (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_46 (Conv2D)           (None, 16, 16, 128)       73856     
_________________________________________________________________
max_pooling2d_30 (MaxPooling (None, 8, 8, 128)         0         
__________

**Next we do the Data Augmentation.**

In [0]:
#data augmentation
datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.15,
        height_shift_range=0.15,
        shear_range=0.15,
        zoom_range=0.15,
        horizontal_flip=True,
        fill_mode='nearest')
datagen.fit(X_train)

** First, we do the training with dropout layer.**

In [33]:
## Build the CNN Model with keras
model = Sequential()
model.add(Conv2D(32, (3,3), strides = (1,1), padding='same', activation ='relu', input_shape =(32,32,3),kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Conv2D(48, (3,3), strides = (1,1), padding='same', activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))

model.add(Conv2D(64, (3,3), strides= (1,1),padding='same', activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding= 'same'))
model.add(Dropout(0.2))                                      #adding dropout layer

 
model.add(Conv2D(128, (3,3), strides= (1,1), padding='same', activation = 'relu',kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.3))                                      #adding dropout layer


model.add(Conv2D(256, (3,3), strides= (1,1), padding='same', activation = 'relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.4))                                      #adding dropout layer

model.add(Conv2D(512, (3,3), strides= (1,1), padding='same', activation = 'relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.5))                                      #adding dropout layer
 
  
## Add Fully Connected Layer here
model.add(Flatten())
model.add(Dense(256, activation= 'relu', kernel_regularizer= regularizers.l2(weight_decay)))
model.add(Dense(10, activation='softmax'))

model.summary()

# Initiate Adagrad/adam optimizer
model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])


## Train the model and get the training and validation loss
model.fit_generator(datagen.flow(X_train, y_train, batch_size=40),\
                    steps_per_epoch=train_images.shape[0] // batch_size,epochs=120,\
                    verbose=1,validation_data=(X_val,y_val))
          
## Create the saving directory for different configured trained model
save_dir = os.path.join(os.getcwd(), 'saved_models')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

## Serialize model to JSON
model_json = model.to_json()
with open("model_Data_augmentation_with_dropout.json", "w") as json_file:
    json_file.write(model_json)


## Serialize weights to HDF5
model_for_weight = 'Keras_Model_Data_augmentation_With Dropout.h5'
model_path_3 = os.path.join(save_dir, model_for_weight) 
model.save_weights(model_path_3)
print("Saved model to disk")

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_25 (Conv2D)           (None, 32, 32, 32)        896       
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 32, 32, 48)        13872     
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 32, 32, 64)        27712     
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 16, 16, 64)        0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 16, 16, 128)       73856     
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 8, 8, 128)         0         
__________

**Then We do it without the Dropout. **

In [34]:
## Build the CNN Model with keras
model = Sequential()
model.add(Conv2D(32, (3,3), strides = (1,1), padding='same', activation ='relu', input_shape =(32,32,3),kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Conv2D(48, (3,3), strides = (1,1), padding='same', activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))

model.add(Conv2D(64, (3,3), strides= (1,1),padding='same', activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding= 'same'))
 
model.add(Conv2D(128, (3,3), strides= (1,1), padding='same', activation = 'relu',kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))

model.add(Conv2D(256, (3,3), strides= (1,1), padding='same', activation = 'relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))

model.add(Conv2D(512, (3,3), strides= (1,1), padding='same', activation = 'relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
 
  
## Add Fully Connected Layer here
model.add(Flatten())
model.add(Dense(256, activation= 'relu', kernel_regularizer= regularizers.l2(weight_decay)))
model.add(Dense(10, activation='softmax'))

model.summary()

# Initiate Adagrad/adam optimizer
model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])



## Train the model and get the training and validation loss
model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),\
                    steps_per_epoch=train_images.shape[0] // batch_size,epochs=120,\
                    verbose=1,validation_data=(X_val,y_val))
          
## Create the saving directory for different configured trained model
save_dir = os.path.join(os.getcwd(), 'saved_models')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

## Serialize model to JSON
model_json = model.to_json()
with open("model_Data_augmentation_without_dropout.json", "w") as json_file:
    json_file.write(model_json)


## Serialize weights to HDF5
model_for_weight = 'Keras_Model_Data_augmentation_Without Dropout.h5'
model_path_4 = os.path.join(save_dir, model_for_weight) 
model.save_weights(model_path_4)
print("Saved model to disk")

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_31 (Conv2D)           (None, 32, 32, 32)        896       
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 32, 32, 48)        13872     
_________________________________________________________________
conv2d_33 (Conv2D)           (None, 32, 32, 64)        27712     
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_34 (Conv2D)           (None, 16, 16, 128)       73856     
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 8, 8, 128)         0         
_________________________________________________________________
conv2d_35 (Conv2D)           (None, 8, 8, 256)         295168    
__________

**In the penultimate step, We evaluate our trained models on the test dataset. We choose the best architecture out of the four architectures. **

In [56]:
######  General CNN without dropout #######

print('General CNN without dropout')
# Load json and create model
json_file = open("model_without_dropout.json", 'r')            
loaded_model_json = json_file.read()
json_file.close()
loaded_model_0 = model_from_json(loaded_model_json)

# Load weights into new model
loaded_model_0.load_weights(model_path_1)                                          
print("Loaded model from disk")

## Score the loaded model on the test dataset
loaded_model_0.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
scores = loaded_model_0.evaluate(X_test, y_test, verbose=1)
print('Loss:', scores[0])
print('Accuracy of CNN without dropout on test data:', scores[1])


######  General CNN with dropout #######

print('General CNN with dropout')
# Load json and create model
json_file = open("model_with_dropout.json", 'r')            
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# Load weights into new model
loaded_model.load_weights(model_path_2)                                          
print("Loaded model from disk")

## Score the loaded model on the test dataset
loaded_model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
scores = loaded_model.evaluate(X_test, y_test, verbose=1)
print('Loss:', scores[0])
print('Accuracy of CNN with dropout on test data:', scores[1])


######  For Data Augmentation with dropout #######

print('For Data Augmentation with dropout')
# Load json and create model
json_file = open("model_Data_augmentation_with_dropout.json", 'r')            
loaded_model_json = json_file.read()
json_file.close()
loaded_model_1 = model_from_json(loaded_model_json)

# Load weights into new model
loaded_model_1.load_weights(model_path_3)                                          
print("Loaded model from disk")

## Score the loaded model on the test dataset
loaded_model_1.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
scores = loaded_model_1.evaluate(X_test, y_test, verbose=1)
print('Loss:', scores[0])
print('Accuracy of model with data augmentation and dropout on test data:', scores[1])


######  For Data Augmentation with No dropout #######

print('For Data Augmentation without the dropout')
# Load json and create model
json_file = open("model_Data_augmentation_without_dropout.json", 'r')            
loaded_model_json = json_file.read()
json_file.close()
loaded_model_2 = model_from_json(loaded_model_json)

# Load weights into new model
loaded_model_2.load_weights(model_path_4)                                          
print("Loaded model from disk")

## Score the loaded model on the test dataset
loaded_model_2.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
scores = loaded_model_2.evaluate(X_test, y_test, verbose=1)
print('Loss:', scores[0])
print('Accuracy of model with data augmentation and NO dropout on test data:', scores[1])

General CNN without dropout
Loaded model from disk
Loss: 1.7323800757408143
Accuracy of CNN without dropout on test data: 0.7592
General CNN with dropout
Loaded model from disk
Loss: 0.6480929811477661
Accuracy of CNN with dropout on test data: 0.8415
For Data Augmentation with dropout
Loaded model from disk
Loss: 0.6004882764339448
Accuracy of model with data augmentation and dropout on test data: 0.8326
For Data Augmentation without the dropout
Loaded model from disk
Loss: 0.5773086317539216
Accuracy of model with data augmentation and NO dropout on test data: 0.8525


**Finally, We do the K-fold cross-validation of the best model.**

In [55]:
## Get the training dataset again
X_train = train_images_1.reshape(train_images_1.shape[0], 32, 32, 3)
y_train_Kfold = np_utils.to_categorical(train_labels_1, nb_classes)
X_train_Kfold = (X_train/255).astype('float32')


cvscores = []
cv = KFold(n_splits=5, random_state=42, shuffle=False)     ## we set K=5
for train_index, test_index in cv.split(X_train_Kfold):
  
    ## K-fold Split of the dataset(where K=5, 4 of the folds are for training and 1 of them is for validating)
    X_train, X_val, y_train, y_val = X_train_Kfold[train_index], X_train_Kfold[test_index], y_train_Kfold[train_index], y_train_Kfold[test_index]

    ## Compile the optimizer and train 
    print("New fold:")
    loaded_model_2.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
    loaded_model_2.fit(X_train, y_train, epochs=10, batch_size=40)

    ## Get the score on the test dataset
    scores = loaded_model_2.evaluate(X_test, y_test)
    print("%s: %.2f%%" % (loaded_model_2.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)
    
print("K-fold cross validation is DONE...")
print("Cross-validation Accuracy: %.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

New fold:
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 79.50%
New fold:
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 79.76%
New fold:
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 79.79%
New fold:
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 79.51%
New fold:
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 79.54%
K-fold cross validation is DONE...
Cross-validation Accuracy: 79.62% (+/- 0.13%)
