# CIFAR 10

Experiment - I: Using dropouts after conv and FC layers

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import Dense,Dropout,Flatten,MaxPool2D,Conv2D,Activation,BatchNormalization
from tensorflow.keras.regularizers import l2 
from tensorflow.keras import Sequential
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# batch, classes, epochs
batch_size = 32
epochs = 50
num_classes = 10

In [5]:
(x_train,y_train),(x_test,y_test) = cifar10.load_data()

In [6]:
(x_train.shape,y_train.shape),(x_test.shape,y_test.shape)

(((50000, 32, 32, 3), (50000, 1)), ((10000, 32, 32, 3), (10000, 1)))

In [7]:
#making data shape suitable for input. X_train,y_train are already in required shape. weneed to convert y_train & y_test to (50000,10) and (10000,10)


In [8]:
y_train = tf.keras.utils.to_categorical(y_train,10)
y_test = tf.keras.utils.to_categorical(y_test,10)

In [15]:
x_train.shape[1:]

(32, 32, 3)

In [21]:
# model architecture
model = Sequential()

# 1st conv layer
model.add(Conv2D(32,(3,3),padding = 'same',input_shape = (32,32,3)))
model.add(Activation('relu'))
model.add(MaxPool2D(3,3))
model.add(Dropout(0.25))

#2nd conv layer
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(MaxPool2D(3,3))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))


In [22]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 10, 10, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 10, 10, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 10, 10, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 10, 10, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 10, 10, 64)       

In [23]:
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

# convert to float, normalise the data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

In [25]:
# train
model.fit(x_train,y_train,batch_size  = batch_size,epochs=10,validation_data=(x_test,y_test),shuffle=True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x18022814048>

Experiment - II: Remove the dropouts after the convolutional layers (but retain them in the FC layer). Also, use batch normalization after every convolutional layer.

In [7]:
# model architecture
model = Sequential()

# 1st conv layer
model.add(Conv2D(32,(3,3),padding = 'same',input_shape = (32,32,3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(3,3))
#model.add(Dropout(0.25))

#2nd conv layer
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(3,3))
#model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))



In [28]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_6 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 10, 10, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 10, 10, 64)        18496     
_________________________________________________________________
activation_7 (Activation)    (None, 10, 10, 64)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 10, 10, 64)       

In [29]:
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

# convert to float, normalise the data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# train
model.fit(x_train,y_train,batch_size  = batch_size,epochs=10,validation_data=(x_test,y_test),shuffle=True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1803129eb88>

here we can observed overfitting. this is becuase we have removed dropouts.we removed the dropouts to make model less complex.

Experiment - III: Use batch normalization and dropouts after every convolutional layer. Also, retain the dropouts in the FC layer.

In [8]:
# model architecture
model = Sequential()

# 1st conv layer
model.add(Conv2D(32,(3,3),padding = 'same',input_shape = (32,32,3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(3,3))
model.add(Dropout(0.25))

#2nd conv layer
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(3,3))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))




In [9]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_5 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 10, 10, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 10, 10, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 10, 10, 64)        18496     
_________________________________________________________________
activation_6 (Activation)    (None, 10, 10, 64)       

In [10]:
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

# convert to float, normalise the data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# train
model.fit(x_train,y_train,batch_size  = batch_size,epochs=10,validation_data=(x_test,y_test),shuffle=True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e44b9c0108>

now we got significant improvement. though training acc goes down becuase we have included dropouts.but there is no overfitting.Dropouts is basically a regularization technique.

Experiment - IV: Remove the dropouts after the convolutional layers and use L2 regularization in the FC layer. Retain the dropouts in FC.

Now lets replace dropouts with L2 regularization.

In [17]:
# model architecture
model = Sequential()

# 1st conv layer
model.add(Conv2D(32,(3,3),padding = 'same',input_shape = (32,32,3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(3,3))
#model.add(Dropout(0.25))

#2nd conv layer
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(3,3))
#model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512,kernel_regularizer = l2(0.01)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))




In [18]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_15 (Conv2D)           (None, 32, 32, 32)        896       
_________________________________________________________________
activation_19 (Activation)   (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_15 (Batc (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 10, 10, 32)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 10, 10, 64)        18496     
_________________________________________________________________
activation_20 (Activation)   (None, 10, 10, 64)        0         
_________________________________________________________________
batch_normalization_16 (Batc (None, 10, 10, 64)       

In [19]:
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

# convert to float, normalise the data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# train
model.fit(x_train,y_train,batch_size  = batch_size,epochs=10,validation_data=(x_test,y_test),shuffle=True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e48362a3c8>

In [None]:
we can see here overfitting. le regularization not worked well. drop out working good.l2 regul. trying to keep downn weights down.while dropouts trying to throgh out redundant weights.

Experiment-V: Dropouts after conv layer, L2 in FC, use BN after convolutional layer

In [9]:
# model architecture
model = Sequential()

# 1st conv layer
model.add(Conv2D(32,(3,3),padding = 'same',input_shape = (32,32,3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(3,3))
model.add(Dropout(0.25))

#2nd conv layer
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(3,3))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512,kernel_regularizer = l2(0.01)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))


In [10]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_4 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 10, 10, 32)        0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 10, 10, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 10, 10, 64)        18496     
_________________________________________________________________
activation_5 (Activation)    (None, 10, 10, 64)       

In [11]:
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

# convert to float, normalise the data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# train
model.fit(x_train,y_train,batch_size  = batch_size,epochs=10,validation_data=(x_test,y_test),shuffle=True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x26875334688>

Experiment-VI: Add a new convolutional layer to the network. Note that by a 'convolutional layer', the professor is referring to a convolutional unit with two sets of Conv2D layers with 128 filters each (we are abusing the terminology a bit here). The code for the additional conv layer is shown below.

In [19]:
# model architecture
model = Sequential()

# 1st conv layer
model.add(Conv2D(32,(3,3),padding = 'same',input_shape = (32,32,3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.25))

#2nd conv layer
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.25))

# new conv layer addition

model.add(Conv2D(128,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(128,(3,3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.25))



model.add(Flatten())
model.add(Dense(512,kernel_regularizer = l2(0.01)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))


In [20]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_27 (Conv2D)           (None, 32, 32, 32)        896       
_________________________________________________________________
activation_28 (Activation)   (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_25 (Batc (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 16, 16, 32)        0         
_________________________________________________________________
dropout_16 (Dropout)         (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 16, 16, 64)        18496     
_________________________________________________________________
activation_29 (Activation)   (None, 16, 16, 64)       

In [25]:
model.compile(loss = 'categorical_crossentropy',
             optimizer = 'sgd',
             metrics = ['accuracy'])

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train = x_train/255
x_xest = x_test/255

model.fit(x_train,y_train,batch_size = batch_size,epochs=10,validation_data=(x_test,y_test),shuffle = True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2687108e1c8>

we can see here that dropouts play a key role in regularization in neural networks.We have some marginal improvement after adding 1 more conv layer.though it kaes little more time in computation.

Experiment - VII: Add more feature maps to the conv layers: from 32 to 64 and 64 to 128.

In [26]:
# model architecture
model = Sequential()

# 1st conv layer
model.add(Conv2D(64,(3,3),padding = 'same',input_shape = (32,32,3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.25))

#2nd conv layer
model.add(Conv2D(128,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(128,(3,3),padding = 'same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.25))


model.add(Flatten())
model.add(Dense(512,kernel_regularizer = l2(0.01)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))


In [27]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_32 (Conv2D)           (None, 32, 32, 64)        1792      
_________________________________________________________________
activation_35 (Activation)   (None, 32, 32, 64)        0         
_________________________________________________________________
batch_normalization_30 (Batc (None, 32, 32, 64)        256       
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 16, 16, 64)        0         
_________________________________________________________________
dropout_20 (Dropout)         (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_33 (Conv2D)           (None, 16, 16, 128)       73856     
_________________________________________________________________
activation_36 (Activation)   (None, 16, 16, 128)      

In [28]:
model.compile(loss = 'categorical_crossentropy',
             optimizer = 'sgd',
             metrics = ['accuracy'])

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train = x_train/255
x_xest = x_test/255

model.fit(x_train,y_train,batch_size = batch_size,epochs=10,validation_data=(x_test,y_test),shuffle = True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x26873e461c8>

Based on these experiments, we saw that the performance of CNNs depends heavily on multiple hyperparameters - the number of layers, number of feature maps in each layer, the use of dropouts, batch normalisation, etc. Thus, it is advisable to first fine-tune your model hyperparameters by conducting lots of experiments. Only when you are convinced that you have found the right set of hyperparameters you should train the model with a larger number of epochs (since almost always the amount of time and computing power you have is limited).