In [1]:
from tensorflow.keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('Train samples: ', x_train.shape, y_train.shape)
print('Test samples: ', x_test.shape, y_test.shape)
print(y_train[0])

Train samples:  (50000, 32, 32, 3) (50000, 1)
Test samples:  (10000, 32, 32, 3) (10000, 1)
[6]


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import BatchNormalization, Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, ZeroPadding2D, Add, LeakyReLU
from tensorflow.keras import utils, Input, initializers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.initializers import glorot_uniform

NUM_CLASSES = 10
cifar10_classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse','ship', 'truck']

#show random images from training data
cols = 8
rows = 2
fig = plt.figure(figsize=(2 * cols - 1, 2.5*rows-1))
for i in range(cols):
    for j in range(rows):
        random_index = np.random.randint(0, len(y_train))
        ax = fig.add_subplot(rows, cols, i * rows + j + 1)
        ax.grid('off')
        ax.axis('off')
        ax.imshow(x_train[random_index, :])
        ax.set_title(cifar10_classes[y_train[random_index, 0]])
        plt.show()

<Figure size 1500x400 with 1 Axes>

In [3]:
# convert data to floats
X_train = x_train.astype('float32')
X_test = x_test.astype('float32')

# normalize data to 0-1
X_train /= 255
X_test /= 255

# one hot encode y
Y_train = utils.to_categorical(y_train, len(cifar10_classes))
Y_test = utils.to_categorical(y_test, len(cifar10_classes))

# set up validation data
x_val = X_train[:10000]
partial_x_train = X_train[10000:]
y_val = Y_train[:10000]
partial_y_train = Y_train[10000:]

gen = ImageDataGenerator(rotation_range=8, 
                         width_shift_range=0.08, 
                         shear_range=0.3, 
                         height_shift_range=0.08, 
                         zoom_range=0.08)

val_gen = ImageDataGenerator()
train_generator = gen.flow(partial_x_train, partial_y_train, batch_size=64)
val_generator = val_gen.flow(x_val, y_val, batch_size=64)


In [4]:
def build_resnet50(dropout = 0.0):
    X_input = Input((32,32,3))
    X = ZeroPadding2D(padding=(3,3), data_format=None)(X_input)
    X = Conv2D(64, (7,7), strides=(2,2), name = 'conv1', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization( axis=3, name='bn_conv1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3,3), strides=(2,2))(X)
    # Stage 2 for ResNet

    # Save the input value. You'll need this later to add back to the main path.
    X_shortcut = X

    X = ZeroPadding2D(padding=(1,1), data_format=None)(X)
    X = Conv2D(64, (3,3), strides=(1,1), name = 'conv2', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv2')(X)
    X = Activation('relu')(X)

    X = ZeroPadding2D(padding=(1,1), data_format=None)(X)
    X = Conv2D(64, (3,3), strides=(1,1), name='conv3', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv3')(X)

    # Add X_shortcut before doing final activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)

    X = Flatten()(X)
    X = Dropout(dropout)(X)
    X = Dense(NUM_CLASSES, activation='softmax', name='fc' + str(NUM_CLASSES), kernel_initializer=glorot_uniform(seed=0))(X)
    
    return X_input, X

Create resnet without dropout. 

In [5]:
X_input1, X1 = build_resnet50()
model1 = Model(inputs = X_input1, outputs = X1, name='ResNet50')
model1.summary()

Model: "ResNet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
zero_padding2d (ZeroPadding2D)  (None, 38, 38, 3)    0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 16, 16, 64)   9472        zero_padding2d[0][0]             
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 16, 16, 64)   256         conv1[0][0]                      
___________________________________________________________________________________________

In [6]:
model1.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
model1.fit(
    train_generator, 
    steps_per_epoch=len(partial_x_train)//64, 
    epochs=10, 
    validation_data=val_generator, 
    validation_steps=len(x_val)//64)

score = model1.evaluate(X_test, Y_test)
print()
print('Test accuracy: ', score[1])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test accuracy:  0.6991000175476074


The resulting resnet has a training accuracy of 74.13% and a validation accuracy of 70.05% after 10 epochs. Due to the gap of about 4%, the model is slightly overfitted.

In [7]:
X_input2, X2 = build_resnet50(0.5)
model2 = Model(inputs = X_input2, outputs = X2, name='ResNet50Dropout')
model2.summary()

Model: "ResNet50Dropout"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
zero_padding2d_3 (ZeroPadding2D (None, 38, 38, 3)    0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 16, 16, 64)   9472        zero_padding2d_3[0][0]           
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 16, 16, 64)   256         conv1[0][0]                      
____________________________________________________________________________________

In [8]:
model2.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
model2.fit(
    train_generator, 
    steps_per_epoch=len(partial_x_train)//64, 
    epochs=10, 
    validation_data=val_generator, 
    validation_steps=len(x_val)//64)

score = model2.evaluate(X_test, Y_test)
print()
print('Test accuracy: ', score[1])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test accuracy:  0.6741999983787537


The resulting resnet with 50% dropout has a traning accuracy of 69.44% and a validation accuracy of 68.29%. The model is not overfit however it is slightly worse than the resnet with no dropout.